Skip to content

[Zen2] Remove initial master node count setting #37150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
*/
public class GetDiscoveredNodesRequest extends ActionRequest {

private int waitForNodes = 1;

@Nullable // if the request should wait indefinitely
private TimeValue timeout = TimeValue.timeValueSeconds(30);

Expand All @@ -47,35 +45,10 @@ public GetDiscoveredNodesRequest() {

public GetDiscoveredNodesRequest(StreamInput in) throws IOException {
super(in);
waitForNodes = in.readInt();
timeout = in.readOptionalTimeValue();
requiredNodes = in.readList(StreamInput::readString);
}

/**
* Sometimes it is useful only to receive a successful response after discovering a certain number of master-eligible nodes. This
* parameter controls this behaviour.
*
* @param waitForNodes the minimum number of nodes to have discovered before this request will receive a successful response. Must
* be at least 1, because we always discover the local node.
*/
public void setWaitForNodes(int waitForNodes) {
if (waitForNodes < 1) {
throw new IllegalArgumentException("always finds at least one node, waiting for [" + waitForNodes + "] is not allowed");
}
this.waitForNodes = waitForNodes;
}

/**
* Sometimes it is useful only to receive a successful response after discovering a certain number of master-eligible nodes. This
* parameter controls this behaviour.
*
* @return the minimum number of nodes to have discovered before this request will receive a successful response.
*/
public int getWaitForNodes() {
return waitForNodes;
}

/**
* Sometimes it is useful to wait until enough nodes have been discovered, rather than failing immediately. This parameter controls how
* long to wait, and defaults to 30s.
Expand Down Expand Up @@ -133,16 +106,14 @@ public void readFrom(StreamInput in) throws IOException {
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeInt(waitForNodes);
out.writeOptionalTimeValue(timeout);
out.writeStringList(requiredNodes);
}

@Override
public String toString() {
return "GetDiscoveredNodesRequest{" +
"waitForNodes=" + waitForNodes +
", timeout=" + timeout +
"timeout=" + timeout +
", requiredNodes=" + requiredNodes + "}";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,6 @@ private static boolean matchesRequirement(DiscoveryNode discoveryNode, String re
}

private static boolean checkWaitRequirements(GetDiscoveredNodesRequest request, Set<DiscoveryNode> nodes) {
if (nodes.size() < request.getWaitForNodes()) {
return false;
}

List<String> requirements = request.getRequiredNodes();
final Set<DiscoveryNode> selectedNodes = new HashSet<>();
for (final String requirement : requirements) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,35 +54,28 @@ public class ClusterBootstrapService {

private static final Logger logger = LogManager.getLogger(ClusterBootstrapService.class);

// The number of master-eligible nodes which, if discovered, can be used to bootstrap the cluster. This setting is unsafe in the event
// that more master nodes are started than expected.
public static final Setting<Integer> INITIAL_MASTER_NODE_COUNT_SETTING =
Setting.intSetting("cluster.unsafe_initial_master_node_count", 0, 0, Property.NodeScope);

public static final Setting<List<String>> INITIAL_MASTER_NODES_SETTING =
Setting.listSetting("cluster.initial_master_nodes", Collections.emptyList(), Function.identity(), Property.NodeScope);

public static final Setting<TimeValue> UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING =
Setting.timeSetting("discovery.unconfigured_bootstrap_timeout",
TimeValue.timeValueSeconds(3), TimeValue.timeValueMillis(1), Property.NodeScope);

private final int initialMasterNodeCount;
private final List<String> initialMasterNodes;
@Nullable
private final TimeValue unconfiguredBootstrapTimeout;
private final TransportService transportService;
private volatile boolean running;

public ClusterBootstrapService(Settings settings, TransportService transportService) {
initialMasterNodeCount = INITIAL_MASTER_NODE_COUNT_SETTING.get(settings);
initialMasterNodes = INITIAL_MASTER_NODES_SETTING.get(settings);
unconfiguredBootstrapTimeout = discoveryIsConfigured(settings) ? null : UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING.get(settings);
this.transportService = transportService;
}

public static boolean discoveryIsConfigured(Settings settings) {
return Stream.of(DISCOVERY_HOSTS_PROVIDER_SETTING, DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING,
INITIAL_MASTER_NODE_COUNT_SETTING, INITIAL_MASTER_NODES_SETTING).anyMatch(s -> s.exists(settings));
return Stream.of(DISCOVERY_HOSTS_PROVIDER_SETTING, DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, INITIAL_MASTER_NODES_SETTING)
.anyMatch(s -> s.exists(settings));
}

public void start() {
Expand Down Expand Up @@ -144,25 +137,21 @@ public String toString() {
});

}
} else if (initialMasterNodeCount > 0 || initialMasterNodes.isEmpty() == false) {
logger.debug("unsafely waiting for discovery of [{}] master-eligible nodes", initialMasterNodeCount);
} else if (initialMasterNodes.isEmpty() == false) {
logger.debug("waiting for discovery of master-eligible nodes matching {}", initialMasterNodes);

final ThreadContext threadContext = transportService.getThreadPool().getThreadContext();
try (ThreadContext.StoredContext ignore = threadContext.stashContext()) {
threadContext.markAsSystemContext();

final GetDiscoveredNodesRequest request = new GetDiscoveredNodesRequest();
if (initialMasterNodeCount > 0) {
request.setWaitForNodes(initialMasterNodeCount);
}
request.setRequiredNodes(initialMasterNodes);
request.setTimeout(null);
logger.trace("sending {}", request);
transportService.sendRequest(transportService.getLocalNode(), GetDiscoveredNodesAction.NAME, request,
new TransportResponseHandler<GetDiscoveredNodesResponse>() {
@Override
public void handleResponse(GetDiscoveredNodesResponse response) {
assert response.getNodes().size() >= initialMasterNodeCount;
assert response.getNodes().stream().allMatch(DiscoveryNode::isMasterNode);
logger.debug("discovered {}, starting to bootstrap", response.getNodes());
awaitBootstrap(response.getBootstrapConfiguration());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
import java.util.stream.StreamSupport;

import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING;
import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING;

public class ClusterFormationFailureHelper {
private static final Logger logger = LogManager.getLogger(ClusterFormationFailureHelper.class);
Expand Down Expand Up @@ -148,23 +147,13 @@ String getDescription() {

final String bootstrappingDescription;

if (INITIAL_MASTER_NODE_COUNT_SETTING.get(Settings.EMPTY).equals(INITIAL_MASTER_NODE_COUNT_SETTING.get(settings))
&& INITIAL_MASTER_NODES_SETTING.get(Settings.EMPTY).equals(INITIAL_MASTER_NODES_SETTING.get(settings))) {
if (INITIAL_MASTER_NODES_SETTING.get(Settings.EMPTY).equals(INITIAL_MASTER_NODES_SETTING.get(settings))) {
bootstrappingDescription = "[" + INITIAL_MASTER_NODES_SETTING.getKey() + "] is empty on this node";
} else if (INITIAL_MASTER_NODES_SETTING.get(Settings.EMPTY).equals(INITIAL_MASTER_NODES_SETTING.get(settings))) {
bootstrappingDescription = String.format(Locale.ROOT,
"this node must discover at least [%d] master-eligible nodes to bootstrap a cluster",
INITIAL_MASTER_NODE_COUNT_SETTING.get(settings));
} else if (INITIAL_MASTER_NODE_COUNT_SETTING.get(settings) <= INITIAL_MASTER_NODES_SETTING.get(settings).size()) {
} else {
// TODO update this when we can bootstrap on only a quorum of the initial nodes
bootstrappingDescription = String.format(Locale.ROOT,
"this node must discover master-eligible nodes %s to bootstrap a cluster",
INITIAL_MASTER_NODES_SETTING.get(settings));
} else {
// TODO update this when we can bootstrap on only a quorum of the initial nodes
bootstrappingDescription = String.format(Locale.ROOT,
"this node must discover at least [%d] master-eligible nodes, including %s, to bootstrap a cluster",
INITIAL_MASTER_NODE_COUNT_SETTING.get(settings), INITIAL_MASTER_NODES_SETTING.get(settings));
}

return String.format(Locale.ROOT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,6 @@ public void apply(Settings value, Settings current, Settings previous) {
Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION,
TransportAddVotingConfigExclusionsAction.MAXIMUM_VOTING_CONFIG_EXCLUSIONS_SETTING,
ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING,
ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING,
ClusterBootstrapService.UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING,
LagDetector.CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING,
DiscoveryUpgradeService.BWC_PING_TIMEOUT_SETTING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,6 @@

public class GetDiscoveredNodesRequestTests extends ESTestCase {

public void testWaitForNodesValidation() {
final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
assertThat("default value is 1", getDiscoveredNodesRequest.getWaitForNodes(), is(1));

final int newWaitForNodes = randomIntBetween(1, 10);
getDiscoveredNodesRequest.setWaitForNodes(newWaitForNodes);
assertThat("value updated", getDiscoveredNodesRequest.getWaitForNodes(), is(newWaitForNodes));

final IllegalArgumentException exception
= expectThrows(IllegalArgumentException.class, () -> getDiscoveredNodesRequest.setWaitForNodes(randomIntBetween(-10, 0)));
assertThat(exception.getMessage(), startsWith("always finds at least one node, waiting for "));
assertThat(exception.getMessage(), endsWith(" is not allowed"));
}

public void testTimeoutValidation() {
final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
assertThat("default value is 30s", getDiscoveredNodesRequest.getTimeout(), is(TimeValue.timeValueSeconds(30)));
Expand All @@ -65,10 +51,6 @@ public void testTimeoutValidation() {
public void testSerialization() throws IOException {
final GetDiscoveredNodesRequest originalRequest = new GetDiscoveredNodesRequest();

if (randomBoolean()) {
originalRequest.setWaitForNodes(randomIntBetween(1, 10));
}

if (randomBoolean()) {
originalRequest.setTimeout(TimeValue.parseTimeValue(randomTimeValue(), "timeout"));
} else if (randomBoolean()) {
Expand All @@ -77,7 +59,6 @@ public void testSerialization() throws IOException {

final GetDiscoveredNodesRequest deserialized = copyWriteable(originalRequest, writableRegistry(), GetDiscoveredNodesRequest::new);

assertThat(deserialized.getWaitForNodes(), equalTo(originalRequest.getWaitForNodes()));
assertThat(deserialized.getTimeout(), equalTo(originalRequest.getTimeout()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
import static java.util.Collections.emptyMap;
import static java.util.Collections.emptySet;
import static java.util.Collections.singleton;
import static java.util.Collections.singletonList;
import static org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING;
import static org.elasticsearch.discovery.PeerFinder.REQUEST_PEERS_ACTION_NAME;
import static org.elasticsearch.transport.TransportService.HANDSHAKE_ACTION_NAME;
Expand Down Expand Up @@ -204,8 +205,8 @@ public void testFailsQuicklyWithZeroTimeoutAndAcceptsNullTimeout() throws Interr

{
final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
getDiscoveredNodesRequest.setWaitForNodes(2);
getDiscoveredNodesRequest.setTimeout(null);
getDiscoveredNodesRequest.setRequiredNodes(singletonList("not-a-node"));
transportService.sendRequest(localNode, GetDiscoveredNodesAction.NAME, getDiscoveredNodesRequest, new ResponseHandler() {
@Override
public void handleResponse(GetDiscoveredNodesResponse response) {
Expand All @@ -221,8 +222,8 @@ public void handleException(TransportException exp) {

{
final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
getDiscoveredNodesRequest.setWaitForNodes(2);
getDiscoveredNodesRequest.setTimeout(TimeValue.ZERO);
getDiscoveredNodesRequest.setRequiredNodes(singletonList("not-a-node"));

final CountDownLatch countDownLatch = new CountDownLatch(1);
transportService.sendRequest(localNode, GetDiscoveredNodesAction.NAME, getDiscoveredNodesRequest, new ResponseHandler() {
Expand Down Expand Up @@ -254,7 +255,6 @@ public void testFailsIfAlreadyBootstrapped() throws InterruptedException {

final CountDownLatch countDownLatch = new CountDownLatch(1);
final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
getDiscoveredNodesRequest.setWaitForNodes(2);
getDiscoveredNodesRequest.setTimeout(null);
transportService.sendRequest(localNode, GetDiscoveredNodesAction.NAME, getDiscoveredNodesRequest, new ResponseHandler() {
@Override
Expand Down Expand Up @@ -283,8 +283,8 @@ public void testFailsIfAcceptsClusterStateWithNonemptyConfiguration() throws Int

final CountDownLatch countDownLatch = new CountDownLatch(1);
final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
getDiscoveredNodesRequest.setWaitForNodes(3);
getDiscoveredNodesRequest.setTimeout(null);
getDiscoveredNodesRequest.setRequiredNodes(singletonList("not-a-node"));
transportService.sendRequest(localNode, GetDiscoveredNodesAction.NAME, getDiscoveredNodesRequest, new ResponseHandler() {
@Override
public void handleResponse(GetDiscoveredNodesResponse response) {
Expand Down Expand Up @@ -342,7 +342,6 @@ public PublishWithJoinResponse read(StreamInput in) throws IOException {
public void testGetsDiscoveredNodesWithZeroTimeout() throws InterruptedException {
setupGetDiscoveredNodesAction();
final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
getDiscoveredNodesRequest.setWaitForNodes(2);
getDiscoveredNodesRequest.setTimeout(TimeValue.ZERO);
assertWaitConditionMet(getDiscoveredNodesRequest);
}
Expand Down Expand Up @@ -377,7 +376,6 @@ public void testGetsDiscoveredNodesDuplicateName() throws InterruptedException {
final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
String name = localNode.getName();
getDiscoveredNodesRequest.setRequiredNodes(Arrays.asList(name, name));
getDiscoveredNodesRequest.setWaitForNodes(1);
getDiscoveredNodesRequest.setTimeout(TimeValue.ZERO);
assertWaitConditionFailedOnDuplicate(getDiscoveredNodesRequest, "[" + localNode + "] matches [" + name + ", " + name + ']');
}
Expand All @@ -396,7 +394,6 @@ public void testGetsDiscoveredNodesTimeoutOnMissing() throws InterruptedExceptio
final CountDownLatch latch = new CountDownLatch(1);
final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
getDiscoveredNodesRequest.setRequiredNodes(Arrays.asList(localNode.getAddress().toString(), "_missing"));
getDiscoveredNodesRequest.setWaitForNodes(1);
getDiscoveredNodesRequest.setTimeout(TimeValue.ZERO);
transportService.sendRequest(localNode, GetDiscoveredNodesAction.NAME, getDiscoveredNodesRequest, new ResponseHandler() {
@Override
Expand All @@ -423,8 +420,7 @@ public void testThrowsExceptionIfDuplicateDiscoveredLater() throws InterruptedEx

final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
final String ip = localNode.getAddress().getAddress();
getDiscoveredNodesRequest.setRequiredNodes(Collections.singletonList(ip));
getDiscoveredNodesRequest.setWaitForNodes(2);
getDiscoveredNodesRequest.setRequiredNodes(Arrays.asList(ip, "not-a-node"));

final CountDownLatch countDownLatch = new CountDownLatch(1);
transportService.sendRequest(localNode, GetDiscoveredNodesAction.NAME, getDiscoveredNodesRequest, new ResponseHandler() {
Expand Down Expand Up @@ -480,7 +476,7 @@ private void setupGetDiscoveredNodesAction() throws InterruptedException {
executeRequestPeersAction();

final GetDiscoveredNodesRequest getDiscoveredNodesRequest = new GetDiscoveredNodesRequest();
getDiscoveredNodesRequest.setWaitForNodes(2);
getDiscoveredNodesRequest.setRequiredNodes(Arrays.asList(localNode.getName(), otherNode.getName()));
assertWaitConditionMet(getDiscoveredNodesRequest);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,6 @@ public void testDiscoveryConfiguredCheck() throws NodeValidationException {

ensureChecksPass.accept(Settings.builder().putList(DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING.getKey()));
ensureChecksPass.accept(Settings.builder().putList(SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING.getKey()));
ensureChecksPass.accept(Settings.builder().put(ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING.getKey(), 0));
ensureChecksPass.accept(Settings.builder().putList(ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING.getKey()));
}
}
Loading