Skip to content

Commit 9f86e99

Browse files
authored
[Zen2] Support rolling upgrades from Zen1 (#35737)
We support rolling upgrades from Zen1 by keeping the master as a Zen1 node until there are no more Zen1 nodes in the cluster, using the following principles: - Zen1 nodes will never vote for Zen2 nodes - Zen2 nodes will, while not bootstrapped, vote for Zen1 nodes - Zen2 nodes that were previously part of a mixed cluster will automatically (and unsafely) bootstrap themselves when the last Zen1 node leaves.
1 parent a27f2ef commit 9f86e99

File tree

13 files changed

+690
-27
lines changed

13 files changed

+690
-27
lines changed

server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationState.java

+12-4
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
import java.util.Map;
3333
import java.util.Optional;
3434

35+
import static org.elasticsearch.cluster.coordination.Coordinator.ZEN1_BWC_TERM;
36+
3537
/**
3638
* The core class of the cluster state coordination algorithm, directly implementing the
3739
* <a href="https://github.com/elastic/elasticsearch-formal-models/blob/master/ZenWithTerms/tla/ZenWithTerms.tla">formal model</a>
@@ -321,10 +323,16 @@ public PublishResponse handlePublishRequest(PublishRequest publishRequest) {
321323
getCurrentTerm());
322324
}
323325
if (clusterState.term() == getLastAcceptedTerm() && clusterState.version() <= getLastAcceptedVersion()) {
324-
logger.debug("handlePublishRequest: ignored publish request due to version mismatch (expected: >[{}], actual: [{}])",
325-
getLastAcceptedVersion(), clusterState.version());
326-
throw new CoordinationStateRejectedException("incoming version " + clusterState.version() +
327-
" lower or equal to current version " + getLastAcceptedVersion());
326+
if (clusterState.term() == ZEN1_BWC_TERM
327+
&& clusterState.nodes().getMasterNode().equals(getLastAcceptedState().nodes().getMasterNode()) == false) {
328+
logger.debug("handling publish request in compatibility mode despite version mismatch (expected: >[{}], actual: [{}])",
329+
getLastAcceptedVersion(), clusterState.version());
330+
} else {
331+
logger.debug("handlePublishRequest: ignored publish request due to version mismatch (expected: >[{}], actual: [{}])",
332+
getLastAcceptedVersion(), clusterState.version());
333+
throw new CoordinationStateRejectedException("incoming version " + clusterState.version() +
334+
" lower or equal to current version " + getLastAcceptedVersion());
335+
}
328336
}
329337

330338
logger.trace("handlePublishRequest: accepting publish request for version [{}] and term [{}]",

server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java

+81-9
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.elasticsearch.cluster.service.ClusterApplier;
4545
import org.elasticsearch.cluster.service.ClusterApplier.ClusterApplyListener;
4646
import org.elasticsearch.cluster.service.MasterService;
47+
import org.elasticsearch.common.Booleans;
4748
import org.elasticsearch.common.Nullable;
4849
import org.elasticsearch.common.Priority;
4950
import org.elasticsearch.common.Strings;
@@ -86,6 +87,9 @@
8687
import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK;
8788

8889
public class Coordinator extends AbstractLifecycleComponent implements Discovery {
90+
91+
public static final long ZEN1_BWC_TERM = 0;
92+
8993
private static final Logger logger = LogManager.getLogger(Coordinator.class);
9094

9195
// the timeout for the publication of each value
@@ -121,6 +125,7 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
121125
private long maxTermSeen;
122126
private final Reconfigurator reconfigurator;
123127
private final ClusterBootstrapService clusterBootstrapService;
128+
private final DiscoveryUpgradeService discoveryUpgradeService;
124129
private final LagDetector lagDetector;
125130
private final ClusterFormationFailureHelper clusterFormationFailureHelper;
126131

@@ -161,6 +166,8 @@ public Coordinator(String nodeName, Settings settings, ClusterSettings clusterSe
161166
masterService.setClusterStateSupplier(this::getStateForMasterService);
162167
this.reconfigurator = new Reconfigurator(settings, clusterSettings);
163168
this.clusterBootstrapService = new ClusterBootstrapService(settings, transportService);
169+
this.discoveryUpgradeService = new DiscoveryUpgradeService(settings, clusterSettings, transportService, this::isBootstrapped,
170+
joinHelper, peerFinder::getFoundPeers, this::unsafelySetConfigurationForUpgrade);
164171
this.lagDetector = new LagDetector(settings, transportService.getThreadPool(), n -> removeNode(n, "lagging"),
165172
transportService::getLocalNode);
166173
this.clusterFormationFailureHelper = new ClusterFormationFailureHelper(settings, this::getClusterFormationState,
@@ -256,6 +263,14 @@ PublishWithJoinResponse handlePublishRequest(PublishRequest publishRequest) {
256263
throw new CoordinationStateRejectedException("no longer leading this publication's term: " + publishRequest);
257264
}
258265

266+
if (publishRequest.getAcceptedState().term() == ZEN1_BWC_TERM && getCurrentTerm() == ZEN1_BWC_TERM
267+
&& mode == Mode.FOLLOWER && Optional.of(sourceNode).equals(lastKnownLeader) == false) {
268+
269+
logger.debug("received cluster state from {} but currently following {}, rejecting", sourceNode, lastKnownLeader);
270+
throw new CoordinationStateRejectedException("received cluster state from " + sourceNode + " but currently following "
271+
+ lastKnownLeader + ", rejecting");
272+
}
273+
259274
ensureTermAtLeast(sourceNode, publishRequest.getAcceptedState().term());
260275
final PublishResponse publishResponse = coordinationState.get().handlePublishRequest(publishRequest);
261276

@@ -323,7 +338,11 @@ private void startElection() {
323338
final StartJoinRequest startJoinRequest
324339
= new StartJoinRequest(getLocalNode(), Math.max(getCurrentTerm(), maxTermSeen) + 1);
325340
logger.debug("starting election with {}", startJoinRequest);
326-
getDiscoveredNodes().forEach(node -> joinHelper.sendStartJoinRequest(startJoinRequest, node));
341+
getDiscoveredNodes().forEach(node -> {
342+
if (isZen1Node(node) == false) {
343+
joinHelper.sendStartJoinRequest(startJoinRequest, node);
344+
}
345+
});
327346
}
328347
}
329348
}
@@ -384,6 +403,11 @@ void becomeCandidate(String method) {
384403

385404
peerFinder.activate(coordinationState.get().getLastAcceptedState().nodes());
386405
clusterFormationFailureHelper.start();
406+
407+
if (getCurrentTerm() == ZEN1_BWC_TERM) {
408+
discoveryUpgradeService.activate(lastKnownLeader);
409+
}
410+
387411
leaderChecker.setCurrentNodes(DiscoveryNodes.EMPTY_NODES);
388412
leaderChecker.updateLeader(null);
389413

@@ -414,6 +438,7 @@ void becomeLeader(String method) {
414438

415439
lastKnownLeader = Optional.of(getLocalNode());
416440
peerFinder.deactivate(getLocalNode());
441+
discoveryUpgradeService.deactivate();
417442
clusterFormationFailureHelper.stop();
418443
closePrevotingAndElectionScheduler();
419444
preVoteCollector.update(getPreVoteResponse(), getLocalNode());
@@ -439,6 +464,7 @@ void becomeFollower(String method, DiscoveryNode leaderNode) {
439464

440465
lastKnownLeader = Optional.of(leaderNode);
441466
peerFinder.deactivate(leaderNode);
467+
discoveryUpgradeService.deactivate();
442468
clusterFormationFailureHelper.stop();
443469
closePrevotingAndElectionScheduler();
444470
cancelActivePublication();
@@ -647,9 +673,6 @@ public boolean setInitialConfiguration(final VotingConfiguration votingConfigura
647673
return false;
648674
}
649675

650-
assert currentState.term() == 0 : currentState;
651-
assert currentState.version() == 0 : currentState;
652-
653676
if (mode != Mode.CANDIDATE) {
654677
throw new CoordinationStateRejectedException("Cannot set initial configuration in mode " + mode);
655678
}
@@ -681,12 +704,59 @@ public boolean setInitialConfiguration(final VotingConfiguration votingConfigura
681704
}
682705
}
683706

707+
private boolean isBootstrapped() {
708+
return getLastAcceptedState().getLastAcceptedConfiguration().isEmpty() == false;
709+
}
710+
711+
private void unsafelySetConfigurationForUpgrade(VotingConfiguration votingConfiguration) {
712+
assert Version.CURRENT.major == Version.V_6_6_0.major + 1 : "remove this method once unsafe upgrades are no longer needed";
713+
synchronized (mutex) {
714+
if (mode != Mode.CANDIDATE) {
715+
throw new IllegalStateException("Cannot overwrite configuration in mode " + mode);
716+
}
717+
718+
if (isBootstrapped()) {
719+
throw new IllegalStateException("Cannot overwrite configuration: configuration is already set to "
720+
+ getLastAcceptedState().getLastAcceptedConfiguration());
721+
}
722+
723+
if (lastKnownLeader.map(Coordinator::isZen1Node).orElse(false) == false) {
724+
throw new IllegalStateException("Cannot upgrade from last-known leader: " + lastKnownLeader);
725+
}
726+
727+
if (getCurrentTerm() != ZEN1_BWC_TERM) {
728+
throw new IllegalStateException("Cannot upgrade, term is " + getCurrentTerm());
729+
}
730+
731+
logger.info("automatically bootstrapping during rolling upgrade, using initial configuration {}", votingConfiguration);
732+
733+
final ClusterState currentState = getStateForMasterService();
734+
final Builder builder = masterService.incrementVersion(currentState);
735+
builder.metaData(MetaData.builder(currentState.metaData()).coordinationMetaData(
736+
CoordinationMetaData.builder(currentState.metaData().coordinationMetaData())
737+
.term(1)
738+
.lastAcceptedConfiguration(votingConfiguration)
739+
.lastCommittedConfiguration(votingConfiguration)
740+
.build()));
741+
final ClusterState newClusterState = builder.build();
742+
743+
coordinationState.get().handleStartJoin(new StartJoinRequest(getLocalNode(), newClusterState.term()));
744+
coordinationState.get().handlePublishRequest(new PublishRequest(newClusterState));
745+
746+
followersChecker.clearCurrentNodes();
747+
followersChecker.updateFastResponseState(getCurrentTerm(), mode);
748+
749+
peerFinder.deactivate(getLocalNode());
750+
peerFinder.activate(newClusterState.nodes());
751+
}
752+
}
753+
684754
// Package-private for testing
685755
ClusterState improveConfiguration(ClusterState clusterState) {
686756
assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
687757

688758
final Set<DiscoveryNode> liveNodes = StreamSupport.stream(clusterState.nodes().spliterator(), false)
689-
.filter(this::hasJoinVoteFrom).collect(Collectors.toSet());
759+
.filter(this::hasJoinVoteFrom).filter(discoveryNode -> isZen1Node(discoveryNode) == false).collect(Collectors.toSet());
690760
final VotingConfiguration newConfig = reconfigurator.reconfigure(liveNodes,
691761
clusterState.getVotingConfigExclusions().stream().map(VotingConfigExclusion::getNodeId).collect(Collectors.toSet()),
692762
clusterState.getLastAcceptedConfiguration());
@@ -967,7 +1037,9 @@ public void run() {
9671037
prevotingRound.close();
9681038
}
9691039
final ClusterState lastAcceptedState = coordinationState.get().getLastAcceptedState();
970-
prevotingRound = preVoteCollector.start(lastAcceptedState, getDiscoveredNodes());
1040+
final List<DiscoveryNode> discoveredNodes
1041+
= getDiscoveredNodes().stream().filter(n -> isZen1Node(n) == false).collect(Collectors.toList());
1042+
prevotingRound = preVoteCollector.start(lastAcceptedState, discoveredNodes);
9711043
}
9721044
}
9731045
}
@@ -1176,13 +1248,13 @@ protected void sendApplyCommit(DiscoveryNode destination, ApplyCommitRequest app
11761248
}
11771249

11781250
// TODO: only here temporarily for BWC development, remove once complete
1179-
public static Settings.Builder addZen1Attribute(Settings.Builder builder) {
1180-
return builder.put("node.attr.zen1", true);
1251+
public static Settings.Builder addZen1Attribute(boolean isZen1Node, Settings.Builder builder) {
1252+
return builder.put("node.attr.zen1", isZen1Node);
11811253
}
11821254

11831255
// TODO: only here temporarily for BWC development, remove once complete
11841256
public static boolean isZen1Node(DiscoveryNode discoveryNode) {
11851257
return discoveryNode.getVersion().before(Version.V_7_0_0) ||
1186-
discoveryNode.getAttributes().containsKey("zen1");
1258+
(Booleans.isTrue(discoveryNode.getAttributes().getOrDefault("zen1", "false")));
11871259
}
11881260
}

0 commit comments

Comments
 (0)