Skip to content

Commit bda5914

Browse files
authored
Add elasticsearch-node detach-cluster command (#37979)
This commit adds the second part of `elasticsearch-node` tool - `detach-cluster` command in addition to `unsafe-bootstrap` command. Also, this commit changes the semantics of `unsafe-bootstrap`, now `unsafe-bootstrap` changes clusterUUID. So the algorithm of running `elasticsearch-node` tool is the following: 1) Stop all nodes in the cluster. 2) Pick master-eligible node with the highest (term, version) pair and run the `unsafe-bootstrap` command on it. If there are no survived master-eligible nodes - skip this step. 3) Run `detach-cluster` command on the remaining survived nodes. Detach cluster makes the following changes to the node metadata: 1) Sets clusterUUID committed to false. 2) Sets currentTerm and term to 0. 3) Removes voting tombstones and sets voting configurations to special constant MUST_JOIN_ELECTED_MASTER, that prevents initial cluster bootstrap. `ElasticsearchNodeCommand` base abstract class is introduced, because `UnsafeBootstrapMasterCommand` and `DetachClusterCommand` have a lot in common. Also, this commit adds "ordinal" parameter to both commands, because it's impossible to write IT otherwise. For MUST_JOIN_ELECTED_MASTER case special handling is introduced in `ClusterFormationFailureHelper`. Tests for both commands reside in `ElasticsearchNodeCommandIT` (renamed from `UnsafeBootstrapMasterIT`).
1 parent 979e557 commit bda5914

12 files changed

+720
-375
lines changed

server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ String getDescription() {
167167

168168
assert clusterState.getLastCommittedConfiguration().isEmpty() == false;
169169

170+
if (clusterState.getLastCommittedConfiguration().equals(VotingConfiguration.MUST_JOIN_ELECTED_MASTER)) {
171+
return String.format(Locale.ROOT,
172+
"master not discovered yet and this node was detached from its previous cluster, have discovered %s; %s",
173+
foundPeers, discoveryWillContinueDescription);
174+
}
175+
170176
final String quorumDescription;
171177
if (clusterState.getLastAcceptedConfiguration().equals(clusterState.getLastCommittedConfiguration())) {
172178
quorumDescription = describeQuorum(clusterState.getLastAcceptedConfiguration());

server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationMetaData.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,8 @@ public String toString() {
325325
public static class VotingConfiguration implements Writeable, ToXContentFragment {
326326

327327
public static final VotingConfiguration EMPTY_CONFIG = new VotingConfiguration(Collections.emptySet());
328+
public static final VotingConfiguration MUST_JOIN_ELECTED_MASTER = new VotingConfiguration(Collections.singleton(
329+
"_must_join_elected_master_"));
328330

329331
private final Set<String> nodeIds;
330332

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.cluster.coordination;
20+
21+
import joptsimple.OptionSet;
22+
import org.elasticsearch.cli.Terminal;
23+
import org.elasticsearch.cluster.metadata.Manifest;
24+
import org.elasticsearch.cluster.metadata.MetaData;
25+
import org.elasticsearch.common.collect.Tuple;
26+
import org.elasticsearch.env.Environment;
27+
28+
import java.io.IOException;
29+
import java.nio.file.Path;
30+
31+
public class DetachClusterCommand extends ElasticsearchNodeCommand {
32+
33+
static final String NODE_DETACHED_MSG = "Node was successfully detached from the cluster";
34+
static final String CONFIRMATION_MSG =
35+
"-------------------------------------------------------------------------------\n" +
36+
"\n" +
37+
"You should run this tool only if you have permanently lost all\n" +
38+
"your master-eligible nodes, and you cannot restore the cluster\n" +
39+
"from a snapshot, or you have already run `elasticsearch-node unsafe-bootstrap`\n" +
40+
"on a master-eligible node that formed a cluster with this node.\n" +
41+
"This tool can cause arbitrary data loss and its use should be your last resort.\n" +
42+
"Do you want to proceed?\n";
43+
44+
public DetachClusterCommand() {
45+
super("Detaches this node from its cluster, allowing it to unsafely join a new cluster");
46+
}
47+
48+
@Override
49+
protected void execute(Terminal terminal, OptionSet options, Environment env) throws Exception {
50+
super.execute(terminal, options, env);
51+
52+
processNodePathsWithLock(terminal, options, env);
53+
54+
terminal.println(NODE_DETACHED_MSG);
55+
}
56+
57+
@Override
58+
protected void processNodePaths(Terminal terminal, Path[] dataPaths) throws IOException {
59+
final Tuple<Manifest, MetaData> manifestMetaDataTuple = loadMetaData(terminal, dataPaths);
60+
final Manifest manifest = manifestMetaDataTuple.v1();
61+
final MetaData metaData = manifestMetaDataTuple.v2();
62+
63+
confirm(terminal, CONFIRMATION_MSG);
64+
65+
writeNewMetaData(terminal, manifest, updateCurrentTerm(), metaData, updateMetaData(metaData), dataPaths);
66+
}
67+
68+
// package-private for tests
69+
static MetaData updateMetaData(MetaData oldMetaData) {
70+
final CoordinationMetaData coordinationMetaData = CoordinationMetaData.builder()
71+
.lastAcceptedConfiguration(CoordinationMetaData.VotingConfiguration.MUST_JOIN_ELECTED_MASTER)
72+
.lastCommittedConfiguration(CoordinationMetaData.VotingConfiguration.MUST_JOIN_ELECTED_MASTER)
73+
.term(0)
74+
.build();
75+
return MetaData.builder(oldMetaData)
76+
.coordinationMetaData(coordinationMetaData)
77+
.clusterUUIDCommitted(false)
78+
.build();
79+
}
80+
81+
//package-private for tests
82+
static long updateCurrentTerm() {
83+
return 0;
84+
}
85+
}
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.cluster.coordination;
20+
21+
import joptsimple.OptionParser;
22+
import joptsimple.OptionSet;
23+
import joptsimple.OptionSpec;
24+
import org.apache.logging.log4j.LogManager;
25+
import org.apache.logging.log4j.Logger;
26+
import org.apache.lucene.store.LockObtainFailedException;
27+
import org.elasticsearch.ElasticsearchException;
28+
import org.elasticsearch.cli.EnvironmentAwareCommand;
29+
import org.elasticsearch.cli.Terminal;
30+
import org.elasticsearch.cluster.ClusterModule;
31+
import org.elasticsearch.cluster.metadata.Manifest;
32+
import org.elasticsearch.cluster.metadata.MetaData;
33+
import org.elasticsearch.common.collect.Tuple;
34+
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
35+
import org.elasticsearch.env.Environment;
36+
import org.elasticsearch.env.NodeEnvironment;
37+
38+
import java.io.IOException;
39+
import java.nio.file.Files;
40+
import java.nio.file.Path;
41+
import java.util.Arrays;
42+
import java.util.Objects;
43+
44+
public abstract class ElasticsearchNodeCommand extends EnvironmentAwareCommand {
45+
private static final Logger logger = LogManager.getLogger(ElasticsearchNodeCommand.class);
46+
protected final NamedXContentRegistry namedXContentRegistry;
47+
static final String STOP_WARNING_MSG =
48+
"--------------------------------------------------------------------------\n" +
49+
"\n" +
50+
" WARNING: Elasticsearch MUST be stopped before running this tool." +
51+
"\n";
52+
static final String FAILED_TO_OBTAIN_NODE_LOCK_MSG = "failed to lock node's directory, is Elasticsearch still running?";
53+
static final String NO_NODE_FOLDER_FOUND_MSG = "no node folder is found in data folder(s), node has not been started yet?";
54+
static final String NO_MANIFEST_FILE_FOUND_MSG = "no manifest file is found, do you run pre 7.0 Elasticsearch?";
55+
static final String GLOBAL_GENERATION_MISSING_MSG = "no metadata is referenced from the manifest file, cluster has never been " +
56+
"bootstrapped?";
57+
static final String NO_GLOBAL_METADATA_MSG = "failed to find global metadata, metadata corrupted?";
58+
static final String WRITE_METADATA_EXCEPTION_MSG = "exception occurred when writing new metadata to disk";
59+
static final String ABORTED_BY_USER_MSG = "aborted by user";
60+
final OptionSpec<Integer> nodeOrdinalOption;
61+
62+
public ElasticsearchNodeCommand(String description) {
63+
super(description);
64+
nodeOrdinalOption = parser.accepts("ordinal", "Optional node ordinal, 0 if not specified")
65+
.withRequiredArg().ofType(Integer.class);
66+
namedXContentRegistry = new NamedXContentRegistry(ClusterModule.getNamedXWriteables());
67+
}
68+
69+
protected void processNodePathsWithLock(Terminal terminal, OptionSet options, Environment env) throws IOException {
70+
terminal.println(Terminal.Verbosity.VERBOSE, "Obtaining lock for node");
71+
Integer nodeOrdinal = nodeOrdinalOption.value(options);
72+
if (nodeOrdinal == null) {
73+
nodeOrdinal = 0;
74+
}
75+
try (NodeEnvironment.NodeLock lock = new NodeEnvironment.NodeLock(nodeOrdinal, logger, env, Files::exists)) {
76+
final Path[] dataPaths =
77+
Arrays.stream(lock.getNodePaths()).filter(Objects::nonNull).map(p -> p.path).toArray(Path[]::new);
78+
if (dataPaths.length == 0) {
79+
throw new ElasticsearchException(NO_NODE_FOLDER_FOUND_MSG);
80+
}
81+
processNodePaths(terminal, dataPaths);
82+
} catch (LockObtainFailedException ex) {
83+
throw new ElasticsearchException(
84+
FAILED_TO_OBTAIN_NODE_LOCK_MSG + " [" + ex.getMessage() + "]");
85+
}
86+
}
87+
88+
protected Tuple<Manifest, MetaData> loadMetaData(Terminal terminal, Path[] dataPaths) throws IOException {
89+
terminal.println(Terminal.Verbosity.VERBOSE, "Loading manifest file");
90+
final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, namedXContentRegistry, dataPaths);
91+
92+
if (manifest == null) {
93+
throw new ElasticsearchException(NO_MANIFEST_FILE_FOUND_MSG);
94+
}
95+
if (manifest.isGlobalGenerationMissing()) {
96+
throw new ElasticsearchException(GLOBAL_GENERATION_MISSING_MSG);
97+
}
98+
terminal.println(Terminal.Verbosity.VERBOSE, "Loading global metadata file");
99+
final MetaData metaData = MetaData.FORMAT.loadGeneration(logger, namedXContentRegistry, manifest.getGlobalGeneration(),
100+
dataPaths);
101+
if (metaData == null) {
102+
throw new ElasticsearchException(NO_GLOBAL_METADATA_MSG + " [generation = " + manifest.getGlobalGeneration() + "]");
103+
}
104+
105+
return Tuple.tuple(manifest, metaData);
106+
}
107+
108+
protected void confirm(Terminal terminal, String msg) {
109+
terminal.println(msg);
110+
String text = terminal.readText("Confirm [y/N] ");
111+
if (text.equalsIgnoreCase("y") == false) {
112+
throw new ElasticsearchException(ABORTED_BY_USER_MSG);
113+
}
114+
}
115+
116+
@Override
117+
protected void execute(Terminal terminal, OptionSet options, Environment env) throws Exception {
118+
terminal.println(STOP_WARNING_MSG);
119+
}
120+
121+
protected abstract void processNodePaths(Terminal terminal, Path[] dataPaths) throws IOException;
122+
123+
124+
protected void writeNewMetaData(Terminal terminal, Manifest oldManifest, long newCurrentTerm,
125+
MetaData oldMetaData, MetaData newMetaData, Path[] dataPaths) {
126+
try {
127+
terminal.println(Terminal.Verbosity.VERBOSE,
128+
"[clusterUUID = " + oldMetaData.clusterUUID() + ", committed = " + oldMetaData.clusterUUIDCommitted() + "] => " +
129+
"[clusterUUID = " + newMetaData.clusterUUID() + ", committed = " + newMetaData.clusterUUIDCommitted() + "]");
130+
terminal.println(Terminal.Verbosity.VERBOSE, "New coordination metadata is " + newMetaData.coordinationMetaData());
131+
terminal.println(Terminal.Verbosity.VERBOSE, "Writing new global metadata to disk");
132+
long newGeneration = MetaData.FORMAT.write(newMetaData, dataPaths);
133+
Manifest newManifest = new Manifest(newCurrentTerm, oldManifest.getClusterStateVersion(), newGeneration,
134+
oldManifest.getIndexGenerations());
135+
terminal.println(Terminal.Verbosity.VERBOSE, "New manifest is " + newManifest);
136+
terminal.println(Terminal.Verbosity.VERBOSE, "Writing new manifest file to disk");
137+
Manifest.FORMAT.writeAndCleanup(newManifest, dataPaths);
138+
terminal.println(Terminal.Verbosity.VERBOSE, "Cleaning up old metadata");
139+
MetaData.FORMAT.cleanupOldFiles(newGeneration, dataPaths);
140+
} catch (Exception e) {
141+
terminal.println(Terminal.Verbosity.VERBOSE, "Cleaning up new metadata");
142+
MetaData.FORMAT.cleanupOldFiles(oldManifest.getGlobalGeneration(), dataPaths);
143+
throw new ElasticsearchException(WRITE_METADATA_EXCEPTION_MSG, e);
144+
}
145+
}
146+
147+
//package-private for testing
148+
OptionParser getParser() {
149+
return parser;
150+
}
151+
}

server/src/main/java/org/elasticsearch/cluster/coordination/NodeToolCli.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ public NodeToolCli() {
3535
super("A CLI tool to unsafely recover a cluster after the permanent loss of too many master-eligible nodes", ()->{});
3636
CommandLoggingConfigurator.configureLoggingWithoutConfig();
3737
subcommands.put("unsafe-bootstrap", new UnsafeBootstrapMasterCommand());
38+
subcommands.put("detach-cluster", new DetachClusterCommand());
3839
}
3940

4041
public static void main(String[] args) throws Exception {

0 commit comments

Comments
 (0)