Skip to content

Commit c4fcf0b

Browse files
Node repurpose tool (#39403)
When a node is repurposed to master/no-data or no-master/no-data, v7.x will not start (see #37748 and #37347). The `elasticsearch repurpose` tool can fix this by cleaning up the problematic data.
1 parent 3bd2dd0 commit c4fcf0b

File tree

10 files changed

+756
-36
lines changed

10 files changed

+756
-36
lines changed

qa/vagrant/src/main/java/org/elasticsearch/packaging/test/ArchiveTestCase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ public void test92ElasticsearchNodeCliPackaging() {
354354
Platforms.PlatformAction action = () -> {
355355
final Result result = sh.run(bin.elasticsearchNode + " -h");
356356
assertThat(result.stdout,
357-
containsString("A CLI tool to unsafely recover a cluster after the permanent loss of too many master-eligible nodes"));
357+
containsString("A CLI tool to do unsafe cluster and index manipulations on current node"));
358358
};
359359

360360
if (distribution().equals(Distribution.DEFAULT_LINUX) || distribution().equals(Distribution.DEFAULT_WINDOWS)) {

server/src/main/java/org/elasticsearch/cluster/coordination/DetachClusterCommand.java

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
*/
1919
package org.elasticsearch.cluster.coordination;
2020

21-
import joptsimple.OptionSet;
2221
import org.elasticsearch.cli.Terminal;
2322
import org.elasticsearch.cluster.metadata.Manifest;
2423
import org.elasticsearch.cluster.metadata.MetaData;
@@ -47,24 +46,18 @@ public DetachClusterCommand() {
4746
super("Detaches this node from its cluster, allowing it to unsafely join a new cluster");
4847
}
4948

50-
@Override
51-
protected void execute(Terminal terminal, OptionSet options, Environment env) throws Exception {
52-
super.execute(terminal, options, env);
53-
54-
processNodePathsWithLock(terminal, options, env);
55-
56-
terminal.println(NODE_DETACHED_MSG);
57-
}
5849

5950
@Override
60-
protected void processNodePaths(Terminal terminal, Path[] dataPaths) throws IOException {
51+
protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException {
6152
final Tuple<Manifest, MetaData> manifestMetaDataTuple = loadMetaData(terminal, dataPaths);
6253
final Manifest manifest = manifestMetaDataTuple.v1();
6354
final MetaData metaData = manifestMetaDataTuple.v2();
6455

6556
confirm(terminal, CONFIRMATION_MSG);
6657

6758
writeNewMetaData(terminal, manifest, updateCurrentTerm(), metaData, updateMetaData(metaData), dataPaths);
59+
60+
terminal.println(NODE_DETACHED_MSG);
6861
}
6962

7063
// package-private for tests

server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,14 @@ public abstract class ElasticsearchNodeCommand extends EnvironmentAwareCommand {
5151
"\n" +
5252
" WARNING: Elasticsearch MUST be stopped before running this tool." +
5353
"\n";
54-
static final String FAILED_TO_OBTAIN_NODE_LOCK_MSG = "failed to lock node's directory, is Elasticsearch still running?";
54+
protected static final String FAILED_TO_OBTAIN_NODE_LOCK_MSG = "failed to lock node's directory, is Elasticsearch still running?";
5555
static final String NO_NODE_FOLDER_FOUND_MSG = "no node folder is found in data folder(s), node has not been started yet?";
5656
static final String NO_MANIFEST_FILE_FOUND_MSG = "no manifest file is found, do you run pre 7.0 Elasticsearch?";
57-
static final String GLOBAL_GENERATION_MISSING_MSG = "no metadata is referenced from the manifest file, cluster has never been " +
58-
"bootstrapped?";
57+
protected static final String GLOBAL_GENERATION_MISSING_MSG =
58+
"no metadata is referenced from the manifest file, cluster has never been bootstrapped?";
5959
static final String NO_GLOBAL_METADATA_MSG = "failed to find global metadata, metadata corrupted?";
6060
static final String WRITE_METADATA_EXCEPTION_MSG = "exception occurred when writing new metadata to disk";
61-
static final String ABORTED_BY_USER_MSG = "aborted by user";
61+
protected static final String ABORTED_BY_USER_MSG = "aborted by user";
6262
final OptionSpec<Integer> nodeOrdinalOption;
6363

6464
public ElasticsearchNodeCommand(String description) {
@@ -80,7 +80,7 @@ protected void processNodePathsWithLock(Terminal terminal, OptionSet options, En
8080
if (dataPaths.length == 0) {
8181
throw new ElasticsearchException(NO_NODE_FOLDER_FOUND_MSG);
8282
}
83-
processNodePaths(terminal, dataPaths);
83+
processNodePaths(terminal, dataPaths, env);
8484
} catch (LockObtainFailedException ex) {
8585
throw new ElasticsearchException(
8686
FAILED_TO_OBTAIN_NODE_LOCK_MSG + " [" + ex.getMessage() + "]");
@@ -116,11 +116,31 @@ protected void confirm(Terminal terminal, String msg) {
116116
}
117117

118118
@Override
119-
protected void execute(Terminal terminal, OptionSet options, Environment env) throws Exception {
119+
protected final void execute(Terminal terminal, OptionSet options, Environment env) throws Exception {
120120
terminal.println(STOP_WARNING_MSG);
121+
if (validateBeforeLock(terminal, env)) {
122+
processNodePathsWithLock(terminal, options, env);
123+
}
124+
}
125+
126+
/**
127+
* Validate that the command can run before taking any locks.
128+
* @param terminal the terminal to print to
129+
* @param env the env to validate.
130+
* @return true to continue, false to stop (must print message in validate).
131+
*/
132+
protected boolean validateBeforeLock(Terminal terminal, Environment env) {
133+
return true;
121134
}
122135

123-
protected abstract void processNodePaths(Terminal terminal, Path[] dataPaths) throws IOException;
136+
137+
/**
138+
* Process the paths. Locks for the paths is held during this method invocation.
139+
* @param terminal the terminal to use for messages
140+
* @param dataPaths the paths of the node to process
141+
* @param env the env of the node to process
142+
*/
143+
protected abstract void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException;
124144

125145

126146
protected void writeNewMetaData(Terminal terminal, Manifest oldManifest, long newCurrentTerm,

server/src/main/java/org/elasticsearch/cluster/coordination/NodeToolCli.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.elasticsearch.cli.CommandLoggingConfigurator;
2222
import org.elasticsearch.cli.MultiCommand;
2323
import org.elasticsearch.cli.Terminal;
24+
import org.elasticsearch.env.NodeRepurposeCommand;
2425

2526
// NodeToolCli does not extend LoggingAwareCommand, because LoggingAwareCommand performs logging initialization
2627
// after LoggingAwareCommand instance is constructed.
@@ -32,10 +33,12 @@
3233
public class NodeToolCli extends MultiCommand {
3334

3435
public NodeToolCli() {
35-
super("A CLI tool to unsafely recover a cluster after the permanent loss of too many master-eligible nodes", ()->{});
36+
super("A CLI tool to do unsafe cluster and index manipulations on current node",
37+
()->{});
3638
CommandLoggingConfigurator.configureLoggingWithoutConfig();
3739
subcommands.put("unsafe-bootstrap", new UnsafeBootstrapMasterCommand());
3840
subcommands.put("detach-cluster", new DetachClusterCommand());
41+
subcommands.put("repurpose", new NodeRepurposeCommand());
3942
}
4043

4144
public static void main(String[] args) throws Exception {

server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
*/
1919
package org.elasticsearch.cluster.coordination;
2020

21-
import joptsimple.OptionSet;
2221
import org.apache.logging.log4j.LogManager;
2322
import org.apache.logging.log4j.Logger;
2423
import org.elasticsearch.ElasticsearchException;
@@ -72,22 +71,18 @@ public class UnsafeBootstrapMasterCommand extends ElasticsearchNodeCommand {
7271
}
7372

7473
@Override
75-
protected void execute(Terminal terminal, OptionSet options, Environment env) throws Exception {
76-
super.execute(terminal, options, env);
77-
74+
protected boolean validateBeforeLock(Terminal terminal, Environment env) {
7875
Settings settings = env.settings();
7976
terminal.println(Terminal.Verbosity.VERBOSE, "Checking node.master setting");
8077
Boolean master = Node.NODE_MASTER_SETTING.get(settings);
8178
if (master == false) {
8279
throw new ElasticsearchException(NOT_MASTER_NODE_MSG);
8380
}
8481

85-
processNodePathsWithLock(terminal, options, env);
86-
87-
terminal.println(MASTER_NODE_BOOTSTRAPPED_MSG);
82+
return true;
8883
}
8984

90-
protected void processNodePaths(Terminal terminal, Path[] dataPaths) throws IOException {
85+
protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException {
9186
terminal.println(Terminal.Verbosity.VERBOSE, "Loading node metadata");
9287
final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, dataPaths);
9388
if (nodeMetaData == null) {
@@ -130,5 +125,7 @@ protected void processNodePaths(Terminal terminal, Path[] dataPaths) throws IOEx
130125
.build();
131126

132127
writeNewMetaData(terminal, manifest, manifest.getCurrentTerm(), metaData, newMetaData, dataPaths);
128+
129+
terminal.println(MASTER_NODE_BOOTSTRAPPED_MSG);
133130
}
134131
}

server/src/main/java/org/elasticsearch/env/NodeEnvironment.java

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,11 @@ public Path resolve(ShardId shardId) {
128128
* ${data.paths}/nodes/{node.id}/indices/{index.uuid}
129129
*/
130130
public Path resolve(Index index) {
131-
return indicesPath.resolve(index.getUUID());
131+
return resolve(index.getUUID());
132+
}
133+
134+
Path resolve(String uuid) {
135+
return indicesPath.resolve(uuid);
132136
}
133137

134138
@Override
@@ -1050,28 +1054,48 @@ private static void ensureAtomicMoveSupported(final NodePath[] nodePaths) throws
10501054
}
10511055

10521056
private void ensureNoShardData(final NodePath[] nodePaths) throws IOException {
1053-
List<Path> shardDataPaths = collectIndexSubPaths(nodePaths, this::isShardPath);
1057+
List<Path> shardDataPaths = collectShardDataPaths(nodePaths);
10541058
if (shardDataPaths.isEmpty() == false) {
10551059
throw new IllegalStateException("Node is started with "
10561060
+ Node.NODE_DATA_SETTING.getKey()
10571061
+ "=false, but has shard data: "
1058-
+ shardDataPaths);
1062+
+ shardDataPaths
1063+
+ ". Use 'elasticsearch-node repurpose' tool to clean up"
1064+
);
10591065
}
10601066
}
10611067

10621068
private void ensureNoIndexMetaData(final NodePath[] nodePaths) throws IOException {
1063-
List<Path> indexMetaDataPaths = collectIndexSubPaths(nodePaths, this::isIndexMetaDataPath);
1069+
List<Path> indexMetaDataPaths = collectIndexMetaDataPaths(nodePaths);
10641070
if (indexMetaDataPaths.isEmpty() == false) {
10651071
throw new IllegalStateException("Node is started with "
10661072
+ Node.NODE_DATA_SETTING.getKey()
10671073
+ "=false and "
10681074
+ Node.NODE_MASTER_SETTING.getKey()
10691075
+ "=false, but has index metadata: "
1070-
+ indexMetaDataPaths);
1076+
+ indexMetaDataPaths
1077+
+ ". Use 'elasticsearch-node repurpose' tool to clean up"
1078+
);
10711079
}
10721080
}
10731081

1074-
private List<Path> collectIndexSubPaths(NodePath[] nodePaths, Predicate<Path> subPathPredicate) throws IOException {
1082+
/**
1083+
* Collect the paths containing shard data in the indicated node paths. The returned paths will point to the shard data folder.
1084+
*/
1085+
static List<Path> collectShardDataPaths(NodePath[] nodePaths) throws IOException {
1086+
return collectIndexSubPaths(nodePaths, NodeEnvironment::isShardPath);
1087+
}
1088+
1089+
1090+
/**
1091+
* Collect the paths containing index meta data in the indicated node paths. The returned paths will point to the
1092+
* {@link MetaDataStateFormat#STATE_DIR_NAME} folder
1093+
*/
1094+
static List<Path> collectIndexMetaDataPaths(NodePath[] nodePaths) throws IOException {
1095+
return collectIndexSubPaths(nodePaths, NodeEnvironment::isIndexMetaDataPath);
1096+
}
1097+
1098+
private static List<Path> collectIndexSubPaths(NodePath[] nodePaths, Predicate<Path> subPathPredicate) throws IOException {
10751099
List<Path> indexSubPaths = new ArrayList<>();
10761100
for (NodePath nodePath : nodePaths) {
10771101
Path indicesPath = nodePath.indicesPath;
@@ -1093,12 +1117,12 @@ private List<Path> collectIndexSubPaths(NodePath[] nodePaths, Predicate<Path> su
10931117
return indexSubPaths;
10941118
}
10951119

1096-
private boolean isShardPath(Path path) {
1120+
private static boolean isShardPath(Path path) {
10971121
return Files.isDirectory(path)
10981122
&& path.getFileName().toString().chars().allMatch(Character::isDigit);
10991123
}
11001124

1101-
private boolean isIndexMetaDataPath(Path path) {
1125+
private static boolean isIndexMetaDataPath(Path path) {
11021126
return Files.isDirectory(path)
11031127
&& path.getFileName().toString().equals(MetaDataStateFormat.STATE_DIR_NAME);
11041128
}

0 commit comments

Comments
 (0)