Skip to content

Commit c8bfe3d

Browse files
authored
Fold node metadata into new node storage (#50741)
Moves node metadata to uses the new storage mechanism (see #48701) as the authoritative source.
1 parent 63a9238 commit c8bfe3d

15 files changed

+223
-163
lines changed

server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public ElasticsearchNodeCommand(String description) {
7272
}
7373

7474
public static PersistedClusterStateService createPersistedClusterStateService(Path[] dataPaths) throws IOException {
75-
final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths);
75+
final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(dataPaths);
7676
if (nodeMetaData == null) {
7777
throw new ElasticsearchException(NO_NODE_METADATA_FOUND_MSG);
7878
}

server/src/main/java/org/elasticsearch/env/NodeEnvironment.java

+28-27
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
5252
import org.elasticsearch.core.internal.io.IOUtils;
5353
import org.elasticsearch.gateway.MetaDataStateFormat;
54+
import org.elasticsearch.gateway.PersistedClusterStateService;
5455
import org.elasticsearch.index.Index;
5556
import org.elasticsearch.index.IndexSettings;
5657
import org.elasticsearch.index.shard.ShardId;
@@ -301,7 +302,7 @@ public NodeEnvironment(Settings settings, Environment environment) throws IOExce
301302
ensureNoShardData(nodePaths);
302303
}
303304

304-
this.nodeMetaData = loadOrCreateNodeMetaData(settings, logger, nodePaths);
305+
this.nodeMetaData = loadNodeMetaData(settings, logger, nodePaths);
305306

306307
success = true;
307308
} finally {
@@ -428,7 +429,7 @@ private static boolean upgradeLegacyNodeFolders(Logger logger, Settings settings
428429
}
429430
// now do the actual upgrade. start by upgrading the node metadata file before moving anything, since a downgrade in an
430431
// intermediate state would be pretty disastrous
431-
loadOrCreateNodeMetaData(settings, logger, legacyNodeLock.getNodePaths());
432+
loadNodeMetaData(settings, logger, legacyNodeLock.getNodePaths());
432433
for (CheckedRunnable<IOException> upgradeAction : upgradeActions) {
433434
upgradeAction.run();
434435
}
@@ -497,36 +498,36 @@ private void maybeLogHeapDetails() {
497498

498499
/**
499500
* scans the node paths and loads existing metaData file. If not found a new meta data will be generated
500-
* and persisted into the nodePaths
501501
*/
502-
private static NodeMetaData loadOrCreateNodeMetaData(Settings settings, Logger logger,
503-
NodePath... nodePaths) throws IOException {
502+
private static NodeMetaData loadNodeMetaData(Settings settings, Logger logger,
503+
NodePath... nodePaths) throws IOException {
504504
final Path[] paths = Arrays.stream(nodePaths).map(np -> np.path).toArray(Path[]::new);
505-
506-
final Set<String> nodeIds = new HashSet<>();
507-
for (final Path path : paths) {
508-
final NodeMetaData metaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, path);
509-
if (metaData != null) {
510-
nodeIds.add(metaData.nodeId());
511-
}
512-
}
513-
if (nodeIds.size() > 1) {
514-
throw new IllegalStateException(
515-
"data paths " + Arrays.toString(paths) + " belong to multiple nodes with IDs " + nodeIds);
516-
}
517-
518-
NodeMetaData metaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, paths);
505+
NodeMetaData metaData = PersistedClusterStateService.nodeMetaData(paths);
519506
if (metaData == null) {
520-
assert nodeIds.isEmpty() : nodeIds;
521-
metaData = new NodeMetaData(generateNodeId(settings), Version.CURRENT);
522-
} else {
523-
assert nodeIds.equals(Collections.singleton(metaData.nodeId())) : nodeIds + " doesn't match " + metaData;
524-
metaData = metaData.upgradeToCurrentVersion();
507+
// load legacy metadata
508+
final Set<String> nodeIds = new HashSet<>();
509+
for (final Path path : paths) {
510+
final NodeMetaData oldStyleMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, path);
511+
if (oldStyleMetaData != null) {
512+
nodeIds.add(oldStyleMetaData.nodeId());
513+
}
514+
}
515+
if (nodeIds.size() > 1) {
516+
throw new IllegalStateException(
517+
"data paths " + Arrays.toString(paths) + " belong to multiple nodes with IDs " + nodeIds);
518+
}
519+
// load legacy metadata
520+
final NodeMetaData legacyMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, paths);
521+
if (legacyMetaData == null) {
522+
assert nodeIds.isEmpty() : nodeIds;
523+
metaData = new NodeMetaData(generateNodeId(settings), Version.CURRENT);
524+
} else {
525+
assert nodeIds.equals(Collections.singleton(legacyMetaData.nodeId())) : nodeIds + " doesn't match " + legacyMetaData;
526+
metaData = legacyMetaData;
527+
}
525528
}
526-
527-
// we write again to make sure all paths have the latest state file
529+
metaData = metaData.upgradeToCurrentVersion();
528530
assert metaData.nodeVersion().equals(Version.CURRENT) : metaData.nodeVersion() + " != " + Version.CURRENT;
529-
NodeMetaData.FORMAT.writeAndCleanup(metaData, paths);
530531

531532
return metaData;
532533
}

server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java

+3-8
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,17 @@
2020

2121
import joptsimple.OptionParser;
2222
import joptsimple.OptionSet;
23-
import org.apache.logging.log4j.LogManager;
24-
import org.apache.logging.log4j.Logger;
2523
import org.elasticsearch.ElasticsearchException;
2624
import org.elasticsearch.Version;
2725
import org.elasticsearch.cli.Terminal;
2826
import org.elasticsearch.cluster.coordination.ElasticsearchNodeCommand;
29-
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
27+
import org.elasticsearch.gateway.PersistedClusterStateService;
3028

3129
import java.io.IOException;
3230
import java.nio.file.Path;
3331
import java.util.Arrays;
3432

3533
public class OverrideNodeVersionCommand extends ElasticsearchNodeCommand {
36-
private static final Logger logger = LogManager.getLogger(OverrideNodeVersionCommand.class);
37-
3834
private static final String TOO_NEW_MESSAGE =
3935
DELIMITER +
4036
"\n" +
@@ -75,8 +71,7 @@ public OverrideNodeVersionCommand() {
7571
@Override
7672
protected void processNodePaths(Terminal terminal, Path[] dataPaths, OptionSet options, Environment env) throws IOException {
7773
final Path[] nodePaths = Arrays.stream(toNodePaths(dataPaths)).map(p -> p.path).toArray(Path[]::new);
78-
final NodeMetaData nodeMetaData
79-
= new NodeMetaData.NodeMetaDataStateFormat(true).loadLatestState(logger, NamedXContentRegistry.EMPTY, nodePaths);
74+
final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePaths);
8075
if (nodeMetaData == null) {
8176
throw new ElasticsearchException(NO_METADATA_MESSAGE);
8277
}
@@ -94,7 +89,7 @@ protected void processNodePaths(Terminal terminal, Path[] dataPaths, OptionSet o
9489
.replace("V_NEW", nodeMetaData.nodeVersion().toString())
9590
.replace("V_CUR", Version.CURRENT.toString()));
9691

97-
NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(nodeMetaData.nodeId(), Version.CURRENT), nodePaths);
92+
PersistedClusterStateService.overrideVersion(Version.CURRENT, dataPaths);
9893

9994
terminal.println(SUCCESS_MESSAGE);
10095
}

server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java

+25-2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.elasticsearch.common.collect.Tuple;
3939
import org.elasticsearch.common.settings.Settings;
4040
import org.elasticsearch.core.internal.io.IOUtils;
41+
import org.elasticsearch.env.NodeMetaData;
4142
import org.elasticsearch.plugins.MetaDataUpgrader;
4243
import org.elasticsearch.transport.TransportService;
4344

@@ -114,6 +115,9 @@ public void start(Settings settings, TransportService transportService, ClusterS
114115
} else {
115116
metaStateService.deleteAll(); // delete legacy files
116117
}
118+
// write legacy node metadata to prevent accidental downgrades from spawning empty cluster state
119+
NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(persistedClusterStateService.getNodeId(), Version.CURRENT),
120+
persistedClusterStateService.getDataPaths());
117121
success = true;
118122
} finally {
119123
if (success == false) {
@@ -126,8 +130,27 @@ public void start(Settings settings, TransportService transportService, ClusterS
126130
throw new ElasticsearchException("failed to load metadata", e);
127131
}
128132
} else {
129-
persistedState.set(
130-
new InMemoryPersistedState(0L, ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).build()));
133+
final long currentTerm = 0L;
134+
final ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).build();
135+
if (persistedClusterStateService.getDataPaths().length > 0) {
136+
// write empty cluster state just so that we have a persistent node id. There is no need to write out global metadata with
137+
// cluster uuid as coordinating-only nodes do not snap into a cluster as they carry no state
138+
try (PersistedClusterStateService.Writer persistenceWriter = persistedClusterStateService.createWriter()) {
139+
persistenceWriter.writeFullStateAndCommit(currentTerm, clusterState);
140+
} catch (IOException e) {
141+
throw new ElasticsearchException("failed to load metadata", e);
142+
}
143+
try {
144+
// delete legacy cluster state files
145+
metaStateService.deleteAll();
146+
// write legacy node metadata to prevent downgrades from spawning empty cluster state
147+
NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(persistedClusterStateService.getNodeId(), Version.CURRENT),
148+
persistedClusterStateService.getDataPaths());
149+
} catch (IOException e) {
150+
throw new UncheckedIOException(e);
151+
}
152+
}
153+
persistedState.set(new InMemoryPersistedState(currentTerm, clusterState));
131154
}
132155
}
133156

server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java

+81-11
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.elasticsearch.cluster.metadata.IndexMetaData;
5252
import org.elasticsearch.cluster.metadata.MetaData;
5353
import org.elasticsearch.common.CheckedConsumer;
54+
import org.elasticsearch.common.Nullable;
5455
import org.elasticsearch.common.io.stream.ReleasableBytesStreamOutput;
5556
import org.elasticsearch.common.lease.Releasable;
5657
import org.elasticsearch.common.logging.Loggers;
@@ -64,6 +65,7 @@
6465
import org.elasticsearch.common.xcontent.XContentType;
6566
import org.elasticsearch.core.internal.io.IOUtils;
6667
import org.elasticsearch.env.NodeEnvironment;
68+
import org.elasticsearch.env.NodeMetaData;
6769
import org.elasticsearch.index.Index;
6870

6971
import java.io.Closeable;
@@ -155,17 +157,7 @@ public Writer createWriter() throws IOException {
155157
final Directory directory = createDirectory(path.resolve(METADATA_DIRECTORY_NAME));
156158
closeables.add(directory);
157159

158-
final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new KeywordAnalyzer());
159-
// start empty since we re-write the whole cluster state to ensure it is all using the same format version
160-
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
161-
// only commit when specifically instructed, we must not write any intermediate states
162-
indexWriterConfig.setCommitOnClose(false);
163-
// most of the data goes into stored fields which are not buffered, so we only really need a tiny buffer
164-
indexWriterConfig.setRAMBufferSizeMB(1.0);
165-
// merge on the write thread (e.g. while flushing)
166-
indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
167-
168-
final IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
160+
final IndexWriter indexWriter = createIndexWriter(directory, false);
169161
closeables.add(indexWriter);
170162
metaDataIndexWriters.add(new MetaDataIndexWriter(directory, indexWriter));
171163
}
@@ -178,6 +170,20 @@ public Writer createWriter() throws IOException {
178170
return new Writer(metaDataIndexWriters, nodeId, bigArrays);
179171
}
180172

173+
private static IndexWriter createIndexWriter(Directory directory, boolean openExisting) throws IOException {
174+
final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new KeywordAnalyzer());
175+
// start empty since we re-write the whole cluster state to ensure it is all using the same format version
176+
indexWriterConfig.setOpenMode(openExisting ? IndexWriterConfig.OpenMode.APPEND : IndexWriterConfig.OpenMode.CREATE);
177+
// only commit when specifically instructed, we must not write any intermediate states
178+
indexWriterConfig.setCommitOnClose(false);
179+
// most of the data goes into stored fields which are not buffered, so we only really need a tiny buffer
180+
indexWriterConfig.setRAMBufferSizeMB(1.0);
181+
// merge on the write thread (e.g. while flushing)
182+
indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
183+
184+
return new IndexWriter(directory, indexWriterConfig);
185+
}
186+
181187
/**
182188
* Remove all persisted cluster states from the given data paths, for use in tests. Should only be called when there is no open
183189
* {@link Writer} on these paths.
@@ -196,6 +202,10 @@ Directory createDirectory(Path path) throws IOException {
196202
return new SimpleFSDirectory(path);
197203
}
198204

205+
public Path[] getDataPaths() {
206+
return dataPaths;
207+
}
208+
199209
public static class OnDiskState {
200210
private static final OnDiskState NO_ON_DISK_STATE = new OnDiskState(null, null, 0L, 0L, MetaData.EMPTY_META_DATA);
201211

@@ -218,6 +228,66 @@ public boolean empty() {
218228
}
219229
}
220230

231+
/**
232+
* Returns the node metadata for the given data paths, and checks if the node ids are unique
233+
* @param dataPaths the data paths to scan
234+
*/
235+
@Nullable
236+
public static NodeMetaData nodeMetaData(Path... dataPaths) throws IOException {
237+
String nodeId = null;
238+
Version version = null;
239+
for (final Path dataPath : dataPaths) {
240+
final Path indexPath = dataPath.resolve(METADATA_DIRECTORY_NAME);
241+
if (Files.exists(indexPath)) {
242+
try (DirectoryReader reader = DirectoryReader.open(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME)))) {
243+
final Map<String, String> userData = reader.getIndexCommit().getUserData();
244+
assert userData.get(NODE_VERSION_KEY) != null;
245+
246+
final String thisNodeId = userData.get(NODE_ID_KEY);
247+
assert thisNodeId != null;
248+
if (nodeId != null && nodeId.equals(thisNodeId) == false) {
249+
throw new IllegalStateException("unexpected node ID in metadata, found [" + thisNodeId +
250+
"] in [" + dataPath + "] but expected [" + nodeId + "]");
251+
} else if (nodeId == null) {
252+
nodeId = thisNodeId;
253+
version = Version.fromId(Integer.parseInt(userData.get(NODE_VERSION_KEY)));
254+
}
255+
} catch (IndexNotFoundException e) {
256+
logger.debug(new ParameterizedMessage("no on-disk state at {}", indexPath), e);
257+
}
258+
}
259+
}
260+
if (nodeId == null) {
261+
return null;
262+
}
263+
return new NodeMetaData(nodeId, version);
264+
}
265+
266+
/**
267+
* Overrides the version field for the metadata in the given data path
268+
*/
269+
public static void overrideVersion(Version newVersion, Path... dataPaths) throws IOException {
270+
for (final Path dataPath : dataPaths) {
271+
final Path indexPath = dataPath.resolve(METADATA_DIRECTORY_NAME);
272+
if (Files.exists(indexPath)) {
273+
try (DirectoryReader reader = DirectoryReader.open(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME)))) {
274+
final Map<String, String> userData = reader.getIndexCommit().getUserData();
275+
assert userData.get(NODE_VERSION_KEY) != null;
276+
277+
try (IndexWriter indexWriter =
278+
createIndexWriter(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME)), true)) {
279+
final Map<String, String> commitData = new HashMap<>(userData);
280+
commitData.put(NODE_VERSION_KEY, Integer.toString(newVersion.id));
281+
indexWriter.setLiveCommitData(commitData.entrySet());
282+
indexWriter.commit();
283+
}
284+
} catch (IndexNotFoundException e) {
285+
logger.debug(new ParameterizedMessage("no on-disk state at {}", indexPath), e);
286+
}
287+
}
288+
}
289+
}
290+
221291
/**
222292
* Loads the best available on-disk cluster state. Returns {@link OnDiskState#NO_ON_DISK_STATE} if no such state was found.
223293
*/

server/src/main/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommand.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
import org.elasticsearch.env.Environment;
5252
import org.elasticsearch.env.NodeEnvironment;
5353
import org.elasticsearch.env.NodeMetaData;
54-
import org.elasticsearch.gateway.MetaDataStateFormat;
54+
import org.elasticsearch.gateway.PersistedClusterStateService;
5555
import org.elasticsearch.index.Index;
5656
import org.elasticsearch.index.IndexSettings;
5757
import org.elasticsearch.index.engine.Engine;
@@ -439,8 +439,7 @@ private void newAllocationId(ShardPath shardPath, Terminal terminal) throws IOEx
439439
private void printRerouteCommand(ShardPath shardPath, Terminal terminal, boolean allocateStale)
440440
throws IOException {
441441
final Path nodePath = getNodePath(shardPath);
442-
final NodeMetaData nodeMetaData =
443-
NodeMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, nodePath);
442+
final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePath);
444443

445444
if (nodeMetaData == null) {
446445
throw new ElasticsearchException("No node meta data at " + nodePath);
@@ -463,7 +462,8 @@ private void printRerouteCommand(ShardPath shardPath, Terminal terminal, boolean
463462

464463
private Path getNodePath(ShardPath shardPath) {
465464
final Path nodePath = shardPath.getDataPath().getParent().getParent().getParent();
466-
if (Files.exists(nodePath) == false || Files.exists(nodePath.resolve(MetaDataStateFormat.STATE_DIR_NAME)) == false) {
465+
if (Files.exists(nodePath) == false ||
466+
Files.exists(nodePath.resolve(PersistedClusterStateService.METADATA_DIRECTORY_NAME)) == false) {
467467
throw new ElasticsearchException("Unable to resolve node path for " + shardPath);
468468
}
469469
return nodePath;

server/src/main/java/org/elasticsearch/node/Node.java

+7
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.Build;
2828
import org.elasticsearch.ElasticsearchException;
2929
import org.elasticsearch.ElasticsearchTimeoutException;
30+
import org.elasticsearch.Version;
3031
import org.elasticsearch.action.ActionModule;
3132
import org.elasticsearch.action.ActionType;
3233
import org.elasticsearch.action.search.SearchExecutionStatsCollector;
@@ -90,6 +91,7 @@
9091
import org.elasticsearch.discovery.DiscoveryModule;
9192
import org.elasticsearch.env.Environment;
9293
import org.elasticsearch.env.NodeEnvironment;
94+
import org.elasticsearch.env.NodeMetaData;
9395
import org.elasticsearch.gateway.GatewayAllocator;
9496
import org.elasticsearch.gateway.GatewayMetaState;
9597
import org.elasticsearch.gateway.GatewayModule;
@@ -698,6 +700,11 @@ public Node start() throws NodeValidationException {
698700
if (Assertions.ENABLED) {
699701
try {
700702
assert injector.getInstance(MetaStateService.class).loadFullState().v1().isEmpty();
703+
final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY,
704+
nodeEnvironment.nodeDataPaths());
705+
assert nodeMetaData != null;
706+
assert nodeMetaData.nodeVersion().equals(Version.CURRENT);
707+
assert nodeMetaData.nodeId().equals(localNodeFactory.getNode().getId());
701708
} catch (IOException e) {
702709
assert false : e;
703710
}

0 commit comments

Comments
 (0)