elastic
diff --git a/‎server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java
+1-1 b/‎server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java
+1-1
diff --git a/‎server/src/main/java/org/elasticsearch/env/NodeEnvironment.java
+28-27 b/‎server/src/main/java/org/elasticsearch/env/NodeEnvironment.java
+28-27
diff --git a/‎server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java
+3-8 b/‎server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java
+3-8
diff --git a/‎server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java
+25-2 b/‎server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java
+25-2
diff --git a/‎server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java
+81-11 b/‎server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java
+81-11
diff --git a/‎server/src/main/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommand.java
+4-4 b/‎server/src/main/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommand.java
+4-4
diff --git a/‎server/src/main/java/org/elasticsearch/node/Node.java
+7 b/‎server/src/main/java/org/elasticsearch/node/Node.java
+7
@@ -72,7 +72,7 @@ public ElasticsearchNodeCommand(String description) {
     }
 
     public static PersistedClusterStateService createPersistedClusterStateService(Path[] dataPaths) throws IOException {
-        final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths);
+        final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(dataPaths);
         if (nodeMetaData == null) {
             throw new ElasticsearchException(NO_NODE_METADATA_FOUND_MSG);
         }
 
@@ -51,6 +51,7 @@
 import org.elasticsearch.common.xcontent.NamedXContentRegistry;
 import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.gateway.MetaDataStateFormat;
+import org.elasticsearch.gateway.PersistedClusterStateService;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.shard.ShardId;
@@ -301,7 +302,7 @@ public NodeEnvironment(Settings settings, Environment environment) throws IOExce
                 ensureNoShardData(nodePaths);
             }
 
-            this.nodeMetaData = loadOrCreateNodeMetaData(settings, logger, nodePaths);
+            this.nodeMetaData = loadNodeMetaData(settings, logger, nodePaths);
 
             success = true;
         } finally {
@@ -428,7 +429,7 @@ private static boolean upgradeLegacyNodeFolders(Logger logger, Settings settings
             }
             // now do the actual upgrade. start by upgrading the node metadata file before moving anything, since a downgrade in an
             // intermediate state would be pretty disastrous
-            loadOrCreateNodeMetaData(settings, logger, legacyNodeLock.getNodePaths());
+            loadNodeMetaData(settings, logger, legacyNodeLock.getNodePaths());
             for (CheckedRunnable<IOException> upgradeAction : upgradeActions) {
                 upgradeAction.run();
             }
@@ -497,36 +498,36 @@ private void maybeLogHeapDetails() {
 
     /**
      * scans the node paths and loads existing metaData file. If not found a new meta data will be generated
-     * and persisted into the nodePaths
      */
-    private static NodeMetaData loadOrCreateNodeMetaData(Settings settings, Logger logger,
-                                                         NodePath... nodePaths) throws IOException {
+    private static NodeMetaData loadNodeMetaData(Settings settings, Logger logger,
+                                                 NodePath... nodePaths) throws IOException {
         final Path[] paths = Arrays.stream(nodePaths).map(np -> np.path).toArray(Path[]::new);
-
-        final Set<String> nodeIds = new HashSet<>();
-        for (final Path path : paths) {
-            final NodeMetaData metaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, path);
-            if (metaData != null) {
-                nodeIds.add(metaData.nodeId());
-            }
-        }
-        if (nodeIds.size() > 1) {
-            throw new IllegalStateException(
-                "data paths " + Arrays.toString(paths) + " belong to multiple nodes with IDs " + nodeIds);
-        }
-
-        NodeMetaData metaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, paths);
+        NodeMetaData metaData = PersistedClusterStateService.nodeMetaData(paths);
         if (metaData == null) {
-            assert nodeIds.isEmpty() : nodeIds;
-            metaData = new NodeMetaData(generateNodeId(settings), Version.CURRENT);
-        } else {
-            assert nodeIds.equals(Collections.singleton(metaData.nodeId())) : nodeIds + " doesn't match " + metaData;
-            metaData = metaData.upgradeToCurrentVersion();
+            // load legacy metadata
+            final Set<String> nodeIds = new HashSet<>();
+            for (final Path path : paths) {
+                final NodeMetaData oldStyleMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, path);
+                if (oldStyleMetaData != null) {
+                    nodeIds.add(oldStyleMetaData.nodeId());
+                }
+            }
+            if (nodeIds.size() > 1) {
+                throw new IllegalStateException(
+                    "data paths " + Arrays.toString(paths) + " belong to multiple nodes with IDs " + nodeIds);
+            }
+            // load legacy metadata
+            final NodeMetaData legacyMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, paths);
+            if (legacyMetaData == null) {
+                assert nodeIds.isEmpty() : nodeIds;
+                metaData = new NodeMetaData(generateNodeId(settings), Version.CURRENT);
+            } else {
+                assert nodeIds.equals(Collections.singleton(legacyMetaData.nodeId())) : nodeIds + " doesn't match " + legacyMetaData;
+                metaData = legacyMetaData;
+            }
         }
-
-        // we write again to make sure all paths have the latest state file
+        metaData = metaData.upgradeToCurrentVersion();
         assert metaData.nodeVersion().equals(Version.CURRENT) : metaData.nodeVersion() + " != " + Version.CURRENT;
-        NodeMetaData.FORMAT.writeAndCleanup(metaData, paths);
 
         return metaData;
     }
 
@@ -20,21 +20,17 @@
 
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.Version;
 import org.elasticsearch.cli.Terminal;
 import org.elasticsearch.cluster.coordination.ElasticsearchNodeCommand;
-import org.elasticsearch.common.xcontent.NamedXContentRegistry;
+import org.elasticsearch.gateway.PersistedClusterStateService;
 
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.Arrays;
 
 public class OverrideNodeVersionCommand extends ElasticsearchNodeCommand {
-    private static final Logger logger = LogManager.getLogger(OverrideNodeVersionCommand.class);
-
     private static final String TOO_NEW_MESSAGE =
         DELIMITER +
             "\n" +
@@ -75,8 +71,7 @@ public OverrideNodeVersionCommand() {
     @Override
     protected void processNodePaths(Terminal terminal, Path[] dataPaths, OptionSet options, Environment env) throws IOException {
         final Path[] nodePaths = Arrays.stream(toNodePaths(dataPaths)).map(p -> p.path).toArray(Path[]::new);
-        final NodeMetaData nodeMetaData
-            = new NodeMetaData.NodeMetaDataStateFormat(true).loadLatestState(logger, NamedXContentRegistry.EMPTY, nodePaths);
+        final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePaths);
         if (nodeMetaData == null) {
             throw new ElasticsearchException(NO_METADATA_MESSAGE);
         }
@@ -94,7 +89,7 @@ protected void processNodePaths(Terminal terminal, Path[] dataPaths, OptionSet o
             .replace("V_NEW", nodeMetaData.nodeVersion().toString())
             .replace("V_CUR", Version.CURRENT.toString()));
 
-        NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(nodeMetaData.nodeId(), Version.CURRENT), nodePaths);
+        PersistedClusterStateService.overrideVersion(Version.CURRENT, dataPaths);
 
         terminal.println(SUCCESS_MESSAGE);
     }
 
@@ -38,6 +38,7 @@
 import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.env.NodeMetaData;
 import org.elasticsearch.plugins.MetaDataUpgrader;
 import org.elasticsearch.transport.TransportService;
 
@@ -114,6 +115,9 @@ public void start(Settings settings, TransportService transportService, ClusterS
                     } else {
                         metaStateService.deleteAll(); // delete legacy files
                     }
+                    // write legacy node metadata to prevent accidental downgrades from spawning empty cluster state
+                    NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(persistedClusterStateService.getNodeId(), Version.CURRENT),
+                        persistedClusterStateService.getDataPaths());
                     success = true;
                 } finally {
                     if (success == false) {
@@ -126,8 +130,27 @@ public void start(Settings settings, TransportService transportService, ClusterS
                 throw new ElasticsearchException("failed to load metadata", e);
             }
         } else {
-            persistedState.set(
-                new InMemoryPersistedState(0L, ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).build()));
+            final long currentTerm = 0L;
+            final ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).build();
+            if (persistedClusterStateService.getDataPaths().length > 0) {
+                // write empty cluster state just so that we have a persistent node id. There is no need to write out global metadata with
+                // cluster uuid as coordinating-only nodes do not snap into a cluster as they carry no state
+                try (PersistedClusterStateService.Writer persistenceWriter = persistedClusterStateService.createWriter()) {
+                    persistenceWriter.writeFullStateAndCommit(currentTerm, clusterState);
+                } catch (IOException e) {
+                    throw new ElasticsearchException("failed to load metadata", e);
+                }
+                try {
+                    // delete legacy cluster state files
+                    metaStateService.deleteAll();
+                    // write legacy node metadata to prevent downgrades from spawning empty cluster state
+                    NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(persistedClusterStateService.getNodeId(), Version.CURRENT),
+                        persistedClusterStateService.getDataPaths());
+                } catch (IOException e) {
+                    throw new UncheckedIOException(e);
+                }
+            }
+            persistedState.set(new InMemoryPersistedState(currentTerm, clusterState));
         }
     }
 
 
@@ -51,6 +51,7 @@
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.metadata.MetaData;
 import org.elasticsearch.common.CheckedConsumer;
+import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.io.stream.ReleasableBytesStreamOutput;
 import org.elasticsearch.common.lease.Releasable;
 import org.elasticsearch.common.logging.Loggers;
@@ -64,6 +65,7 @@
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.env.NodeEnvironment;
+import org.elasticsearch.env.NodeMetaData;
 import org.elasticsearch.index.Index;
 
 import java.io.Closeable;
@@ -155,17 +157,7 @@ public Writer createWriter() throws IOException {
                 final Directory directory = createDirectory(path.resolve(METADATA_DIRECTORY_NAME));
                 closeables.add(directory);
 
-                final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new KeywordAnalyzer());
-                // start empty since we re-write the whole cluster state to ensure it is all using the same format version
-                indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
-                // only commit when specifically instructed, we must not write any intermediate states
-                indexWriterConfig.setCommitOnClose(false);
-                // most of the data goes into stored fields which are not buffered, so we only really need a tiny buffer
-                indexWriterConfig.setRAMBufferSizeMB(1.0);
-                // merge on the write thread (e.g. while flushing)
-                indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
-
-                final IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
+                final IndexWriter indexWriter = createIndexWriter(directory, false);
                 closeables.add(indexWriter);
                 metaDataIndexWriters.add(new MetaDataIndexWriter(directory, indexWriter));
             }
@@ -178,6 +170,20 @@ public Writer createWriter() throws IOException {
         return new Writer(metaDataIndexWriters, nodeId, bigArrays);
     }
 
+    private static IndexWriter createIndexWriter(Directory directory, boolean openExisting) throws IOException {
+        final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new KeywordAnalyzer());
+        // start empty since we re-write the whole cluster state to ensure it is all using the same format version
+        indexWriterConfig.setOpenMode(openExisting ? IndexWriterConfig.OpenMode.APPEND : IndexWriterConfig.OpenMode.CREATE);
+        // only commit when specifically instructed, we must not write any intermediate states
+        indexWriterConfig.setCommitOnClose(false);
+        // most of the data goes into stored fields which are not buffered, so we only really need a tiny buffer
+        indexWriterConfig.setRAMBufferSizeMB(1.0);
+        // merge on the write thread (e.g. while flushing)
+        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+
+        return new IndexWriter(directory, indexWriterConfig);
+    }
+
     /**
      * Remove all persisted cluster states from the given data paths, for use in tests. Should only be called when there is no open
      * {@link Writer} on these paths.
@@ -196,6 +202,10 @@ Directory createDirectory(Path path) throws IOException {
         return new SimpleFSDirectory(path);
     }
 
+    public Path[] getDataPaths() {
+        return dataPaths;
+    }
+
     public static class OnDiskState {
         private static final OnDiskState NO_ON_DISK_STATE = new OnDiskState(null, null, 0L, 0L, MetaData.EMPTY_META_DATA);
 
@@ -218,6 +228,66 @@ public boolean empty() {
         }
     }
 
+    /**
+     * Returns the node metadata for the given data paths, and checks if the node ids are unique
+     * @param dataPaths the data paths to scan
+     */
+    @Nullable
+    public static NodeMetaData nodeMetaData(Path... dataPaths) throws IOException {
+        String nodeId = null;
+        Version version = null;
+        for (final Path dataPath : dataPaths) {
+            final Path indexPath = dataPath.resolve(METADATA_DIRECTORY_NAME);
+            if (Files.exists(indexPath)) {
+                try (DirectoryReader reader = DirectoryReader.open(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME)))) {
+                    final Map<String, String> userData = reader.getIndexCommit().getUserData();
+                    assert userData.get(NODE_VERSION_KEY) != null;
+
+                    final String thisNodeId = userData.get(NODE_ID_KEY);
+                    assert thisNodeId != null;
+                    if (nodeId != null && nodeId.equals(thisNodeId) == false) {
+                        throw new IllegalStateException("unexpected node ID in metadata, found [" + thisNodeId +
+                            "] in [" + dataPath + "] but expected [" + nodeId + "]");
+                    } else if (nodeId == null) {
+                        nodeId = thisNodeId;
+                        version = Version.fromId(Integer.parseInt(userData.get(NODE_VERSION_KEY)));
+                    }
+                } catch (IndexNotFoundException e) {
+                    logger.debug(new ParameterizedMessage("no on-disk state at {}", indexPath), e);
+                }
+            }
+        }
+        if (nodeId == null) {
+            return null;
+        }
+        return new NodeMetaData(nodeId, version);
+    }
+
+    /**
+     * Overrides the version field for the metadata in the given data path
+     */
+    public static void overrideVersion(Version newVersion, Path... dataPaths) throws IOException {
+        for (final Path dataPath : dataPaths) {
+            final Path indexPath = dataPath.resolve(METADATA_DIRECTORY_NAME);
+            if (Files.exists(indexPath)) {
+                try (DirectoryReader reader = DirectoryReader.open(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME)))) {
+                    final Map<String, String> userData = reader.getIndexCommit().getUserData();
+                    assert userData.get(NODE_VERSION_KEY) != null;
+
+                    try (IndexWriter indexWriter =
+                             createIndexWriter(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME)), true)) {
+                        final Map<String, String> commitData = new HashMap<>(userData);
+                        commitData.put(NODE_VERSION_KEY, Integer.toString(newVersion.id));
+                        indexWriter.setLiveCommitData(commitData.entrySet());
+                        indexWriter.commit();
+                    }
+                } catch (IndexNotFoundException e) {
+                    logger.debug(new ParameterizedMessage("no on-disk state at {}", indexPath), e);
+                }
+            }
+        }
+    }
+
     /**
      * Loads the best available on-disk cluster state. Returns {@link OnDiskState#NO_ON_DISK_STATE} if no such state was found.
      */
 
@@ -51,7 +51,7 @@
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.env.NodeEnvironment;
 import org.elasticsearch.env.NodeMetaData;
-import org.elasticsearch.gateway.MetaDataStateFormat;
+import org.elasticsearch.gateway.PersistedClusterStateService;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.engine.Engine;
@@ -439,8 +439,7 @@ private void newAllocationId(ShardPath shardPath, Terminal terminal) throws IOEx
     private void printRerouteCommand(ShardPath shardPath, Terminal terminal, boolean allocateStale)
         throws IOException {
         final Path nodePath = getNodePath(shardPath);
-        final NodeMetaData nodeMetaData =
-            NodeMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, nodePath);
+        final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePath);
 
         if (nodeMetaData == null) {
             throw new ElasticsearchException("No node meta data at " + nodePath);
@@ -463,7 +462,8 @@ private void printRerouteCommand(ShardPath shardPath, Terminal terminal, boolean
 
     private Path getNodePath(ShardPath shardPath) {
         final Path nodePath = shardPath.getDataPath().getParent().getParent().getParent();
-        if (Files.exists(nodePath) == false || Files.exists(nodePath.resolve(MetaDataStateFormat.STATE_DIR_NAME)) == false) {
+        if (Files.exists(nodePath) == false ||
+            Files.exists(nodePath.resolve(PersistedClusterStateService.METADATA_DIRECTORY_NAME)) == false) {
             throw new ElasticsearchException("Unable to resolve node path for " + shardPath);
         }
         return nodePath;
 
@@ -27,6 +27,7 @@
 import org.elasticsearch.Build;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchTimeoutException;
+import org.elasticsearch.Version;
 import org.elasticsearch.action.ActionModule;
 import org.elasticsearch.action.ActionType;
 import org.elasticsearch.action.search.SearchExecutionStatsCollector;
@@ -90,6 +91,7 @@
 import org.elasticsearch.discovery.DiscoveryModule;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.env.NodeEnvironment;
+import org.elasticsearch.env.NodeMetaData;
 import org.elasticsearch.gateway.GatewayAllocator;
 import org.elasticsearch.gateway.GatewayMetaState;
 import org.elasticsearch.gateway.GatewayModule;
@@ -698,6 +700,11 @@ public Node start() throws NodeValidationException {
         if (Assertions.ENABLED) {
             try {
                 assert injector.getInstance(MetaStateService.class).loadFullState().v1().isEmpty();
+                final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY,
+                    nodeEnvironment.nodeDataPaths());
+                assert nodeMetaData != null;
+                assert nodeMetaData.nodeVersion().equals(Version.CURRENT);
+                assert nodeMetaData.nodeId().equals(localNodeFactory.getNode().getId());
             } catch (IOException e) {
                 assert false : e;
             }
Original file line number	Diff line number	Diff line change
`@@ -72,7 +72,7 @@ public ElasticsearchNodeCommand(String description) {`
`72`	`72`	`}`
`73`	`73`
`74`	`74`	`public static PersistedClusterStateService createPersistedClusterStateService(Path[] dataPaths) throws IOException {`
`75`		`- final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths);`
	`75`	`+ final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(dataPaths);`
`76`	`76`	`if (nodeMetaData == null) {`
`77`	`77`	`throw new ElasticsearchException(NO_NODE_METADATA_FOUND_MSG);`
`78`	`78`	`}`