Fix missing historyUUID in peer recovery when rolling upgrade 5.x to 6.3 (#31506)

dnhatn · dnhatn · commit e5f3a62da528 · 2018-06-21T23:49:55.000-04:00
Today we make sure that a 5.x index commit should have all required commit tags in RecoveryTarget#cleanFiles method. The reason we do this in RecoveryTarget#cleanFiles method because this is only needed in a file-based recovery and we assume that #cleanFiles should be called in a file-based recovery. However, this assumption is not valid if the index is sealed (.i.e synced-flush). This incorrect assumption would prevent users from rolling upgrade from 5.x to 6.3 if their index were sealed. Closes #31482
diff --git a/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java b/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java
@@ -713,8 +713,21 @@ public void testRecovery() throws Exception {
 
             // make sure all recoveries are done
             ensureGreen(index);
-            // Explicitly flush so we're sure to have a bunch of documents in the Lucene index
-            client().performRequest("POST", "/_flush");
+            // Recovering a synced-flush index from 5.x to 6.x might be subtle as a 5.x index commit does not have all 6.x commit tags.
+            if (randomBoolean()) {
+                // We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
+                // A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
+                assertBusy(() -> {
+                    Response resp = client().performRequest("POST", index + "/_flush/synced");
+                    assertOK(resp);
+                    Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
+                    assertThat(result.get("successful"), equalTo(result.get("total")));
+                    assertThat(result.get("failed"), equalTo(0));
+                });
+            } else {
+                // Explicitly flush so we're sure to have a bunch of documents in the Lucene index
+                assertOK(client().performRequest("POST", "/_flush"));
+            }
             if (shouldHaveTranslog) {
                 // Update a few documents so we are sure to have a translog
                 indexRandomDocuments(count / 10, false /* Flushing here would invalidate the whole thing....*/, false,
diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java
@@ -283,4 +283,29 @@ public void testSearchGeoPoints() throws Exception {
         }
     }
 
+    public void testRecoverSyncedFlushIndex() throws Exception {
+        final String index = "recover_synced_flush_index";
+        if (CLUSTER_TYPE == ClusterType.OLD) {
+            Settings.Builder settings = Settings.builder()
+                .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1)
+                .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1)
+                // if the node with the replica is the first to be restarted, while a replica is still recovering
+                // then delayed allocation will kick in. When the node comes back, the master will search for a copy
+                // but the recovering copy will be seen as invalid and the cluster health won't return to GREEN
+                // before timing out
+                .put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "100ms")
+                .put(SETTING_ALLOCATION_MAX_RETRY.getKey(), "0"); // fail faster
+            createIndex(index, settings.build());
+            indexDocs(index, 0, randomInt(5));
+            // We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
+            // A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
+            assertBusy(() -> {
+                Response resp = client().performRequest("POST", index + "/_flush/synced");
+                assertOK(resp);
+                Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
+                assertThat(result.get("successful"), equalTo(2));
+            });
+        }
+        ensureGreen(index);
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryTarget.java b/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryTarget.java
@@ -362,6 +362,9 @@ private void ensureRefCount() {
 
     @Override
     public void prepareForTranslogOperations(boolean fileBasedRecovery, int totalTranslogOps) throws IOException {
+        if (fileBasedRecovery && indexShard.indexSettings().getIndexVersionCreated().before(Version.V_6_0_0)) {
+            store.ensureIndexHas6xCommitTags();
+        }
         state().getTranslog().totalOperations(totalTranslogOps);
         indexShard().openEngineAndSkipTranslogRecovery();
     }
@@ -438,9 +441,6 @@ public void cleanFiles(int totalTranslogOps, Store.MetadataSnapshot sourceMetaDa
         store.incRef();
         try {
             store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetaData);
-            if (indexShard.indexSettings().getIndexVersionCreated().before(Version.V_6_0_0_rc1)) {
-                store.ensureIndexHas6xCommitTags();
-            }
             // TODO: Assign the global checkpoint to the max_seqno of the safe commit if the index version >= 6.2
             final String translogUUID = Translog.createEmptyTranslog(
                 indexShard.shardPath().resolveTranslog(), SequenceNumbers.UNASSIGNED_SEQ_NO, shardId, indexShard.getPrimaryTerm());