Skip to content

Commit 9100a70

Browse files
authored
Fix missing historyUUID in peer recovery when rolling upgrade 5.x to 6.3 (#31506)
Today we make sure that a 5.x index commit should have all required commit tags in RecoveryTarget#cleanFiles method. The reason we do this in RecoveryTarget#cleanFiles method because this is only needed in a file-based recovery and we assume that #cleanFiles should be called in a file-based recovery. However, this assumption is not valid if the index is sealed (.i.e synced-flush). This incorrect assumption would prevent users from rolling upgrade from 5.x to 6.3 if their index were sealed. Closes #31482
1 parent 65ce504 commit 9100a70

File tree

3 files changed

+45
-5
lines changed

3 files changed

+45
-5
lines changed

qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java

+16-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.http.entity.StringEntity;
2525
import org.apache.http.util.EntityUtils;
2626
import org.elasticsearch.Version;
27+
import org.elasticsearch.client.Request;
2728
import org.elasticsearch.client.Response;
2829
import org.elasticsearch.cluster.metadata.IndexMetaData;
2930
import org.elasticsearch.common.Booleans;
@@ -713,8 +714,21 @@ public void testRecovery() throws Exception {
713714

714715
// make sure all recoveries are done
715716
ensureGreen(index);
716-
// Explicitly flush so we're sure to have a bunch of documents in the Lucene index
717-
client().performRequest("POST", "/_flush");
717+
// Recovering a synced-flush index from 5.x to 6.x might be subtle as a 5.x index commit does not have all 6.x commit tags.
718+
if (randomBoolean()) {
719+
// We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
720+
// A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
721+
assertBusy(() -> {
722+
Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
723+
assertOK(resp);
724+
Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
725+
assertThat(result.get("successful"), equalTo(result.get("total")));
726+
assertThat(result.get("failed"), equalTo(0));
727+
});
728+
} else {
729+
// Explicitly flush so we're sure to have a bunch of documents in the Lucene index
730+
assertOK(client().performRequest(new Request("POST", "/_flush")));
731+
}
718732
if (shouldHaveTranslog) {
719733
// Update a few documents so we are sure to have a translog
720734
indexRandomDocuments(count / 10, false /* Flushing here would invalidate the whole thing....*/, false,

qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java

+26
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.http.entity.StringEntity;
2323
import org.elasticsearch.Version;
2424
import org.elasticsearch.action.support.PlainActionFuture;
25+
import org.elasticsearch.client.Request;
2526
import org.elasticsearch.client.Response;
2627
import org.elasticsearch.cluster.metadata.IndexMetaData;
2728
import org.elasticsearch.common.settings.Settings;
@@ -285,4 +286,29 @@ public void testSearchGeoPoints() throws Exception {
285286
}
286287
}
287288

289+
public void testRecoverSyncedFlushIndex() throws Exception {
290+
final String index = "recover_synced_flush_index";
291+
if (CLUSTER_TYPE == ClusterType.OLD) {
292+
Settings.Builder settings = Settings.builder()
293+
.put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1)
294+
.put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1)
295+
// if the node with the replica is the first to be restarted, while a replica is still recovering
296+
// then delayed allocation will kick in. When the node comes back, the master will search for a copy
297+
// but the recovering copy will be seen as invalid and the cluster health won't return to GREEN
298+
// before timing out
299+
.put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "100ms")
300+
.put(SETTING_ALLOCATION_MAX_RETRY.getKey(), "0"); // fail faster
301+
createIndex(index, settings.build());
302+
indexDocs(index, 0, randomInt(5));
303+
// We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
304+
// A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
305+
assertBusy(() -> {
306+
Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
307+
assertOK(resp);
308+
Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
309+
assertThat(result.get("successful"), equalTo(2));
310+
});
311+
}
312+
ensureGreen(index);
313+
}
288314
}

server/src/main/java/org/elasticsearch/indices/recovery/RecoveryTarget.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,9 @@ private void ensureRefCount() {
362362

363363
@Override
364364
public void prepareForTranslogOperations(boolean fileBasedRecovery, int totalTranslogOps) throws IOException {
365+
if (fileBasedRecovery && indexShard.indexSettings().getIndexVersionCreated().before(Version.V_6_0_0)) {
366+
store.ensureIndexHas6xCommitTags();
367+
}
365368
state().getTranslog().totalOperations(totalTranslogOps);
366369
indexShard().openEngineAndSkipTranslogRecovery();
367370
}
@@ -438,9 +441,6 @@ public void cleanFiles(int totalTranslogOps, Store.MetadataSnapshot sourceMetaDa
438441
store.incRef();
439442
try {
440443
store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetaData);
441-
if (indexShard.indexSettings().getIndexVersionCreated().before(Version.V_6_0_0_rc1)) {
442-
store.ensureIndexHas6xCommitTags();
443-
}
444444
// TODO: Assign the global checkpoint to the max_seqno of the safe commit if the index version >= 6.2
445445
final String translogUUID = Translog.createEmptyTranslog(
446446
indexShard.shardPath().resolveTranslog(), SequenceNumbers.UNASSIGNED_SEQ_NO, shardId, indexShard.getPrimaryTerm());

0 commit comments

Comments
 (0)