Skip to content

Commit 44e1c8e

Browse files
dnhatncolings86
authored andcommitted
TEST: Add bwc recovery tests with synced-flush index
Although the master branch does not affect by #31482, it's helpful to have BWC tests that verify the peer recovery with a synced-flush index. This commit adds the bwc tests from #31506 to the master branch. Relates #31482 Relates #31506
1 parent efffa02 commit 44e1c8e

File tree

2 files changed

+52
-2
lines changed

2 files changed

+52
-2
lines changed

qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java

+20-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
import org.apache.http.entity.StringEntity;
2525
import org.apache.http.util.EntityUtils;
2626
import org.elasticsearch.Version;
27+
import org.elasticsearch.client.Request;
2728
import org.elasticsearch.client.Response;
29+
import org.elasticsearch.client.ResponseException;
2830
import org.elasticsearch.client.RestClient;
2931
import org.elasticsearch.cluster.metadata.IndexMetaData;
3032
import org.elasticsearch.common.Booleans;
@@ -701,8 +703,24 @@ public void testRecovery() throws Exception {
701703

702704
// make sure all recoveries are done
703705
ensureGreen(index);
704-
// Explicitly flush so we're sure to have a bunch of documents in the Lucene index
705-
client().performRequest("POST", "/_flush");
706+
// Recovering a synced-flush index from 5.x to 6.x might be subtle as a 5.x index commit does not have all 6.x commit tags.
707+
if (randomBoolean()) {
708+
// We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
709+
// A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
710+
assertBusy(() -> {
711+
try {
712+
Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
713+
Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
714+
assertThat(result.get("successful"), equalTo(result.get("total")));
715+
assertThat(result.get("failed"), equalTo(0));
716+
} catch (ResponseException ex) {
717+
throw new AssertionError(ex); // cause assert busy to retry
718+
}
719+
});
720+
} else {
721+
// Explicitly flush so we're sure to have a bunch of documents in the Lucene index
722+
assertOK(client().performRequest(new Request("POST", "/_flush")));
723+
}
706724
if (shouldHaveTranslog) {
707725
// Update a few documents so we are sure to have a translog
708726
indexRandomDocuments(count / 10, false /* Flushing here would invalidate the whole thing....*/, false,

qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java

+32
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
import org.apache.http.entity.StringEntity;
2323
import org.elasticsearch.Version;
2424
import org.elasticsearch.action.support.PlainActionFuture;
25+
import org.elasticsearch.client.Request;
2526
import org.elasticsearch.client.Response;
27+
import org.elasticsearch.client.ResponseException;
2628
import org.elasticsearch.cluster.metadata.IndexMetaData;
2729
import org.elasticsearch.common.settings.Settings;
2830
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
@@ -239,4 +241,34 @@ public void testRelocationWithConcurrentIndexing() throws Exception {
239241
}
240242
}
241243

244+
public void testRecoverSyncedFlushIndex() throws Exception {
245+
final String index = "recover_synced_flush_index";
246+
if (CLUSTER_TYPE == ClusterType.OLD) {
247+
Settings.Builder settings = Settings.builder()
248+
.put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1)
249+
.put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1)
250+
// if the node with the replica is the first to be restarted, while a replica is still recovering
251+
// then delayed allocation will kick in. When the node comes back, the master will search for a copy
252+
// but the recovering copy will be seen as invalid and the cluster health won't return to GREEN
253+
// before timing out
254+
.put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "100ms")
255+
.put(SETTING_ALLOCATION_MAX_RETRY.getKey(), "0"); // fail faster
256+
createIndex(index, settings.build());
257+
indexDocs(index, 0, randomInt(5));
258+
// We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
259+
// A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
260+
assertBusy(() -> {
261+
try {
262+
Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
263+
Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
264+
assertThat(result.get("successful"), equalTo(result.get("total")));
265+
assertThat(result.get("failed"), equalTo(0));
266+
} catch (ResponseException ex) {
267+
throw new AssertionError(ex); // cause assert busy to retry
268+
}
269+
});
270+
}
271+
ensureGreen(index);
272+
}
273+
242274
}

0 commit comments

Comments
 (0)