Skip to content

Commit 413aad9

Browse files
Add IT for Snapshot Issue in 47552 (#47627)
Adding a specific integration test that reproduces the problem fixed in #47552. The issue fixed only reproduces in the snapshot resiliency otherwise which are not available in 6.8 where the fix is being backported to as well.
1 parent 4f41853 commit 413aad9

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java

+49
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,55 @@ public void testDataNodeRestartWithBusyMasterDuringSnapshot() throws Exception {
12341234
}, 60L, TimeUnit.SECONDS);
12351235
}
12361236

1237+
public void testDataNodeRestartAfterShardSnapshotFailure() throws Exception {
1238+
logger.info("--> starting a master node and two data nodes");
1239+
internalCluster().startMasterOnlyNode();
1240+
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
1241+
logger.info("--> creating repository");
1242+
assertAcked(client().admin().cluster().preparePutRepository("test-repo")
1243+
.setType("mock").setSettings(Settings.builder()
1244+
.put("location", randomRepoPath())
1245+
.put("compress", randomBoolean())
1246+
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
1247+
assertAcked(prepareCreate("test-idx", 0, Settings.builder()
1248+
.put("number_of_shards", 2).put("number_of_replicas", 0)));
1249+
ensureGreen();
1250+
logger.info("--> indexing some data");
1251+
final int numdocs = randomIntBetween(50, 100);
1252+
IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs];
1253+
for (int i = 0; i < builders.length; i++) {
1254+
builders[i] = client().prepareIndex("test-idx", "type1",
1255+
Integer.toString(i)).setSource("field1", "bar " + i);
1256+
}
1257+
indexRandom(true, builders);
1258+
flushAndRefresh();
1259+
blockAllDataNodes("test-repo");
1260+
logger.info("--> snapshot");
1261+
client(internalCluster().getMasterName()).admin().cluster()
1262+
.prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
1263+
logger.info("--> restarting first data node, which should cause the primary shard on it to be failed");
1264+
internalCluster().restartNode(dataNodes.get(0), InternalTestCluster.EMPTY_CALLBACK);
1265+
1266+
logger.info("--> wait for shard snapshot of first primary to show as failed");
1267+
assertBusy(() -> assertThat(
1268+
client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get().getSnapshots()
1269+
.get(0).getShardsStats().getFailedShards(), is(1)), 60L, TimeUnit.SECONDS);
1270+
1271+
logger.info("--> restarting second data node, which should cause the primary shard on it to be failed");
1272+
internalCluster().restartNode(dataNodes.get(1), InternalTestCluster.EMPTY_CALLBACK);
1273+
1274+
// check that snapshot completes with both failed shards being accounted for in the snapshot result
1275+
assertBusy(() -> {
1276+
GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
1277+
.prepareGetSnapshots("test-repo").setSnapshots("test-snap").setIgnoreUnavailable(true).get();
1278+
assertEquals(1, snapshotsStatusResponse.getSnapshots("test-repo").size());
1279+
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots("test-repo").get(0);
1280+
assertTrue(snapshotInfo.state().toString(), snapshotInfo.state().completed());
1281+
assertThat(snapshotInfo.totalShards(), is(2));
1282+
assertThat(snapshotInfo.shardFailures(), hasSize(2));
1283+
}, 60L, TimeUnit.SECONDS);
1284+
}
1285+
12371286
public void testRetentionLeasesClearedOnRestore() throws Exception {
12381287
final String repoName = "test-repo-retention-leases";
12391288
assertAcked(client().admin().cluster().preparePutRepository(repoName)

0 commit comments

Comments
 (0)