Skip to content

Commit 1359ef7

Browse files
Add IT for Snapshot Issue in 47552 (#47627) (#47634)
* Add IT for Snapshot Issue in 47552 (#47627) Adding a specific integration test that reproduces the problem fixed in #47552. The issue fixed only reproduces in the snapshot resiliency otherwise which are not available in 6.8 where the fix is being backported to as well.
1 parent 6bd0339 commit 1359ef7

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java

+49
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,55 @@ public void testDataNodeRestartWithBusyMasterDuringSnapshot() throws Exception {
12361236
}, 60L, TimeUnit.SECONDS);
12371237
}
12381238

1239+
public void testDataNodeRestartAfterShardSnapshotFailure() throws Exception {
1240+
logger.info("--> starting a master node and two data nodes");
1241+
internalCluster().startMasterOnlyNode();
1242+
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
1243+
logger.info("--> creating repository");
1244+
assertAcked(client().admin().cluster().preparePutRepository("test-repo")
1245+
.setType("mock").setSettings(Settings.builder()
1246+
.put("location", randomRepoPath())
1247+
.put("compress", randomBoolean())
1248+
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
1249+
assertAcked(prepareCreate("test-idx", 0, Settings.builder()
1250+
.put("number_of_shards", 2).put("number_of_replicas", 0)));
1251+
ensureGreen();
1252+
logger.info("--> indexing some data");
1253+
final int numdocs = randomIntBetween(50, 100);
1254+
IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs];
1255+
for (int i = 0; i < builders.length; i++) {
1256+
builders[i] = client().prepareIndex("test-idx", "type1",
1257+
Integer.toString(i)).setSource("field1", "bar " + i);
1258+
}
1259+
indexRandom(true, builders);
1260+
flushAndRefresh();
1261+
blockAllDataNodes("test-repo");
1262+
logger.info("--> snapshot");
1263+
client(internalCluster().getMasterName()).admin().cluster()
1264+
.prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
1265+
logger.info("--> restarting first data node, which should cause the primary shard on it to be failed");
1266+
internalCluster().restartNode(dataNodes.get(0), InternalTestCluster.EMPTY_CALLBACK);
1267+
1268+
logger.info("--> wait for shard snapshot of first primary to show as failed");
1269+
assertBusy(() -> assertThat(
1270+
client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get().getSnapshots()
1271+
.get(0).getShardsStats().getFailedShards(), is(1)), 60L, TimeUnit.SECONDS);
1272+
1273+
logger.info("--> restarting second data node, which should cause the primary shard on it to be failed");
1274+
internalCluster().restartNode(dataNodes.get(1), InternalTestCluster.EMPTY_CALLBACK);
1275+
1276+
// check that snapshot completes with both failed shards being accounted for in the snapshot result
1277+
assertBusy(() -> {
1278+
GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
1279+
.prepareGetSnapshots("test-repo").setSnapshots("test-snap").setIgnoreUnavailable(true).get();
1280+
assertEquals(1, snapshotsStatusResponse.getSnapshots().size());
1281+
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
1282+
assertTrue(snapshotInfo.state().toString(), snapshotInfo.state().completed());
1283+
assertThat(snapshotInfo.totalShards(), is(2));
1284+
assertThat(snapshotInfo.shardFailures(), hasSize(2));
1285+
}, 60L, TimeUnit.SECONDS);
1286+
}
1287+
12391288
public void testRetentionLeasesClearedOnRestore() throws Exception {
12401289
final String repoName = "test-repo-retention-leases";
12411290
assertAcked(client().admin().cluster().preparePutRepository(repoName)

0 commit comments

Comments
 (0)