Skip to content

Commit 793823e

Browse files
Fix Index Deletion During Partial Snapshot Create
We can simply filter out shard generation updates for indices that were removed from the cluster state concurrently to fix index deletes during partial snapshots as that completely removes any reference to those shards from the snapshot. Follow up to #50202 Closes #50200
1 parent e95ee81 commit 793823e

File tree

2 files changed

+30
-8
lines changed

2 files changed

+30
-8
lines changed

server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -568,16 +568,17 @@ public void onNoLongerMaster() {
568568
private void cleanupAfterError(Exception exception) {
569569
threadPool.generic().execute(() -> {
570570
if (snapshotCreated) {
571+
final MetaData metaData = clusterService.state().metaData();
571572
repositoriesService.repository(snapshot.snapshot().getRepository())
572573
.finalizeSnapshot(snapshot.snapshot().getSnapshotId(),
573-
buildGenerations(snapshot),
574+
buildGenerations(snapshot, metaData),
574575
snapshot.startTime(),
575576
ExceptionsHelper.stackTrace(exception),
576577
0,
577578
Collections.emptyList(),
578579
snapshot.repositoryStateId(),
579580
snapshot.includeGlobalState(),
580-
metaDataForSnapshot(snapshot, clusterService.state().metaData()),
581+
metaDataForSnapshot(snapshot, metaData),
581582
snapshot.userMetadata(),
582583
snapshot.useShardGenerations(),
583584
ActionListener.runAfter(ActionListener.wrap(ignored -> {
@@ -593,11 +594,22 @@ private void cleanupAfterError(Exception exception) {
593594
}
594595
}
595596

596-
private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snapshot) {
597+
private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snapshot, MetaData metaData) {
597598
ShardGenerations.Builder builder = ShardGenerations.builder();
598599
final Map<String, IndexId> indexLookup = new HashMap<>();
599-
snapshot.indices().forEach(idx -> indexLookup.put(idx.getName(), idx));
600-
snapshot.shards().forEach(c -> builder.put(indexLookup.get(c.key.getIndexName()), c.key.id(), c.value.generation()));
600+
snapshot.indices().forEach(idx -> {
601+
if (metaData.index(idx.getName()) != null) {
602+
indexLookup.put(idx.getName(), idx);
603+
} else {
604+
assert snapshot.partial() : "Index [" + idx + "] was deleted during a snapshot but snapshot was not partial.";
605+
}
606+
});
607+
snapshot.shards().forEach(c -> {
608+
final IndexId indexId = indexLookup.get(c.key.getIndexName());
609+
if (indexId != null) {
610+
builder.put(indexId, c.key.id(), c.value.generation());
611+
}
612+
});
601613
return builder.build();
602614
}
603615

@@ -1034,7 +1046,7 @@ protected void doRun() {
10341046
}
10351047
repository.finalizeSnapshot(
10361048
snapshot.getSnapshotId(),
1037-
buildGenerations(entry),
1049+
buildGenerations(entry, metaData),
10381050
entry.startTime(),
10391051
failure,
10401052
entry.shards().size(),

server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@
211211
import static org.hamcrest.Matchers.empty;
212212
import static org.hamcrest.Matchers.hasSize;
213213
import static org.hamcrest.Matchers.instanceOf;
214+
import static org.hamcrest.Matchers.is;
214215
import static org.hamcrest.Matchers.lessThanOrEqualTo;
215216
import static org.mockito.Mockito.mock;
216217

@@ -514,11 +515,11 @@ public void testConcurrentSnapshotDeleteAndDeleteIndex() {
514515
testClusterNodes.currentMaster(testClusterNodes.nodes.values().iterator().next().clusterService.state());
515516

516517
final StepListener<Collection<CreateIndexResponse>> createIndicesListener = new StepListener<>();
518+
final int indices = randomIntBetween(5, 20);
517519

518520
continueOrDie(createRepoAndIndex(repoName, index, 1), createIndexResponse -> {
519521
// create a few more indices to make it more likely that the subsequent index delete operation happens before snapshot
520522
// finalization
521-
final int indices = randomIntBetween(5, 20);
522523
final GroupedActionListener<CreateIndexResponse> listener = new GroupedActionListener<>(createIndicesListener, indices);
523524
for (int i = 0; i < indices; ++i) {
524525
client().admin().indices().create(new CreateIndexRequest("index-" + i), listener);
@@ -527,9 +528,11 @@ public void testConcurrentSnapshotDeleteAndDeleteIndex() {
527528

528529
final StepListener<CreateSnapshotResponse> createSnapshotResponseStepListener = new StepListener<>();
529530

531+
final boolean partialSnapshot = randomBoolean();
532+
530533
continueOrDie(createIndicesListener, createIndexResponses ->
531534
client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(false)
532-
.execute(createSnapshotResponseStepListener));
535+
.setPartial(partialSnapshot).execute(createSnapshotResponseStepListener));
533536

534537
continueOrDie(createSnapshotResponseStepListener,
535538
createSnapshotResponse -> client().admin().indices().delete(new DeleteIndexRequest(index), noopListener()));
@@ -544,6 +547,13 @@ public void testConcurrentSnapshotDeleteAndDeleteIndex() {
544547

545548
final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotIds.iterator().next());
546549
assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
550+
if (partialSnapshot) {
551+
// Single shard for each index so we either get all indices or all except for the deleted index
552+
assertThat(snapshotInfo.successfulShards(), either(is(indices + 1)).or(is(indices)));
553+
} else {
554+
// Index delete must be blocked for non-partial snapshots and we get a snapshot for every index
555+
assertEquals(snapshotInfo.successfulShards(), indices + 1);
556+
}
547557
assertEquals(0, snapshotInfo.failedShards());
548558
}
549559

0 commit comments

Comments
 (0)