Skip to content

Commit afcdc27

Browse files
Fix Index Deletion during Snapshot Finalization (#50202) (#50227)
With #45689 making it so that index metadata is written after all shards have been snapshotted we can't delete indices that are part of the upcoming snapshot finalization any longer and it is not sufficient to check if all shards of an index have been snapshotted before deciding that it is safe to delete it. This change forbids deleting any index that is in the process of being snapshot to avoid issues during snapshot finalization. Relates #50200 (doesn't fully fix yet because we're not fixing the `partial=true` snapshot case here
1 parent 4ced237 commit afcdc27

File tree

2 files changed

+49
-15
lines changed

2 files changed

+49
-15
lines changed

server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

+4-15
Original file line numberDiff line numberDiff line change
@@ -1535,21 +1535,10 @@ public static Set<Index> snapshottingIndices(final ClusterState currentState, fi
15351535
final Set<Index> indices = new HashSet<>();
15361536
for (final SnapshotsInProgress.Entry entry : snapshots.entries()) {
15371537
if (entry.partial() == false) {
1538-
if (entry.state() == State.INIT) {
1539-
for (IndexId index : entry.indices()) {
1540-
IndexMetaData indexMetaData = currentState.metaData().index(index.getName());
1541-
if (indexMetaData != null && indicesToCheck.contains(indexMetaData.getIndex())) {
1542-
indices.add(indexMetaData.getIndex());
1543-
}
1544-
}
1545-
} else {
1546-
for (ObjectObjectCursor<ShardId, SnapshotsInProgress.ShardSnapshotStatus> shard : entry.shards()) {
1547-
Index index = shard.key.getIndex();
1548-
if (indicesToCheck.contains(index)
1549-
&& shard.value.state().completed() == false
1550-
&& currentState.getMetaData().index(index) != null) {
1551-
indices.add(index);
1552-
}
1538+
for (IndexId index : entry.indices()) {
1539+
IndexMetaData indexMetaData = currentState.metaData().index(index.getName());
1540+
if (indexMetaData != null && indicesToCheck.contains(indexMetaData.getIndex())) {
1541+
indices.add(indexMetaData.getIndex());
15531542
}
15541543
}
15551544
}

server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java

+45
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
import org.elasticsearch.action.support.ActiveShardCount;
8181
import org.elasticsearch.action.support.AutoCreateIndex;
8282
import org.elasticsearch.action.support.DestructiveOperations;
83+
import org.elasticsearch.action.support.GroupedActionListener;
8384
import org.elasticsearch.action.support.PlainActionFuture;
8485
import org.elasticsearch.action.support.TransportAction;
8586
import org.elasticsearch.action.support.WriteRequest;
@@ -504,6 +505,50 @@ public void testConcurrentSnapshotCreateAndDeleteOther() {
504505
}
505506
}
506507

508+
public void testConcurrentSnapshotDeleteAndDeleteIndex() {
509+
setupTestCluster(randomFrom(1, 3, 5), randomIntBetween(2, 10));
510+
511+
String repoName = "repo";
512+
String snapshotName = "snapshot";
513+
final String index = "test";
514+
515+
TestClusterNodes.TestClusterNode masterNode =
516+
testClusterNodes.currentMaster(testClusterNodes.nodes.values().iterator().next().clusterService.state());
517+
518+
final StepListener<Collection<CreateIndexResponse>> createIndicesListener = new StepListener<>();
519+
520+
continueOrDie(createRepoAndIndex(repoName, index, 1), createIndexResponse -> {
521+
// create a few more indices to make it more likely that the subsequent index delete operation happens before snapshot
522+
// finalization
523+
final int indices = randomIntBetween(5, 20);
524+
final GroupedActionListener<CreateIndexResponse> listener = new GroupedActionListener<>(createIndicesListener, indices);
525+
for (int i = 0; i < indices; ++i) {
526+
client().admin().indices().create(new CreateIndexRequest("index-" + i), listener);
527+
}
528+
});
529+
530+
final StepListener<CreateSnapshotResponse> createSnapshotResponseStepListener = new StepListener<>();
531+
532+
continueOrDie(createIndicesListener, createIndexResponses ->
533+
client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(false)
534+
.execute(createSnapshotResponseStepListener));
535+
536+
continueOrDie(createSnapshotResponseStepListener,
537+
createSnapshotResponse -> client().admin().indices().delete(new DeleteIndexRequest(index), noopListener()));
538+
539+
deterministicTaskQueue.runAllRunnableTasks();
540+
541+
SnapshotsInProgress finalSnapshotsInProgress = masterNode.clusterService.state().custom(SnapshotsInProgress.TYPE);
542+
assertFalse(finalSnapshotsInProgress.entries().stream().anyMatch(entry -> entry.state().completed() == false));
543+
final Repository repository = masterNode.repositoriesService.repository(repoName);
544+
Collection<SnapshotId> snapshotIds = getRepositoryData(repository).getSnapshotIds();
545+
assertThat(snapshotIds, hasSize(1));
546+
547+
final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotIds.iterator().next());
548+
assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
549+
assertEquals(0, snapshotInfo.failedShards());
550+
}
551+
507552
/**
508553
* Simulates concurrent restarts of data and master nodes as well as relocating a primary shard, while starting and subsequently
509554
* deleting a snapshot.

0 commit comments

Comments
 (0)