Skip to content

Commit d219dfa

Browse files
CR: handle index recreate and stronger tests
1 parent 8a80b78 commit d219dfa

File tree

3 files changed

+45
-12
lines changed

3 files changed

+45
-12
lines changed

server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,13 @@ private ShardGenerations(Map<IndexId, List<String>> shardGenerations) {
5454
this.shardGenerations = shardGenerations;
5555
}
5656

57+
/**
58+
* Returns the total number of shards tracked by this instance.
59+
*/
60+
public int totalShards() {
61+
return shardGenerations.values().stream().mapToInt(List::size).sum();
62+
}
63+
5764
/**
5865
* Returns all indices for which shard generations are tracked.
5966
*

server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -597,14 +597,13 @@ private void cleanupAfterError(Exception exception) {
597597
private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snapshot, MetaData metaData) {
598598
ShardGenerations.Builder builder = ShardGenerations.builder();
599599
final Map<String, IndexId> indexLookup = new HashMap<>();
600-
snapshot.indices().forEach(idx -> {
601-
if (metaData.index(idx.getName()) != null) {
602-
indexLookup.put(idx.getName(), idx);
603-
} else {
604-
assert snapshot.partial() : "Index [" + idx + "] was deleted during a snapshot but snapshot was not partial.";
605-
}
606-
});
600+
snapshot.indices().forEach(idx -> indexLookup.put(idx.getName(), idx));
607601
snapshot.shards().forEach(c -> {
602+
if (metaData.index(c.key.getIndex()) == null) {
603+
assert snapshot.partial() :
604+
"Index [" + c.key.getIndex() + "] was deleted during a snapshot but snapshot was not partial.";
605+
return;
606+
}
608607
final IndexId indexId = indexLookup.get(c.key.getIndexName());
609608
if (indexId != null) {
610609
builder.put(indexId, c.key.id(), c.value.generation());
@@ -1044,12 +1043,13 @@ protected void doRun() {
10441043
shardFailures.add(new SnapshotShardFailure(status.nodeId(), shardId, status.reason()));
10451044
}
10461045
}
1046+
final ShardGenerations shardGenerations = buildGenerations(entry, metaData);
10471047
repository.finalizeSnapshot(
10481048
snapshot.getSnapshotId(),
1049-
buildGenerations(entry, metaData),
1049+
shardGenerations,
10501050
entry.startTime(),
10511051
failure,
1052-
entry.shards().size(),
1052+
entry.partial() ? shardGenerations.totalShards() : entry.shards().size(),
10531053
unmodifiableList(shardFailures),
10541054
entry.repositoryStateId(),
10551055
entry.includeGlobalState(),

server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import org.apache.logging.log4j.LogManager;
2323
import org.apache.logging.log4j.Logger;
24+
import org.apache.lucene.util.SetOnce;
2425
import org.elasticsearch.ExceptionsHelper;
2526
import org.elasticsearch.Version;
2627
import org.elasticsearch.action.ActionListener;
@@ -143,6 +144,7 @@
143144
import org.elasticsearch.env.TestEnvironment;
144145
import org.elasticsearch.gateway.MetaStateService;
145146
import org.elasticsearch.gateway.TransportNodesListGatewayStartedShards;
147+
import org.elasticsearch.index.Index;
146148
import org.elasticsearch.index.analysis.AnalysisRegistry;
147149
import org.elasticsearch.index.seqno.GlobalCheckpointSyncAction;
148150
import org.elasticsearch.index.seqno.RetentionLeaseSyncer;
@@ -504,7 +506,7 @@ public void testConcurrentSnapshotCreateAndDeleteOther() {
504506
}
505507
}
506508

507-
public void testConcurrentSnapshotDeleteAndDeleteIndex() {
509+
public void testConcurrentSnapshotDeleteAndDeleteIndex() throws IOException {
508510
setupTestCluster(randomFrom(1, 3, 5), randomIntBetween(2, 10));
509511

510512
String repoName = "repo";
@@ -517,7 +519,9 @@ public void testConcurrentSnapshotDeleteAndDeleteIndex() {
517519
final StepListener<Collection<CreateIndexResponse>> createIndicesListener = new StepListener<>();
518520
final int indices = randomIntBetween(5, 20);
519521

522+
final SetOnce<Index> firstIndex = new SetOnce<>();
520523
continueOrDie(createRepoAndIndex(repoName, index, 1), createIndexResponse -> {
524+
firstIndex.set(masterNode.clusterService.state().metaData().index(index).getIndex());
521525
// create a few more indices to make it more likely that the subsequent index delete operation happens before snapshot
522526
// finalization
523527
final GroupedActionListener<CreateIndexResponse> listener = new GroupedActionListener<>(createIndicesListener, indices);
@@ -535,21 +539,43 @@ public void testConcurrentSnapshotDeleteAndDeleteIndex() {
535539
.setPartial(partialSnapshot).execute(createSnapshotResponseStepListener));
536540

537541
continueOrDie(createSnapshotResponseStepListener,
538-
createSnapshotResponse -> client().admin().indices().delete(new DeleteIndexRequest(index), noopListener()));
542+
createSnapshotResponse -> client().admin().indices().delete(new DeleteIndexRequest(index), new ActionListener<>() {
543+
@Override
544+
public void onResponse(AcknowledgedResponse acknowledgedResponse) {
545+
if (partialSnapshot) {
546+
// Recreate index by the same name to test that we don't snapshot conflicting metadata in this scenario
547+
client().admin().indices().create(new CreateIndexRequest(index), noopListener());
548+
}
549+
}
550+
551+
@Override
552+
public void onFailure(Exception e) {
553+
if (partialSnapshot) {
554+
throw new AssertionError("Delete index should always work during partial snapshots", e);
555+
}
556+
}
557+
}));
539558

540559
deterministicTaskQueue.runAllRunnableTasks();
541560

542561
SnapshotsInProgress finalSnapshotsInProgress = masterNode.clusterService.state().custom(SnapshotsInProgress.TYPE);
543562
assertFalse(finalSnapshotsInProgress.entries().stream().anyMatch(entry -> entry.state().completed() == false));
544563
final Repository repository = masterNode.repositoriesService.repository(repoName);
545-
Collection<SnapshotId> snapshotIds = getRepositoryData(repository).getSnapshotIds();
564+
final RepositoryData repositoryData = getRepositoryData(repository);
565+
Collection<SnapshotId> snapshotIds = repositoryData.getSnapshotIds();
546566
assertThat(snapshotIds, hasSize(1));
547567

548568
final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotIds.iterator().next());
549569
assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
550570
if (partialSnapshot) {
551571
// Single shard for each index so we either get all indices or all except for the deleted index
552572
assertThat(snapshotInfo.successfulShards(), either(is(indices + 1)).or(is(indices)));
573+
if (snapshotInfo.successfulShards() == indices + 1) {
574+
final IndexMetaData indexMetaData =
575+
repository.getSnapshotIndexMetaData(snapshotInfo.snapshotId(), repositoryData.resolveIndexId(index));
576+
// Make sure we snapshotted the metadata of this index and not the recreated version
577+
assertEquals(indexMetaData.getIndex(), firstIndex.get());
578+
}
553579
} else {
554580
// Index delete must be blocked for non-partial snapshots and we get a snapshot for every index
555581
assertEquals(snapshotInfo.successfulShards(), indices + 1);

0 commit comments

Comments
 (0)