Skip to content

Commit 9ae57de

Browse files
Track Repository Gen. in BlobStoreRepository (#48944) (#49119)
This is intended as a stop-gap solution/improvement to #38941 that prevents repo modifications without an intermittent master failover from causing inconsistent (outdated due to inconsistent listing of index-N blobs) `RepositoryData` to be written. Tracking the latest repository generation will move to the cluster state in a separate pull request. This is intended as a low-risk change to be backported as far as possible and motived by the recently increased chance of #38941 causing trouble via SLM (see #47520). Closes #47834 Closes #49048
1 parent fa8cc46 commit 9ae57de

File tree

4 files changed

+113
-31
lines changed

4 files changed

+113
-31
lines changed

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

+73-13
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
import java.util.Set;
106106
import java.util.concurrent.Executor;
107107
import java.util.concurrent.atomic.AtomicBoolean;
108+
import java.util.concurrent.atomic.AtomicLong;
108109
import java.util.stream.Collectors;
109110
import java.util.stream.Stream;
110111

@@ -400,7 +401,7 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, Action
400401
} else {
401402
try {
402403
final Map<String, BlobMetaData> rootBlobs = blobContainer().listBlobs();
403-
final RepositoryData repositoryData = getRepositoryData(latestGeneration(rootBlobs.keySet()));
404+
final RepositoryData repositoryData = safeRepositoryData(repositoryStateId, rootBlobs);
404405
// Cache the indices that were found before writing out the new index-N blob so that a stuck master will never
405406
// delete an index that was created by another master node after writing this index-N blob.
406407
final Map<String, BlobContainer> foundIndices = blobStore().blobContainer(indicesPath()).children();
@@ -411,6 +412,30 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, Action
411412
}
412413
}
413414

415+
/**
416+
* Loads {@link RepositoryData} ensuring that it is consistent with the given {@code rootBlobs} as well of the assumed generation.
417+
*
418+
* @param repositoryStateId Expected repository generation
419+
* @param rootBlobs Blobs at the repository root
420+
* @return RepositoryData
421+
*/
422+
private RepositoryData safeRepositoryData(long repositoryStateId, Map<String, BlobMetaData> rootBlobs) {
423+
final long generation = latestGeneration(rootBlobs.keySet());
424+
final long genToLoad = latestKnownRepoGen.updateAndGet(known -> Math.max(known, repositoryStateId));
425+
if (genToLoad > generation) {
426+
// It's always a possibility to not see the latest index-N in the listing here on an eventually consistent blob store, just
427+
// debug log it. Any blobs leaked as a result of an inconsistent listing here will be cleaned up in a subsequent cleanup or
428+
// snapshot delete run anyway.
429+
logger.debug("Determined repository's generation from its contents to [" + generation + "] but " +
430+
"current generation is at least [" + genToLoad + "]");
431+
}
432+
if (genToLoad != repositoryStateId) {
433+
throw new RepositoryException(metadata.name(), "concurrent modification of the index-N file, expected current generation [" +
434+
repositoryStateId + "], actual current generation [" + genToLoad + "]");
435+
}
436+
return getRepositoryData(genToLoad);
437+
}
438+
414439
/**
415440
* After updating the {@link RepositoryData} each of the shards directories is individually first moved to the next shard generation
416441
* and then has all now unreferenced blobs in it deleted.
@@ -514,14 +539,8 @@ public void cleanup(long repositoryStateId, ActionListener<RepositoryCleanupResu
514539
if (isReadOnly()) {
515540
throw new RepositoryException(metadata.name(), "cannot run cleanup on readonly repository");
516541
}
517-
final RepositoryData repositoryData = getRepositoryData();
518-
if (repositoryData.getGenId() != repositoryStateId) {
519-
// Check that we are working on the expected repository version before gathering the data to clean up
520-
throw new RepositoryException(metadata.name(), "concurrent modification of the repository before cleanup started, " +
521-
"expected current generation [" + repositoryStateId + "], actual current generation ["
522-
+ repositoryData.getGenId() + "]");
523-
}
524542
Map<String, BlobMetaData> rootBlobs = blobContainer().listBlobs();
543+
final RepositoryData repositoryData = safeRepositoryData(repositoryStateId, rootBlobs);
525544
final Map<String, BlobContainer> foundIndices = blobStore().blobContainer(indicesPath()).children();
526545
final Set<String> survivingIndexIds =
527546
repositoryData.getIndices().values().stream().map(IndexId::getId).collect(Collectors.toSet());
@@ -845,12 +864,36 @@ public void endVerification(String seed) {
845864
}
846865
}
847866

867+
// Tracks the latest known repository generation in a best-effort way to detect inconsistent listing of root level index-N blobs
868+
// and concurrent modifications.
869+
// Protected for use in MockEventuallyConsistentRepository
870+
protected final AtomicLong latestKnownRepoGen = new AtomicLong(RepositoryData.EMPTY_REPO_GEN);
871+
848872
@Override
849873
public RepositoryData getRepositoryData() {
850-
try {
851-
return getRepositoryData(latestIndexBlobId());
852-
} catch (IOException ioe) {
853-
throw new RepositoryException(metadata.name(), "Could not determine repository generation from root blobs", ioe);
874+
// Retry loading RepositoryData in a loop in case we run into concurrent modifications of the repository.
875+
while (true) {
876+
final long generation;
877+
try {
878+
generation = latestIndexBlobId();
879+
} catch (IOException ioe) {
880+
throw new RepositoryException(metadata.name(), "Could not determine repository generation from root blobs", ioe);
881+
}
882+
final long genToLoad = latestKnownRepoGen.updateAndGet(known -> Math.max(known, generation));
883+
if (genToLoad > generation) {
884+
logger.info("Determined repository generation [" + generation
885+
+ "] from repository contents but correct generation must be at least [" + genToLoad + "]");
886+
}
887+
try {
888+
return getRepositoryData(genToLoad);
889+
} catch (RepositoryException e) {
890+
if (genToLoad != latestKnownRepoGen.get()) {
891+
logger.warn("Failed to load repository data generation [" + genToLoad +
892+
"] because a concurrent operation moved the current generation to [" + latestKnownRepoGen.get() + "]", e);
893+
continue;
894+
}
895+
throw e;
896+
}
854897
}
855898
}
856899

@@ -868,6 +911,12 @@ private RepositoryData getRepositoryData(long indexGen) {
868911
return RepositoryData.snapshotsFromXContent(parser, indexGen);
869912
}
870913
} catch (IOException ioe) {
914+
// If we fail to load the generation we tracked in latestKnownRepoGen we reset it.
915+
// This is done as a fail-safe in case a user manually deletes the contents of the repository in which case subsequent
916+
// operations must start from the EMPTY_REPO_GEN again
917+
if (latestKnownRepoGen.compareAndSet(indexGen, RepositoryData.EMPTY_REPO_GEN)) {
918+
logger.warn("Resetting repository generation tracker because we failed to read generation [" + indexGen + "]", ioe);
919+
}
871920
throw new RepositoryException(metadata.name(), "could not read repository data from index blob", ioe);
872921
}
873922
}
@@ -892,10 +941,21 @@ protected void writeIndexGen(final RepositoryData repositoryData, final long exp
892941
"] - possibly due to simultaneous snapshot deletion requests");
893942
}
894943
final long newGen = currentGen + 1;
944+
if (latestKnownRepoGen.get() >= newGen) {
945+
throw new IllegalArgumentException(
946+
"Tried writing generation [" + newGen + "] but repository is at least at generation [" + newGen + "] already");
947+
}
895948
// write the index file
896949
final String indexBlob = INDEX_FILE_PREFIX + Long.toString(newGen);
897950
logger.debug("Repository [{}] writing new index generational blob [{}]", metadata.name(), indexBlob);
898-
writeAtomic(indexBlob, BytesReference.bytes(repositoryData.snapshotsToXContent(XContentFactory.jsonBuilder())), true);
951+
writeAtomic(indexBlob,
952+
BytesReference.bytes(repositoryData.snapshotsToXContent(XContentFactory.jsonBuilder())), true);
953+
final long latestKnownGen = latestKnownRepoGen.updateAndGet(known -> Math.max(known, newGen));
954+
if (newGen < latestKnownGen) {
955+
// Don't mess up the index.latest blob
956+
throw new IllegalStateException(
957+
"Wrote generation [" + newGen + "] but latest known repo gen concurrently changed to [" + latestKnownGen + "]");
958+
}
899959
// write the current generation to the index-latest file
900960
final BytesReference genBytes;
901961
try (BytesStreamOutput bStream = new BytesStreamOutput()) {

server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ public void testConcurrentSnapshotCreateAndDelete() {
396396
final StepListener<CreateSnapshotResponse> createAnotherSnapshotResponseStepListener = new StepListener<>();
397397

398398
continueOrDie(deleteSnapshotStepListener, acknowledgedResponse -> masterNode.client.admin().cluster()
399-
.prepareCreateSnapshot(repoName, snapshotName).execute(createAnotherSnapshotResponseStepListener));
399+
.prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(true).execute(createAnotherSnapshotResponseStepListener));
400400
continueOrDie(createAnotherSnapshotResponseStepListener, createSnapshotResponse ->
401401
assertEquals(createSnapshotResponse.getSnapshotInfo().state(), SnapshotState.SUCCESS));
402402

@@ -1146,7 +1146,7 @@ protected void assertSnapshotOrGenericThread() {
11461146
} else {
11471147
return metaData -> {
11481148
final Repository repository = new MockEventuallyConsistentRepository(
1149-
metaData, xContentRegistry(), deterministicTaskQueue.getThreadPool(), blobStoreContext);
1149+
metaData, xContentRegistry(), deterministicTaskQueue.getThreadPool(), blobStoreContext, random());
11501150
repository.start();
11511151
return repository;
11521152
};

server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepository.java

+32-10
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@
4343
import java.io.InputStream;
4444
import java.nio.file.NoSuchFileException;
4545
import java.util.ArrayList;
46+
import java.util.Collections;
4647
import java.util.HashMap;
4748
import java.util.List;
4849
import java.util.Map;
50+
import java.util.Random;
4951
import java.util.concurrent.atomic.AtomicBoolean;
5052
import java.util.concurrent.atomic.AtomicLong;
5153
import java.util.function.Function;
@@ -63,18 +65,22 @@
6365
*/
6466
public class MockEventuallyConsistentRepository extends BlobStoreRepository {
6567

68+
private final Random random;
69+
6670
private final Context context;
6771

6872
private final NamedXContentRegistry namedXContentRegistry;
6973

7074
public MockEventuallyConsistentRepository(
71-
RepositoryMetaData metadata,
72-
NamedXContentRegistry namedXContentRegistry,
73-
ThreadPool threadPool,
74-
Context context) {
75-
super(metadata,false, namedXContentRegistry, threadPool);
75+
final RepositoryMetaData metadata,
76+
final NamedXContentRegistry namedXContentRegistry,
77+
final ThreadPool threadPool,
78+
final Context context,
79+
final Random random) {
80+
super(metadata, false, namedXContentRegistry, threadPool);
7681
this.context = context;
7782
this.namedXContentRegistry = namedXContentRegistry;
83+
this.random = random;
7884
}
7985

8086
// Filters out all actions that are super-seeded by subsequent actions
@@ -111,6 +117,9 @@ public BlobPath basePath() {
111117
*/
112118
public static final class Context {
113119

120+
// Eventual consistency is only simulated as long as this flag is false
121+
private boolean consistent;
122+
114123
private final List<BlobStoreAction> actions = new ArrayList<>();
115124

116125
/**
@@ -121,6 +130,7 @@ public void forceConsistent() {
121130
final List<BlobStoreAction> consistentActions = consistentView(actions);
122131
actions.clear();
123132
actions.addAll(consistentActions);
133+
consistent = true;
124134
}
125135
}
126136
}
@@ -244,14 +254,14 @@ public Map<String, BlobMetaData> listBlobs() {
244254
ensureNotClosed();
245255
final String thisPath = path.buildAsString();
246256
synchronized (context.actions) {
247-
return consistentView(context.actions).stream()
257+
return maybeMissLatestIndexN(consistentView(context.actions).stream()
248258
.filter(
249259
action -> action.path.startsWith(thisPath) && action.path.substring(thisPath.length()).indexOf('/') == -1
250260
&& action.operation == Operation.PUT)
251261
.collect(
252262
Collectors.toMap(
253263
action -> action.path.substring(thisPath.length()),
254-
action -> new PlainBlobMetaData(action.path.substring(thisPath.length()), action.data.length)));
264+
action -> new PlainBlobMetaData(action.path.substring(thisPath.length()), action.data.length))));
255265
}
256266
}
257267

@@ -272,9 +282,21 @@ public Map<String, BlobContainer> children() {
272282

273283
@Override
274284
public Map<String, BlobMetaData> listBlobsByPrefix(String blobNamePrefix) {
275-
return
276-
listBlobs().entrySet().stream().filter(entry -> entry.getKey().startsWith(blobNamePrefix)).collect(
277-
Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
285+
return maybeMissLatestIndexN(
286+
listBlobs().entrySet().stream().filter(entry -> entry.getKey().startsWith(blobNamePrefix))
287+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
288+
}
289+
290+
// Randomly filter out the latest /index-N blob from a listing to test that tracking of it in latestKnownRepoGen
291+
// overrides an inconsistent listing
292+
private Map<String, BlobMetaData> maybeMissLatestIndexN(Map<String, BlobMetaData> listing) {
293+
// Only filter out latest index-N at the repo root and only as long as we're not in a forced consistent state
294+
if (path.parent() == null && context.consistent == false && random.nextBoolean()) {
295+
final Map<String, BlobMetaData> filtered = new HashMap<>(listing);
296+
filtered.remove(BlobStoreRepository.INDEX_FILE_PREFIX + latestKnownRepoGen.get());
297+
return Collections.unmodifiableMap(filtered);
298+
}
299+
return listing;
278300
}
279301

280302
@Override

server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepositoryTests.java

+6-6
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public void testReadAfterWriteConsistently() throws IOException {
4949
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
5050
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
5151
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
52-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
52+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
5353
repository.start();
5454
final BlobContainer blobContainer = repository.blobStore().blobContainer(repository.basePath());
5555
final String blobName = randomAlphaOfLength(10);
@@ -69,7 +69,7 @@ public void testReadAfterWriteAfterReadThrows() throws IOException {
6969
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
7070
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
7171
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
72-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
72+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
7373
repository.start();
7474
final BlobContainer blobContainer = repository.blobStore().blobContainer(repository.basePath());
7575
final String blobName = randomAlphaOfLength(10);
@@ -85,7 +85,7 @@ public void testReadAfterDeleteAfterWriteThrows() throws IOException {
8585
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
8686
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
8787
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
88-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
88+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
8989
repository.start();
9090
final BlobContainer blobContainer = repository.blobStore().blobContainer(repository.basePath());
9191
final String blobName = randomAlphaOfLength(10);
@@ -103,7 +103,7 @@ public void testOverwriteRandomBlobFails() throws IOException {
103103
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
104104
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
105105
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
106-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
106+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
107107
repository.start();
108108
final BlobContainer container = repository.blobStore().blobContainer(repository.basePath());
109109
final String blobName = randomAlphaOfLength(10);
@@ -120,7 +120,7 @@ public void testOverwriteShardSnapBlobFails() throws IOException {
120120
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
121121
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
122122
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
123-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
123+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
124124
repository.start();
125125
final BlobContainer container =
126126
repository.blobStore().blobContainer(repository.basePath().add("indices").add("someindex").add("0"));
@@ -140,7 +140,7 @@ public void testOverwriteSnapshotInfoBlob() {
140140
when(threadPool.executor(ThreadPool.Names.SNAPSHOT)).thenReturn(new SameThreadExecutorService());
141141
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
142142
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
143-
xContentRegistry(), threadPool, blobStoreContext)) {
143+
xContentRegistry(), threadPool, blobStoreContext, random())) {
144144
repository.start();
145145

146146
// We create a snap- blob for snapshot "foo" in the first generation

0 commit comments

Comments
 (0)