Skip to content

Commit fc505aa

Browse files
Track Repository Gen. in BlobStoreRepository (#48944) (#49116)
This is intended as a stop-gap solution/improvement to #38941 that prevents repo modifications without an intermittent master failover from causing inconsistent (outdated due to inconsistent listing of index-N blobs) `RepositoryData` to be written. Tracking the latest repository generation will move to the cluster state in a separate pull request. This is intended as a low-risk change to be backported as far as possible and motived by the recently increased chance of #38941 causing trouble via SLM (see #47520). Closes #47834 Closes #49048
1 parent a370008 commit fc505aa

File tree

4 files changed

+111
-30
lines changed

4 files changed

+111
-30
lines changed

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

+71-12
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@
113113
import java.util.concurrent.Executor;
114114
import java.util.concurrent.LinkedBlockingQueue;
115115
import java.util.concurrent.TimeUnit;
116+
import java.util.concurrent.atomic.AtomicLong;
116117
import java.util.stream.Collectors;
117118
import java.util.stream.Stream;
118119

@@ -372,7 +373,7 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, boolea
372373
} else {
373374
try {
374375
final Map<String, BlobMetaData> rootBlobs = blobContainer().listBlobs();
375-
final RepositoryData repositoryData = getRepositoryData(latestGeneration(rootBlobs.keySet()));
376+
final RepositoryData repositoryData = safeRepositoryData(repositoryStateId, rootBlobs);
376377
// Cache the indices that were found before writing out the new index-N blob so that a stuck master will never
377378
// delete an index that was created by another master node after writing this index-N blob.
378379
final Map<String, BlobContainer> foundIndices = blobStore().blobContainer(indicesPath()).children();
@@ -383,6 +384,30 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, boolea
383384
}
384385
}
385386

387+
/**
388+
* Loads {@link RepositoryData} ensuring that it is consistent with the given {@code rootBlobs} as well of the assumed generation.
389+
*
390+
* @param repositoryStateId Expected repository generation
391+
* @param rootBlobs Blobs at the repository root
392+
* @return RepositoryData
393+
*/
394+
private RepositoryData safeRepositoryData(long repositoryStateId, Map<String, BlobMetaData> rootBlobs) {
395+
final long generation = latestGeneration(rootBlobs.keySet());
396+
final long genToLoad = latestKnownRepoGen.updateAndGet(known -> Math.max(known, repositoryStateId));
397+
if (genToLoad > generation) {
398+
// It's always a possibility to not see the latest index-N in the listing here on an eventually consistent blob store, just
399+
// debug log it. Any blobs leaked as a result of an inconsistent listing here will be cleaned up in a subsequent cleanup or
400+
// snapshot delete run anyway.
401+
logger.debug("Determined repository's generation from its contents to [" + generation + "] but " +
402+
"current generation is at least [" + genToLoad + "]");
403+
}
404+
if (genToLoad != repositoryStateId) {
405+
throw new RepositoryException(metadata.name(), "concurrent modification of the index-N file, expected current generation [" +
406+
repositoryStateId + "], actual current generation [" + genToLoad + "]");
407+
}
408+
return getRepositoryData(genToLoad);
409+
}
410+
386411
/**
387412
* After updating the {@link RepositoryData} each of the shards directories is individually first moved to the next shard generation
388413
* and then has all now unreferenced blobs in it deleted.
@@ -610,14 +635,8 @@ public void cleanup(long repositoryStateId, boolean writeShardGens, ActionListen
610635
if (isReadOnly()) {
611636
throw new RepositoryException(metadata.name(), "cannot run cleanup on readonly repository");
612637
}
613-
final RepositoryData repositoryData = getRepositoryData();
614-
if (repositoryData.getGenId() != repositoryStateId) {
615-
// Check that we are working on the expected repository version before gathering the data to clean up
616-
throw new RepositoryException(metadata.name(), "concurrent modification of the repository before cleanup started, " +
617-
"expected current generation [" + repositoryStateId + "], actual current generation ["
618-
+ repositoryData.getGenId() + "]");
619-
}
620638
Map<String, BlobMetaData> rootBlobs = blobContainer().listBlobs();
639+
final RepositoryData repositoryData = safeRepositoryData(repositoryStateId, rootBlobs);
621640
final Map<String, BlobContainer> foundIndices = blobStore().blobContainer(indicesPath()).children();
622641
final Set<String> survivingIndexIds =
623642
repositoryData.getIndices().values().stream().map(IndexId::getId).collect(Collectors.toSet());
@@ -903,12 +922,36 @@ public void endVerification(String seed) {
903922
}
904923
}
905924

925+
// Tracks the latest known repository generation in a best-effort way to detect inconsistent listing of root level index-N blobs
926+
// and concurrent modifications.
927+
// Protected for use in MockEventuallyConsistentRepository
928+
protected final AtomicLong latestKnownRepoGen = new AtomicLong(RepositoryData.EMPTY_REPO_GEN);
929+
906930
@Override
907931
public RepositoryData getRepositoryData() {
908-
try {
909-
return getRepositoryData(latestIndexBlobId());
910-
} catch (IOException ioe) {
911-
throw new RepositoryException(metadata.name(), "Could not determine repository generation from root blobs", ioe);
932+
// Retry loading RepositoryData in a loop in case we run into concurrent modifications of the repository.
933+
while (true) {
934+
final long generation;
935+
try {
936+
generation = latestIndexBlobId();
937+
} catch (IOException ioe) {
938+
throw new RepositoryException(metadata.name(), "Could not determine repository generation from root blobs", ioe);
939+
}
940+
final long genToLoad = latestKnownRepoGen.updateAndGet(known -> Math.max(known, generation));
941+
if (genToLoad > generation) {
942+
logger.info("Determined repository generation [" + generation
943+
+ "] from repository contents but correct generation must be at least [" + genToLoad + "]");
944+
}
945+
try {
946+
return getRepositoryData(genToLoad);
947+
} catch (RepositoryException e) {
948+
if (genToLoad != latestKnownRepoGen.get()) {
949+
logger.warn("Failed to load repository data generation [" + genToLoad +
950+
"] because a concurrent operation moved the current generation to [" + latestKnownRepoGen.get() + "]", e);
951+
continue;
952+
}
953+
throw e;
954+
}
912955
}
913956
}
914957

@@ -926,6 +969,12 @@ private RepositoryData getRepositoryData(long indexGen) {
926969
return RepositoryData.snapshotsFromXContent(parser, indexGen);
927970
}
928971
} catch (IOException ioe) {
972+
// If we fail to load the generation we tracked in latestKnownRepoGen we reset it.
973+
// This is done as a fail-safe in case a user manually deletes the contents of the repository in which case subsequent
974+
// operations must start from the EMPTY_REPO_GEN again
975+
if (latestKnownRepoGen.compareAndSet(indexGen, RepositoryData.EMPTY_REPO_GEN)) {
976+
logger.warn("Resetting repository generation tracker because we failed to read generation [" + indexGen + "]", ioe);
977+
}
929978
throw new RepositoryException(metadata.name(), "could not read repository data from index blob", ioe);
930979
}
931980
}
@@ -951,11 +1000,21 @@ protected void writeIndexGen(final RepositoryData repositoryData, final long exp
9511000
"] - possibly due to simultaneous snapshot deletion requests");
9521001
}
9531002
final long newGen = currentGen + 1;
1003+
if (latestKnownRepoGen.get() >= newGen) {
1004+
throw new IllegalArgumentException(
1005+
"Tried writing generation [" + newGen + "] but repository is at least at generation [" + newGen + "] already");
1006+
}
9541007
// write the index file
9551008
final String indexBlob = INDEX_FILE_PREFIX + Long.toString(newGen);
9561009
logger.debug("Repository [{}] writing new index generational blob [{}]", metadata.name(), indexBlob);
9571010
writeAtomic(indexBlob,
9581011
BytesReference.bytes(repositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), writeShardGens)), true);
1012+
final long latestKnownGen = latestKnownRepoGen.updateAndGet(known -> Math.max(known, newGen));
1013+
if (newGen < latestKnownGen) {
1014+
// Don't mess up the index.latest blob
1015+
throw new IllegalStateException(
1016+
"Wrote generation [" + newGen + "] but latest known repo gen concurrently changed to [" + latestKnownGen + "]");
1017+
}
9591018
// write the current generation to the index-latest file
9601019
final BytesReference genBytes;
9611020
try (BytesStreamOutput bStream = new BytesStreamOutput()) {

server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ public void testConcurrentSnapshotCreateAndDelete() {
396396
final StepListener<CreateSnapshotResponse> createAnotherSnapshotResponseStepListener = new StepListener<>();
397397

398398
continueOrDie(deleteSnapshotStepListener, acknowledgedResponse -> masterNode.client.admin().cluster()
399-
.prepareCreateSnapshot(repoName, snapshotName).execute(createAnotherSnapshotResponseStepListener));
399+
.prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(true).execute(createAnotherSnapshotResponseStepListener));
400400
continueOrDie(createAnotherSnapshotResponseStepListener, createSnapshotResponse ->
401401
assertEquals(createSnapshotResponse.getSnapshotInfo().state(), SnapshotState.SUCCESS));
402402

@@ -1146,7 +1146,7 @@ protected void assertSnapshotOrGenericThread() {
11461146
} else {
11471147
return metaData -> {
11481148
final Repository repository = new MockEventuallyConsistentRepository(
1149-
metaData, xContentRegistry(), deterministicTaskQueue.getThreadPool(), blobStoreContext);
1149+
metaData, xContentRegistry(), deterministicTaskQueue.getThreadPool(), blobStoreContext, random());
11501150
repository.start();
11511151
return repository;
11521152
};

server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepository.java

+32-10
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@
4343
import java.io.InputStream;
4444
import java.nio.file.NoSuchFileException;
4545
import java.util.ArrayList;
46+
import java.util.Collections;
4647
import java.util.HashMap;
4748
import java.util.List;
4849
import java.util.Map;
50+
import java.util.Random;
4951
import java.util.concurrent.atomic.AtomicBoolean;
5052
import java.util.concurrent.atomic.AtomicLong;
5153
import java.util.function.Function;
@@ -63,18 +65,22 @@
6365
*/
6466
public class MockEventuallyConsistentRepository extends BlobStoreRepository {
6567

68+
private final Random random;
69+
6670
private final Context context;
6771

6872
private final NamedXContentRegistry namedXContentRegistry;
6973

7074
public MockEventuallyConsistentRepository(
71-
RepositoryMetaData metadata,
72-
NamedXContentRegistry namedXContentRegistry,
73-
ThreadPool threadPool,
74-
Context context) {
75-
super(metadata,false, namedXContentRegistry, threadPool);
75+
final RepositoryMetaData metadata,
76+
final NamedXContentRegistry namedXContentRegistry,
77+
final ThreadPool threadPool,
78+
final Context context,
79+
final Random random) {
80+
super(metadata, false, namedXContentRegistry, threadPool);
7681
this.context = context;
7782
this.namedXContentRegistry = namedXContentRegistry;
83+
this.random = random;
7884
}
7985

8086
// Filters out all actions that are super-seeded by subsequent actions
@@ -111,6 +117,9 @@ public BlobPath basePath() {
111117
*/
112118
public static final class Context {
113119

120+
// Eventual consistency is only simulated as long as this flag is false
121+
private boolean consistent;
122+
114123
private final List<BlobStoreAction> actions = new ArrayList<>();
115124

116125
/**
@@ -121,6 +130,7 @@ public void forceConsistent() {
121130
final List<BlobStoreAction> consistentActions = consistentView(actions);
122131
actions.clear();
123132
actions.addAll(consistentActions);
133+
consistent = true;
124134
}
125135
}
126136
}
@@ -244,14 +254,14 @@ public Map<String, BlobMetaData> listBlobs() {
244254
ensureNotClosed();
245255
final String thisPath = path.buildAsString();
246256
synchronized (context.actions) {
247-
return consistentView(context.actions).stream()
257+
return maybeMissLatestIndexN(consistentView(context.actions).stream()
248258
.filter(
249259
action -> action.path.startsWith(thisPath) && action.path.substring(thisPath.length()).indexOf('/') == -1
250260
&& action.operation == Operation.PUT)
251261
.collect(
252262
Collectors.toMap(
253263
action -> action.path.substring(thisPath.length()),
254-
action -> new PlainBlobMetaData(action.path.substring(thisPath.length()), action.data.length)));
264+
action -> new PlainBlobMetaData(action.path.substring(thisPath.length()), action.data.length))));
255265
}
256266
}
257267

@@ -272,9 +282,21 @@ public Map<String, BlobContainer> children() {
272282

273283
@Override
274284
public Map<String, BlobMetaData> listBlobsByPrefix(String blobNamePrefix) {
275-
return
276-
listBlobs().entrySet().stream().filter(entry -> entry.getKey().startsWith(blobNamePrefix)).collect(
277-
Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
285+
return maybeMissLatestIndexN(
286+
listBlobs().entrySet().stream().filter(entry -> entry.getKey().startsWith(blobNamePrefix))
287+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
288+
}
289+
290+
// Randomly filter out the latest /index-N blob from a listing to test that tracking of it in latestKnownRepoGen
291+
// overrides an inconsistent listing
292+
private Map<String, BlobMetaData> maybeMissLatestIndexN(Map<String, BlobMetaData> listing) {
293+
// Only filter out latest index-N at the repo root and only as long as we're not in a forced consistent state
294+
if (path.parent() == null && context.consistent == false && random.nextBoolean()) {
295+
final Map<String, BlobMetaData> filtered = new HashMap<>(listing);
296+
filtered.remove(BlobStoreRepository.INDEX_FILE_PREFIX + latestKnownRepoGen.get());
297+
return Collections.unmodifiableMap(filtered);
298+
}
299+
return listing;
278300
}
279301

280302
@Override

server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepositoryTests.java

+6-6
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public void testReadAfterWriteConsistently() throws IOException {
5050
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
5151
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
5252
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
53-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
53+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
5454
repository.start();
5555
final BlobContainer blobContainer = repository.blobStore().blobContainer(repository.basePath());
5656
final String blobName = randomAlphaOfLength(10);
@@ -70,7 +70,7 @@ public void testReadAfterWriteAfterReadThrows() throws IOException {
7070
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
7171
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
7272
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
73-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
73+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
7474
repository.start();
7575
final BlobContainer blobContainer = repository.blobStore().blobContainer(repository.basePath());
7676
final String blobName = randomAlphaOfLength(10);
@@ -86,7 +86,7 @@ public void testReadAfterDeleteAfterWriteThrows() throws IOException {
8686
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
8787
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
8888
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
89-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
89+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
9090
repository.start();
9191
final BlobContainer blobContainer = repository.blobStore().blobContainer(repository.basePath());
9292
final String blobName = randomAlphaOfLength(10);
@@ -104,7 +104,7 @@ public void testOverwriteRandomBlobFails() throws IOException {
104104
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
105105
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
106106
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
107-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
107+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
108108
repository.start();
109109
final BlobContainer container = repository.blobStore().blobContainer(repository.basePath());
110110
final String blobName = randomAlphaOfLength(10);
@@ -121,7 +121,7 @@ public void testOverwriteShardSnapBlobFails() throws IOException {
121121
MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context();
122122
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
123123
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
124-
xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) {
124+
xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) {
125125
repository.start();
126126
final BlobContainer container =
127127
repository.blobStore().blobContainer(repository.basePath().add("indices").add("someindex").add("0"));
@@ -143,7 +143,7 @@ public void testOverwriteSnapshotInfoBlob() {
143143
new ThreadPool.Info(ThreadPool.Names.SNAPSHOT, ThreadPool.ThreadPoolType.FIXED, randomIntBetween(1, 10)));
144144
try (BlobStoreRepository repository = new MockEventuallyConsistentRepository(
145145
new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY),
146-
xContentRegistry(), threadPool, blobStoreContext)) {
146+
xContentRegistry(), threadPool, blobStoreContext, random())) {
147147
repository.start();
148148

149149
// We create a snap- blob for snapshot "foo" in the first generation

0 commit comments

Comments
 (0)