113
113
import java .util .concurrent .Executor ;
114
114
import java .util .concurrent .LinkedBlockingQueue ;
115
115
import java .util .concurrent .TimeUnit ;
116
+ import java .util .concurrent .atomic .AtomicLong ;
116
117
import java .util .stream .Collectors ;
117
118
import java .util .stream .Stream ;
118
119
@@ -366,7 +367,7 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, boolea
366
367
} else {
367
368
try {
368
369
final Map <String , BlobMetaData > rootBlobs = blobContainer ().listBlobs ();
369
- final RepositoryData repositoryData = getRepositoryData ( latestGeneration ( rootBlobs . keySet ()) );
370
+ final RepositoryData repositoryData = safeRepositoryData ( repositoryStateId , rootBlobs );
370
371
// Cache the indices that were found before writing out the new index-N blob so that a stuck master will never
371
372
// delete an index that was created by another master node after writing this index-N blob.
372
373
final Map <String , BlobContainer > foundIndices = blobStore ().blobContainer (indicesPath ()).children ();
@@ -377,6 +378,30 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, boolea
377
378
}
378
379
}
379
380
381
+ /**
382
+ * Loads {@link RepositoryData} ensuring that it is consistent with the given {@code rootBlobs} as well of the assumed generation.
383
+ *
384
+ * @param repositoryStateId Expected repository generation
385
+ * @param rootBlobs Blobs at the repository root
386
+ * @return RepositoryData
387
+ */
388
+ private RepositoryData safeRepositoryData (long repositoryStateId , Map <String , BlobMetaData > rootBlobs ) {
389
+ final long generation = latestGeneration (rootBlobs .keySet ());
390
+ final long genToLoad = latestKnownRepoGen .updateAndGet (known -> Math .max (known , repositoryStateId ));
391
+ if (genToLoad > generation ) {
392
+ // It's always a possibility to not see the latest index-N in the listing here on an eventually consistent blob store, just
393
+ // debug log it. Any blobs leaked as a result of an inconsistent listing here will be cleaned up in a subsequent cleanup or
394
+ // snapshot delete run anyway.
395
+ logger .debug ("Determined repository's generation from its contents to [" + generation + "] but " +
396
+ "current generation is at least [" + genToLoad + "]" );
397
+ }
398
+ if (genToLoad != repositoryStateId ) {
399
+ throw new RepositoryException (metadata .name (), "concurrent modification of the index-N file, expected current generation [" +
400
+ repositoryStateId + "], actual current generation [" + genToLoad + "]" );
401
+ }
402
+ return getRepositoryData (genToLoad );
403
+ }
404
+
380
405
/**
381
406
* After updating the {@link RepositoryData} each of the shards directories is individually first moved to the next shard generation
382
407
* and then has all now unreferenced blobs in it deleted.
@@ -604,14 +629,8 @@ public void cleanup(long repositoryStateId, boolean writeShardGens, ActionListen
604
629
if (isReadOnly ()) {
605
630
throw new RepositoryException (metadata .name (), "cannot run cleanup on readonly repository" );
606
631
}
607
- final RepositoryData repositoryData = getRepositoryData ();
608
- if (repositoryData .getGenId () != repositoryStateId ) {
609
- // Check that we are working on the expected repository version before gathering the data to clean up
610
- throw new RepositoryException (metadata .name (), "concurrent modification of the repository before cleanup started, " +
611
- "expected current generation [" + repositoryStateId + "], actual current generation ["
612
- + repositoryData .getGenId () + "]" );
613
- }
614
632
Map <String , BlobMetaData > rootBlobs = blobContainer ().listBlobs ();
633
+ final RepositoryData repositoryData = safeRepositoryData (repositoryStateId , rootBlobs );
615
634
final Map <String , BlobContainer > foundIndices = blobStore ().blobContainer (indicesPath ()).children ();
616
635
final Set <String > survivingIndexIds =
617
636
repositoryData .getIndices ().values ().stream ().map (IndexId ::getId ).collect (Collectors .toSet ());
@@ -897,12 +916,36 @@ public void endVerification(String seed) {
897
916
}
898
917
}
899
918
919
+ // Tracks the latest known repository generation in a best-effort way to detect inconsistent listing of root level index-N blobs
920
+ // and concurrent modifications.
921
+ // Protected for use in MockEventuallyConsistentRepository
922
+ protected final AtomicLong latestKnownRepoGen = new AtomicLong (RepositoryData .EMPTY_REPO_GEN );
923
+
900
924
@ Override
901
925
public RepositoryData getRepositoryData () {
902
- try {
903
- return getRepositoryData (latestIndexBlobId ());
904
- } catch (IOException ioe ) {
905
- throw new RepositoryException (metadata .name (), "Could not determine repository generation from root blobs" , ioe );
926
+ // Retry loading RepositoryData in a loop in case we run into concurrent modifications of the repository.
927
+ while (true ) {
928
+ final long generation ;
929
+ try {
930
+ generation = latestIndexBlobId ();
931
+ } catch (IOException ioe ) {
932
+ throw new RepositoryException (metadata .name (), "Could not determine repository generation from root blobs" , ioe );
933
+ }
934
+ final long genToLoad = latestKnownRepoGen .updateAndGet (known -> Math .max (known , generation ));
935
+ if (genToLoad > generation ) {
936
+ logger .info ("Determined repository generation [" + generation
937
+ + "] from repository contents but correct generation must be at least [" + genToLoad + "]" );
938
+ }
939
+ try {
940
+ return getRepositoryData (genToLoad );
941
+ } catch (RepositoryException e ) {
942
+ if (genToLoad != latestKnownRepoGen .get ()) {
943
+ logger .warn ("Failed to load repository data generation [" + genToLoad +
944
+ "] because a concurrent operation moved the current generation to [" + latestKnownRepoGen .get () + "]" , e );
945
+ continue ;
946
+ }
947
+ throw e ;
948
+ }
906
949
}
907
950
}
908
951
@@ -920,6 +963,12 @@ private RepositoryData getRepositoryData(long indexGen) {
920
963
return RepositoryData .snapshotsFromXContent (parser , indexGen );
921
964
}
922
965
} catch (IOException ioe ) {
966
+ // If we fail to load the generation we tracked in latestKnownRepoGen we reset it.
967
+ // This is done as a fail-safe in case a user manually deletes the contents of the repository in which case subsequent
968
+ // operations must start from the EMPTY_REPO_GEN again
969
+ if (latestKnownRepoGen .compareAndSet (indexGen , RepositoryData .EMPTY_REPO_GEN )) {
970
+ logger .warn ("Resetting repository generation tracker because we failed to read generation [" + indexGen + "]" , ioe );
971
+ }
923
972
throw new RepositoryException (metadata .name (), "could not read repository data from index blob" , ioe );
924
973
}
925
974
}
@@ -945,11 +994,21 @@ protected void writeIndexGen(final RepositoryData repositoryData, final long exp
945
994
"] - possibly due to simultaneous snapshot deletion requests" );
946
995
}
947
996
final long newGen = currentGen + 1 ;
997
+ if (latestKnownRepoGen .get () >= newGen ) {
998
+ throw new IllegalArgumentException (
999
+ "Tried writing generation [" + newGen + "] but repository is at least at generation [" + newGen + "] already" );
1000
+ }
948
1001
// write the index file
949
1002
final String indexBlob = INDEX_FILE_PREFIX + Long .toString (newGen );
950
1003
logger .debug ("Repository [{}] writing new index generational blob [{}]" , metadata .name (), indexBlob );
951
1004
writeAtomic (indexBlob ,
952
1005
BytesReference .bytes (repositoryData .snapshotsToXContent (XContentFactory .jsonBuilder (), writeShardGens )), true );
1006
+ final long latestKnownGen = latestKnownRepoGen .updateAndGet (known -> Math .max (known , newGen ));
1007
+ if (newGen < latestKnownGen ) {
1008
+ // Don't mess up the index.latest blob
1009
+ throw new IllegalStateException (
1010
+ "Wrote generation [" + newGen + "] but latest known repo gen concurrently changed to [" + latestKnownGen + "]" );
1011
+ }
953
1012
// write the current generation to the index-latest file
954
1013
final BytesReference genBytes ;
955
1014
try (BytesStreamOutput bStream = new BytesStreamOutput ()) {
0 commit comments