78
78
import java .util .Comparator ;
79
79
import java .util .List ;
80
80
import java .util .Locale ;
81
+ import java .util .OptionalLong ;
81
82
import java .util .concurrent .CompletableFuture ;
82
83
import java .util .concurrent .CopyOnWriteArrayList ;
83
84
import java .util .concurrent .atomic .AtomicInteger ;
@@ -196,7 +197,6 @@ && isTargetSameHistory()
196
197
}
197
198
198
199
final StepListener <SendFileResult > sendFileStep = new StepListener <>();
199
- final StepListener <ReplicationResponse > establishRetentionLeaseStep = new StepListener <>();
200
200
final StepListener <TimeValue > prepareEngineStep = new StepListener <>();
201
201
final StepListener <SendSnapshotResult > sendSnapshotStep = new StepListener <>();
202
202
final StepListener <Void > finalizeStep = new StepListener <>();
@@ -264,7 +264,16 @@ && isTargetSameHistory()
264
264
265
265
deleteRetentionLeaseStep .whenComplete (ignored -> {
266
266
assert Transports .assertNotTransportThread (RecoverySourceHandler .this + "[phase1]" );
267
- phase1 (safeCommitRef .getIndexCommit (), shard .getLastKnownGlobalCheckpoint (), () -> estimateNumOps , sendFileStep );
267
+
268
+ final Consumer <ActionListener <Long >> getGlobalCheckpoint ;
269
+ if (useRetentionLeases ) {
270
+ getGlobalCheckpoint = l -> createRetentionLease (startingSeqNo , l );
271
+ } else {
272
+ final long globalCheckpoint = shard .getLastKnownGlobalCheckpoint ();
273
+ getGlobalCheckpoint = l -> l .onResponse (globalCheckpoint );
274
+ }
275
+
276
+ phase1 (safeCommitRef .getIndexCommit (), getGlobalCheckpoint , () -> estimateNumOps , sendFileStep );
268
277
}, onFailure );
269
278
270
279
} catch (final Exception e ) {
@@ -274,41 +283,6 @@ && isTargetSameHistory()
274
283
assert startingSeqNo >= 0 : "startingSeqNo must be non negative. got: " + startingSeqNo ;
275
284
276
285
sendFileStep .whenComplete (r -> {
277
- if (useRetentionLeases && isSequenceNumberBasedRecovery == false ) {
278
- // We can in general use retention leases for peer recovery, but there is no lease for the target node right now.
279
- runUnderPrimaryPermit (() -> {
280
- // Clone the peer recovery retention lease belonging to the source shard. We are retaining history between the
281
- // the local checkpoint of the safe commit we're creating and this lease's retained seqno with the retention
282
- // lock, and by cloning an existing lease we (approximately) know that all our peers are also retaining history
283
- // as requested by the cloned lease. If the recovery now fails before copying enough history over then a
284
- // subsequent attempt will find this lease, determine it is not enough, and fall back to a file-based recovery.
285
- //
286
- // (approximately) because we do not guarantee to be able to satisfy every lease on every peer.
287
- logger .trace ("cloning primary's retention lease" );
288
- try {
289
- final RetentionLease clonedLease = shard .cloneLocalPeerRecoveryRetentionLease (request .targetNode ().getId (),
290
- new ThreadedActionListener <>(logger , shard .getThreadPool (),
291
- ThreadPool .Names .GENERIC , establishRetentionLeaseStep , false ));
292
- logger .trace ("cloned primary's retention lease as [{}]" , clonedLease );
293
- } catch (RetentionLeaseNotFoundException e ) {
294
- // it's possible that the primary has no retention lease yet if we are doing a rolling upgrade from a
295
- // version before 7.4, and in that case we just create a lease using the local checkpoint of the safe commit
296
- // which we're using for recovery as a conservative estimate for the global checkpoint.
297
- assert shard .indexSettings ().getIndexVersionCreated ().before (Version .V_7_4_0 );
298
- final long estimatedGlobalCheckpoint = startingSeqNo - 1 ;
299
- shard .addPeerRecoveryRetentionLease (request .targetNode ().getId (),
300
- estimatedGlobalCheckpoint , new ThreadedActionListener <>(logger , shard .getThreadPool (),
301
- ThreadPool .Names .GENERIC , establishRetentionLeaseStep , false ));
302
- logger .trace ("created retention lease with estimated checkpoint of [{}]" , estimatedGlobalCheckpoint );
303
- }
304
- }, shardId + " establishing retention lease for [" + request .targetAllocationId () + "]" ,
305
- shard , cancellableThreads , logger );
306
- } else {
307
- establishRetentionLeaseStep .onResponse (null );
308
- }
309
- }, onFailure );
310
-
311
- establishRetentionLeaseStep .whenComplete (r -> {
312
286
assert Transports .assertNotTransportThread (RecoverySourceHandler .this + "[prepareTargetForTranslog]" );
313
287
// For a sequence based recovery, the target can keep its local translog
314
288
prepareTargetForTranslog (isSequenceNumberBasedRecovery == false ,
@@ -455,7 +429,8 @@ static final class SendFileResult {
455
429
* segments that are missing. Only segments that have the same size and
456
430
* checksum can be reused
457
431
*/
458
- void phase1 (IndexCommit snapshot , long globalCheckpoint , IntSupplier translogOps , ActionListener <SendFileResult > listener ) {
432
+ void phase1 (IndexCommit snapshot , Consumer <ActionListener <Long >> getGlobalCheckpoint ,
433
+ IntSupplier translogOps , ActionListener <SendFileResult > listener ) {
459
434
cancellableThreads .checkForCancel ();
460
435
// Total size of segment files that are recovered
461
436
long totalSizeInBytes = 0 ;
@@ -518,6 +493,7 @@ void phase1(IndexCommit snapshot, long globalCheckpoint, IntSupplier translogOps
518
493
phase1ExistingFileNames .size (), new ByteSizeValue (existingTotalSizeInBytes ));
519
494
final StepListener <Void > sendFileInfoStep = new StepListener <>();
520
495
final StepListener <Void > sendFilesStep = new StepListener <>();
496
+ final StepListener <Long > getGlobalCheckpointStep = new StepListener <>();
521
497
final StepListener <Void > cleanFilesStep = new StepListener <>();
522
498
cancellableThreads .execute (() ->
523
499
recoveryTarget .receiveFileInfo (phase1FileNames , phase1FileSizes , phase1ExistingFileNames ,
@@ -526,7 +502,9 @@ void phase1(IndexCommit snapshot, long globalCheckpoint, IntSupplier translogOps
526
502
sendFileInfoStep .whenComplete (r ->
527
503
sendFiles (store , phase1Files .toArray (new StoreFileMetaData [0 ]), translogOps , sendFilesStep ), listener ::onFailure );
528
504
529
- sendFilesStep .whenComplete (r ->
505
+ sendFilesStep .whenComplete (r -> getGlobalCheckpoint .accept (getGlobalCheckpointStep ), listener ::onFailure );
506
+
507
+ getGlobalCheckpointStep .whenComplete (globalCheckpoint ->
530
508
cleanFiles (store , recoverySourceMetadata , translogOps , globalCheckpoint , cleanFilesStep ), listener ::onFailure );
531
509
532
510
final long totalSize = totalSizeInBytes ;
@@ -550,6 +528,45 @@ void phase1(IndexCommit snapshot, long globalCheckpoint, IntSupplier translogOps
550
528
}
551
529
}
552
530
531
+ private void createRetentionLease (final long startingSeqNo , ActionListener <Long > listener ) {
532
+ runUnderPrimaryPermit (() -> {
533
+ // Clone the peer recovery retention lease belonging to the source shard. We are retaining history between the the local
534
+ // checkpoint of the safe commit we're creating and this lease's retained seqno with the retention lock, and by cloning an
535
+ // existing lease we (approximately) know that all our peers are also retaining history as requested by the cloned lease. If
536
+ // the recovery now fails before copying enough history over then a subsequent attempt will find this lease, determine it is
537
+ // not enough, and fall back to a file-based recovery.
538
+ //
539
+ // (approximately) because we do not guarantee to be able to satisfy every lease on every peer.
540
+ logger .trace ("cloning primary's retention lease" );
541
+ try {
542
+ final StepListener <ReplicationResponse > cloneRetentionLeaseStep = new StepListener <>();
543
+ final RetentionLease clonedLease
544
+ = shard .cloneLocalPeerRecoveryRetentionLease (request .targetNode ().getId (),
545
+ new ThreadedActionListener <>(logger , shard .getThreadPool (),
546
+ ThreadPool .Names .GENERIC , cloneRetentionLeaseStep , false ));
547
+ logger .trace ("cloned primary's retention lease as [{}]" , clonedLease );
548
+ cloneRetentionLeaseStep .whenComplete (
549
+ rr -> listener .onResponse (clonedLease .retainingSequenceNumber () - 1 ),
550
+ listener ::onFailure );
551
+ } catch (RetentionLeaseNotFoundException e ) {
552
+ // it's possible that the primary has no retention lease yet if we are doing a rolling upgrade from a version before
553
+ // 7.4, and in that case we just create a lease using the local checkpoint of the safe commit which we're using for
554
+ // recovery as a conservative estimate for the global checkpoint.
555
+ assert shard .indexSettings ().getIndexVersionCreated ().before (Version .V_7_4_0 );
556
+ final StepListener <ReplicationResponse > addRetentionLeaseStep = new StepListener <>();
557
+ final long estimatedGlobalCheckpoint = startingSeqNo - 1 ;
558
+ shard .addPeerRecoveryRetentionLease (request .targetNode ().getId (),
559
+ estimatedGlobalCheckpoint , new ThreadedActionListener <>(logger , shard .getThreadPool (),
560
+ ThreadPool .Names .GENERIC , addRetentionLeaseStep , false ));
561
+ addRetentionLeaseStep .whenComplete (
562
+ rr -> listener .onResponse (estimatedGlobalCheckpoint ),
563
+ listener ::onFailure );
564
+ logger .trace ("created retention lease with estimated checkpoint of [{}]" , estimatedGlobalCheckpoint );
565
+ }
566
+ }, shardId + " establishing retention lease for [" + request .targetAllocationId () + "]" ,
567
+ shard , cancellableThreads , logger );
568
+ }
569
+
553
570
boolean canSkipPhase1 (Store .MetadataSnapshot source , Store .MetadataSnapshot target ) {
554
571
if (source .getSyncId () == null || source .getSyncId ().equals (target .getSyncId ()) == false ) {
555
572
return false ;
0 commit comments