Skip to content

Commit d63e777

Browse files
committed
Sample the GCP later and merely assert that it's ahead of the leased GCP
1 parent 24b5806 commit d63e777

File tree

4 files changed

+34
-27
lines changed

4 files changed

+34
-27
lines changed

server/src/main/java/org/elasticsearch/index/seqno/ReplicationTracker.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,10 @@ public boolean assertRetentionLeasesPersisted(final Path path) throws IOExceptio
472472
* containing the persistent node ID calculated by {@link ReplicationTracker#getPeerRecoveryRetentionLeaseId}, and retain operations
473473
* with sequence numbers strictly greater than the given global checkpoint.
474474
*/
475-
public void addPeerRecoveryRetentionLease(String nodeId, long globalCheckpoint, ActionListener<ReplicationResponse> listener) {
476-
addRetentionLease(getPeerRecoveryRetentionLeaseId(nodeId), globalCheckpoint + 1, PEER_RECOVERY_RETENTION_LEASE_SOURCE, listener);
475+
public RetentionLease addPeerRecoveryRetentionLease(String nodeId, long globalCheckpoint,
476+
ActionListener<ReplicationResponse> listener) {
477+
return addRetentionLease(getPeerRecoveryRetentionLeaseId(nodeId), globalCheckpoint + 1,
478+
PEER_RECOVERY_RETENTION_LEASE_SOURCE, listener);
477479
}
478480

479481
public RetentionLease cloneLocalPeerRecoveryRetentionLease(String nodeId, ActionListener<ReplicationResponse> listener) {

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -2598,11 +2598,12 @@ public boolean isRelocatedPrimary() {
25982598
return replicationTracker.isRelocated();
25992599
}
26002600

2601-
public void addPeerRecoveryRetentionLease(String nodeId, long globalCheckpoint, ActionListener<ReplicationResponse> listener) {
2601+
public RetentionLease addPeerRecoveryRetentionLease(String nodeId, long globalCheckpoint,
2602+
ActionListener<ReplicationResponse> listener) {
26022603
assert assertPrimaryMode();
26032604
// only needed for BWC reasons involving rolling upgrades from versions that do not support PRRLs:
26042605
assert indexSettings.getIndexVersionCreated().before(Version.V_7_4_0);
2605-
replicationTracker.addPeerRecoveryRetentionLease(nodeId, globalCheckpoint, listener);
2606+
return replicationTracker.addPeerRecoveryRetentionLease(nodeId, globalCheckpoint, listener);
26062607
}
26072608

26082609
public RetentionLease cloneLocalPeerRecoveryRetentionLease(String nodeId, ActionListener<ReplicationResponse> listener) {

server/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java

+23-20
Original file line numberDiff line numberDiff line change
@@ -264,14 +264,14 @@ && isTargetSameHistory()
264264
deleteRetentionLeaseStep.whenComplete(ignored -> {
265265
assert Transports.assertNotTransportThread(RecoverySourceHandler.this + "[phase1]");
266266

267-
final Consumer<ActionListener<Long>> getInitialGlobalCheckpoint;
267+
final Consumer<ActionListener<RetentionLease>> createRetentionLeaseAsync;
268268
if (useRetentionLeases) {
269-
getInitialGlobalCheckpoint = l -> createRetentionLease(startingSeqNo, l);
269+
createRetentionLeaseAsync = l -> createRetentionLease(startingSeqNo, l);
270270
} else {
271-
getInitialGlobalCheckpoint = l -> l.onResponse(SequenceNumbers.UNASSIGNED_SEQ_NO);
271+
createRetentionLeaseAsync = l -> l.onResponse(null);
272272
}
273273

274-
phase1(safeCommitRef.getIndexCommit(), getInitialGlobalCheckpoint, () -> estimateNumOps, sendFileStep);
274+
phase1(safeCommitRef.getIndexCommit(), createRetentionLeaseAsync, () -> estimateNumOps, sendFileStep);
275275
}, onFailure);
276276

277277
} catch (final Exception e) {
@@ -427,7 +427,7 @@ static final class SendFileResult {
427427
* segments that are missing. Only segments that have the same size and
428428
* checksum can be reused
429429
*/
430-
void phase1(IndexCommit snapshot, Consumer<ActionListener<Long>> getInitialGlobalCheckpoint,
430+
void phase1(IndexCommit snapshot, Consumer<ActionListener<RetentionLease>> createRetentionLease,
431431
IntSupplier translogOps, ActionListener<SendFileResult> listener) {
432432
cancellableThreads.checkForCancel();
433433
// Total size of segment files that are recovered
@@ -491,7 +491,7 @@ void phase1(IndexCommit snapshot, Consumer<ActionListener<Long>> getInitialGloba
491491
phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSizeInBytes));
492492
final StepListener<Void> sendFileInfoStep = new StepListener<>();
493493
final StepListener<Void> sendFilesStep = new StepListener<>();
494-
final StepListener<Long> getInitialGlobalCheckpointStep = new StepListener<>();
494+
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
495495
final StepListener<Void> cleanFilesStep = new StepListener<>();
496496
cancellableThreads.execute(() ->
497497
recoveryTarget.receiveFileInfo(phase1FileNames, phase1FileSizes, phase1ExistingFileNames,
@@ -500,12 +500,19 @@ void phase1(IndexCommit snapshot, Consumer<ActionListener<Long>> getInitialGloba
500500
sendFileInfoStep.whenComplete(r ->
501501
sendFiles(store, phase1Files.toArray(new StoreFileMetaData[0]), translogOps, sendFilesStep), listener::onFailure);
502502

503-
sendFilesStep.whenComplete(r -> getInitialGlobalCheckpoint.accept(getInitialGlobalCheckpointStep), listener::onFailure);
504-
505-
getInitialGlobalCheckpointStep.whenComplete(initialGlobalCheckpoint ->
506-
cleanFiles(store, recoverySourceMetadata, translogOps,
507-
Math.max(initialGlobalCheckpoint, Long.parseLong(snapshot.getUserData().get(SequenceNumbers.MAX_SEQ_NO))),
508-
cleanFilesStep),
503+
sendFilesStep.whenComplete(r -> createRetentionLease.accept(createRetentionLeaseStep), listener::onFailure);
504+
505+
createRetentionLeaseStep.whenComplete(retentionLease ->
506+
{
507+
final long lastKnownGlobalCheckpoint = shard.getLastKnownGlobalCheckpoint();
508+
assert retentionLease == null || retentionLease.retainingSequenceNumber() - 1 <= lastKnownGlobalCheckpoint
509+
: retentionLease + " vs " + lastKnownGlobalCheckpoint;
510+
// Establishes new empty translog on the replica with global checkpoint set to lastKnownGlobalCheckpoint. We want
511+
// the commit we just copied to be a safe commit on the replica, so why not set the global checkpoint on the replica
512+
// to the max seqno of this commit? Because (in rare corner cases) this commit might not be a safe commit here on
513+
// the primary, and in these cases the max seqno would be too high to be valid as a global checkpoint.
514+
cleanFiles(store, recoverySourceMetadata, translogOps, lastKnownGlobalCheckpoint, cleanFilesStep);
515+
},
509516
listener::onFailure);
510517

511518
final long totalSize = totalSizeInBytes;
@@ -529,7 +536,7 @@ void phase1(IndexCommit snapshot, Consumer<ActionListener<Long>> getInitialGloba
529536
}
530537
}
531538

532-
private void createRetentionLease(final long startingSeqNo, ActionListener<Long> initialGlobalCheckpointListener) {
539+
private void createRetentionLease(final long startingSeqNo, ActionListener<RetentionLease> listener) {
533540
runUnderPrimaryPermit(() -> {
534541
// Clone the peer recovery retention lease belonging to the source shard. We are retaining history between the the local
535542
// checkpoint of the safe commit we're creating and this lease's retained seqno with the retention lock, and by cloning an
@@ -546,22 +553,18 @@ private void createRetentionLease(final long startingSeqNo, ActionListener<Long>
546553
new ThreadedActionListener<>(logger, shard.getThreadPool(),
547554
ThreadPool.Names.GENERIC, cloneRetentionLeaseStep, false));
548555
logger.trace("cloned primary's retention lease as [{}]", clonedLease);
549-
cloneRetentionLeaseStep.whenComplete(
550-
rr -> initialGlobalCheckpointListener.onResponse(clonedLease.retainingSequenceNumber() - 1),
551-
initialGlobalCheckpointListener::onFailure);
556+
cloneRetentionLeaseStep.whenComplete(rr -> listener.onResponse(clonedLease), listener::onFailure);
552557
} catch (RetentionLeaseNotFoundException e) {
553558
// it's possible that the primary has no retention lease yet if we are doing a rolling upgrade from a version before
554559
// 7.4, and in that case we just create a lease using the local checkpoint of the safe commit which we're using for
555560
// recovery as a conservative estimate for the global checkpoint.
556561
assert shard.indexSettings().getIndexVersionCreated().before(Version.V_7_4_0);
557562
final StepListener<ReplicationResponse> addRetentionLeaseStep = new StepListener<>();
558563
final long estimatedGlobalCheckpoint = startingSeqNo - 1;
559-
shard.addPeerRecoveryRetentionLease(request.targetNode().getId(),
564+
final RetentionLease newLease = shard.addPeerRecoveryRetentionLease(request.targetNode().getId(),
560565
estimatedGlobalCheckpoint, new ThreadedActionListener<>(logger, shard.getThreadPool(),
561566
ThreadPool.Names.GENERIC, addRetentionLeaseStep, false));
562-
addRetentionLeaseStep.whenComplete(
563-
rr -> initialGlobalCheckpointListener.onResponse(estimatedGlobalCheckpoint),
564-
initialGlobalCheckpointListener::onFailure);
567+
addRetentionLeaseStep.whenComplete(rr -> listener.onResponse(newLease), listener::onFailure);
565568
logger.trace("created retention lease with estimated checkpoint of [{}]", estimatedGlobalCheckpoint);
566569
}
567570
}, shardId + " establishing retention lease for [" + request.targetAllocationId() + "]",

server/src/test/java/org/elasticsearch/indices/recovery/RecoverySourceHandlerTests.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
6565
import org.elasticsearch.index.mapper.Uid;
6666
import org.elasticsearch.index.seqno.ReplicationTracker;
67+
import org.elasticsearch.index.seqno.RetentionLease;
6768
import org.elasticsearch.index.seqno.RetentionLeases;
6869
import org.elasticsearch.index.seqno.SeqNoStats;
6970
import org.elasticsearch.index.seqno.SequenceNumbers;
@@ -467,10 +468,10 @@ public void testThrowExceptionOnPrimaryRelocatedBeforePhase1Started() throws IOE
467468
between(1, 8)) {
468469

469470
@Override
470-
void phase1(IndexCommit snapshot, Consumer<ActionListener<Long>> getInitialGlobalCheckpoint,
471+
void phase1(IndexCommit snapshot, Consumer<ActionListener<RetentionLease>> createRetentionLease,
471472
IntSupplier translogOps, ActionListener<SendFileResult> listener) {
472473
phase1Called.set(true);
473-
super.phase1(snapshot, getInitialGlobalCheckpoint, translogOps, listener);
474+
super.phase1(snapshot, createRetentionLease, translogOps, listener);
474475
}
475476

476477
@Override
@@ -686,7 +687,7 @@ public void cleanFiles(int totalTranslogOps, long globalCheckpoint, Store.Metada
686687
try {
687688
final CountDownLatch latch = new CountDownLatch(1);
688689
handler.phase1(DirectoryReader.listCommits(dir).get(0),
689-
l -> recoveryExecutor.execute(() -> l.onResponse(randomNonNegativeLong())),
690+
l -> recoveryExecutor.execute(() -> l.onResponse(null)),
690691
() -> 0,
691692
new LatchedActionListener<>(phase1Listener, latch));
692693
latch.await();

0 commit comments

Comments
 (0)