171
171
import java .util .function .BiConsumer ;
172
172
import java .util .function .Consumer ;
173
173
import java .util .function .Function ;
174
+ import java .util .function .LongSupplier ;
174
175
import java .util .function .Supplier ;
175
176
import java .util .stream .Collectors ;
176
177
import java .util .stream .StreamSupport ;
@@ -1407,7 +1408,7 @@ public long recoverLocallyUpToGlobalCheckpoint() {
1407
1408
recoveryState .getTranslog ().totalLocal (recoveredOps ); // adjust the total local to reflect the actual count
1408
1409
return recoveredOps ;
1409
1410
};
1410
- innerOpenEngineAndTranslog ();
1411
+ innerOpenEngineAndTranslog (() -> globalCheckpoint );
1411
1412
getEngine ().recoverFromTranslog (translogRecoveryRunner , globalCheckpoint );
1412
1413
logger .trace ("shard locally recovered up to {}" , getEngine ().getSeqNoStats (globalCheckpoint ));
1413
1414
} finally {
@@ -1533,6 +1534,15 @@ int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot, Engine.Operat
1533
1534
return opsRecovered ;
1534
1535
}
1535
1536
1537
+ private void loadGlobalCheckpointToReplicationTracker () throws IOException {
1538
+ // we have to set it before we open an engine and recover from the translog because
1539
+ // acquiring a snapshot from the translog causes a sync which causes the global checkpoint to be pulled in,
1540
+ // and an engine can be forced to close in ctor which also causes the global checkpoint to be pulled in.
1541
+ final String translogUUID = store .readLastCommittedSegmentsInfo ().getUserData ().get (Translog .TRANSLOG_UUID_KEY );
1542
+ final long globalCheckpoint = Translog .readGlobalCheckpoint (translogConfig .getTranslogPath (), translogUUID );
1543
+ replicationTracker .updateGlobalCheckpointOnReplica (globalCheckpoint , "read from translog checkpoint" );
1544
+ }
1545
+
1536
1546
/**
1537
1547
* opens the engine on top of the existing lucene engine and translog.
1538
1548
* Operations from the translog will be replayed to bring lucene up to date.
@@ -1548,7 +1558,8 @@ public void openEngineAndRecoverFromTranslog() throws IOException {
1548
1558
return runTranslogRecovery (engine , snapshot , Engine .Operation .Origin .LOCAL_TRANSLOG_RECOVERY ,
1549
1559
translogRecoveryStats ::incrementRecoveredOperations );
1550
1560
};
1551
- innerOpenEngineAndTranslog ();
1561
+ loadGlobalCheckpointToReplicationTracker ();
1562
+ innerOpenEngineAndTranslog (replicationTracker );
1552
1563
getEngine ().recoverFromTranslog (translogRecoveryRunner , Long .MAX_VALUE );
1553
1564
}
1554
1565
@@ -1559,25 +1570,20 @@ public void openEngineAndRecoverFromTranslog() throws IOException {
1559
1570
public void openEngineAndSkipTranslogRecovery () throws IOException {
1560
1571
assert routingEntry ().recoverySource ().getType () == RecoverySource .Type .PEER : "not a peer recovery [" + routingEntry () + "]" ;
1561
1572
assert recoveryState .getStage () == RecoveryState .Stage .TRANSLOG : "unexpected recovery stage [" + recoveryState .getStage () + "]" ;
1562
- innerOpenEngineAndTranslog ();
1573
+ loadGlobalCheckpointToReplicationTracker ();
1574
+ innerOpenEngineAndTranslog (replicationTracker );
1563
1575
getEngine ().skipTranslogRecovery ();
1564
1576
}
1565
1577
1566
- private void innerOpenEngineAndTranslog () throws IOException {
1578
+ private void innerOpenEngineAndTranslog (LongSupplier globalCheckpointSupplier ) throws IOException {
1567
1579
if (state != IndexShardState .RECOVERING ) {
1568
1580
throw new IndexShardNotRecoveringException (shardId , state );
1569
1581
}
1570
- final EngineConfig config = newEngineConfig ();
1582
+ final EngineConfig config = newEngineConfig (globalCheckpointSupplier );
1571
1583
1572
1584
// we disable deletes since we allow for operations to be executed against the shard while recovering
1573
1585
// but we need to make sure we don't loose deletes until we are done recovering
1574
1586
config .setEnableGcDeletes (false );
1575
- // we have to set it before we open an engine and recover from the translog because
1576
- // acquiring a snapshot from the translog causes a sync which causes the global checkpoint to be pulled in,
1577
- // and an engine can be forced to close in ctor which also causes the global checkpoint to be pulled in.
1578
- final String translogUUID = store .readLastCommittedSegmentsInfo ().getUserData ().get (Translog .TRANSLOG_UUID_KEY );
1579
- final long globalCheckpoint = Translog .readGlobalCheckpoint (translogConfig .getTranslogPath (), translogUUID );
1580
- replicationTracker .updateGlobalCheckpointOnReplica (globalCheckpoint , "read from translog checkpoint" );
1581
1587
updateRetentionLeasesOnReplica (loadRetentionLeases ());
1582
1588
assert recoveryState .getRecoverySource ().expectEmptyRetentionLeases () == false || getRetentionLeases ().leases ().isEmpty ()
1583
1589
: "expected empty set of retention leases with recovery source [" + recoveryState .getRecoverySource ()
@@ -2646,7 +2652,7 @@ private DocumentMapperForType docMapper(String type) {
2646
2652
mapperService .resolveDocumentType (type ));
2647
2653
}
2648
2654
2649
- private EngineConfig newEngineConfig () {
2655
+ private EngineConfig newEngineConfig (LongSupplier globalCheckpointSupplier ) {
2650
2656
Sort indexSort = indexSortSupplier .get ();
2651
2657
return new EngineConfig (shardId , shardRouting .allocationId ().getId (),
2652
2658
threadPool , indexSettings , warmer , store , indexSettings .getMergePolicy (),
@@ -2656,7 +2662,7 @@ private EngineConfig newEngineConfig() {
2656
2662
IndexingMemoryController .SHARD_INACTIVE_TIME_SETTING .get (indexSettings .getSettings ()),
2657
2663
Collections .singletonList (refreshListeners ),
2658
2664
Collections .singletonList (new RefreshMetricUpdater (refreshMetric )),
2659
- indexSort , circuitBreakerService , replicationTracker , replicationTracker ::getRetentionLeases ,
2665
+ indexSort , circuitBreakerService , globalCheckpointSupplier , replicationTracker ::getRetentionLeases ,
2660
2666
() -> getOperationPrimaryTerm (), tombstoneDocSupplier ());
2661
2667
}
2662
2668
@@ -3293,7 +3299,7 @@ assert getActiveOperationsCount() == OPERATIONS_BLOCKED
3293
3299
// we must create both new read-only engine and new read-write engine under mutex to ensure snapshotStoreMetadata,
3294
3300
// acquireXXXCommit and close works.
3295
3301
final Engine readOnlyEngine =
3296
- new ReadOnlyEngine (newEngineConfig (), seqNoStats , translogStats , false , Function .identity ()) {
3302
+ new ReadOnlyEngine (newEngineConfig (replicationTracker ), seqNoStats , translogStats , false , Function .identity ()) {
3297
3303
@ Override
3298
3304
public IndexCommitRef acquireLastIndexCommit (boolean flushFirst ) {
3299
3305
synchronized (mutex ) {
@@ -3322,7 +3328,7 @@ public void close() throws IOException {
3322
3328
}
3323
3329
};
3324
3330
IOUtils .close (currentEngineReference .getAndSet (readOnlyEngine ));
3325
- newEngineReference .set (engineFactory .newReadWriteEngine (newEngineConfig ()));
3331
+ newEngineReference .set (engineFactory .newReadWriteEngine (newEngineConfig (replicationTracker )));
3326
3332
onNewEngine (newEngineReference .get ());
3327
3333
}
3328
3334
final Engine .TranslogRecoveryRunner translogRunner = (engine , snapshot ) -> runTranslogRecovery (
0 commit comments