Skip to content

Commit 8de4659

Browse files
committed
Use Lucene soft-deletes in peer recovery (#30522)
This commit adds Lucene soft-deletes as another source for peer-recovery besides translog. Relates #29530
1 parent 07ecc03 commit 8de4659

25 files changed

+652
-189
lines changed

docs/reference/indices/flush.asciidoc

+2-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ which returns something similar to:
101101
"translog_generation" : "2",
102102
"max_seq_no" : "-1",
103103
"sync_id" : "AVvFY-071siAOuFGEO9P", <1>
104-
"max_unsafe_auto_id_timestamp" : "-1"
104+
"max_unsafe_auto_id_timestamp" : "-1",
105+
"min_retained_seq_no": "0"
105106
},
106107
"num_docs" : 0
107108
}

server/src/main/java/org/elasticsearch/index/engine/CombinedDeletionPolicy.java

+9-3
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,17 @@
4646
public final class CombinedDeletionPolicy extends IndexDeletionPolicy {
4747
private final Logger logger;
4848
private final TranslogDeletionPolicy translogDeletionPolicy;
49+
private final SoftDeletesPolicy softDeletesPolicy;
4950
private final LongSupplier globalCheckpointSupplier;
5051
private final ObjectIntHashMap<IndexCommit> snapshottedCommits; // Number of snapshots held against each commit point.
5152
private volatile IndexCommit safeCommit; // the most recent safe commit point - its max_seqno at most the persisted global checkpoint.
5253
private volatile IndexCommit lastCommit; // the most recent commit point
5354

54-
CombinedDeletionPolicy(Logger logger, TranslogDeletionPolicy translogDeletionPolicy, LongSupplier globalCheckpointSupplier) {
55+
CombinedDeletionPolicy(Logger logger, TranslogDeletionPolicy translogDeletionPolicy,
56+
SoftDeletesPolicy softDeletesPolicy, LongSupplier globalCheckpointSupplier) {
5557
this.logger = logger;
5658
this.translogDeletionPolicy = translogDeletionPolicy;
59+
this.softDeletesPolicy = softDeletesPolicy;
5760
this.globalCheckpointSupplier = globalCheckpointSupplier;
5861
this.snapshottedCommits = new ObjectIntHashMap<>();
5962
}
@@ -80,7 +83,7 @@ public synchronized void onCommit(List<? extends IndexCommit> commits) throws IO
8083
deleteCommit(commits.get(i));
8184
}
8285
}
83-
updateTranslogDeletionPolicy();
86+
updateRetentionPolicy();
8487
}
8588

8689
private void deleteCommit(IndexCommit commit) throws IOException {
@@ -90,7 +93,7 @@ private void deleteCommit(IndexCommit commit) throws IOException {
9093
assert commit.isDeleted() : "Deletion commit [" + commitDescription(commit) + "] was suppressed";
9194
}
9295

93-
private void updateTranslogDeletionPolicy() throws IOException {
96+
private void updateRetentionPolicy() throws IOException {
9497
assert Thread.holdsLock(this);
9598
logger.debug("Safe commit [{}], last commit [{}]", commitDescription(safeCommit), commitDescription(lastCommit));
9699
assert safeCommit.isDeleted() == false : "The safe commit must not be deleted";
@@ -101,6 +104,9 @@ private void updateTranslogDeletionPolicy() throws IOException {
101104
assert minRequiredGen <= lastGen : "minRequiredGen must not be greater than lastGen";
102105
translogDeletionPolicy.setTranslogGenerationOfLastCommit(lastGen);
103106
translogDeletionPolicy.setMinTranslogGenerationForRecovery(minRequiredGen);
107+
108+
softDeletesPolicy.setLocalCheckpointOfSafeCommit(
109+
Long.parseLong(safeCommit.getUserData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY)));
104110
}
105111

106112
/**

server/src/main/java/org/elasticsearch/index/engine/Engine.java

+18-6
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ public abstract class Engine implements Closeable {
9898

9999
public static final String SYNC_COMMIT_ID = "sync_id";
100100
public static final String HISTORY_UUID_KEY = "history_uuid";
101+
public static final String MIN_RETAINED_SEQNO = "min_retained_seq_no";
101102

102103
protected final ShardId shardId;
103104
protected final String allocationId;
@@ -578,19 +579,17 @@ public enum SearcherScope {
578579

579580
public abstract void syncTranslog() throws IOException;
580581

581-
public abstract Closeable acquireTranslogRetentionLock();
582+
/**
583+
* Acquires a lock on the translog files and Lucene soft-deleted documents to prevent them from being trimmed
584+
*/
585+
public abstract Closeable acquireRetentionLockForPeerRecovery();
582586

583587
/**
584588
* Creates a new translog snapshot from this engine for reading translog operations whose seq# in the provided range.
585589
* The caller has to close the returned snapshot after finishing the reading.
586590
*/
587591
public abstract Translog.Snapshot newTranslogSnapshotBetween(long minSeqNo, long maxSeqNo) throws IOException;
588592

589-
/**
590-
* Returns the estimated number of translog operations in this engine whose seq# at least the provided seq#.
591-
*/
592-
public abstract int estimateTranslogOperationsFromMinSeq(long minSeqNo);
593-
594593
public abstract TranslogStats getTranslogStats();
595594

596595
/**
@@ -604,6 +603,19 @@ public enum SearcherScope {
604603
public abstract Translog.Snapshot newLuceneChangesSnapshot(String source, MapperService mapperService,
605604
long minSeqNo, long maxSeqNo, boolean requiredFullRange) throws IOException;
606605

606+
/**
607+
* Creates a new history snapshot for reading operations since the provided seqno.
608+
* The returned snapshot can be retrieved from either Lucene index or translog files.
609+
*/
610+
public abstract Translog.Snapshot readHistoryOperations(String source, MapperService mapperService, long startingSeqNo) throws IOException;
611+
612+
/**
613+
* Returns the estimated number of history operations whose seq# at least the provided seq# in this engine.
614+
*/
615+
public abstract int estimateNumberOfHistoryOperations(String source, MapperService mapperService, long startingSeqNo) throws IOException;
616+
617+
public abstract boolean hasCompleteOperationHistory(String source, MapperService mapperService, long startingSeqNo) throws IOException;
618+
607619
protected final void ensureOpen(Exception suppressed) {
608620
if (isClosed.get()) {
609621
AlreadyClosedException ace = new AlreadyClosedException(shardId + " engine is closed", failedEngine.get());

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

+85-26
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import org.apache.logging.log4j.Logger;
2323
import org.apache.logging.log4j.message.ParameterizedMessage;
2424
import org.apache.lucene.document.Field;
25-
import org.apache.lucene.document.LongPoint;
2625
import org.apache.lucene.document.NumericDocValuesField;
2726
import org.apache.lucene.index.DirectoryReader;
2827
import org.apache.lucene.index.IndexCommit;
@@ -38,7 +37,6 @@
3837
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
3938
import org.apache.lucene.index.Term;
4039
import org.apache.lucene.search.IndexSearcher;
41-
import org.apache.lucene.search.Query;
4240
import org.apache.lucene.search.ReferenceManager;
4341
import org.apache.lucene.search.SearcherFactory;
4442
import org.apache.lucene.search.SearcherManager;
@@ -157,6 +155,7 @@ public class InternalEngine extends Engine {
157155
private final CounterMetric numDocUpdates = new CounterMetric();
158156
private final NumericDocValuesField softDeleteField = Lucene.newSoftDeleteField();
159157
private final boolean softDeleteEnabled;
158+
private final SoftDeletesPolicy softDeletesPolicy;
160159
private final LastRefreshedCheckpointListener lastRefreshedCheckpointListener;
161160

162161
/**
@@ -182,7 +181,6 @@ public InternalEngine(EngineConfig engineConfig) {
182181
maxUnsafeAutoIdTimestamp.set(Long.MAX_VALUE);
183182
}
184183
this.uidField = engineConfig.getIndexSettings().isSingleType() ? IdFieldMapper.NAME : UidFieldMapper.NAME;
185-
this.softDeleteEnabled = engineConfig.getIndexSettings().isSoftDeleteEnabled();
186184
final TranslogDeletionPolicy translogDeletionPolicy = new TranslogDeletionPolicy(
187185
engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(),
188186
engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis()
@@ -204,8 +202,10 @@ public InternalEngine(EngineConfig engineConfig) {
204202
assert translog.getGeneration() != null;
205203
this.translog = translog;
206204
this.localCheckpointTracker = createLocalCheckpointTracker(localCheckpointTrackerSupplier);
205+
this.softDeleteEnabled = engineConfig.getIndexSettings().isSoftDeleteEnabled();
206+
this.softDeletesPolicy = newSoftDeletesPolicy();
207207
this.combinedDeletionPolicy =
208-
new CombinedDeletionPolicy(logger, translogDeletionPolicy, translog::getLastSyncedGlobalCheckpoint);
208+
new CombinedDeletionPolicy(logger, translogDeletionPolicy, softDeletesPolicy, translog::getLastSyncedGlobalCheckpoint);
209209
writer = createWriter();
210210
bootstrapAppendOnlyInfoFromWriter(writer);
211211
historyUUID = loadHistoryUUID(writer);
@@ -262,6 +262,18 @@ private LocalCheckpointTracker createLocalCheckpointTracker(
262262
return localCheckpointTrackerSupplier.apply(maxSeqNo, localCheckpoint);
263263
}
264264

265+
private SoftDeletesPolicy newSoftDeletesPolicy() throws IOException {
266+
final Map<String, String> commitUserData = store.readLastCommittedSegmentsInfo().userData;
267+
final long lastMinRetainedSeqNo;
268+
if (commitUserData.containsKey(Engine.MIN_RETAINED_SEQNO)) {
269+
lastMinRetainedSeqNo = Long.parseLong(commitUserData.get(Engine.MIN_RETAINED_SEQNO));
270+
} else {
271+
lastMinRetainedSeqNo = Long.parseLong(commitUserData.get(SequenceNumbers.MAX_SEQ_NO)) + 1;
272+
}
273+
return new SoftDeletesPolicy(translog::getLastSyncedGlobalCheckpoint, lastMinRetainedSeqNo,
274+
engineConfig.getIndexSettings().getSoftDeleteRetentionOperations());
275+
}
276+
265277
/**
266278
* This reference manager delegates all it's refresh calls to another (internal) SearcherManager
267279
* The main purpose for this is that if we have external refreshes happening we don't issue extra
@@ -481,18 +493,39 @@ public void syncTranslog() throws IOException {
481493
}
482494

483495
@Override
484-
public Closeable acquireTranslogRetentionLock() {
485-
return getTranslog().acquireRetentionLock();
496+
public Translog.Snapshot newTranslogSnapshotBetween(long minSeqNo, long maxSeqNo) throws IOException {
497+
return getTranslog().getSnapshotBetween(minSeqNo, maxSeqNo);
486498
}
487499

500+
/**
501+
* Creates a new history snapshot for reading operations since the provided seqno.
502+
* The returned snapshot can be retrieved from either Lucene index or translog files.
503+
*/
488504
@Override
489-
public Translog.Snapshot newTranslogSnapshotBetween(long minSeqNo, long maxSeqNo) throws IOException {
490-
return getTranslog().getSnapshotBetween(minSeqNo, maxSeqNo);
505+
public Translog.Snapshot readHistoryOperations(String source, MapperService mapperService, long startingSeqNo) throws IOException {
506+
if (engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
507+
return newLuceneChangesSnapshot(source, mapperService, Math.max(0, startingSeqNo), Long.MAX_VALUE, false);
508+
} else {
509+
return getTranslog().getSnapshotBetween(startingSeqNo, Long.MAX_VALUE);
510+
}
491511
}
492512

513+
/**
514+
* Returns the estimated number of history operations whose seq# at least the provided seq# in this engine.
515+
*/
493516
@Override
494-
public int estimateTranslogOperationsFromMinSeq(long minSeqNo) {
495-
return getTranslog().estimateTotalOperationsFromMinSeq(minSeqNo);
517+
public int estimateNumberOfHistoryOperations(String source, MapperService mapperService, long startingSeqNo) throws IOException {
518+
if (engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
519+
try (Translog.Snapshot snapshot =
520+
newLuceneChangesSnapshot(source, mapperService, Math.max(0, startingSeqNo), Long.MAX_VALUE, false)) {
521+
return snapshot.totalOperations();
522+
} catch (IOException ex) {
523+
maybeFailEngine(source, ex);
524+
throw ex;
525+
}
526+
} else {
527+
return getTranslog().estimateTotalOperationsFromMinSeq(startingSeqNo);
528+
}
496529
}
497530

498531
@Override
@@ -2127,8 +2160,8 @@ private IndexWriterConfig getIndexWriterConfig() {
21272160
MergePolicy mergePolicy = config().getMergePolicy();
21282161
if (softDeleteEnabled) {
21292162
iwc.setSoftDeletesField(Lucene.SOFT_DELETE_FIELD);
2130-
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, this::softDeletesRetentionQuery,
2131-
new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETE_FIELD, this::softDeletesRetentionQuery, mergePolicy));
2163+
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
2164+
new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETE_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
21322165
}
21332166
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
21342167
iwc.setSimilarity(engineConfig.getSimilarity());
@@ -2141,20 +2174,6 @@ private IndexWriterConfig getIndexWriterConfig() {
21412174
return iwc;
21422175
}
21432176

2144-
/**
2145-
* Documents including tombstones are soft-deleted and matched this query will be retained and won't cleaned up by merges.
2146-
*/
2147-
private Query softDeletesRetentionQuery() {
2148-
ensureOpen();
2149-
// TODO: We send the safe commit in peer-recovery, thus we need to retain all operations after the local checkpoint of that commit.
2150-
final long retainedExtraOps = engineConfig.getIndexSettings().getSoftDeleteRetentionOperations();
2151-
// Prefer using the global checkpoint which is persisted on disk than an in-memory value.
2152-
// If we failed to fsync checkpoint but already used a higher global checkpoint value to clean up soft-deleted ops,
2153-
// then we may not have all required operations whose seq# greater than the global checkpoint after restarted.
2154-
final long persistedGlobalCheckpoint = translog.getLastSyncedGlobalCheckpoint();
2155-
return LongPoint.newRangeQuery(SeqNoFieldMapper.NAME, persistedGlobalCheckpoint + 1 - retainedExtraOps, Long.MAX_VALUE);
2156-
}
2157-
21582177
/** Extended SearcherFactory that warms the segments if needed when acquiring a new searcher */
21592178
static final class SearchFactory extends EngineSearcherFactory {
21602179
private final Engine.Warmer warmer;
@@ -2341,6 +2360,9 @@ protected void commitIndexWriter(final IndexWriter writer, final Translog transl
23412360
commitData.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(localCheckpointTracker.getMaxSeqNo()));
23422361
commitData.put(MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID, Long.toString(maxUnsafeAutoIdTimestamp.get()));
23432362
commitData.put(HISTORY_UUID_KEY, historyUUID);
2363+
if (softDeleteEnabled) {
2364+
commitData.put(Engine.MIN_RETAINED_SEQNO, Long.toString(softDeletesPolicy.getMinRetainedSeqNo()));
2365+
}
23442366
logger.trace("committing writer with commit data [{}]", commitData);
23452367
return commitData.entrySet().iterator();
23462368
});
@@ -2396,6 +2418,8 @@ public void onSettingsChanged() {
23962418
final IndexSettings indexSettings = engineConfig.getIndexSettings();
23972419
translogDeletionPolicy.setRetentionAgeInMillis(indexSettings.getTranslogRetentionAge().getMillis());
23982420
translogDeletionPolicy.setRetentionSizeInBytes(indexSettings.getTranslogRetentionSize().getBytes());
2421+
2422+
softDeletesPolicy.setRetentionOperations(indexSettings.getSoftDeleteRetentionOperations());
23992423
}
24002424

24012425
public MergeStats getMergeStats() {
@@ -2509,6 +2533,41 @@ public Translog.Snapshot newLuceneChangesSnapshot(String source, MapperService m
25092533
}
25102534
}
25112535

2536+
@Override
2537+
public boolean hasCompleteOperationHistory(String source, MapperService mapperService, long startingSeqNo) throws IOException {
2538+
if (engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
2539+
return getMinRetainedSeqNo() <= startingSeqNo;
2540+
} else {
2541+
final long currentLocalCheckpoint = getLocalCheckpointTracker().getCheckpoint();
2542+
final LocalCheckpointTracker tracker = new LocalCheckpointTracker(startingSeqNo, startingSeqNo - 1);
2543+
try (Translog.Snapshot snapshot = getTranslog().getSnapshotBetween(startingSeqNo, Long.MAX_VALUE)) {
2544+
Translog.Operation operation;
2545+
while ((operation = snapshot.next()) != null) {
2546+
if (operation.seqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) {
2547+
tracker.markSeqNoAsCompleted(operation.seqNo());
2548+
}
2549+
}
2550+
}
2551+
return tracker.getCheckpoint() >= currentLocalCheckpoint;
2552+
}
2553+
}
2554+
2555+
/**
2556+
* Returns the minimum seqno that is retained in the Lucene index.
2557+
* Operations whose seq# are at least this value should exist in the Lucene index.
2558+
*/
2559+
final long getMinRetainedSeqNo() {
2560+
assert softDeleteEnabled : Thread.currentThread().getName();
2561+
return softDeletesPolicy.getMinRetainedSeqNo();
2562+
}
2563+
2564+
@Override
2565+
public Closeable acquireRetentionLockForPeerRecovery() {
2566+
final Closeable translogLock = translog.acquireRetentionLock();
2567+
final Releasable softDeletesLock = softDeletesPolicy.acquireRetentionLock();
2568+
return () -> IOUtils.close(translogLock, softDeletesLock);
2569+
}
2570+
25122571
@Override
25132572
public boolean isRecovering() {
25142573
return pendingTranslogRecovery.get();

server/src/main/java/org/elasticsearch/index/engine/LuceneChangesSnapshot.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public int totalOperations() {
116116
}
117117

118118
@Override
119-
public int overriddenOperations() {
119+
public int skippedOperations() {
120120
return skippedOperations;
121121
}
122122

0 commit comments

Comments
 (0)