Skip to content

Commit 601ea76

Browse files
authored
Use Lucene soft-deletes in peer recovery (elastic#30522)
This commit adds Lucene soft-deletes as another source for peer-recovery besides translog. Relates elastic#29530
1 parent d467be3 commit 601ea76

25 files changed

+647
-186
lines changed

docs/reference/indices/flush.asciidoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ which returns something similar to:
101101
"translog_generation" : "2",
102102
"max_seq_no" : "-1",
103103
"sync_id" : "AVvFY-071siAOuFGEO9P", <1>
104-
"max_unsafe_auto_id_timestamp" : "-1"
104+
"max_unsafe_auto_id_timestamp" : "-1",
105+
"min_retained_seq_no": "0"
105106
},
106107
"num_docs" : 0
107108
}

server/src/main/java/org/elasticsearch/index/engine/CombinedDeletionPolicy.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,17 @@
4646
public final class CombinedDeletionPolicy extends IndexDeletionPolicy {
4747
private final Logger logger;
4848
private final TranslogDeletionPolicy translogDeletionPolicy;
49+
private final SoftDeletesPolicy softDeletesPolicy;
4950
private final LongSupplier globalCheckpointSupplier;
5051
private final ObjectIntHashMap<IndexCommit> snapshottedCommits; // Number of snapshots held against each commit point.
5152
private volatile IndexCommit safeCommit; // the most recent safe commit point - its max_seqno at most the persisted global checkpoint.
5253
private volatile IndexCommit lastCommit; // the most recent commit point
5354

54-
CombinedDeletionPolicy(Logger logger, TranslogDeletionPolicy translogDeletionPolicy, LongSupplier globalCheckpointSupplier) {
55+
CombinedDeletionPolicy(Logger logger, TranslogDeletionPolicy translogDeletionPolicy,
56+
SoftDeletesPolicy softDeletesPolicy, LongSupplier globalCheckpointSupplier) {
5557
this.logger = logger;
5658
this.translogDeletionPolicy = translogDeletionPolicy;
59+
this.softDeletesPolicy = softDeletesPolicy;
5760
this.globalCheckpointSupplier = globalCheckpointSupplier;
5861
this.snapshottedCommits = new ObjectIntHashMap<>();
5962
}
@@ -80,7 +83,7 @@ public synchronized void onCommit(List<? extends IndexCommit> commits) throws IO
8083
deleteCommit(commits.get(i));
8184
}
8285
}
83-
updateTranslogDeletionPolicy();
86+
updateRetentionPolicy();
8487
}
8588

8689
private void deleteCommit(IndexCommit commit) throws IOException {
@@ -90,7 +93,7 @@ private void deleteCommit(IndexCommit commit) throws IOException {
9093
assert commit.isDeleted() : "Deletion commit [" + commitDescription(commit) + "] was suppressed";
9194
}
9295

93-
private void updateTranslogDeletionPolicy() throws IOException {
96+
private void updateRetentionPolicy() throws IOException {
9497
assert Thread.holdsLock(this);
9598
logger.debug("Safe commit [{}], last commit [{}]", commitDescription(safeCommit), commitDescription(lastCommit));
9699
assert safeCommit.isDeleted() == false : "The safe commit must not be deleted";
@@ -101,6 +104,9 @@ private void updateTranslogDeletionPolicy() throws IOException {
101104
assert minRequiredGen <= lastGen : "minRequiredGen must not be greater than lastGen";
102105
translogDeletionPolicy.setTranslogGenerationOfLastCommit(lastGen);
103106
translogDeletionPolicy.setMinTranslogGenerationForRecovery(minRequiredGen);
107+
108+
softDeletesPolicy.setLocalCheckpointOfSafeCommit(
109+
Long.parseLong(safeCommit.getUserData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY)));
104110
}
105111

106112
/**

server/src/main/java/org/elasticsearch/index/engine/Engine.java

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ public abstract class Engine implements Closeable {
9898

9999
public static final String SYNC_COMMIT_ID = "sync_id";
100100
public static final String HISTORY_UUID_KEY = "history_uuid";
101+
public static final String MIN_RETAINED_SEQNO = "min_retained_seq_no";
101102

102103
protected final ShardId shardId;
103104
protected final String allocationId;
@@ -578,19 +579,17 @@ public enum SearcherScope {
578579

579580
public abstract void syncTranslog() throws IOException;
580581

581-
public abstract Closeable acquireTranslogRetentionLock();
582+
/**
583+
* Acquires a lock on the translog files and Lucene soft-deleted documents to prevent them from being trimmed
584+
*/
585+
public abstract Closeable acquireRetentionLockForPeerRecovery();
582586

583587
/**
584588
* Creates a new translog snapshot from this engine for reading translog operations whose seq# in the provided range.
585589
* The caller has to close the returned snapshot after finishing the reading.
586590
*/
587591
public abstract Translog.Snapshot newTranslogSnapshotBetween(long minSeqNo, long maxSeqNo) throws IOException;
588592

589-
/**
590-
* Returns the estimated number of translog operations in this engine whose seq# at least the provided seq#.
591-
*/
592-
public abstract int estimateTranslogOperationsFromMinSeq(long minSeqNo);
593-
594593
public abstract TranslogStats getTranslogStats();
595594

596595
/**
@@ -604,6 +603,19 @@ public enum SearcherScope {
604603
public abstract Translog.Snapshot newLuceneChangesSnapshot(String source, MapperService mapperService,
605604
long minSeqNo, long maxSeqNo, boolean requiredFullRange) throws IOException;
606605

606+
/**
607+
* Creates a new history snapshot for reading operations since the provided seqno.
608+
* The returned snapshot can be retrieved from either Lucene index or translog files.
609+
*/
610+
public abstract Translog.Snapshot readHistoryOperations(String source, MapperService mapperService, long startingSeqNo) throws IOException;
611+
612+
/**
613+
* Returns the estimated number of history operations whose seq# at least the provided seq# in this engine.
614+
*/
615+
public abstract int estimateNumberOfHistoryOperations(String source, MapperService mapperService, long startingSeqNo) throws IOException;
616+
617+
public abstract boolean hasCompleteOperationHistory(String source, MapperService mapperService, long startingSeqNo) throws IOException;
618+
607619
protected final void ensureOpen(Exception suppressed) {
608620
if (isClosed.get()) {
609621
AlreadyClosedException ace = new AlreadyClosedException(shardId + " engine is closed", failedEngine.get());

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 85 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import org.apache.logging.log4j.Logger;
2323
import org.apache.logging.log4j.message.ParameterizedMessage;
2424
import org.apache.lucene.document.Field;
25-
import org.apache.lucene.document.LongPoint;
2625
import org.apache.lucene.document.NumericDocValuesField;
2726
import org.apache.lucene.index.DirectoryReader;
2827
import org.apache.lucene.index.IndexCommit;
@@ -38,7 +37,6 @@
3837
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
3938
import org.apache.lucene.index.Term;
4039
import org.apache.lucene.search.IndexSearcher;
41-
import org.apache.lucene.search.Query;
4240
import org.apache.lucene.search.ReferenceManager;
4341
import org.apache.lucene.search.SearcherFactory;
4442
import org.apache.lucene.search.SearcherManager;
@@ -153,6 +151,7 @@ public class InternalEngine extends Engine {
153151
private final CounterMetric numDocUpdates = new CounterMetric();
154152
private final NumericDocValuesField softDeleteField = Lucene.newSoftDeleteField();
155153
private final boolean softDeleteEnabled;
154+
private final SoftDeletesPolicy softDeletesPolicy;
156155
private final LastRefreshedCheckpointListener lastRefreshedCheckpointListener;
157156

158157
/**
@@ -177,7 +176,6 @@ public InternalEngine(EngineConfig engineConfig) {
177176
if (engineConfig.isAutoGeneratedIDsOptimizationEnabled() == false) {
178177
maxUnsafeAutoIdTimestamp.set(Long.MAX_VALUE);
179178
}
180-
this.softDeleteEnabled = engineConfig.getIndexSettings().isSoftDeleteEnabled();
181179
final TranslogDeletionPolicy translogDeletionPolicy = new TranslogDeletionPolicy(
182180
engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(),
183181
engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis()
@@ -199,8 +197,10 @@ public InternalEngine(EngineConfig engineConfig) {
199197
assert translog.getGeneration() != null;
200198
this.translog = translog;
201199
this.localCheckpointTracker = createLocalCheckpointTracker(localCheckpointTrackerSupplier);
200+
this.softDeleteEnabled = engineConfig.getIndexSettings().isSoftDeleteEnabled();
201+
this.softDeletesPolicy = newSoftDeletesPolicy();
202202
this.combinedDeletionPolicy =
203-
new CombinedDeletionPolicy(logger, translogDeletionPolicy, translog::getLastSyncedGlobalCheckpoint);
203+
new CombinedDeletionPolicy(logger, translogDeletionPolicy, softDeletesPolicy, translog::getLastSyncedGlobalCheckpoint);
204204
writer = createWriter();
205205
bootstrapAppendOnlyInfoFromWriter(writer);
206206
historyUUID = loadHistoryUUID(writer);
@@ -257,6 +257,18 @@ private LocalCheckpointTracker createLocalCheckpointTracker(
257257
return localCheckpointTrackerSupplier.apply(maxSeqNo, localCheckpoint);
258258
}
259259

260+
private SoftDeletesPolicy newSoftDeletesPolicy() throws IOException {
261+
final Map<String, String> commitUserData = store.readLastCommittedSegmentsInfo().userData;
262+
final long lastMinRetainedSeqNo;
263+
if (commitUserData.containsKey(Engine.MIN_RETAINED_SEQNO)) {
264+
lastMinRetainedSeqNo = Long.parseLong(commitUserData.get(Engine.MIN_RETAINED_SEQNO));
265+
} else {
266+
lastMinRetainedSeqNo = Long.parseLong(commitUserData.get(SequenceNumbers.MAX_SEQ_NO)) + 1;
267+
}
268+
return new SoftDeletesPolicy(translog::getLastSyncedGlobalCheckpoint, lastMinRetainedSeqNo,
269+
engineConfig.getIndexSettings().getSoftDeleteRetentionOperations());
270+
}
271+
260272
/**
261273
* This reference manager delegates all it's refresh calls to another (internal) SearcherManager
262274
* The main purpose for this is that if we have external refreshes happening we don't issue extra
@@ -468,18 +480,39 @@ public void syncTranslog() throws IOException {
468480
}
469481

470482
@Override
471-
public Closeable acquireTranslogRetentionLock() {
472-
return getTranslog().acquireRetentionLock();
483+
public Translog.Snapshot newTranslogSnapshotBetween(long minSeqNo, long maxSeqNo) throws IOException {
484+
return getTranslog().getSnapshotBetween(minSeqNo, maxSeqNo);
473485
}
474486

487+
/**
488+
* Creates a new history snapshot for reading operations since the provided seqno.
489+
* The returned snapshot can be retrieved from either Lucene index or translog files.
490+
*/
475491
@Override
476-
public Translog.Snapshot newTranslogSnapshotBetween(long minSeqNo, long maxSeqNo) throws IOException {
477-
return getTranslog().getSnapshotBetween(minSeqNo, maxSeqNo);
492+
public Translog.Snapshot readHistoryOperations(String source, MapperService mapperService, long startingSeqNo) throws IOException {
493+
if (engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
494+
return newLuceneChangesSnapshot(source, mapperService, Math.max(0, startingSeqNo), Long.MAX_VALUE, false);
495+
} else {
496+
return getTranslog().getSnapshotBetween(startingSeqNo, Long.MAX_VALUE);
497+
}
478498
}
479499

500+
/**
501+
* Returns the estimated number of history operations whose seq# at least the provided seq# in this engine.
502+
*/
480503
@Override
481-
public int estimateTranslogOperationsFromMinSeq(long minSeqNo) {
482-
return getTranslog().estimateTotalOperationsFromMinSeq(minSeqNo);
504+
public int estimateNumberOfHistoryOperations(String source, MapperService mapperService, long startingSeqNo) throws IOException {
505+
if (engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
506+
try (Translog.Snapshot snapshot =
507+
newLuceneChangesSnapshot(source, mapperService, Math.max(0, startingSeqNo), Long.MAX_VALUE, false)) {
508+
return snapshot.totalOperations();
509+
} catch (IOException ex) {
510+
maybeFailEngine(source, ex);
511+
throw ex;
512+
}
513+
} else {
514+
return getTranslog().estimateTotalOperationsFromMinSeq(startingSeqNo);
515+
}
483516
}
484517

485518
@Override
@@ -2070,8 +2103,8 @@ private IndexWriterConfig getIndexWriterConfig() {
20702103
MergePolicy mergePolicy = config().getMergePolicy();
20712104
if (softDeleteEnabled) {
20722105
iwc.setSoftDeletesField(Lucene.SOFT_DELETE_FIELD);
2073-
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, this::softDeletesRetentionQuery,
2074-
new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETE_FIELD, this::softDeletesRetentionQuery, mergePolicy));
2106+
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
2107+
new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETE_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
20752108
}
20762109
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
20772110
iwc.setSimilarity(engineConfig.getSimilarity());
@@ -2084,20 +2117,6 @@ private IndexWriterConfig getIndexWriterConfig() {
20842117
return iwc;
20852118
}
20862119

2087-
/**
2088-
* Documents including tombstones are soft-deleted and matched this query will be retained and won't cleaned up by merges.
2089-
*/
2090-
private Query softDeletesRetentionQuery() {
2091-
ensureOpen();
2092-
// TODO: We send the safe commit in peer-recovery, thus we need to retain all operations after the local checkpoint of that commit.
2093-
final long retainedExtraOps = engineConfig.getIndexSettings().getSoftDeleteRetentionOperations();
2094-
// Prefer using the global checkpoint which is persisted on disk than an in-memory value.
2095-
// If we failed to fsync checkpoint but already used a higher global checkpoint value to clean up soft-deleted ops,
2096-
// then we may not have all required operations whose seq# greater than the global checkpoint after restarted.
2097-
final long persistedGlobalCheckpoint = translog.getLastSyncedGlobalCheckpoint();
2098-
return LongPoint.newRangeQuery(SeqNoFieldMapper.NAME, persistedGlobalCheckpoint + 1 - retainedExtraOps, Long.MAX_VALUE);
2099-
}
2100-
21012120
/** Extended SearcherFactory that warms the segments if needed when acquiring a new searcher */
21022121
static final class SearchFactory extends EngineSearcherFactory {
21032122
private final Engine.Warmer warmer;
@@ -2284,6 +2303,9 @@ protected void commitIndexWriter(final IndexWriter writer, final Translog transl
22842303
commitData.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(localCheckpointTracker.getMaxSeqNo()));
22852304
commitData.put(MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID, Long.toString(maxUnsafeAutoIdTimestamp.get()));
22862305
commitData.put(HISTORY_UUID_KEY, historyUUID);
2306+
if (softDeleteEnabled) {
2307+
commitData.put(Engine.MIN_RETAINED_SEQNO, Long.toString(softDeletesPolicy.getMinRetainedSeqNo()));
2308+
}
22872309
logger.trace("committing writer with commit data [{}]", commitData);
22882310
return commitData.entrySet().iterator();
22892311
});
@@ -2339,6 +2361,8 @@ public void onSettingsChanged() {
23392361
final IndexSettings indexSettings = engineConfig.getIndexSettings();
23402362
translogDeletionPolicy.setRetentionAgeInMillis(indexSettings.getTranslogRetentionAge().getMillis());
23412363
translogDeletionPolicy.setRetentionSizeInBytes(indexSettings.getTranslogRetentionSize().getBytes());
2364+
2365+
softDeletesPolicy.setRetentionOperations(indexSettings.getSoftDeleteRetentionOperations());
23422366
}
23432367

23442368
public MergeStats getMergeStats() {
@@ -2452,6 +2476,41 @@ public Translog.Snapshot newLuceneChangesSnapshot(String source, MapperService m
24522476
}
24532477
}
24542478

2479+
@Override
2480+
public boolean hasCompleteOperationHistory(String source, MapperService mapperService, long startingSeqNo) throws IOException {
2481+
if (engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
2482+
return getMinRetainedSeqNo() <= startingSeqNo;
2483+
} else {
2484+
final long currentLocalCheckpoint = getLocalCheckpointTracker().getCheckpoint();
2485+
final LocalCheckpointTracker tracker = new LocalCheckpointTracker(startingSeqNo, startingSeqNo - 1);
2486+
try (Translog.Snapshot snapshot = getTranslog().getSnapshotBetween(startingSeqNo, Long.MAX_VALUE)) {
2487+
Translog.Operation operation;
2488+
while ((operation = snapshot.next()) != null) {
2489+
if (operation.seqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) {
2490+
tracker.markSeqNoAsCompleted(operation.seqNo());
2491+
}
2492+
}
2493+
}
2494+
return tracker.getCheckpoint() >= currentLocalCheckpoint;
2495+
}
2496+
}
2497+
2498+
/**
2499+
* Returns the minimum seqno that is retained in the Lucene index.
2500+
* Operations whose seq# are at least this value should exist in the Lucene index.
2501+
*/
2502+
final long getMinRetainedSeqNo() {
2503+
assert softDeleteEnabled : Thread.currentThread().getName();
2504+
return softDeletesPolicy.getMinRetainedSeqNo();
2505+
}
2506+
2507+
@Override
2508+
public Closeable acquireRetentionLockForPeerRecovery() {
2509+
final Closeable translogLock = translog.acquireRetentionLock();
2510+
final Releasable softDeletesLock = softDeletesPolicy.acquireRetentionLock();
2511+
return () -> IOUtils.close(translogLock, softDeletesLock);
2512+
}
2513+
24552514
@Override
24562515
public boolean isRecovering() {
24572516
return pendingTranslogRecovery.get();

server/src/main/java/org/elasticsearch/index/engine/LuceneChangesSnapshot.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public int totalOperations() {
116116
}
117117

118118
@Override
119-
public int overriddenOperations() {
119+
public int skippedOperations() {
120120
return skippedOperations;
121121
}
122122

0 commit comments

Comments
 (0)