Skip to content

Commit 4f62b51

Browse files
authored
Use lastSyncedGlobalCheckpoint in deletion policy (#27826)
Today we use the in-memory global checkpoint from SequenceNumbersService to clean up unneeded commit points, however the latest global checkpoint may haven't fsynced to the disk yet. If the translog checkpoint fsync failed and we already use a higher global checkpoint to clean up commit points, then we may have removed a safe commit which we try to keep for recovery. This commit updates the deletion policy using lastSyncedGlobalCheckpoint from Translog rather the in memory global checkpoint. Relates #27606
1 parent 43ff38c commit 4f62b51

File tree

2 files changed

+29
-20
lines changed

2 files changed

+29
-20
lines changed

core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,11 @@ public InternalEngine(EngineConfig engineConfig) {
182182
final SeqNoStats seqNoStats = loadSeqNoStats(openMode);
183183
logger.trace("recovered [{}]", seqNoStats);
184184
this.seqNoService = seqNoServiceSupplier.apply(engineConfig, seqNoStats);
185+
translog = openTranslog(engineConfig, translogDeletionPolicy, seqNoService::getGlobalCheckpoint);
186+
assert translog.getGeneration() != null;
187+
this.translog = translog;
185188
this.snapshotDeletionPolicy = new SnapshotDeletionPolicy(
186-
new CombinedDeletionPolicy(openMode, translogDeletionPolicy, seqNoService::getGlobalCheckpoint)
189+
new CombinedDeletionPolicy(openMode, translogDeletionPolicy, translog::getLastSyncedGlobalCheckpoint)
187190
);
188191
writer = createWriter(openMode == EngineConfig.OpenMode.CREATE_INDEX_AND_TRANSLOG);
189192
updateMaxUnsafeAutoIdTimestampFromWriter(writer);
@@ -195,9 +198,6 @@ public InternalEngine(EngineConfig engineConfig) {
195198
historyUUID = loadOrGenerateHistoryUUID(writer, engineConfig.getForceNewHistoryUUID());
196199
Objects.requireNonNull(historyUUID, "history uuid should not be null");
197200
indexWriter = writer;
198-
translog = openTranslog(engineConfig, writer, translogDeletionPolicy, () -> seqNoService.getGlobalCheckpoint());
199-
assert translog.getGeneration() != null;
200-
this.translog = translog;
201201
updateWriterOnOpen();
202202
} catch (IOException | TranslogCorruptedException e) {
203203
throw new EngineCreationFailureException(shardId, "failed to create engine", e);
@@ -437,12 +437,12 @@ private void recoverFromTranslogInternal() throws IOException {
437437
translog.trimUnreferencedReaders();
438438
}
439439

440-
private Translog openTranslog(EngineConfig engineConfig, IndexWriter writer, TranslogDeletionPolicy translogDeletionPolicy, LongSupplier globalCheckpointSupplier) throws IOException {
440+
private Translog openTranslog(EngineConfig engineConfig, TranslogDeletionPolicy translogDeletionPolicy, LongSupplier globalCheckpointSupplier) throws IOException {
441441
assert openMode != null;
442442
final TranslogConfig translogConfig = engineConfig.getTranslogConfig();
443443
String translogUUID = null;
444444
if (openMode == EngineConfig.OpenMode.OPEN_INDEX_AND_TRANSLOG) {
445-
translogUUID = loadTranslogUUIDFromCommit(writer);
445+
translogUUID = loadTranslogUUIDFromLastCommit();
446446
// We expect that this shard already exists, so it must already have an existing translog else something is badly wrong!
447447
if (translogUUID == null) {
448448
throw new IndexFormatTooOldException("translog", "translog has no generation nor a UUID - this might be an index from a previous version consider upgrading to N-1 first");
@@ -492,14 +492,13 @@ public long getWritingBytes() {
492492
}
493493

494494
/**
495-
* Reads the current stored translog ID from the IW commit data. If the id is not found, recommits the current
496-
* translog id into lucene and returns null.
495+
* Reads the current stored translog ID from the last commit data.
497496
*/
498497
@Nullable
499-
private String loadTranslogUUIDFromCommit(IndexWriter writer) throws IOException {
500-
// commit on a just opened writer will commit even if there are no changes done to it
501-
// we rely on that for the commit data translog id key
502-
final Map<String, String> commitUserData = commitDataAsMap(writer);
498+
private String loadTranslogUUIDFromLastCommit() throws IOException {
499+
assert openMode == EngineConfig.OpenMode.OPEN_INDEX_AND_TRANSLOG :
500+
"Only reuse existing translogUUID with OPEN_INDEX_AND_TRANSLOG; openMode = [" + openMode + "]";
501+
final Map<String, String> commitUserData = store.readLastCommittedSegmentsInfo().getUserData();
503502
if (commitUserData.containsKey(Translog.TRANSLOG_UUID_KEY)) {
504503
if (commitUserData.containsKey(Translog.TRANSLOG_GENERATION_KEY) == false) {
505504
throw new IllegalStateException("commit doesn't contain translog generation id");

core/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@
168168
import static org.hamcrest.Matchers.greaterThan;
169169
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
170170
import static org.hamcrest.Matchers.hasKey;
171-
import static org.hamcrest.Matchers.hasSize;
172171
import static org.hamcrest.Matchers.lessThanOrEqualTo;
173172
import static org.hamcrest.Matchers.not;
174173
import static org.hamcrest.Matchers.notNullValue;
@@ -4263,29 +4262,40 @@ public long getGlobalCheckpoint() {
42634262
.put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), randomFrom("-1", "512b", "1gb")));
42644263
indexSettings.updateIndexMetaData(builder.build());
42654264

4265+
final Path translogPath = createTempDir();
42664266
store = createStore();
4267-
try (InternalEngine engine
4268-
= createEngine(indexSettings, store, createTempDir(), NoMergePolicy.INSTANCE, null, seqNoServiceSupplier)) {
4267+
final EngineConfig engineConfig = config(indexSettings, store, translogPath, NoMergePolicy.INSTANCE, null);
4268+
try (Engine engine = new InternalEngine(engineConfig, seqNoServiceSupplier) {
4269+
@Override
4270+
protected void commitIndexWriter(IndexWriter writer, Translog translog, String syncId) throws IOException {
4271+
// Advance the global checkpoint during the flush to create a lag between a persisted global checkpoint in the translog
4272+
// (this value is visible to the deletion policy) and an in memory global checkpoint in the SequenceNumbersService.
4273+
if (rarely()) {
4274+
globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), seqNoService().getLocalCheckpoint()));
4275+
}
4276+
super.commitIndexWriter(writer, translog, syncId);
4277+
}
4278+
}){
42694279
int numDocs = scaledRandomIntBetween(10, 100);
42704280
for (int docId = 0; docId < numDocs; docId++) {
42714281
ParseContext.Document document = testDocumentWithTextField();
42724282
document.add(new Field(SourceFieldMapper.NAME, BytesReference.toBytes(B_1), SourceFieldMapper.Defaults.FIELD_TYPE));
42734283
engine.index(indexForDoc(testParsedDocument(Integer.toString(docId), null, document, B_1, null)));
42744284
if (frequently()) {
4275-
globalCheckpoint.set(randomIntBetween(
4276-
Math.toIntExact(engine.seqNoService().getGlobalCheckpoint()),
4277-
Math.toIntExact(engine.seqNoService().getLocalCheckpoint())));
4285+
globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), engine.seqNoService().getLocalCheckpoint()));
4286+
engine.getTranslog().sync();
42784287
}
42794288
if (frequently()) {
4289+
final long lastSyncedGlobalCheckpoint = Translog.readGlobalCheckpoint(translogPath);
42804290
engine.flush(randomBoolean(), true);
42814291
final List<IndexCommit> commits = DirectoryReader.listCommits(store.directory());
42824292
// Keep only one safe commit as the oldest commit.
42834293
final IndexCommit safeCommit = commits.get(0);
42844294
assertThat(Long.parseLong(safeCommit.getUserData().get(SequenceNumbers.MAX_SEQ_NO)),
4285-
lessThanOrEqualTo(globalCheckpoint.get()));
4295+
lessThanOrEqualTo(lastSyncedGlobalCheckpoint));
42864296
for (int i = 1; i < commits.size(); i++) {
42874297
assertThat(Long.parseLong(commits.get(i).getUserData().get(SequenceNumbers.MAX_SEQ_NO)),
4288-
greaterThan(globalCheckpoint.get()));
4298+
greaterThan(lastSyncedGlobalCheckpoint));
42894299
}
42904300
// Make sure we keep all translog operations after the local checkpoint of the safe commit.
42914301
long localCheckpointFromSafeCommit = Long.parseLong(safeCommit.getUserData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY));

0 commit comments

Comments
 (0)