Skip to content

Commit a520cc5

Browse files
authored
Always fail engine if delete operation fails (#40117)
Unlike index operations which can fail at the document level to analyzing errors, delete operations should never fail at the document level whether soft-deletes is enabled or not. With this change, we will always fail the engine if we fail to apply a delete operation to Lucene. Closes #33256
1 parent a87b139 commit a520cc5

File tree

4 files changed

+66
-71
lines changed

4 files changed

+66
-71
lines changed

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,26 +1256,16 @@ public DeleteResult delete(Delete delete) throws IOException {
12561256
plan.versionOfDeletion, getPrimaryTerm(), delete.seqNo(), plan.currentlyDeleted == false);
12571257
}
12581258
}
1259-
if (delete.origin().isFromTranslog() == false) {
1260-
final Translog.Location location;
1261-
if (deleteResult.getResultType() == Result.Type.SUCCESS) {
1262-
location = translog.add(new Translog.Delete(delete, deleteResult));
1263-
} else if (deleteResult.getSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) {
1264-
// if we have document failure, record it as a no-op in the translog and Lucene with the generated seq_no
1265-
final NoOp noOp = new NoOp(deleteResult.getSeqNo(), delete.primaryTerm(), delete.origin(),
1266-
delete.startTime(), deleteResult.getFailure().toString());
1267-
location = innerNoOp(noOp).getTranslogLocation();
1268-
} else {
1269-
location = null;
1270-
}
1259+
if (delete.origin().isFromTranslog() == false && deleteResult.getResultType() == Result.Type.SUCCESS) {
1260+
final Translog.Location location = translog.add(new Translog.Delete(delete, deleteResult));
12711261
deleteResult.setTranslogLocation(location);
12721262
}
12731263
localCheckpointTracker.markSeqNoAsCompleted(deleteResult.getSeqNo());
12741264
deleteResult.setTook(System.nanoTime() - delete.startTime());
12751265
deleteResult.freeze();
12761266
} catch (RuntimeException | IOException e) {
12771267
try {
1278-
maybeFailEngine("index", e);
1268+
maybeFailEngine("delete", e);
12791269
} catch (Exception inner) {
12801270
e.addSuppressed(inner);
12811271
}
@@ -1395,12 +1385,9 @@ private DeleteResult deleteInLucene(Delete delete, DeletionStrategy plan) throws
13951385
plan.versionOfDeletion, getPrimaryTerm(), delete.seqNo(), plan.currentlyDeleted == false);
13961386
} catch (Exception ex) {
13971387
if (indexWriter.getTragicException() == null) {
1398-
// there is no tragic event and such it must be a document level failure
1399-
return new DeleteResult(
1400-
ex, plan.versionOfDeletion, delete.primaryTerm(), delete.seqNo(), plan.currentlyDeleted == false);
1401-
} else {
1402-
throw ex;
1388+
throw new AssertionError("delete operation should never fail at document level", ex);
14031389
}
1390+
throw ex;
14041391
}
14051392
}
14061393

server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3259,22 +3259,6 @@ public void testHandleDocumentFailure() throws Exception {
32593259
assertNotNull(indexResult.getTranslogLocation());
32603260
engine.index(indexForDoc(doc2));
32613261

3262-
// test failure while deleting
3263-
// all these simulated exceptions are not fatal to the IW so we treat them as document failures
3264-
final Engine.DeleteResult deleteResult;
3265-
if (randomBoolean()) {
3266-
throwingIndexWriter.get().setThrowFailure(() -> new IOException("simulated"));
3267-
deleteResult = engine.delete(new Engine.Delete("test", "1", newUid(doc1), primaryTerm.get()));
3268-
assertThat(deleteResult.getFailure(), instanceOf(IOException.class));
3269-
} else {
3270-
throwingIndexWriter.get().setThrowFailure(() -> new IllegalArgumentException("simulated max token length"));
3271-
deleteResult = engine.delete(new Engine.Delete("test", "1", newUid(doc1), primaryTerm.get()));
3272-
assertThat(deleteResult.getFailure(),
3273-
instanceOf(IllegalArgumentException.class));
3274-
}
3275-
assertThat(deleteResult.getVersion(), equalTo(2L));
3276-
assertThat(deleteResult.getSeqNo(), equalTo(3L));
3277-
32783262
// test non document level failure is thrown
32793263
if (randomBoolean()) {
32803264
// simulate close by corruption
@@ -3308,6 +3292,40 @@ public BytesRef binaryValue() {
33083292
}
33093293
}
33103294

3295+
public void testDeleteWithFatalError() throws Exception {
3296+
final IllegalStateException tragicException = new IllegalStateException("fail to store tombstone");
3297+
try (Store store = createStore()) {
3298+
EngineConfig.TombstoneDocSupplier tombstoneDocSupplier = new EngineConfig.TombstoneDocSupplier() {
3299+
@Override
3300+
public ParsedDocument newDeleteTombstoneDoc(String type, String id) {
3301+
ParsedDocument parsedDocument = tombstoneDocSupplier().newDeleteTombstoneDoc(type, id);
3302+
parsedDocument.rootDoc().add(new StoredField("foo", "bar") {
3303+
// this is a hack to add a failure during store document which triggers a tragic event
3304+
// and in turn fails the engine
3305+
@Override
3306+
public BytesRef binaryValue() {
3307+
throw tragicException;
3308+
}
3309+
});
3310+
return parsedDocument;
3311+
}
3312+
3313+
@Override
3314+
public ParsedDocument newNoopTombstoneDoc(String reason) {
3315+
return tombstoneDocSupplier().newNoopTombstoneDoc(reason);
3316+
}
3317+
};
3318+
try (InternalEngine engine = createEngine(null, null, null, config(this.engine.config(), store, tombstoneDocSupplier))) {
3319+
final ParsedDocument doc = testParsedDocument("1", null, testDocumentWithTextField(), SOURCE, null);
3320+
engine.index(indexForDoc(doc));
3321+
expectThrows(IllegalStateException.class,
3322+
() -> engine.delete(new Engine.Delete("test", "1", newUid("1"), primaryTerm.get())));
3323+
assertTrue(engine.isClosed.get());
3324+
assertSame(tragicException, engine.failedEngine.get());
3325+
}
3326+
}
3327+
}
3328+
33113329
public void testDoubleDeliveryPrimary() throws IOException {
33123330
final ParsedDocument doc = testParsedDocument("1", null, testDocumentWithTextField(),
33133331
new BytesArray("{}".getBytes(Charset.defaultCharset())), null);

server/src/test/java/org/elasticsearch/index/replication/IndexLevelReplicationTests.java

Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
*/
1919
package org.elasticsearch.index.replication;
2020

21-
import org.apache.lucene.document.Field;
2221
import org.apache.lucene.index.IndexWriter;
2322
import org.apache.lucene.index.IndexableField;
2423
import org.apache.lucene.index.Term;
@@ -58,7 +57,6 @@
5857
import org.hamcrest.Matcher;
5958

6059
import java.io.IOException;
61-
import java.nio.charset.StandardCharsets;
6260
import java.util.ArrayList;
6361
import java.util.Collections;
6462
import java.util.List;
@@ -418,10 +416,8 @@ public void testReplicaOperationWithConcurrentPrimaryPromotion() throws Exceptio
418416
*/
419417
public void testDocumentFailureReplication() throws Exception {
420418
final IOException indexException = new IOException("simulated indexing failure");
421-
final IOException deleteException = new IOException("simulated deleting failure");
422419
final EngineFactory engineFactory = config -> InternalEngineTests.createInternalEngine((dir, iwc) ->
423420
new IndexWriter(dir, iwc) {
424-
final AtomicBoolean throwAfterIndexedOneDoc = new AtomicBoolean(); // need one document to trigger delete in IW.
425421
@Override
426422
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException {
427423
boolean isTombstone = false;
@@ -430,20 +426,12 @@ public long addDocument(Iterable<? extends IndexableField> doc) throws IOExcepti
430426
isTombstone = true;
431427
}
432428
}
433-
if (isTombstone == false && throwAfterIndexedOneDoc.getAndSet(true)) {
434-
throw indexException;
429+
if (isTombstone) {
430+
return super.addDocument(doc); // allow to add Noop
435431
} else {
436-
return super.addDocument(doc);
432+
throw indexException;
437433
}
438434
}
439-
@Override
440-
public long deleteDocuments(Term... terms) throws IOException {
441-
throw deleteException;
442-
}
443-
@Override
444-
public long softUpdateDocument(Term term, Iterable<? extends IndexableField> doc, Field...fields) throws IOException {
445-
throw deleteException; // a delete uses softUpdateDocument API if soft-deletes enabled
446-
}
447435
}, null, null, config);
448436
try (ReplicationGroup shards = new ReplicationGroup(buildIndexMetaData(0)) {
449437
@Override
@@ -454,20 +442,13 @@ public long softUpdateDocument(Term term, Iterable<? extends IndexableField> doc
454442
long primaryTerm = shards.getPrimary().getPendingPrimaryTerm();
455443
List<Translog.Operation> expectedTranslogOps = new ArrayList<>();
456444
BulkItemResponse indexResp = shards.index(new IndexRequest(index.getName(), "type", "1").source("{}", XContentType.JSON));
457-
assertThat(indexResp.isFailed(), equalTo(false));
458-
expectedTranslogOps.add(new Translog.Index("type", "1", 0, primaryTerm, 1, "{}".getBytes(StandardCharsets.UTF_8), null, -1));
445+
assertThat(indexResp.isFailed(), equalTo(true));
446+
assertThat(indexResp.getFailure().getCause(), equalTo(indexException));
447+
expectedTranslogOps.add(new Translog.NoOp(0, primaryTerm, indexException.toString()));
459448
try (Translog.Snapshot snapshot = getTranslog(shards.getPrimary()).newSnapshot()) {
460449
assertThat(snapshot, SnapshotMatchers.containsOperationsInAnyOrder(expectedTranslogOps));
461450
}
462-
463-
indexResp = shards.index(new IndexRequest(index.getName(), "type", "any").source("{}", XContentType.JSON));
464-
assertThat(indexResp.getFailure().getCause(), equalTo(indexException));
465-
expectedTranslogOps.add(new Translog.NoOp(1, primaryTerm, indexException.toString()));
466-
467-
BulkItemResponse deleteResp = shards.delete(new DeleteRequest(index.getName(), "type", "1"));
468-
assertThat(deleteResp.getFailure().getCause(), equalTo(deleteException));
469-
expectedTranslogOps.add(new Translog.NoOp(2, primaryTerm, deleteException.toString()));
470-
shards.assertAllEqual(1);
451+
shards.assertAllEqual(0);
471452

472453
int nReplica = randomIntBetween(1, 3);
473454
for (int i = 0; i < nReplica; i++) {
@@ -482,14 +463,10 @@ public long softUpdateDocument(Term term, Iterable<? extends IndexableField> doc
482463
assertThat(snapshot, SnapshotMatchers.containsOperationsInAnyOrder(expectedTranslogOps));
483464
}
484465
}
485-
// unlike previous failures, these two failures replicated directly from the replication channel.
466+
// the failure replicated directly from the replication channel.
486467
indexResp = shards.index(new IndexRequest(index.getName(), "type", "any").source("{}", XContentType.JSON));
487468
assertThat(indexResp.getFailure().getCause(), equalTo(indexException));
488-
expectedTranslogOps.add(new Translog.NoOp(3, primaryTerm, indexException.toString()));
489-
490-
deleteResp = shards.delete(new DeleteRequest(index.getName(), "type", "1"));
491-
assertThat(deleteResp.getFailure().getCause(), equalTo(deleteException));
492-
expectedTranslogOps.add(new Translog.NoOp(4, primaryTerm, deleteException.toString()));
469+
expectedTranslogOps.add(new Translog.NoOp(1, primaryTerm, indexException.toString()));
493470

494471
for (IndexShard shard : shards) {
495472
try (Translog.Snapshot snapshot = getTranslog(shard).newSnapshot()) {
@@ -499,7 +476,7 @@ public long softUpdateDocument(Term term, Iterable<? extends IndexableField> doc
499476
assertThat(snapshot, SnapshotMatchers.containsOperationsInAnyOrder(expectedTranslogOps));
500477
}
501478
}
502-
shards.assertAllEqual(1);
479+
shards.assertAllEqual(0);
503480
}
504481
}
505482

test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -500,10 +500,10 @@ protected InternalEngine createEngine(EngineConfig config) throws IOException {
500500
return createEngine(null, null, null, config);
501501
}
502502

503-
private InternalEngine createEngine(@Nullable IndexWriterFactory indexWriterFactory,
504-
@Nullable BiFunction<Long, Long, LocalCheckpointTracker> localCheckpointTrackerSupplier,
505-
@Nullable ToLongBiFunction<Engine, Engine.Operation> seqNoForOperation,
506-
EngineConfig config) throws IOException {
503+
protected InternalEngine createEngine(@Nullable IndexWriterFactory indexWriterFactory,
504+
@Nullable BiFunction<Long, Long, LocalCheckpointTracker> localCheckpointTrackerSupplier,
505+
@Nullable ToLongBiFunction<Engine, Engine.Operation> seqNoForOperation,
506+
EngineConfig config) throws IOException {
507507
final Store store = config.getStore();
508508
final Directory directory = store.directory();
509509
if (Lucene.indexExists(directory) == false) {
@@ -697,6 +697,19 @@ public EngineConfig config(
697697
tombstoneDocSupplier());
698698
}
699699

700+
protected EngineConfig config(EngineConfig config, Store store, EngineConfig.TombstoneDocSupplier tombstoneDocSupplier) {
701+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test",
702+
Settings.builder().put(config.getIndexSettings().getSettings())
703+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).build());
704+
return new EngineConfig(config.getShardId(), config.getAllocationId(), config.getThreadPool(),
705+
indexSettings, config.getWarmer(), store, config.getMergePolicy(), config.getAnalyzer(), config.getSimilarity(),
706+
new CodecService(null, logger), config.getEventListener(), config.getQueryCache(), config.getQueryCachingPolicy(),
707+
config.getTranslogConfig(), config.getFlushMergesAfter(), config.getExternalRefreshListener(),
708+
config.getInternalRefreshListener(), config.getIndexSort(), config.getCircuitBreakerService(),
709+
config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(),
710+
config.getPrimaryTermSupplier(), tombstoneDocSupplier);
711+
}
712+
700713
protected EngineConfig noOpConfig(IndexSettings indexSettings, Store store, Path translogPath) {
701714
return noOpConfig(indexSettings, store, translogPath, null);
702715
}

0 commit comments

Comments
 (0)