Skip to content

Commit 1249e6b

Browse files
committed
Handle no-op document level failures (#46083)
Today we assume that document failures can not occur for no-ops. This assumption is bogus, as they can fail for a variety of reasons such as the Lucene index having reached the document limit. Because of this assumption, we were asserting that such a document-level failure would never happen. When this bogus assertion is violated, we fail the node, a catastrophe. Instead, we need to treat this as a fatal engine exception.
1 parent 564b803 commit 1249e6b

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

+8-2
Original file line numberDiff line numberDiff line change
@@ -1514,9 +1514,14 @@ private NoOpResult innerNoOp(final NoOp noOp) throws IOException {
15141514
: "Noop tombstone document but _tombstone field is not set [" + doc + " ]";
15151515
doc.add(softDeletesField);
15161516
indexWriter.addDocument(doc);
1517-
} catch (Exception ex) {
1517+
} catch (final Exception ex) {
1518+
/*
1519+
* Document level failures when adding a no-op are unexpected, we likely hit something fatal such as the Lucene
1520+
* index being corrupt, or the Lucene document limit. We have already issued a sequence number here so this is
1521+
* fatal, fail the engine.
1522+
*/
15181523
if (ex instanceof AlreadyClosedException == false && indexWriter.getTragicException() == null) {
1519-
throw new AssertionError("noop operation should never fail at document level", ex);
1524+
failEngine("no-op origin[" + noOp.origin() + "] seq#[" + noOp.seqNo() + "] failed at document level", ex);
15201525
}
15211526
throw ex;
15221527
}
@@ -2852,4 +2857,5 @@ private void restoreVersionMapAndCheckpointTracker(DirectoryReader directoryRead
28522857
// remove live entries in the version map
28532858
refresh("restore_version_map_and_checkpoint_tracker", SearcherScope.INTERNAL, true);
28542859
}
2860+
28552861
}

server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

+25
Original file line numberDiff line numberDiff line change
@@ -5972,4 +5972,29 @@ public void testAlwaysRecordReplicaOrPeerRecoveryOperationsToTranslog() throws E
59725972
equalTo(seqNos));
59735973
}
59745974
}
5975+
5976+
public void testNoOpFailure() throws IOException {
5977+
engine.close();
5978+
final Settings settings = Settings.builder()
5979+
.put(defaultSettings.getSettings())
5980+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).build();
5981+
final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(
5982+
IndexMetaData.builder(defaultSettings.getIndexMetaData()).settings(settings).build());
5983+
try (Store store = createStore();
5984+
Engine engine = createEngine((dir, iwc) -> new IndexWriter(dir, iwc) {
5985+
@Override
5986+
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException {
5987+
throw new IllegalArgumentException("fatal");
5988+
}
5989+
}, null, null, config(indexSettings, store, createTempDir(), NoMergePolicy.INSTANCE, null))) {
5990+
final Engine.NoOp op = new Engine.NoOp(0, 0, PRIMARY, System.currentTimeMillis(), "test");
5991+
final IllegalArgumentException e = expectThrows(IllegalArgumentException. class, () -> engine.noOp(op));
5992+
assertThat(e.getMessage(), equalTo("fatal"));
5993+
assertTrue(engine.isClosed.get());
5994+
assertThat(engine.failedEngine.get(), not(nullValue()));
5995+
assertThat(engine.failedEngine.get(), instanceOf(IllegalArgumentException.class));
5996+
assertThat(engine.failedEngine.get().getMessage(), equalTo("fatal"));
5997+
}
5998+
}
5999+
59756000
}

0 commit comments

Comments
 (0)