Skip to content

Commit 01e326d

Browse files
Fix ref count handling in Engine.failEngine (elastic#48639) (elastic#48646)
We can run into an already closed store here and hence throw on trying to increment the ref count => moving to the guarded ref count increment closes elastic#48625
1 parent f2d6893 commit 01e326d

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

server/src/main/java/org/elasticsearch/index/engine/Engine.java

+13-8
Original file line numberDiff line numberDiff line change
@@ -1157,7 +1157,6 @@ public void failEngine(String reason, @Nullable Exception failure) {
11571157
maybeDie(reason, failure);
11581158
}
11591159
if (failEngineLock.tryLock()) {
1160-
store.incRef();
11611160
try {
11621161
if (failedEngine.get() != null) {
11631162
logger.warn(() ->
@@ -1179,11 +1178,19 @@ public void failEngine(String reason, @Nullable Exception failure) {
11791178
// on the same node that we don't see the corrupted marker file when
11801179
// the shard is initializing
11811180
if (Lucene.isCorruptionException(failure)) {
1182-
try {
1183-
store.markStoreCorrupted(new IOException("failed engine (reason: [" + reason + "])",
1184-
ExceptionsHelper.unwrapCorruption(failure)));
1185-
} catch (IOException e) {
1186-
logger.warn("Couldn't mark store corrupted", e);
1181+
if (store.tryIncRef()) {
1182+
try {
1183+
store.markStoreCorrupted(new IOException("failed engine (reason: [" + reason + "])",
1184+
ExceptionsHelper.unwrapCorruption(failure)));
1185+
} catch (IOException e) {
1186+
logger.warn("Couldn't mark store corrupted", e);
1187+
} finally {
1188+
store.decRef();
1189+
}
1190+
} else {
1191+
logger.warn(() ->
1192+
new ParameterizedMessage("tried to mark store as corrupted but store is already closed. [{}]", reason),
1193+
failure);
11871194
}
11881195
}
11891196
eventListener.onFailedEngine(reason, failure);
@@ -1192,8 +1199,6 @@ public void failEngine(String reason, @Nullable Exception failure) {
11921199
if (failure != null) inner.addSuppressed(failure);
11931200
// don't bubble up these exceptions up
11941201
logger.warn("failEngine threw exception", inner);
1195-
} finally {
1196-
store.decRef();
11971202
}
11981203
} else {
11991204
logger.debug(() -> new ParameterizedMessage("tried to fail engine but could not acquire lock - engine should " +

server/src/test/java/org/elasticsearch/recovery/RelocationIT.java

-1
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,6 @@ public void testIndexAndRelocateConcurrently() throws Exception {
490490
docs[i] = client().prepareIndex("test", "type1", id).setSource("field1", English.intToEnglish(numDocs + i));
491491
}
492492
indexRandom(true, docs);
493-
numDocs *= 2;
494493

495494
logger.info(" --> waiting for relocation to complete");
496495
ensureGreen(TimeValue.timeValueSeconds(60), "test"); // move all shards to the new nodes (it waits on relocation)

0 commit comments

Comments
 (0)