Skip to content

Commit d0ac828

Browse files
Bubble-up exceptions from scheduler (#38441)
Instead of logging warnings we now rethrow exceptions thrown inside scheduled/submitted tasks. This will still log them as warnings in production but has the added benefit that if they are thrown during unit/integration test runs, the test will be flagged as an error. Fixed NPE in GlobalCheckPointListeners that caused CCR tests (IndexFollowingIT and likely others) to fail. This is a continuation of #38014 Backports #38317
1 parent 401c45c commit d0ac828

File tree

5 files changed

+82
-103
lines changed

5 files changed

+82
-103
lines changed

qa/evil-tests/src/test/java/org/elasticsearch/threadpool/EvilThreadPoolTests.java

Lines changed: 61 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -19,32 +19,29 @@
1919

2020
package org.elasticsearch.threadpool;
2121

22-
import org.apache.logging.log4j.Level;
23-
import org.apache.logging.log4j.LogManager;
24-
import org.apache.logging.log4j.Logger;
25-
import org.apache.logging.log4j.core.LogEvent;
26-
import org.elasticsearch.common.logging.Loggers;
22+
import org.elasticsearch.common.SuppressForbidden;
2723
import org.elasticsearch.common.settings.Settings;
2824
import org.elasticsearch.common.unit.TimeValue;
2925
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
3026
import org.elasticsearch.common.util.concurrent.EsExecutors;
3127
import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor;
3228
import org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor;
3329
import org.elasticsearch.test.ESTestCase;
34-
import org.elasticsearch.test.MockLogAppender;
3530
import org.junit.After;
3631
import org.junit.Before;
3732

3833
import java.util.Optional;
3934
import java.util.concurrent.CountDownLatch;
35+
import java.util.concurrent.ExecutionException;
4036
import java.util.concurrent.ExecutorService;
37+
import java.util.concurrent.ScheduledFuture;
4138
import java.util.concurrent.ScheduledThreadPoolExecutor;
4239
import java.util.concurrent.TimeUnit;
4340
import java.util.concurrent.atomic.AtomicReference;
41+
import java.util.function.BiFunction;
4442
import java.util.function.Consumer;
4543

4644
import static org.hamcrest.Matchers.containsString;
47-
import static org.hamcrest.Matchers.equalTo;
4845
import static org.hamcrest.Matchers.hasToString;
4946
import static org.hamcrest.Matchers.instanceOf;
5047

@@ -260,6 +257,50 @@ public void testExecutionExceptionOnScheduler() throws InterruptedException {
260257
}
261258
}
262259

260+
@SuppressForbidden(reason = "this tests that the deprecated method still works")
261+
public void testDeprecatedSchedule() throws ExecutionException, InterruptedException {
262+
verifyDeprecatedSchedule(((threadPool, runnable)
263+
-> threadPool.schedule(TimeValue.timeValueMillis(randomInt(10)), ThreadPool.Names.SAME, runnable)));
264+
}
265+
266+
public void testThreadPoolScheduleDeprecated() throws ExecutionException, InterruptedException {
267+
verifyDeprecatedSchedule(((threadPool, runnable)
268+
-> threadPool.scheduleDeprecated(TimeValue.timeValueMillis(randomInt(10)), ThreadPool.Names.SAME, runnable)));
269+
}
270+
271+
private void verifyDeprecatedSchedule(BiFunction<ThreadPool,
272+
Runnable, ScheduledFuture<?>> scheduleFunction) throws InterruptedException, ExecutionException {
273+
Thread.UncaughtExceptionHandler originalHandler = Thread.getDefaultUncaughtExceptionHandler();
274+
CountDownLatch missingExceptions = new CountDownLatch(1);
275+
Thread.setDefaultUncaughtExceptionHandler((t, e) -> {
276+
missingExceptions.countDown();
277+
});
278+
try {
279+
ThreadPool threadPool = new TestThreadPool("test");
280+
CountDownLatch missingExecutions = new CountDownLatch(1);
281+
try {
282+
scheduleFunction.apply(threadPool, missingExecutions::countDown)
283+
.get();
284+
assertEquals(0, missingExecutions.getCount());
285+
286+
ExecutionException exception = expectThrows(ExecutionException.class,
287+
"schedule(...).get() must throw exception from runnable",
288+
() -> scheduleFunction.apply(threadPool,
289+
() -> {
290+
throw new IllegalArgumentException("FAIL");
291+
}
292+
).get());
293+
294+
assertEquals(IllegalArgumentException.class, exception.getCause().getClass());
295+
assertTrue(missingExceptions.await(30, TimeUnit.SECONDS));
296+
} finally {
297+
ThreadPool.terminate(threadPool, 10, TimeUnit.SECONDS);
298+
}
299+
} finally {
300+
Thread.setDefaultUncaughtExceptionHandler(originalHandler);
301+
}
302+
}
303+
263304
private Runnable delayMillis(Runnable r, int ms) {
264305
return () -> {
265306
try {
@@ -369,60 +410,26 @@ private void runExecutionTest(
369410

370411
final CountDownLatch supplierLatch = new CountDownLatch(1);
371412

372-
Runnable job = () -> {
373-
try {
374-
runnable.run();
375-
} finally {
376-
supplierLatch.countDown();
377-
}
378-
};
379-
380-
// snoop on logging to also handle the cases where exceptions are simply logged in Scheduler.
381-
final Logger schedulerLogger = LogManager.getLogger(Scheduler.SafeScheduledThreadPoolExecutor.class);
382-
final MockLogAppender appender = new MockLogAppender();
383-
appender.addExpectation(
384-
new MockLogAppender.LoggingExpectation() {
385-
@Override
386-
public void match(LogEvent event) {
387-
if (event.getLevel() == Level.WARN) {
388-
assertThat("no other warnings than those expected",
389-
event.getMessage().getFormattedMessage(),
390-
equalTo("uncaught exception in scheduled thread [" + Thread.currentThread().getName() + "]"));
391-
assertTrue(expectThrowable);
392-
assertNotNull(event.getThrown());
393-
assertTrue("only one message allowed", throwableReference.compareAndSet(null, event.getThrown()));
394-
uncaughtExceptionHandlerLatch.countDown();
395-
}
396-
}
397-
398-
@Override
399-
public void assertMatched() {
413+
try {
414+
runner.accept(() -> {
415+
try {
416+
runnable.run();
417+
} finally {
418+
supplierLatch.countDown();
400419
}
401420
});
421+
} catch (Throwable t) {
422+
consumer.accept(Optional.of(t));
423+
return;
424+
}
402425

403-
appender.start();
404-
Loggers.addAppender(schedulerLogger, appender);
405-
try {
406-
try {
407-
runner.accept(job);
408-
} catch (Throwable t) {
409-
consumer.accept(Optional.of(t));
410-
return;
411-
}
412-
413-
supplierLatch.await();
426+
supplierLatch.await();
414427

415-
if (expectThrowable) {
416-
uncaughtExceptionHandlerLatch.await();
417-
}
418-
} finally {
419-
Loggers.removeAppender(schedulerLogger, appender);
420-
appender.stop();
428+
if (expectThrowable) {
429+
uncaughtExceptionHandlerLatch.await();
421430
}
422431

423432
consumer.accept(Optional.ofNullable(throwableReference.get()));
424-
} catch (IllegalAccessException e) {
425-
throw new RuntimeException(e);
426433
} finally {
427434
Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler);
428435
}

server/src/main/java/org/elasticsearch/index/shard/GlobalCheckpointListeners.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ synchronized void add(final long waitingForGlobalCheckpoint, final GlobalCheckpo
135135
* before we could be cancelled by the notification. In this case, our listener here would
136136
* not be in the map and we should not fire the timeout logic.
137137
*/
138-
removed = listeners.remove(listener).v2() != null;
138+
removed = listeners.remove(listener) != null;
139139
}
140140
if (removed) {
141141
final TimeoutException e = new TimeoutException(timeout.getStringRep());

server/src/main/java/org/elasticsearch/threadpool/Scheduler.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
import org.apache.logging.log4j.LogManager;
2323
import org.apache.logging.log4j.Logger;
24-
import org.apache.logging.log4j.message.ParameterizedMessage;
24+
import org.elasticsearch.ExceptionsHelper;
2525
import org.elasticsearch.common.SuppressForbidden;
2626
import org.elasticsearch.common.settings.Settings;
2727
import org.elasticsearch.common.unit.TimeValue;
@@ -47,8 +47,8 @@ public interface Scheduler {
4747
/**
4848
* Create a scheduler that can be used client side. Server side, please use <code>ThreadPool.schedule</code> instead.
4949
*
50-
* Notice that if any scheduled jobs fail with an exception, they will be logged as a warning. This includes jobs started
51-
* using execute, submit and schedule.
50+
* Notice that if any scheduled jobs fail with an exception, these will bubble up to the uncaught exception handler where they will
51+
* be logged as a warning. This includes jobs started using execute, submit and schedule.
5252
* @param settings the settings to use
5353
* @return executor
5454
*/
@@ -272,7 +272,8 @@ public void onAfter() {
272272
}
273273

274274
/**
275-
* This subclass ensures to properly bubble up Throwable instances of type Error and logs exceptions thrown in submitted/scheduled tasks
275+
* This subclass ensures to properly bubble up Throwable instances of both type Error and Exception thrown in submitted/scheduled
276+
* tasks to the uncaught exception handler
276277
*/
277278
class SafeScheduledThreadPoolExecutor extends ScheduledThreadPoolExecutor {
278279
private static final Logger logger = LogManager.getLogger(SafeScheduledThreadPoolExecutor.class);
@@ -294,12 +295,10 @@ public SafeScheduledThreadPoolExecutor(int corePoolSize) {
294295

295296
@Override
296297
protected void afterExecute(Runnable r, Throwable t) {
297-
Throwable exception = EsExecutors.rethrowErrors(r);
298-
if (exception != null) {
299-
logger.warn(() ->
300-
new ParameterizedMessage("uncaught exception in scheduled thread [{}]", Thread.currentThread().getName()),
301-
exception);
302-
}
298+
if (t != null) return;
299+
// Scheduler only allows Runnable's so we expect no checked exceptions here. If anyone uses submit directly on `this`, we
300+
// accept the wrapped exception in the output.
301+
ExceptionsHelper.reThrowIfNotNull(EsExecutors.rethrowErrors(r));
303302
}
304303
}
305304
}

server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.logging.log4j.Logger;
2424
import org.apache.logging.log4j.message.ParameterizedMessage;
2525
import org.apache.lucene.util.Counter;
26+
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
2627
import org.elasticsearch.core.internal.io.IOUtils;
2728
import org.elasticsearch.Version;
2829
import org.elasticsearch.common.Nullable;
@@ -497,7 +498,16 @@ class ThreadedRunnable implements Runnable {
497498

498499
@Override
499500
public void run() {
500-
executor.execute(runnable);
501+
try {
502+
executor.execute(runnable);
503+
} catch (EsRejectedExecutionException e) {
504+
if (e.isExecutorShutdown()) {
505+
logger.debug(new ParameterizedMessage("could not schedule execution of [{}] on [{}] as executor is shut down",
506+
runnable, executor), e);
507+
} else {
508+
throw e;
509+
}
510+
}
501511
}
502512

503513
@Override

server/src/test/java/org/elasticsearch/threadpool/SchedulerTests.java

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
package org.elasticsearch.threadpool;
2121

22-
import org.elasticsearch.common.SuppressForbidden;
2322
import org.elasticsearch.common.settings.Settings;
2423
import org.elasticsearch.common.unit.TimeValue;
2524
import org.elasticsearch.test.ESTestCase;
@@ -29,12 +28,9 @@
2928
import java.util.Collections;
3029
import java.util.List;
3130
import java.util.concurrent.CountDownLatch;
32-
import java.util.concurrent.ExecutionException;
33-
import java.util.concurrent.ScheduledFuture;
3431
import java.util.concurrent.ScheduledThreadPoolExecutor;
3532
import java.util.concurrent.TimeUnit;
3633
import java.util.concurrent.atomic.AtomicLong;
37-
import java.util.function.BiFunction;
3834
import java.util.stream.Collectors;
3935
import java.util.stream.LongStream;
4036

@@ -157,37 +153,4 @@ public void testScheduledOnScheduler() throws InterruptedException {
157153
Scheduler.terminate(executor, 10, TimeUnit.SECONDS);
158154
}
159155
}
160-
161-
@SuppressForbidden(reason = "this tests that the deprecated method still works")
162-
public void testDeprecatedSchedule() throws ExecutionException, InterruptedException {
163-
verifyDeprecatedSchedule(((threadPool, runnable)
164-
-> threadPool.schedule(TimeValue.timeValueMillis(randomInt(10)), ThreadPool.Names.SAME, runnable)));
165-
}
166-
167-
public void testThreadPoolScheduleDeprecated() throws ExecutionException, InterruptedException {
168-
verifyDeprecatedSchedule(((threadPool, runnable)
169-
-> threadPool.scheduleDeprecated(TimeValue.timeValueMillis(randomInt(10)), ThreadPool.Names.SAME, runnable)));
170-
}
171-
172-
private void verifyDeprecatedSchedule(BiFunction<ThreadPool,
173-
Runnable, ScheduledFuture<?>> scheduleFunction) throws InterruptedException, ExecutionException {
174-
ThreadPool threadPool = new TestThreadPool("test");
175-
CountDownLatch missingExecutions = new CountDownLatch(1);
176-
try {
177-
scheduleFunction.apply(threadPool, missingExecutions::countDown)
178-
.get();
179-
assertEquals(0, missingExecutions.getCount());
180-
181-
ExecutionException exception = expectThrows(ExecutionException.class,
182-
"schedule(...).get() must throw exception from runnable",
183-
() -> scheduleFunction.apply(threadPool,
184-
() -> { throw new IllegalArgumentException("FAIL"); }
185-
).get());
186-
187-
assertEquals(IllegalArgumentException.class, exception.getCause().getClass());
188-
} finally {
189-
ThreadPool.terminate(threadPool, 10, TimeUnit.SECONDS);
190-
}
191-
}
192-
193156
}

0 commit comments

Comments
 (0)