From 5f85da69f80a4fd573faa4a2cf4d191e74b053eb Mon Sep 17 00:00:00 2001 From: Tim Vernum Date: Thu, 18 Jul 2019 12:33:32 +1000 Subject: [PATCH 1/2] Log summarised description of StartupExceptions When a Node fails to start, it will throw a StartupException on the main thread which will cause the process to exit. Previously these were simply logged in the same way as any other uncaught exception, which would potentially result in long stack traces with the key details (the primary cause) being nested somewhere in the middle of the log lines. This was particularly true if the failure was due to an exception being thrown within a plugin - the primary cause may well have been wrapped in two or three other exceptions before it was logged. This commit adds a new summarised description whenever there is an uncaught StartupException. This summary is logged before and after the standard stack trace logging to make it more prominent and increase the likelihood that it will be noticed and understood. The summary focuses on printing messages from ElasticsearchExceptions as these are the most likely to hold clear, specific and actionable information and also prints the message for each cause of the ElasticsearchException which may contain the precise details (e.g. the pathname in a FileNotFoundException or AccessDeniedException). Resolves: #34895 --- ...ElasticsearchUncaughtExceptionHandler.java | 41 +++++++++++++++++++ ...icsearchUncaughtExceptionHandlerTests.java | 34 +++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandler.java b/server/src/main/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandler.java index 1c3c0ccf6b61f..c60a0d6015255 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandler.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandler.java @@ -22,6 +22,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.common.SuppressForbidden; import java.io.IOError; @@ -53,6 +55,10 @@ public void uncaughtException(Thread t, Throwable e) { halt(1); } } + } else if (e instanceof StartupException) { + // StartupException means that this server didn't start, and we want to do everything we can to make that + // error clear to anyone who consults the logs so that they're not simply overwhelmed by a stack trace. + onStartupException(t.getName(), (StartupException) e); } else { onNonFatalUncaught(t.getName(), e); } @@ -70,6 +76,41 @@ void onNonFatalUncaught(final String threadName, final Throwable t) { logger.warn(() -> new ParameterizedMessage("uncaught exception in thread [{}]", threadName), t); } + void onStartupException(final String threadName, final StartupException e) { + String bannerMessage = describeStartupException(e); + logger.error(bannerMessage); + logger.warn(() -> new ParameterizedMessage("uncaught exception in thread [{}]", threadName), e); + // Log the error message twice (before and after the stack trace) so that it is super-obvious to anyone reading the logs + logger.error(bannerMessage); + } + + // accessible for testing + static String describeStartupException(StartupException e) { + StringBuilder bannerMessage = new StringBuilder("an exception was thrown that prevented this node from starting (") + // Append the top message so that it as clear as possible that this message is just a summary of the stacktrace next to it. + .append(e.getMessage()) + .append(")"); + // Find the first elasticsearch exception, that message is the most likely to provide a helpful explanation + ElasticsearchException esCause = (ElasticsearchException) ExceptionsHelper.unwrap(e, ElasticsearchException.class); + if (esCause != null) { + bannerMessage.append("\nthis was caused by:"); + // Allow the elasticsearch exception to decide on the best root cause(s to report) + for (ElasticsearchException root : esCause.guessRootCauses()) { + bannerMessage.append("\n * ").append(root.getMessage()) + .append(" (").append(ElasticsearchException.getExceptionName(root)).append(")"); + String indent = " "; + Throwable cause = root.getCause(); + for (int counter = 0; counter < 3 && cause != null; counter++) { + bannerMessage.append('\n').append(indent).append("- caused by: ") + .append(cause.getMessage()).append(" (").append(ElasticsearchException.getExceptionName(cause)).append(")"); + cause = cause.getCause(); + indent += " "; + } + } + } + return bannerMessage.toString(); + } + void halt(int status) { AccessController.doPrivileged(new PrivilegedHaltAction(status)); } diff --git a/server/src/test/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandlerTests.java b/server/src/test/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandlerTests.java index df415d713d103..f0c7b4ed4ed2e 100644 --- a/server/src/test/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandlerTests.java +++ b/server/src/test/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandlerTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.bootstrap; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchSecurityException; import org.elasticsearch.test.ESTestCase; import java.io.IOError; @@ -129,6 +131,38 @@ public void testIsFatalCause() { assertNonFatal(new UncheckedIOException(new IOException())); } + public void testStartupExceptionMessageWithoutElasticsearchException() { + final StartupException exception = new StartupException(new IndexOutOfBoundsException("test")); + final String message = ElasticsearchUncaughtExceptionHandler.describeStartupException(exception); + assertThat(message, equalTo("an exception was thrown that prevented this node from starting" + + " (java.lang.IndexOutOfBoundsException: test)")); + } + + public void testStartupExceptionMessageWithElasticsearchException() { + final StartupException exception = new StartupException(new RuntimeException("test", + new ElasticsearchException("es-exception", new RuntimeException("the-cause", new IndexOutOfBoundsException("root-cause"))))); + final String message = ElasticsearchUncaughtExceptionHandler.describeStartupException(exception); + assertThat(message, equalTo( + "an exception was thrown that prevented this node from starting (java.lang.RuntimeException: test)\n" + + "this was caused by:\n" + + " * es-exception (exception)\n" + + " - caused by: the-cause (runtime_exception)\n" + + " - caused by: root-cause (index_out_of_bounds_exception)")); + } + + public void testStartupExceptionMessageWithChainOfElasticsearchExceptions() { + final StartupException exception = new StartupException(new RuntimeException("test", new ElasticsearchException("es-exception-1", + new ElasticsearchSecurityException("es-exception-2", new RuntimeException("the-cause", + new IndexOutOfBoundsException("root-cause")))))); + final String message = ElasticsearchUncaughtExceptionHandler.describeStartupException(exception); + assertThat(message, equalTo( + "an exception was thrown that prevented this node from starting (java.lang.RuntimeException: test)\n" + + "this was caused by:\n" + + " * es-exception-2 (security_exception)\n" + + " - caused by: the-cause (runtime_exception)\n" + + " - caused by: root-cause (index_out_of_bounds_exception)")); + } + private void assertFatal(Throwable cause) { assertTrue(ElasticsearchUncaughtExceptionHandler.isFatalUncaught(cause)); } From 2be200648b22b616914d3fb6bb8b32ce83164494 Mon Sep 17 00:00:00 2001 From: Tim Vernum Date: Wed, 12 Jan 2022 20:44:38 +1100 Subject: [PATCH 2/2] Update docs/changelog/44536.yaml --- docs/changelog/44536.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/44536.yaml diff --git a/docs/changelog/44536.yaml b/docs/changelog/44536.yaml new file mode 100644 index 0000000000000..ddae6474d3d91 --- /dev/null +++ b/docs/changelog/44536.yaml @@ -0,0 +1,5 @@ +pr: 44536 +summary: Log summarised description of `StartupExceptions` +area: Infra/Logging +type: enhancement +issues: []