Skip to content

Commit 5f85da6

Browse files
committed
Log summarised description of StartupExceptions
When a Node fails to start, it will throw a StartupException on the main thread which will cause the process to exit. Previously these were simply logged in the same way as any other uncaught exception, which would potentially result in long stack traces with the key details (the primary cause) being nested somewhere in the middle of the log lines. This was particularly true if the failure was due to an exception being thrown within a plugin - the primary cause may well have been wrapped in two or three other exceptions before it was logged. This commit adds a new summarised description whenever there is an uncaught StartupException. This summary is logged before and after the standard stack trace logging to make it more prominent and increase the likelihood that it will be noticed and understood. The summary focuses on printing messages from ElasticsearchExceptions as these are the most likely to hold clear, specific and actionable information and also prints the message for each cause of the ElasticsearchException which may contain the precise details (e.g. the pathname in a FileNotFoundException or AccessDeniedException). Resolves: elastic#34895
1 parent 2b65489 commit 5f85da6

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

server/src/main/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandler.java

+41
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import org.apache.logging.log4j.LogManager;
2323
import org.apache.logging.log4j.Logger;
2424
import org.apache.logging.log4j.message.ParameterizedMessage;
25+
import org.elasticsearch.ElasticsearchException;
26+
import org.elasticsearch.ExceptionsHelper;
2527
import org.elasticsearch.common.SuppressForbidden;
2628

2729
import java.io.IOError;
@@ -53,6 +55,10 @@ public void uncaughtException(Thread t, Throwable e) {
5355
halt(1);
5456
}
5557
}
58+
} else if (e instanceof StartupException) {
59+
// StartupException means that this server didn't start, and we want to do everything we can to make that
60+
// error clear to anyone who consults the logs so that they're not simply overwhelmed by a stack trace.
61+
onStartupException(t.getName(), (StartupException) e);
5662
} else {
5763
onNonFatalUncaught(t.getName(), e);
5864
}
@@ -70,6 +76,41 @@ void onNonFatalUncaught(final String threadName, final Throwable t) {
7076
logger.warn(() -> new ParameterizedMessage("uncaught exception in thread [{}]", threadName), t);
7177
}
7278

79+
void onStartupException(final String threadName, final StartupException e) {
80+
String bannerMessage = describeStartupException(e);
81+
logger.error(bannerMessage);
82+
logger.warn(() -> new ParameterizedMessage("uncaught exception in thread [{}]", threadName), e);
83+
// Log the error message twice (before and after the stack trace) so that it is super-obvious to anyone reading the logs
84+
logger.error(bannerMessage);
85+
}
86+
87+
// accessible for testing
88+
static String describeStartupException(StartupException e) {
89+
StringBuilder bannerMessage = new StringBuilder("an exception was thrown that prevented this node from starting (")
90+
// Append the top message so that it as clear as possible that this message is just a summary of the stacktrace next to it.
91+
.append(e.getMessage())
92+
.append(")");
93+
// Find the first elasticsearch exception, that message is the most likely to provide a helpful explanation
94+
ElasticsearchException esCause = (ElasticsearchException) ExceptionsHelper.unwrap(e, ElasticsearchException.class);
95+
if (esCause != null) {
96+
bannerMessage.append("\nthis was caused by:");
97+
// Allow the elasticsearch exception to decide on the best root cause(s to report)
98+
for (ElasticsearchException root : esCause.guessRootCauses()) {
99+
bannerMessage.append("\n * ").append(root.getMessage())
100+
.append(" (").append(ElasticsearchException.getExceptionName(root)).append(")");
101+
String indent = " ";
102+
Throwable cause = root.getCause();
103+
for (int counter = 0; counter < 3 && cause != null; counter++) {
104+
bannerMessage.append('\n').append(indent).append("- caused by: ")
105+
.append(cause.getMessage()).append(" (").append(ElasticsearchException.getExceptionName(cause)).append(")");
106+
cause = cause.getCause();
107+
indent += " ";
108+
}
109+
}
110+
}
111+
return bannerMessage.toString();
112+
}
113+
73114
void halt(int status) {
74115
AccessController.doPrivileged(new PrivilegedHaltAction(status));
75116
}

server/src/test/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandlerTests.java

+34
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.elasticsearch.bootstrap;
2121

22+
import org.elasticsearch.ElasticsearchException;
23+
import org.elasticsearch.ElasticsearchSecurityException;
2224
import org.elasticsearch.test.ESTestCase;
2325

2426
import java.io.IOError;
@@ -129,6 +131,38 @@ public void testIsFatalCause() {
129131
assertNonFatal(new UncheckedIOException(new IOException()));
130132
}
131133

134+
public void testStartupExceptionMessageWithoutElasticsearchException() {
135+
final StartupException exception = new StartupException(new IndexOutOfBoundsException("test"));
136+
final String message = ElasticsearchUncaughtExceptionHandler.describeStartupException(exception);
137+
assertThat(message, equalTo("an exception was thrown that prevented this node from starting" +
138+
" (java.lang.IndexOutOfBoundsException: test)"));
139+
}
140+
141+
public void testStartupExceptionMessageWithElasticsearchException() {
142+
final StartupException exception = new StartupException(new RuntimeException("test",
143+
new ElasticsearchException("es-exception", new RuntimeException("the-cause", new IndexOutOfBoundsException("root-cause")))));
144+
final String message = ElasticsearchUncaughtExceptionHandler.describeStartupException(exception);
145+
assertThat(message, equalTo(
146+
"an exception was thrown that prevented this node from starting (java.lang.RuntimeException: test)\n" +
147+
"this was caused by:\n" +
148+
" * es-exception (exception)\n" +
149+
" - caused by: the-cause (runtime_exception)\n" +
150+
" - caused by: root-cause (index_out_of_bounds_exception)"));
151+
}
152+
153+
public void testStartupExceptionMessageWithChainOfElasticsearchExceptions() {
154+
final StartupException exception = new StartupException(new RuntimeException("test", new ElasticsearchException("es-exception-1",
155+
new ElasticsearchSecurityException("es-exception-2", new RuntimeException("the-cause",
156+
new IndexOutOfBoundsException("root-cause"))))));
157+
final String message = ElasticsearchUncaughtExceptionHandler.describeStartupException(exception);
158+
assertThat(message, equalTo(
159+
"an exception was thrown that prevented this node from starting (java.lang.RuntimeException: test)\n" +
160+
"this was caused by:\n" +
161+
" * es-exception-2 (security_exception)\n" +
162+
" - caused by: the-cause (runtime_exception)\n" +
163+
" - caused by: root-cause (index_out_of_bounds_exception)"));
164+
}
165+
132166
private void assertFatal(Throwable cause) {
133167
assertTrue(ElasticsearchUncaughtExceptionHandler.isFatalUncaught(cause));
134168
}

0 commit comments

Comments
 (0)