Skip to content

Commit 743fcd7

Browse files
Add Netty ByteBuf Leak Check to REST Test Clusters (#64528) (#65864)
We do this check in all tests that inherit from `EsTestCase` but didn't check for `ByteBuf` leaks in rest test clusters, which means we have very little coverage of the REST layer. With recent reports of very rare leak warnings in logs I think it's worthwhile to do this check in REST tests as well.
1 parent de0a545 commit 743fcd7

File tree

1 file changed

+42
-25
lines changed

1 file changed

+42
-25
lines changed

buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java

+42-25
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,9 @@ private Map<String, String> getESEnvironment() {
769769
.map(p -> p.replace("${ES_PATH_CONF}", configFile.getParent().toString()))
770770
.collect(Collectors.joining(" "));
771771
}
772+
if (systemProperties.containsKey("io.netty.leakDetection.level") == false) {
773+
systemPropertiesString = systemPropertiesString + " -Dio.netty.leakDetection.level=paranoid";
774+
}
772775
String jvmArgsString = "";
773776
if (jvmArgs.isEmpty() == false) {
774777
jvmArgsString = " " + jvmArgs.stream().peek(argument -> {
@@ -921,10 +924,6 @@ public synchronized void stop(boolean tailLogs) {
921924
// Test clusters are not reused, don't spend time on a graceful shutdown
922925
stopHandle(esProcess.toHandle(), true);
923926
reaper.unregister(toString());
924-
if (tailLogs) {
925-
logFileContents("Standard output of node", esStdoutFile);
926-
logFileContents("Standard error of node", esStderrFile);
927-
}
928927
esProcess = null;
929928
// Clean up the ports file in case this is started again.
930929
try {
@@ -937,6 +936,8 @@ public synchronized void stop(boolean tailLogs) {
937936
} catch (IOException e) {
938937
throw new UncheckedIOException(e);
939938
}
939+
logFileContents("Standard output of node", esStdoutFile, tailLogs);
940+
logFileContents("Standard error of node", esStderrFile, tailLogs);
940941
}
941942

942943
@Override
@@ -989,7 +990,7 @@ private void logProcessInfo(String prefix, ProcessHandle.Info info) {
989990
);
990991
}
991992

992-
private void logFileContents(String description, Path from) {
993+
private void logFileContents(String description, Path from, boolean tailLogs) {
993994
final Map<String, Integer> errorsAndWarnings = new LinkedHashMap<>();
994995
LinkedList<String> ring = new LinkedList<>();
995996
try (LineNumberReader reader = new LineNumberReader(Files.newBufferedReader(from))) {
@@ -1018,31 +1019,47 @@ private void logFileContents(String description, Path from) {
10181019
}
10191020
}
10201021
} catch (IOException e) {
1021-
throw new UncheckedIOException("Failed to tail log " + this, e);
1022+
if (tailLogs) {
1023+
throw new UncheckedIOException("Failed to tail log " + this, e);
1024+
}
1025+
return;
10221026
}
10231027

1024-
if (errorsAndWarnings.isEmpty() == false || ring.isEmpty() == false) {
1025-
LOGGER.error("\n=== {} `{}` ===", description, this);
1026-
}
1027-
if (errorsAndWarnings.isEmpty() == false) {
1028-
LOGGER.lifecycle("\n» ↓ errors and warnings from " + from + " ↓");
1029-
errorsAndWarnings.forEach((message, count) -> {
1030-
LOGGER.lifecycle("» " + message.replace("\n", "\n» "));
1031-
if (count > 1) {
1032-
LOGGER.lifecycle("» ↑ repeated " + count + " times ↑");
1033-
}
1034-
});
1028+
boolean foundNettyLeaks = false;
1029+
for (String logLine : errorsAndWarnings.keySet()) {
1030+
if (logLine.contains("ResourceLeakDetector]")) {
1031+
tailLogs = true;
1032+
foundNettyLeaks = true;
1033+
break;
1034+
}
10351035
}
1036+
if (tailLogs) {
1037+
if (errorsAndWarnings.isEmpty() == false || ring.isEmpty() == false) {
1038+
LOGGER.error("\n=== {} `{}` ===", description, this);
1039+
}
1040+
if (errorsAndWarnings.isEmpty() == false) {
1041+
LOGGER.lifecycle("\n» ↓ errors and warnings from " + from + " ↓");
1042+
errorsAndWarnings.forEach((message, count) -> {
1043+
LOGGER.lifecycle("» " + message.replace("\n", "\n» "));
1044+
if (count > 1) {
1045+
LOGGER.lifecycle("» ↑ repeated " + count + " times ↑");
1046+
}
1047+
});
1048+
}
10361049

1037-
ring.removeIf(line -> MESSAGES_WE_DONT_CARE_ABOUT.stream().anyMatch(line::contains));
1050+
ring.removeIf(line -> MESSAGES_WE_DONT_CARE_ABOUT.stream().anyMatch(line::contains));
10381051

1039-
if (ring.isEmpty() == false) {
1040-
LOGGER.lifecycle("» ↓ last " + TAIL_LOG_MESSAGES_COUNT + " non error or warning messages from " + from + " ↓");
1041-
ring.forEach(message -> {
1042-
if (errorsAndWarnings.containsKey(normalizeLogLine(message)) == false) {
1043-
LOGGER.lifecycle("» " + message.replace("\n", "\n» "));
1044-
}
1045-
});
1052+
if (ring.isEmpty() == false) {
1053+
LOGGER.lifecycle("» ↓ last " + TAIL_LOG_MESSAGES_COUNT + " non error or warning messages from " + from + " ↓");
1054+
ring.forEach(message -> {
1055+
if (errorsAndWarnings.containsKey(normalizeLogLine(message)) == false) {
1056+
LOGGER.lifecycle("» " + message.replace("\n", "\n» "));
1057+
}
1058+
});
1059+
}
1060+
}
1061+
if (foundNettyLeaks) {
1062+
throw new TestClustersException("Found Netty ByteBuf leaks in node logs.");
10461063
}
10471064
}
10481065

0 commit comments

Comments
 (0)