Skip to content

Commit 133ac01

Browse files
committed
[ML] Include message in field_stats for text log files (#34861)
This change ensures the `message` field is always included in the `field_stats` for the semi-structured text log file file structure. Previously it was not, as it will almost certainly contain all distinct values. However, for consistency in the UI it's useful to include it.
1 parent 56f9ee5 commit 133ac01

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java

+1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ static TextLogFileStructureFinder makeTextLogFileStructureFinder(List<String> ex
8989
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "date"));
9090

9191
SortedMap<String, FieldStats> fieldStats = new TreeMap<>();
92+
fieldStats.put("message", FileStructureUtils.calculateFieldStats(sampleMessages, timeoutChecker));
9293

9394
GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, fieldStats, timeoutChecker);
9495
// We can't parse directly into @timestamp using Grok, so parse to some other time field, which the date filter will then remove

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java

+22
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,16 @@
77

88
import org.elasticsearch.common.collect.Tuple;
99
import org.elasticsearch.common.util.set.Sets;
10+
import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats;
1011
import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure;
1112
import org.elasticsearch.xpack.ml.filestructurefinder.TimestampFormatFinder.TimestampMatch;
1213

1314
import java.util.Collections;
1415
import java.util.Set;
16+
import java.util.stream.Collectors;
17+
18+
import static org.hamcrest.Matchers.hasItem;
19+
import static org.hamcrest.Matchers.not;
1520

1621
public class TextLogFileStructureFinderTests extends FileStructureTestCase {
1722

@@ -127,6 +132,11 @@ public void testCreateConfigsGivenElasticsearchLog() throws Exception {
127132
assertEquals("\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern());
128133
assertEquals("timestamp", structure.getTimestampField());
129134
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
135+
FieldStats messageFieldStats = structure.getFieldStats().get("message");
136+
assertNotNull(messageFieldStats);
137+
for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) {
138+
assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
139+
}
130140
}
131141

132142
public void testCreateConfigsGivenElasticsearchLogAndTimestampFieldOverride() throws Exception {
@@ -158,6 +168,11 @@ public void testCreateConfigsGivenElasticsearchLogAndTimestampFieldOverride() th
158168
assertEquals("\\[%{TIMESTAMP_ISO8601:my_time}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern());
159169
assertEquals("my_time", structure.getTimestampField());
160170
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
171+
FieldStats messageFieldStats = structure.getFieldStats().get("message");
172+
assertNotNull(messageFieldStats);
173+
for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) {
174+
assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
175+
}
161176
}
162177

163178
public void testCreateConfigsGivenElasticsearchLogAndGrokPatternOverride() throws Exception {
@@ -191,6 +206,13 @@ public void testCreateConfigsGivenElasticsearchLogAndGrokPatternOverride() throw
191206
"\\[%{JAVACLASS:class} *\\] \\[%{HOSTNAME:node}\\] %{JAVALOGMESSAGE:message}", structure.getGrokPattern());
192207
assertEquals("timestamp", structure.getTimestampField());
193208
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
209+
FieldStats messageFieldStats = structure.getFieldStats().get("message");
210+
assertNotNull(messageFieldStats);
211+
for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) {
212+
// In this case the "message" field was output by the Grok pattern, so "message"
213+
// at the end of the processing will _not_ contain a complete sample message
214+
assertThat(structureFinder.getSampleMessages(), not(hasItem(statMessage)));
215+
}
194216
}
195217

196218
public void testCreateConfigsGivenElasticsearchLogAndImpossibleGrokPatternOverride() {

0 commit comments

Comments
 (0)