Skip to content

Commit 3f8071e

Browse files
authored
[ML] Make datafeeds work with nanosecond time fields (#51180)
Allows ML datafeeds to work with time fields that have the "date_nanos" type _and make use of the extra precision_. (Previously datafeeds only worked with time fields that were exact multiples of milliseconds. So datafeeds would work with "date_nanos" only if the extra precision over "date" was not used.) Relates #49889
1 parent d64e115 commit 3f8071e

File tree

5 files changed

+82
-25
lines changed

5 files changed

+82
-25
lines changed

x-pack/plugin/ml/qa/basic-multi-node/src/test/java/org/elasticsearch/xpack/ml/integration/MlBasicMultiNodeIT.java

+11-6
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.yaml.snakeyaml.util.UriEncoder;
1818

1919
import java.io.IOException;
20+
import java.io.UncheckedIOException;
2021
import java.util.Collections;
2122
import java.util.List;
2223
import java.util.Map;
@@ -36,7 +37,7 @@ public void testMachineLearningInstalled() throws Exception {
3637
assertTrue((Boolean) ml.get("enabled"));
3738
}
3839

39-
public void testInvalidJob() throws Exception {
40+
public void testInvalidJob() {
4041
// The job name is invalid because it contains a space
4142
String jobId = "invalid job";
4243
ResponseException e = expectThrows(ResponseException.class, () -> createFarequoteJob(jobId));
@@ -103,22 +104,26 @@ public void testMiniFarequote() throws Exception {
103104
}
104105

105106
public void testMiniFarequoteWithDatafeeder() throws Exception {
107+
boolean datesHaveNanoSecondResolution = randomBoolean();
108+
String dateMappingType = datesHaveNanoSecondResolution ? "date_nanos" : "date";
109+
String dateFormat = datesHaveNanoSecondResolution ? "strict_date_optional_time_nanos" : "strict_date_optional_time";
110+
String randomNanos = datesHaveNanoSecondResolution ? "," + randomIntBetween(100000000, 999999999) : "";
106111
Request createAirlineDataRequest = new Request("PUT", "/airline-data");
107112
createAirlineDataRequest.setJsonEntity("{"
108113
+ " \"mappings\": {"
109114
+ " \"properties\": {"
110-
+ " \"time\": { \"type\":\"date\"},"
115+
+ " \"time\": { \"type\":\"" + dateMappingType + "\", \"format\":\"" + dateFormat + "\"},"
111116
+ " \"airline\": { \"type\":\"keyword\"},"
112117
+ " \"responsetime\": { \"type\":\"float\"}"
113118
+ " }"
114119
+ " }"
115120
+ "}");
116121
client().performRequest(createAirlineDataRequest);
117122
Request airlineData1 = new Request("PUT", "/airline-data/_doc/1");
118-
airlineData1.setJsonEntity("{\"time\":\"2016-06-01T00:00:00Z\",\"airline\":\"AAA\",\"responsetime\":135.22}");
123+
airlineData1.setJsonEntity("{\"time\":\"2016-06-01T00:00:00" + randomNanos + "Z\",\"airline\":\"AAA\",\"responsetime\":135.22}");
119124
client().performRequest(airlineData1);
120125
Request airlineData2 = new Request("PUT", "/airline-data/_doc/2");
121-
airlineData2.setJsonEntity("{\"time\":\"2016-06-01T01:59:00Z\",\"airline\":\"AAA\",\"responsetime\":541.76}");
126+
airlineData2.setJsonEntity("{\"time\":\"2016-06-01T01:59:00" + randomNanos + "Z\",\"airline\":\"AAA\",\"responsetime\":541.76}");
122127
client().performRequest(airlineData2);
123128

124129
// Ensure all data is searchable
@@ -147,7 +152,7 @@ public void testMiniFarequoteWithDatafeeder() throws Exception {
147152
assertEquals(2, dataCountsDoc.get("input_record_count"));
148153
assertEquals(2, dataCountsDoc.get("processed_record_count"));
149154
} catch (IOException e) {
150-
throw new RuntimeException(e);
155+
throw new UncheckedIOException(e);
151156
}
152157
});
153158

@@ -233,7 +238,7 @@ public void testMiniFarequoteReopen() throws Exception {
233238
assertEquals(1000, responseBody2.get("bucket_count"));
234239

235240
// unintuitive: should return the earliest record timestamp of this feed???
236-
assertEquals(null, responseBody2.get("earliest_record_timestamp"));
241+
assertNull(responseBody2.get("earliest_record_timestamp"));
237242
assertEquals(1407082000000L, responseBody2.get("latest_record_timestamp"));
238243

239244
assertEquals(Collections.singletonMap("closed", true),

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public static TimeBasedExtractedFields build(Job job, DatafeedConfig datafeed, F
6161
List<String> remainingFields = job.allInputFields().stream().filter(f -> !f.equals(timeField)).collect(Collectors.toList());
6262
List<ExtractedField> allExtractedFields = new ArrayList<>(remainingFields.size() + 1);
6363
allExtractedFields.add(timeExtractedField);
64-
remainingFields.stream().forEach(field -> allExtractedFields.add(extractionMethodDetector.detect(field)));
64+
remainingFields.forEach(field -> allExtractedFields.add(extractionMethodDetector.detect(field)));
6565

6666
return new TimeBasedExtractedFields(timeExtractedField, allExtractedFields);
6767
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java

+8-3
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public ExtractedField detect(String field) {
9494
}
9595

9696
private ExtractedField detectNonScriptField(String field) {
97-
if (isFieldOfType(field, TimeField.TYPE) && isAggregatable(field)) {
97+
if (isFieldOfTypes(field, TimeField.TYPES) && isAggregatable(field)) {
9898
return new TimeField(field, ExtractedField.Method.DOC_VALUE);
9999
}
100100
if (isFieldOfType(field, GeoPointField.TYPE)) {
@@ -129,9 +129,14 @@ public boolean isAggregatable(String field) {
129129
}
130130

131131
private boolean isFieldOfType(String field, String type) {
132+
return isFieldOfTypes(field, Collections.singleton(type));
133+
}
134+
135+
private boolean isFieldOfTypes(String field, Set<String> types) {
136+
assert types.isEmpty() == false;
132137
Map<String, FieldCapabilities> fieldCaps = fieldsCapabilities.getField(field);
133-
if (fieldCaps != null && fieldCaps.size() == 1) {
134-
return fieldCaps.containsKey(type);
138+
if (fieldCaps != null && fieldCaps.isEmpty() == false) {
139+
return types.containsAll(fieldCaps.keySet());
135140
}
136141
return false;
137142
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/TimeField.java

+23-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66
package org.elasticsearch.xpack.ml.extractor;
77

8+
import org.elasticsearch.common.util.set.Sets;
89
import org.elasticsearch.search.SearchHit;
910

1011
import java.util.Collections;
@@ -13,15 +14,17 @@
1314

1415
public class TimeField extends AbstractField {
1516

16-
static final String TYPE = "date";
17-
18-
private static final Set<String> TYPES = Collections.singleton(TYPE);
17+
static final Set<String> TYPES = Collections.unmodifiableSet(Sets.newHashSet("date", "date_nanos"));
1918

2019
private static final String EPOCH_MILLIS_FORMAT = "epoch_millis";
2120

2221
private final Method method;
2322

2423
public TimeField(String name, Method method) {
24+
// This class intentionally reports the possible types rather than the types reported by
25+
// field caps at the point of construction. This means that it will continue to work if,
26+
// for example, a newly created index has a "date_nanos" time field when in all the indices
27+
// that matched the pattern when this constructor was called the field had type "date".
2528
super(name, TYPES);
2629
if (method == Method.SOURCE) {
2730
throw new IllegalArgumentException("time field [" + name + "] cannot be extracted from source");
@@ -41,7 +44,23 @@ public Object[] value(SearchHit hit) {
4144
return value;
4245
}
4346
if (value[0] instanceof String) { // doc_value field with the epoch_millis format
44-
value[0] = Long.parseLong((String) value[0]);
47+
// Since nanosecond support was added epoch_millis timestamps may have a fractional component.
48+
// We discard this, taking just whole milliseconds. Arguably it would be better to retain the
49+
// precision here and let the downstream component decide whether it wants the accuracy, but
50+
// that makes it hard to pass around the value as a number. The double type doesn't have
51+
// enough digits of accuracy, and obviously long cannot store the fraction. BigDecimal would
52+
// work, but that isn't supported by the JSON parser if the number gets round-tripped through
53+
// JSON. So String is really the only format that could be used, but the ML consumers of time
54+
// are expecting a number.
55+
String strVal0 = (String) value[0];
56+
int dotPos = strVal0.indexOf('.');
57+
if (dotPos == -1) {
58+
value[0] = Long.parseLong(strVal0);
59+
} else if (dotPos > 0) {
60+
value[0] = Long.parseLong(strVal0.substring(0, dotPos));
61+
} else {
62+
value[0] = 0L;
63+
}
4564
} else if (value[0] instanceof Long == false) { // pre-6.0 field
4665
throw new IllegalStateException("Unexpected value for a time field: " + value[0].getClass());
4766
}

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/TimeFieldTests.java

+39-11
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,61 @@
55
*/
66
package org.elasticsearch.xpack.ml.extractor;
77

8+
import org.elasticsearch.common.time.DateFormatter;
89
import org.elasticsearch.search.SearchHit;
910
import org.elasticsearch.test.ESTestCase;
1011
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
1112

12-
import static org.hamcrest.Matchers.contains;
13+
import java.time.Instant;
14+
15+
import static org.hamcrest.Matchers.containsInAnyOrder;
1316
import static org.hamcrest.Matchers.equalTo;
1417
import static org.hamcrest.Matchers.is;
1518
import static org.hamcrest.Matchers.startsWith;
1619

1720
public class TimeFieldTests extends ESTestCase {
1821

19-
public void testDocValueWithStringValue() {
20-
long millis = randomLong();
21-
SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", Long.toString(millis)).build();
22+
public void testDocValueWithWholeMillisecondStringValue() {
23+
long millis = randomNonNegativeLong();
24+
Instant time = Instant.ofEpochMilli(millis);
25+
DateFormatter formatter = DateFormatter.forPattern("epoch_millis");
26+
String timeAsString = formatter.format(time);
27+
SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", timeAsString).build();
28+
29+
ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE);
30+
31+
assertThat(timeField.value(hit), equalTo(new Object[] { millis }));
32+
assertThat(timeField.getName(), equalTo("time"));
33+
assertThat(timeField.getSearchField(), equalTo("time"));
34+
assertThat(timeField.getTypes(), containsInAnyOrder("date", "date_nanos"));
35+
assertThat(timeField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
36+
assertThat(timeField.getDocValueFormat(), equalTo("epoch_millis"));
37+
assertThat(timeField.supportsFromSource(), is(false));
38+
expectThrows(UnsupportedOperationException.class, timeField::newFromSource);
39+
assertThat(timeField.isMultiField(), is(false));
40+
expectThrows(UnsupportedOperationException.class, timeField::getParentField);
41+
}
42+
43+
public void testDocValueWithFractionalMillisecondStringValue() {
44+
long millis = randomNonNegativeLong();
45+
int extraNanos = randomIntBetween(1, 999999);
46+
Instant time = Instant.ofEpochMilli(millis).plusNanos(extraNanos);
47+
DateFormatter formatter = DateFormatter.forPattern("epoch_millis");
48+
String timeAsString = formatter.format(time);
49+
SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", timeAsString).build();
2250

2351
ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE);
2452

2553
assertThat(timeField.value(hit), equalTo(new Object[] { millis }));
2654
assertThat(timeField.getName(), equalTo("time"));
2755
assertThat(timeField.getSearchField(), equalTo("time"));
28-
assertThat(timeField.getTypes(), contains("date"));
56+
assertThat(timeField.getTypes(), containsInAnyOrder("date", "date_nanos"));
2957
assertThat(timeField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
3058
assertThat(timeField.getDocValueFormat(), equalTo("epoch_millis"));
3159
assertThat(timeField.supportsFromSource(), is(false));
32-
expectThrows(UnsupportedOperationException.class, () -> timeField.newFromSource());
60+
expectThrows(UnsupportedOperationException.class, timeField::newFromSource);
3361
assertThat(timeField.isMultiField(), is(false));
34-
expectThrows(UnsupportedOperationException.class, () -> timeField.getParentField());
62+
expectThrows(UnsupportedOperationException.class, timeField::getParentField);
3563
}
3664

3765
public void testScriptWithLongValue() {
@@ -43,13 +71,13 @@ public void testScriptWithLongValue() {
4371
assertThat(timeField.value(hit), equalTo(new Object[] { millis }));
4472
assertThat(timeField.getName(), equalTo("time"));
4573
assertThat(timeField.getSearchField(), equalTo("time"));
46-
assertThat(timeField.getTypes(), contains("date"));
74+
assertThat(timeField.getTypes(), containsInAnyOrder("date", "date_nanos"));
4775
assertThat(timeField.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD));
48-
expectThrows(UnsupportedOperationException.class, () -> timeField.getDocValueFormat());
76+
expectThrows(UnsupportedOperationException.class, timeField::getDocValueFormat);
4977
assertThat(timeField.supportsFromSource(), is(false));
50-
expectThrows(UnsupportedOperationException.class, () -> timeField.newFromSource());
78+
expectThrows(UnsupportedOperationException.class, timeField::newFromSource);
5179
assertThat(timeField.isMultiField(), is(false));
52-
expectThrows(UnsupportedOperationException.class, () -> timeField.getParentField());
80+
expectThrows(UnsupportedOperationException.class, timeField::getParentField);
5381
}
5482

5583
public void testUnknownFormat() {

0 commit comments

Comments
 (0)