Skip to content

Commit b202a59

Browse files
committed
[ML] Add earliest and latest timestamps to field stats (#42890)
This change adds the earliest and latest timestamps into the field stats for fields of type "date" in the output of the ML find_file_structure endpoint. This will enable the cards for date fields in the file data visualizer in the UI to be made to look more similar to the cards for date fields in the index data visualizer in the UI.
1 parent 280a2c9 commit b202a59

File tree

16 files changed

+365
-100
lines changed

16 files changed

+365
-100
lines changed

client/rest-high-level/src/main/java/org/elasticsearch/client/ml/filestructurefinder/FieldStats.java

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,14 @@ public class FieldStats implements ToXContentObject {
3838
public static final ParseField MAX_VALUE = new ParseField("max_value");
3939
public static final ParseField MEAN_VALUE = new ParseField("mean_value");
4040
public static final ParseField MEDIAN_VALUE = new ParseField("median_value");
41+
public static final ParseField EARLIEST = new ParseField("earliest");
42+
public static final ParseField LATEST = new ParseField("latest");
4143
public static final ParseField TOP_HITS = new ParseField("top_hits");
4244

4345
@SuppressWarnings("unchecked")
4446
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", true,
4547
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
46-
(List<Map<String, Object>>) a[6]));
48+
(String) a[6], (String) a[7], (List<Map<String, Object>>) a[8]));
4749

4850
static {
4951
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
@@ -52,6 +54,8 @@ public class FieldStats implements ToXContentObject {
5254
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
5355
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
5456
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
57+
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), EARLIEST);
58+
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), LATEST);
5559
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
5660
}
5761

@@ -61,16 +65,20 @@ public class FieldStats implements ToXContentObject {
6165
private final Double maxValue;
6266
private final Double meanValue;
6367
private final Double medianValue;
68+
private final String earliestTimestamp;
69+
private final String latestTimestamp;
6470
private final List<Map<String, Object>> topHits;
6571

6672
FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
67-
List<Map<String, Object>> topHits) {
73+
String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
6874
this.count = count;
6975
this.cardinality = cardinality;
7076
this.minValue = minValue;
7177
this.maxValue = maxValue;
7278
this.meanValue = meanValue;
7379
this.medianValue = medianValue;
80+
this.earliestTimestamp = earliestTimestamp;
81+
this.latestTimestamp = latestTimestamp;
7482
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
7583
}
7684

@@ -98,6 +106,14 @@ public Double getMedianValue() {
98106
return medianValue;
99107
}
100108

109+
public String getEarliestTimestamp() {
110+
return earliestTimestamp;
111+
}
112+
113+
public String getLatestTimestamp() {
114+
return latestTimestamp;
115+
}
116+
101117
public List<Map<String, Object>> getTopHits() {
102118
return topHits;
103119
}
@@ -120,6 +136,12 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par
120136
if (medianValue != null) {
121137
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
122138
}
139+
if (earliestTimestamp != null) {
140+
builder.field(EARLIEST.getPreferredName(), earliestTimestamp);
141+
}
142+
if (latestTimestamp != null) {
143+
builder.field(LATEST.getPreferredName(), latestTimestamp);
144+
}
123145
if (topHits.isEmpty() == false) {
124146
builder.field(TOP_HITS.getPreferredName(), topHits);
125147
}
@@ -140,7 +162,7 @@ static Number toIntegerIfInteger(double d) {
140162
@Override
141163
public int hashCode() {
142164

143-
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
165+
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
144166
}
145167

146168
@Override
@@ -161,6 +183,8 @@ public boolean equals(Object other) {
161183
Objects.equals(this.maxValue, that.maxValue) &&
162184
Objects.equals(this.meanValue, that.meanValue) &&
163185
Objects.equals(this.medianValue, that.medianValue) &&
186+
Objects.equals(this.earliestTimestamp, that.earliestTimestamp) &&
187+
Objects.equals(this.latestTimestamp, that.latestTimestamp) &&
164188
Objects.equals(this.topHits, that.topHits);
165189
}
166190
}

client/rest-high-level/src/test/java/org/elasticsearch/client/ml/filestructurefinder/FieldStatsTests.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ static FieldStats createTestFieldStats() {
4343
Double maxValue = null;
4444
Double meanValue = null;
4545
Double medianValue = null;
46+
String earliestTimestamp = null;
47+
String latestTimestamp = null;
4648
boolean isMetric = randomBoolean();
4749
if (isMetric) {
4850
if (randomBoolean()) {
@@ -54,6 +56,12 @@ static FieldStats createTestFieldStats() {
5456
}
5557
meanValue = randomDouble();
5658
medianValue = randomDouble();
59+
} else {
60+
boolean isDate = randomBoolean();
61+
if (isDate) {
62+
earliestTimestamp = randomAlphaOfLength(20);
63+
latestTimestamp = randomAlphaOfLength(20);
64+
}
5765
}
5866

5967
List<Map<String, Object>> topHits = new ArrayList<>();
@@ -68,7 +76,7 @@ static FieldStats createTestFieldStats() {
6876
topHits.add(topHit);
6977
}
7078

71-
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
79+
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
7280
}
7381

7482
@Override

docs/reference/ml/apis/find-file-structure.asciidoc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,8 @@ If the request does not encounter errors, you receive the following result:
445445
"release_date" : {
446446
"count" : 24,
447447
"cardinality" : 20,
448+
"earliest" : "1932-06-01",
449+
"latest" : "2011-06-02",
448450
"top_hits" : [
449451
{
450452
"value" : "1985-06-01",
@@ -1152,6 +1154,8 @@ If the request does not encounter errors, you receive the following result:
11521154
"tpep_dropoff_datetime" : {
11531155
"count" : 19998,
11541156
"cardinality" : 9066,
1157+
"earliest" : "2018-05-31 06:18:15",
1158+
"latest" : "2018-06-02 02:25:44",
11551159
"top_hits" : [
11561160
{
11571161
"value" : "2018-06-01 01:12:12",
@@ -1198,6 +1202,8 @@ If the request does not encounter errors, you receive the following result:
11981202
"tpep_pickup_datetime" : {
11991203
"count" : 19998,
12001204
"cardinality" : 8760,
1205+
"earliest" : "2018-05-31 06:08:31",
1206+
"latest" : "2018-06-02 01:21:21",
12011207
"top_hits" : [
12021208
{
12031209
"value" : "2018-06-01 00:01:23",
@@ -1457,6 +1463,8 @@ this:
14571463
"timestamp" : {
14581464
"count" : 53,
14591465
"cardinality" : 28,
1466+
"earliest" : "2018-09-27T14:39:28,518",
1467+
"latest" : "2018-09-27T14:39:37,012",
14601468
"top_hits" : [
14611469
{
14621470
"value" : "2018-09-27T14:39:29,859",
@@ -1719,6 +1727,8 @@ this:
17191727
"timestamp" : {
17201728
"count" : 53,
17211729
"cardinality" : 28,
1730+
"earliest" : "2018-09-27T14:39:28,518",
1731+
"latest" : "2018-09-27T14:39:37,012",
17221732
"top_hits" : [
17231733
{
17241734
"value" : "2018-09-27T14:39:29,859",

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
*/
66
package org.elasticsearch.xpack.core.ml.filestructurefinder;
77

8+
import org.elasticsearch.Version;
89
import org.elasticsearch.common.ParseField;
10+
import org.elasticsearch.common.Strings;
911
import org.elasticsearch.common.io.stream.StreamInput;
1012
import org.elasticsearch.common.io.stream.StreamOutput;
1113
import org.elasticsearch.common.io.stream.Writeable;
@@ -27,12 +29,14 @@ public class FieldStats implements ToXContentObject, Writeable {
2729
static final ParseField MAX_VALUE = new ParseField("max_value");
2830
static final ParseField MEAN_VALUE = new ParseField("mean_value");
2931
static final ParseField MEDIAN_VALUE = new ParseField("median_value");
32+
static final ParseField EARLIEST = new ParseField("earliest");
33+
static final ParseField LATEST = new ParseField("latest");
3034
static final ParseField TOP_HITS = new ParseField("top_hits");
3135

3236
@SuppressWarnings("unchecked")
3337
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", false,
3438
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
35-
(List<Map<String, Object>>) a[6]));
39+
(String) a[6], (String) a[7], (List<Map<String, Object>>) a[8]));
3640

3741
static {
3842
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
@@ -41,6 +45,8 @@ public class FieldStats implements ToXContentObject, Writeable {
4145
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
4246
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
4347
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
48+
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), EARLIEST);
49+
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), LATEST);
4450
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
4551
}
4652

@@ -50,20 +56,33 @@ public class FieldStats implements ToXContentObject, Writeable {
5056
private final Double maxValue;
5157
private final Double meanValue;
5258
private final Double medianValue;
59+
private final String earliestTimestamp;
60+
private final String latestTimestamp;
5361
private final List<Map<String, Object>> topHits;
5462

5563
public FieldStats(long count, int cardinality, List<Map<String, Object>> topHits) {
56-
this(count, cardinality, null, null, null, null, topHits);
64+
this(count, cardinality, null, null, null, null, null, null, topHits);
65+
}
66+
67+
public FieldStats(long count, int cardinality, String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
68+
this(count, cardinality, null, null, null, null, earliestTimestamp, latestTimestamp, topHits);
5769
}
5870

5971
public FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
6072
List<Map<String, Object>> topHits) {
73+
this(count, cardinality, minValue, maxValue, meanValue, medianValue, null, null, topHits);
74+
}
75+
76+
FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
77+
String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
6178
this.count = count;
6279
this.cardinality = cardinality;
6380
this.minValue = minValue;
6481
this.maxValue = maxValue;
6582
this.meanValue = meanValue;
6683
this.medianValue = medianValue;
84+
this.earliestTimestamp = earliestTimestamp;
85+
this.latestTimestamp = latestTimestamp;
6786
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
6887
}
6988

@@ -74,6 +93,13 @@ public FieldStats(StreamInput in) throws IOException {
7493
maxValue = in.readOptionalDouble();
7594
meanValue = in.readOptionalDouble();
7695
medianValue = in.readOptionalDouble();
96+
if (in.getVersion().onOrAfter(Version.V_7_3_0)) {
97+
earliestTimestamp = in.readOptionalString();
98+
latestTimestamp = in.readOptionalString();
99+
} else {
100+
earliestTimestamp = null;
101+
latestTimestamp = null;
102+
}
77103
topHits = in.readList(StreamInput::readMap);
78104
}
79105

@@ -85,6 +111,10 @@ public void writeTo(StreamOutput out) throws IOException {
85111
out.writeOptionalDouble(maxValue);
86112
out.writeOptionalDouble(meanValue);
87113
out.writeOptionalDouble(medianValue);
114+
if (out.getVersion().onOrAfter(Version.V_7_3_0)) {
115+
out.writeOptionalString(earliestTimestamp);
116+
out.writeOptionalString(latestTimestamp);
117+
}
88118
out.writeCollection(topHits, StreamOutput::writeMap);
89119
}
90120

@@ -112,6 +142,14 @@ public Double getMedianValue() {
112142
return medianValue;
113143
}
114144

145+
public String getEarliestTimestamp() {
146+
return earliestTimestamp;
147+
}
148+
149+
public String getLatestTimestamp() {
150+
return latestTimestamp;
151+
}
152+
115153
public List<Map<String, Object>> getTopHits() {
116154
return topHits;
117155
}
@@ -134,6 +172,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
134172
if (medianValue != null) {
135173
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
136174
}
175+
if (earliestTimestamp != null) {
176+
builder.field(EARLIEST.getPreferredName(), earliestTimestamp);
177+
}
178+
if (latestTimestamp != null) {
179+
builder.field(LATEST.getPreferredName(), latestTimestamp);
180+
}
137181
if (topHits.isEmpty() == false) {
138182
builder.field(TOP_HITS.getPreferredName(), topHits);
139183
}
@@ -154,7 +198,7 @@ public static Number toIntegerIfInteger(double d) {
154198
@Override
155199
public int hashCode() {
156200

157-
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
201+
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
158202
}
159203

160204
@Override
@@ -175,6 +219,13 @@ public boolean equals(Object other) {
175219
Objects.equals(this.maxValue, that.maxValue) &&
176220
Objects.equals(this.meanValue, that.meanValue) &&
177221
Objects.equals(this.medianValue, that.medianValue) &&
222+
Objects.equals(this.earliestTimestamp, that.earliestTimestamp) &&
223+
Objects.equals(this.latestTimestamp, that.latestTimestamp) &&
178224
Objects.equals(this.topHits, that.topHits);
179225
}
226+
227+
@Override
228+
public String toString() {
229+
return Strings.toString(this);
230+
}
180231
}

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ static FieldStats createTestFieldStats() {
3030
Double maxValue = null;
3131
Double meanValue = null;
3232
Double medianValue = null;
33+
String earliestTimestamp = null;
34+
String latestTimestamp = null;
3335
boolean isMetric = randomBoolean();
3436
if (isMetric) {
3537
if (randomBoolean()) {
@@ -41,6 +43,12 @@ static FieldStats createTestFieldStats() {
4143
}
4244
meanValue = randomDouble();
4345
medianValue = randomDouble();
46+
} else {
47+
boolean isDate = randomBoolean();
48+
if (isDate) {
49+
earliestTimestamp = randomAlphaOfLength(20);
50+
latestTimestamp = randomAlphaOfLength(20);
51+
}
4452
}
4553

4654
List<Map<String, Object>> topHits = new ArrayList<>();
@@ -55,7 +63,7 @@ static FieldStats createTestFieldStats() {
5563
topHits.add(topHit);
5664
}
5765

58-
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
66+
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
5967
}
6068

6169
@Override

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,7 @@ static DelimitedFileStructureFinder makeDelimitedFileStructureFinder(List<String
159159

160160
SortedMap<String, Object> mappings = mappingsAndFieldStats.v1();
161161
if (timeField != null) {
162-
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD,
163-
Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "date"));
162+
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT);
164163
}
165164

166165
if (mappingsAndFieldStats.v2() != null) {

0 commit comments

Comments
 (0)