Skip to content

Commit 69fe13c

Browse files
author
Hendrik Muhs
authored
[Transform] Transform optmize date histogram (#54068)
optimize transform for group_by on date_histogram by injecting an additional range query. This limits the number of search and index requests and avoids unnecessary updates. Only recent buckets get re-written. fixes #54254
1 parent 8168895 commit 69fe13c

File tree

11 files changed

+221
-140
lines changed

11 files changed

+221
-140
lines changed

client/rest-high-level/src/test/java/org/elasticsearch/client/transform/transforms/pivot/hlrc/DateHistogramGroupSourceTests.java

+4-5
Original file line numberDiff line numberDiff line change
@@ -74,19 +74,18 @@ public static DateHistogramGroupSource randomDateHistogramGroupSource() {
7474
dateHistogramGroupSource = new DateHistogramGroupSource(
7575
field,
7676
scriptConfig,
77-
new DateHistogramGroupSource.FixedInterval(new DateHistogramInterval(randomPositiveTimeValue()))
77+
new DateHistogramGroupSource.FixedInterval(new DateHistogramInterval(randomPositiveTimeValue())),
78+
randomBoolean() ? randomZone() : null
7879
);
7980
} else {
8081
dateHistogramGroupSource = new DateHistogramGroupSource(
8182
field,
8283
scriptConfig,
83-
new DateHistogramGroupSource.CalendarInterval(new DateHistogramInterval(randomTimeValue(1, 1, "m", "h", "d", "w")))
84+
new DateHistogramGroupSource.CalendarInterval(new DateHistogramInterval(randomTimeValue(1, 1, "m", "h", "d", "w"))),
85+
randomBoolean() ? randomZone() : null
8486
);
8587
}
8688

87-
if (randomBoolean()) {
88-
dateHistogramGroupSource.setTimeZone(randomZone());
89-
}
9089
return dateHistogramGroupSource;
9190
}
9291

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/transforms/SyncConfig.java

+2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ public interface SyncConfig extends ToXContentObject, NamedWriteable {
1919
*/
2020
boolean isValid();
2121

22+
String getField();
23+
2224
QueryBuilder getRangeQuery(TransformCheckpoint newCheckpoint);
2325

2426
QueryBuilder getRangeQuery(TransformCheckpoint oldCheckpoint, TransformCheckpoint newCheckpoint);

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/transforms/TimeSyncConfig.java

+16-14
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
2626
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
2727

28-
public class TimeSyncConfig implements SyncConfig {
28+
public class TimeSyncConfig implements SyncConfig {
2929

3030
public static final TimeValue DEFAULT_DELAY = TimeValue.timeValueSeconds(60);
3131
private static final String NAME = "data_frame_transform_pivot_sync_time";
@@ -37,17 +37,18 @@ public class TimeSyncConfig implements SyncConfig {
3737
private static final ConstructingObjectParser<TimeSyncConfig, Void> LENIENT_PARSER = createParser(true);
3838

3939
private static ConstructingObjectParser<TimeSyncConfig, Void> createParser(boolean lenient) {
40-
ConstructingObjectParser<TimeSyncConfig, Void> parser = new ConstructingObjectParser<>(NAME, lenient,
41-
args -> {
42-
String field = (String) args[0];
43-
TimeValue delay = (TimeValue) args[1];
44-
return new TimeSyncConfig(field, delay);
45-
});
40+
ConstructingObjectParser<TimeSyncConfig, Void> parser = new ConstructingObjectParser<>(NAME, lenient, args -> {
41+
String field = (String) args[0];
42+
TimeValue delay = (TimeValue) args[1];
43+
return new TimeSyncConfig(field, delay);
44+
});
4645
parser.declareString(constructorArg(), TransformField.FIELD);
47-
parser.declareField(optionalConstructorArg(),
46+
parser.declareField(
47+
optionalConstructorArg(),
4848
(p, c) -> TimeValue.parseTimeValue(p.text(), DEFAULT_DELAY, TransformField.DELAY.getPreferredName()),
4949
TransformField.DELAY,
50-
ObjectParser.ValueType.STRING);
50+
ObjectParser.ValueType.STRING
51+
);
5152
return parser;
5253
}
5354

@@ -65,6 +66,7 @@ public TimeSyncConfig(StreamInput in) throws IOException {
6566
this.delay = in.readTimeValue();
6667
}
6768

69+
@Override
6870
public String getField() {
6971
return field;
7072
}
@@ -105,12 +107,11 @@ public boolean equals(Object other) {
105107

106108
final TimeSyncConfig that = (TimeSyncConfig) other;
107109

108-
return Objects.equals(this.field, that.field)
109-
&& Objects.equals(this.delay, that.delay);
110+
return Objects.equals(this.field, that.field) && Objects.equals(this.delay, that.delay);
110111
}
111112

112113
@Override
113-
public int hashCode(){
114+
public int hashCode() {
114115
return Objects.hash(field, delay);
115116
}
116117

@@ -139,7 +140,8 @@ public QueryBuilder getRangeQuery(TransformCheckpoint newCheckpoint) {
139140

140141
@Override
141142
public QueryBuilder getRangeQuery(TransformCheckpoint oldCheckpoint, TransformCheckpoint newCheckpoint) {
142-
return new RangeQueryBuilder(field).gte(oldCheckpoint.getTimeUpperBound()).lt(newCheckpoint.getTimeUpperBound())
143-
.format("epoch_millis");
143+
return new RangeQueryBuilder(field).gte(oldCheckpoint.getTimeUpperBound())
144+
.lt(newCheckpoint.getTimeUpperBound())
145+
.format("epoch_millis");
144146
}
145147
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/transforms/pivot/DateHistogramGroupSource.java

+45-9
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,18 @@
77

88
import org.elasticsearch.Version;
99
import org.elasticsearch.common.ParseField;
10+
import org.elasticsearch.common.Rounding;
1011
import org.elasticsearch.common.io.stream.StreamInput;
1112
import org.elasticsearch.common.io.stream.StreamOutput;
1213
import org.elasticsearch.common.io.stream.Writeable;
14+
import org.elasticsearch.common.unit.TimeValue;
1315
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
1416
import org.elasticsearch.common.xcontent.ObjectParser;
1517
import org.elasticsearch.common.xcontent.ToXContentFragment;
1618
import org.elasticsearch.common.xcontent.XContentBuilder;
1719
import org.elasticsearch.common.xcontent.XContentParser;
1820
import org.elasticsearch.index.query.QueryBuilder;
21+
import org.elasticsearch.index.query.RangeQueryBuilder;
1922
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
2023
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
2124

@@ -105,6 +108,11 @@ public boolean equals(Object other) {
105108
public int hashCode() {
106109
return Objects.hash(interval);
107110
}
111+
112+
@Override
113+
public String toString() {
114+
return interval.toString();
115+
}
108116
}
109117

110118
public static class CalendarInterval implements Interval {
@@ -169,6 +177,11 @@ public boolean equals(Object other) {
169177
public int hashCode() {
170178
return Objects.hash(interval);
171179
}
180+
181+
@Override
182+
public String toString() {
183+
return interval.toString();
184+
}
172185
}
173186

174187
private Interval readInterval(StreamInput in) throws IOException {
@@ -195,11 +208,26 @@ private void writeInterval(Interval interval, StreamOutput out) throws IOExcepti
195208
private static final ConstructingObjectParser<DateHistogramGroupSource, Void> LENIENT_PARSER = createParser(true);
196209

197210
private final Interval interval;
198-
private ZoneId timeZone;
211+
private final ZoneId timeZone;
212+
private Rounding rounding;
199213

200-
public DateHistogramGroupSource(String field, ScriptConfig scriptConfig, Interval interval) {
214+
public DateHistogramGroupSource(String field, ScriptConfig scriptConfig, Interval interval, ZoneId timeZone) {
201215
super(field, scriptConfig);
202216
this.interval = interval;
217+
this.timeZone = timeZone;
218+
219+
Rounding.DateTimeUnit timeUnit = DateHistogramAggregationBuilder.DATE_FIELD_UNITS.get(interval.toString());
220+
final Rounding.Builder roundingBuilder;
221+
if (timeUnit != null) {
222+
roundingBuilder = new Rounding.Builder(timeUnit);
223+
} else {
224+
roundingBuilder = new Rounding.Builder(TimeValue.parseTimeValue(interval.toString(), interval.getName()));
225+
}
226+
227+
if (timeZone != null) {
228+
roundingBuilder.timeZone(timeZone);
229+
}
230+
this.rounding = roundingBuilder.build();
203231
}
204232

205233
public DateHistogramGroupSource(StreamInput in) throws IOException {
@@ -218,6 +246,7 @@ private static ConstructingObjectParser<DateHistogramGroupSource, Void> createPa
218246
ScriptConfig scriptConfig = (ScriptConfig) args[1];
219247
String fixedInterval = (String) args[2];
220248
String calendarInterval = (String) args[3];
249+
ZoneId zoneId = (ZoneId) args[4];
221250

222251
Interval interval = null;
223252

@@ -231,15 +260,15 @@ private static ConstructingObjectParser<DateHistogramGroupSource, Void> createPa
231260
throw new IllegalArgumentException("You must specify either fixed_interval or calendar_interval, found none");
232261
}
233262

234-
return new DateHistogramGroupSource(field, scriptConfig, interval);
263+
return new DateHistogramGroupSource(field, scriptConfig, interval, zoneId);
235264
});
236265

237266
declareValuesSourceFields(parser, lenient);
238267

239268
parser.declareString(optionalConstructorArg(), new ParseField(FixedInterval.NAME));
240269
parser.declareString(optionalConstructorArg(), new ParseField(CalendarInterval.NAME));
241270

242-
parser.declareField(DateHistogramGroupSource::setTimeZone, p -> {
271+
parser.declareField(optionalConstructorArg(), p -> {
243272
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
244273
return ZoneId.of(p.text());
245274
} else {
@@ -267,8 +296,8 @@ public ZoneId getTimeZone() {
267296
return timeZone;
268297
}
269298

270-
public void setTimeZone(ZoneId timeZone) {
271-
this.timeZone = timeZone;
299+
public Rounding getRounding() {
300+
return rounding;
272301
}
273302

274303
@Override
@@ -315,9 +344,16 @@ public int hashCode() {
315344
}
316345

317346
@Override
318-
public QueryBuilder getIncrementalBucketUpdateFilterQuery(Set<String> changedBuckets) {
319-
// no need for an extra range filter as this is already done by checkpoints
320-
return null;
347+
public QueryBuilder getIncrementalBucketUpdateFilterQuery(
348+
Set<String> changedBuckets,
349+
String synchronizationField,
350+
long synchronizationTimestamp
351+
) {
352+
if (synchronizationField != null && synchronizationField.equals(field) && synchronizationTimestamp > 0) {
353+
return new RangeQueryBuilder(field).gte(rounding.round(synchronizationTimestamp)).format("epoch_millis");
354+
} else {
355+
return null;
356+
}
321357
}
322358

323359
@Override

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/transforms/pivot/HistogramGroupSource.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,11 @@ public int hashCode() {
101101
}
102102

103103
@Override
104-
public QueryBuilder getIncrementalBucketUpdateFilterQuery(Set<String> changedBuckets) {
104+
public QueryBuilder getIncrementalBucketUpdateFilterQuery(
105+
Set<String> changedBuckets,
106+
String synchronizationField,
107+
long synchronizationTimestamp
108+
) {
105109
// histograms are simple and cheap, so we skip this optimization
106110
return null;
107111
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/transforms/pivot/SingleGroupSource.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,11 @@ public void writeTo(StreamOutput out) throws IOException {
116116

117117
public abstract boolean supportsIncrementalBucketUpdate();
118118

119-
public abstract QueryBuilder getIncrementalBucketUpdateFilterQuery(Set<String> changedBuckets);
119+
public abstract QueryBuilder getIncrementalBucketUpdateFilterQuery(
120+
Set<String> changedBuckets,
121+
String synchronizationField,
122+
long synchronizationTimestamp
123+
);
120124

121125
public String getField() {
122126
return field;

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/transforms/pivot/TermsGroupSource.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,15 @@ public static TermsGroupSource fromXContent(final XContentParser parser, boolean
5454
}
5555

5656
@Override
57-
public QueryBuilder getIncrementalBucketUpdateFilterQuery(Set<String> changedBuckets) {
58-
return new TermsQueryBuilder(field, changedBuckets);
57+
public QueryBuilder getIncrementalBucketUpdateFilterQuery(
58+
Set<String> changedBuckets,
59+
String synchronizationField,
60+
long synchronizationTimestamp
61+
) {
62+
if (changedBuckets != null && changedBuckets.isEmpty() == false) {
63+
return new TermsQueryBuilder(field, changedBuckets);
64+
}
65+
return null;
5966
}
6067

6168
@Override

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/transform/transforms/TransformConfigUpdateTests.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
import java.util.Collections;
2222
import java.util.Map;
2323

24-
import static org.elasticsearch.xpack.core.transform.transforms.TransformConfigTests.randomTransformConfig;
2524
import static org.elasticsearch.xpack.core.transform.transforms.DestConfigTests.randomDestConfig;
2625
import static org.elasticsearch.xpack.core.transform.transforms.SourceConfigTests.randomSourceConfig;
26+
import static org.elasticsearch.xpack.core.transform.transforms.TransformConfigTests.randomTransformConfig;
2727
import static org.hamcrest.Matchers.equalTo;
2828

2929
public class TransformConfigUpdateTests extends AbstractSerializingTransformTestCase<TransformConfigUpdate> {
@@ -184,6 +184,11 @@ public String getWriteableName() {
184184
return "foo";
185185
}
186186

187+
@Override
188+
public String getField() {
189+
return "foo";
190+
}
191+
187192
@Override
188193
public void writeTo(StreamOutput out) throws IOException {}
189194

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/transform/transforms/pivot/DateHistogramGroupSourceTests.java

+64-5
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,17 @@
1010
import org.elasticsearch.common.io.stream.BytesStreamOutput;
1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.Writeable.Reader;
13+
import org.elasticsearch.common.time.DateFormatter;
14+
import org.elasticsearch.common.time.DateFormatters;
1315
import org.elasticsearch.common.xcontent.XContentParser;
1416
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
1517
import org.elasticsearch.test.AbstractSerializingTestCase;
1618

1719
import java.io.IOException;
20+
import java.time.ZoneOffset;
21+
import java.time.temporal.TemporalAccessor;
22+
23+
import static org.hamcrest.Matchers.equalTo;
1824

1925
public class DateHistogramGroupSourceTests extends AbstractSerializingTestCase<DateHistogramGroupSource> {
2026

@@ -26,19 +32,20 @@ public static DateHistogramGroupSource randomDateHistogramGroupSource() {
2632
dateHistogramGroupSource = new DateHistogramGroupSource(
2733
field,
2834
scriptConfig,
29-
new DateHistogramGroupSource.FixedInterval(new DateHistogramInterval(randomPositiveTimeValue()))
35+
new DateHistogramGroupSource.FixedInterval(new DateHistogramInterval(randomTimeValue(1, 100, "d", "h", "ms", "s", "m"))),
36+
randomBoolean() ? randomZone() : null
3037
);
3138
} else {
3239
dateHistogramGroupSource = new DateHistogramGroupSource(
3340
field,
3441
scriptConfig,
35-
new DateHistogramGroupSource.CalendarInterval(new DateHistogramInterval(randomTimeValue(1, 1, "m", "h", "d", "w")))
42+
new DateHistogramGroupSource.CalendarInterval(
43+
new DateHistogramInterval(randomTimeValue(1, 1, "m", "h", "d", "w", "M", "q", "y"))
44+
),
45+
randomBoolean() ? randomZone() : null
3646
);
3747
}
3848

39-
if (randomBoolean()) {
40-
dateHistogramGroupSource.setTimeZone(randomZone());
41-
}
4249
return dateHistogramGroupSource;
4350
}
4451

@@ -70,4 +77,56 @@ protected Reader<DateHistogramGroupSource> instanceReader() {
7077
return DateHistogramGroupSource::new;
7178
}
7279

80+
public void testRoundingDateHistogramFixedInterval() {
81+
String field = randomBoolean() ? null : randomAlphaOfLengthBetween(1, 20);
82+
DateHistogramGroupSource dateHistogramGroupSource = new DateHistogramGroupSource(
83+
field,
84+
null,
85+
new DateHistogramGroupSource.FixedInterval(new DateHistogramInterval("1d")),
86+
null
87+
);
88+
89+
// not meant to be complete rounding tests, see {@link RoundingTests} for more
90+
assertNotNull(dateHistogramGroupSource.getRounding());
91+
92+
assertThat(
93+
dateHistogramGroupSource.getRounding().round(time("2020-03-26T23:59:59.000Z")),
94+
equalTo(time("2020-03-26T00:00:00.000Z"))
95+
);
96+
assertThat(
97+
dateHistogramGroupSource.getRounding().round(time("2020-03-26T00:00:01.000Z")),
98+
equalTo(time("2020-03-26T00:00:00.000Z"))
99+
);
100+
}
101+
102+
public void testRoundingDateHistogramCalendarInterval() {
103+
String field = randomBoolean() ? null : randomAlphaOfLengthBetween(1, 20);
104+
DateHistogramGroupSource dateHistogramGroupSource = new DateHistogramGroupSource(
105+
field,
106+
null,
107+
new DateHistogramGroupSource.CalendarInterval(new DateHistogramInterval("1w")),
108+
null
109+
);
110+
111+
// not meant to be complete rounding tests, see {@link RoundingTests} for more
112+
assertNotNull(dateHistogramGroupSource.getRounding());
113+
114+
assertThat(
115+
dateHistogramGroupSource.getRounding().round(time("2020-03-26T23:59:59.000Z")),
116+
equalTo(time("2020-03-23T00:00:00.000Z"))
117+
);
118+
assertThat(
119+
dateHistogramGroupSource.getRounding().round(time("2020-03-29T23:59:59.000Z")),
120+
equalTo(time("2020-03-23T00:00:00.000Z"))
121+
);
122+
assertThat(
123+
dateHistogramGroupSource.getRounding().round(time("2020-03-23T00:00:01.000Z")),
124+
equalTo(time("2020-03-23T00:00:00.000Z"))
125+
);
126+
}
127+
128+
private static long time(String time) {
129+
TemporalAccessor accessor = DateFormatter.forPattern("date_optional_time").withZone(ZoneOffset.UTC).parse(time);
130+
return DateFormatters.from(accessor).toInstant().toEpochMilli();
131+
}
73132
}

0 commit comments

Comments
 (0)