Skip to content

Commit 0483f7c

Browse files
authored
Begin moving date_histogram to offset rounding (take two) (#51271)
We added a new rounding in #50609 that handles offsets to the start and end of the rounding so that we could support `offset` in the `composite` aggregation. This starts moving `date_histogram` to that new offset. This is a redo of #50873 with more integration tests. This reverts commit d114c9d.
1 parent d1de74e commit 0483f7c

File tree

10 files changed

+287
-35
lines changed

10 files changed

+287
-35
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml

+209
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,213 @@ setup:
139139

140140
- match: { aggregations.histo.buckets.3.doc_count: 1 }
141141

142+
---
143+
"date_histogram":
144+
- skip:
145+
version: " - 7.1.99"
146+
reason: calendar_interval introduced in 7.2.0
147+
148+
- do:
149+
indices.create:
150+
index: test_2
151+
body:
152+
settings:
153+
# There was a BWC issue that only showed up on empty shards. This
154+
# test has 4 docs and 5 shards makes sure we get one empty.
155+
number_of_shards: 5
156+
mappings:
157+
properties:
158+
date:
159+
type : date
160+
161+
- do:
162+
bulk:
163+
index: test_2
164+
refresh: true
165+
body:
166+
- '{"index": {}}'
167+
- '{"date": "2016-01-01"}'
168+
- '{"index": {}}'
169+
- '{"date": "2016-01-02"}'
170+
- '{"index": {}}'
171+
- '{"date": "2016-02-01"}'
172+
- '{"index": {}}'
173+
- '{"date": "2016-03-01"}'
174+
175+
- do:
176+
search:
177+
body:
178+
size: 0
179+
aggs:
180+
histo:
181+
date_histogram:
182+
field: date
183+
calendar_interval: month
184+
185+
- match: { hits.total.value: 4 }
186+
- length: { aggregations.histo.buckets: 3 }
187+
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
188+
- match: { aggregations.histo.buckets.0.doc_count: 2 }
189+
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
190+
- match: { aggregations.histo.buckets.1.doc_count: 1 }
191+
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
192+
- match: { aggregations.histo.buckets.2.doc_count: 1 }
193+
194+
---
195+
"date_histogram with offset":
196+
- skip:
197+
version: " - 7.1.99"
198+
reason: calendar_interval introduced in 7.2.0
199+
200+
- do:
201+
indices.create:
202+
index: test_2
203+
body:
204+
settings:
205+
# There was a BWC issue that only showed up on empty shards. This
206+
# test has 4 docs and 5 shards makes sure we get one empty.
207+
number_of_shards: 5
208+
mappings:
209+
properties:
210+
date:
211+
type : date
212+
213+
- do:
214+
bulk:
215+
index: test_2
216+
refresh: true
217+
body:
218+
- '{"index": {}}'
219+
- '{"date": "2016-01-01"}'
220+
- '{"index": {}}'
221+
- '{"date": "2016-01-02"}'
222+
- '{"index": {}}'
223+
- '{"date": "2016-02-01"}'
224+
- '{"index": {}}'
225+
- '{"date": "2016-03-01"}'
226+
227+
- do:
228+
search:
229+
body:
230+
size: 0
231+
aggs:
232+
histo:
233+
date_histogram:
234+
field: date
235+
calendar_interval: month
236+
offset: +1d
237+
238+
- match: { hits.total.value: 4 }
239+
- length: { aggregations.histo.buckets: 3 }
240+
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
241+
- match: { aggregations.histo.buckets.0.doc_count: 1 }
242+
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
243+
- match: { aggregations.histo.buckets.1.doc_count: 2 }
244+
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
245+
- match: { aggregations.histo.buckets.2.doc_count: 1 }
246+
142247

248+
---
249+
"date_histogram on range":
250+
- skip:
251+
version: " - 7.1.99"
252+
reason: calendar_interval introduced in 7.2.0
253+
254+
- do:
255+
indices.create:
256+
index: test_2
257+
body:
258+
settings:
259+
# There was a BWC issue that only showed up on empty shards. This
260+
# test has 4 docs and 5 shards makes sure we get one empty.
261+
number_of_shards: 5
262+
mappings:
263+
properties:
264+
range:
265+
type : date_range
266+
267+
- do:
268+
bulk:
269+
index: test_2
270+
refresh: true
271+
body:
272+
- '{"index": {}}'
273+
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
274+
- '{"index": {}}'
275+
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
276+
- '{"index": {}}'
277+
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
278+
- '{"index": {}}'
279+
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'
280+
281+
- do:
282+
search:
283+
body:
284+
size: 0
285+
aggs:
286+
histo:
287+
date_histogram:
288+
field: range
289+
calendar_interval: month
290+
291+
- match: { hits.total.value: 4 }
292+
- length: { aggregations.histo.buckets: 3 }
293+
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
294+
- match: { aggregations.histo.buckets.0.doc_count: 2 }
295+
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
296+
- match: { aggregations.histo.buckets.1.doc_count: 1 }
297+
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
298+
- match: { aggregations.histo.buckets.2.doc_count: 1 }
299+
300+
---
301+
"date_histogram on range with offset":
302+
- skip:
303+
version: " - 7.1.99"
304+
reason: calendar_interval introduced in 7.2.0
305+
306+
- do:
307+
indices.create:
308+
index: test_2
309+
body:
310+
settings:
311+
# There was a BWC issue that only showed up on empty shards. This
312+
# test has 4 docs and 5 shards makes sure we get one empty.
313+
number_of_shards: 5
314+
mappings:
315+
properties:
316+
range:
317+
type : date_range
318+
319+
- do:
320+
bulk:
321+
index: test_2
322+
refresh: true
323+
body:
324+
- '{"index": {}}'
325+
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
326+
- '{"index": {}}'
327+
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
328+
- '{"index": {}}'
329+
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
330+
- '{"index": {}}'
331+
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'
332+
333+
- do:
334+
search:
335+
body:
336+
size: 0
337+
aggs:
338+
histo:
339+
date_histogram:
340+
field: range
341+
calendar_interval: month
342+
offset: +1d
343+
344+
- match: { hits.total.value: 4 }
345+
- length: { aggregations.histo.buckets: 3 }
346+
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
347+
- match: { aggregations.histo.buckets.0.doc_count: 1 }
348+
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
349+
- match: { aggregations.histo.buckets.1.doc_count: 2 }
350+
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
351+
- match: { aggregations.histo.buckets.2.doc_count: 1 }

server/src/main/java/org/elasticsearch/common/Rounding.java

+44-2
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,19 @@ public void writeTo(StreamOutput out) throws IOException {
164164
*/
165165
public abstract long nextRoundingValue(long value);
166166

167+
/**
168+
* How "offset" this rounding is from the traditional "start" of the period.
169+
* @deprecated We're in the process of abstracting offset *into* Rounding
170+
* so keep any usage to migratory shims
171+
*/
172+
@Deprecated
173+
public abstract long offset();
174+
175+
/**
176+
* Strip the {@code offset} from these bounds.
177+
*/
178+
public abstract Rounding withoutOffset();
179+
167180
@Override
168181
public abstract boolean equals(Object obj);
169182

@@ -420,6 +433,16 @@ public long nextRoundingValue(long utcMillis) {
420433
}
421434
}
422435

436+
@Override
437+
public long offset() {
438+
return 0;
439+
}
440+
441+
@Override
442+
public Rounding withoutOffset() {
443+
return this;
444+
}
445+
423446
@Override
424447
public int hashCode() {
425448
return Objects.hash(unit, timeZone);
@@ -546,6 +569,16 @@ public long nextRoundingValue(long time) {
546569
.toInstant().toEpochMilli();
547570
}
548571

572+
@Override
573+
public long offset() {
574+
return 0;
575+
}
576+
577+
@Override
578+
public Rounding withoutOffset() {
579+
return this;
580+
}
581+
549582
@Override
550583
public int hashCode() {
551584
return Objects.hash(interval, timeZone);
@@ -607,8 +640,17 @@ public long round(long value) {
607640

608641
@Override
609642
public long nextRoundingValue(long value) {
610-
// This isn't needed by the current users. We'll implement it when we migrate other users to it.
611-
throw new UnsupportedOperationException("not yet supported");
643+
return delegate.nextRoundingValue(value - offset) + offset;
644+
}
645+
646+
@Override
647+
public long offset() {
648+
return offset;
649+
}
650+
651+
@Override
652+
public Rounding withoutOffset() {
653+
return delegate;
612654
}
613655

614656
@Override

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java

+3-4
Original file line numberDiff line numberDiff line change
@@ -493,22 +493,21 @@ protected ValuesSourceAggregatorFactory<ValuesSource> innerBuild(QueryShardConte
493493
AggregatorFactory parent,
494494
Builder subFactoriesBuilder) throws IOException {
495495
final ZoneId tz = timeZone();
496-
// TODO use offset here rather than explicitly in the aggregation
497-
final Rounding rounding = dateHistogramInterval.createRounding(tz, 0);
496+
final Rounding rounding = dateHistogramInterval.createRounding(tz, offset);
498497
final ZoneId rewrittenTimeZone = rewriteTimeZone(queryShardContext);
499498
final Rounding shardRounding;
500499
if (tz == rewrittenTimeZone) {
501500
shardRounding = rounding;
502501
} else {
503-
shardRounding = dateHistogramInterval.createRounding(rewrittenTimeZone, 0);
502+
shardRounding = dateHistogramInterval.createRounding(rewrittenTimeZone, offset);
504503
}
505504

506505
ExtendedBounds roundedBounds = null;
507506
if (this.extendedBounds != null) {
508507
// parse any string bounds to longs and round
509508
roundedBounds = this.extendedBounds.parseAndValidate(name, queryShardContext, config.format()).round(rounding);
510509
}
511-
return new DateHistogramAggregatorFactory(name, config, offset, order, keyed, minDocCount,
510+
return new DateHistogramAggregatorFactory(name, config, order, keyed, minDocCount,
512511
rounding, shardRounding, roundedBounds, queryShardContext, parent, subFactoriesBuilder, metaData);
513512
}
514513

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java

+6-8
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,16 @@ class DateHistogramAggregator extends BucketsAggregator {
6464
private final ExtendedBounds extendedBounds;
6565

6666
private final LongHash bucketOrds;
67-
private long offset;
6867

6968
DateHistogramAggregator(String name, AggregatorFactories factories, Rounding rounding, Rounding shardRounding,
70-
long offset, BucketOrder order, boolean keyed,
69+
BucketOrder order, boolean keyed,
7170
long minDocCount, @Nullable ExtendedBounds extendedBounds, @Nullable ValuesSource.Numeric valuesSource,
7271
DocValueFormat formatter, SearchContext aggregationContext,
7372
Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
7473

7574
super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
7675
this.rounding = rounding;
7776
this.shardRounding = shardRounding;
78-
this.offset = offset;
7977
this.order = InternalOrder.validate(order, this);
8078
this.keyed = keyed;
8179
this.minDocCount = minDocCount;
@@ -113,7 +111,7 @@ public void collect(int doc, long bucket) throws IOException {
113111
long value = values.nextValue();
114112
// We can use shardRounding here, which is sometimes more efficient
115113
// if daylight saving times are involved.
116-
long rounded = shardRounding.round(value - offset) + offset;
114+
long rounded = shardRounding.round(value);
117115
assert rounded >= previousRounded;
118116
if (rounded == previousRounded) {
119117
continue;
@@ -148,9 +146,9 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOE
148146
// value source will be null for unmapped fields
149147
// Important: use `rounding` here, not `shardRounding`
150148
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
151-
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
149+
? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds)
152150
: null;
153-
return new InternalDateHistogram(name, buckets, order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
151+
return new InternalDateHistogram(name, buckets, order, minDocCount, rounding.offset(), emptyBucketInfo, formatter, keyed,
154152
pipelineAggregators(), metaData());
155153
}
156154

@@ -159,8 +157,8 @@ public InternalAggregation buildEmptyAggregation() {
159157
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
160158
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
161159
: null;
162-
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
163-
pipelineAggregators(), metaData());
160+
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, rounding.offset(), emptyBucketInfo, formatter,
161+
keyed, pipelineAggregators(), metaData());
164162
}
165163

166164
@Override

0 commit comments

Comments
 (0)