Skip to content

Commit 5f63ab8

Browse files
committed
Adds hard_bounds to histogram aggregations
Adds a hard_bounds parameter to explicitly limit the buckets that a histogram can generate. This is especially useful in case of open ended ranges that can produce a very large number of buckets. Closes elastic#50109
1 parent bee43b9 commit 5f63ab8

26 files changed

+808
-118
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml

+45
Original file line numberDiff line numberDiff line change
@@ -528,3 +528,48 @@ setup:
528528
- match: { profile.shards.0.aggregations.0.description: histo }
529529
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
530530
- match: { profile.shards.0.aggregations.0.debug.total_buckets: 3 }
531+
532+
---
533+
"histogram with hard bounds":
534+
- skip:
535+
version: " - 7.9.99"
536+
reason: hard_bounds were introduced in 8.0.0
537+
538+
- do:
539+
indices.create:
540+
index: test_3
541+
body:
542+
mappings:
543+
properties:
544+
range:
545+
type: long_range
546+
547+
- do:
548+
bulk:
549+
index: test_3
550+
refresh: true
551+
body:
552+
- '{"index": {}}'
553+
- '{"range": {"lte": 10}}'
554+
- '{"index": {}}'
555+
- '{"range": {"gte": 15}}'
556+
557+
- do:
558+
search:
559+
index: test_3
560+
body:
561+
size: 0
562+
aggs:
563+
histo:
564+
histogram:
565+
field: range
566+
interval: 1
567+
hard_bounds:
568+
min: 0
569+
max: 20
570+
- match: { hits.total.value: 2 }
571+
- length: { aggregations.histo.buckets: 21 }
572+
- match: { aggregations.histo.buckets.0.key: 0 }
573+
- match: { aggregations.histo.buckets.0.doc_count: 1 }
574+
- match: { aggregations.histo.buckets.20.key: 20 }
575+
- match: { aggregations.histo.buckets.20.doc_count: 1 }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
setup:
2+
- skip:
3+
version: " - 7.1.99"
4+
reason: calendar_interval introduced in 7.2.0
5+
6+
- do:
7+
indices.create:
8+
index: test_date_hist
9+
body:
10+
settings:
11+
# There was a BWC issue that only showed up on empty shards. This
12+
# test has 4 docs and 5 shards makes sure we get one empty.
13+
number_of_shards: 5
14+
mappings:
15+
properties:
16+
range:
17+
type: date_range
18+
19+
- do:
20+
bulk:
21+
index: test_date_hist
22+
refresh: true
23+
body:
24+
- '{"index": {}}'
25+
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
26+
- '{"index": {}}'
27+
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
28+
- '{"index": {}}'
29+
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
30+
- '{"index": {}}'
31+
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'
32+
- '{"index": {}}'
33+
- '{"range": {"gte": "2016-04-01"}}'
34+
- '{"index": {}}'
35+
- '{"range": {"lt": "2016-02-01"}}'
36+
37+
---
38+
"date_histogram on range with hard bounds":
39+
- skip:
40+
version: " - 7.9.99"
41+
reason: hard_bounds introduced in 8.0.0
42+
43+
- do:
44+
search:
45+
body:
46+
size: 0
47+
aggs:
48+
histo:
49+
date_histogram:
50+
field: range
51+
calendar_interval: month
52+
hard_bounds:
53+
"min": "2015-06-01"
54+
"max": "2016-06-01"
55+
56+
- match: { hits.total.value: 6 }
57+
- length: { aggregations.histo.buckets: 13 }
58+
- match: { aggregations.histo.buckets.0.key_as_string: "2015-06-01T00:00:00.000Z" }
59+
- match: { aggregations.histo.buckets.0.doc_count: 1 }
60+
- match: { aggregations.histo.buckets.8.key_as_string: "2016-02-01T00:00:00.000Z" }
61+
- match: { aggregations.histo.buckets.8.doc_count: 1 }
62+
- match: { aggregations.histo.buckets.12.key_as_string: "2016-06-01T00:00:00.000Z" }
63+
- match: { aggregations.histo.buckets.12.doc_count: 1 }

server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java

+26-7
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
import org.elasticsearch.search.aggregations.BucketOrder;
3939
import org.elasticsearch.search.aggregations.InternalAggregation;
4040
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
41-
import org.elasticsearch.search.aggregations.bucket.histogram.ExtendedBounds;
41+
import org.elasticsearch.search.aggregations.bucket.histogram.LongBounds;
4242
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
4343
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket;
4444
import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram;
@@ -1084,7 +1084,7 @@ public void testSingleValueFieldWithExtendedBounds() throws Exception {
10841084
.dateHistogramInterval(DateHistogramInterval.days(interval))
10851085
.minDocCount(0)
10861086
// when explicitly specifying a format, the extended bounds should be defined by the same format
1087-
.extendedBounds(new ExtendedBounds(format(boundsMin, pattern), format(boundsMax, pattern)))
1087+
.extendedBounds(new LongBounds(format(boundsMin, pattern), format(boundsMax, pattern)))
10881088
.format(pattern))
10891089
.get();
10901090

@@ -1152,7 +1152,7 @@ public void testSingleValueFieldWithExtendedBoundsTimezone() throws Exception {
11521152
.from("now/d").to("now/d").includeLower(true).includeUpper(true).timeZone(timezone.getId()))
11531153
.addAggregation(
11541154
dateHistogram("histo").field("date").dateHistogramInterval(DateHistogramInterval.hours(1))
1155-
.timeZone(timezone).minDocCount(0).extendedBounds(new ExtendedBounds("now/d", "now/d+23h"))
1155+
.timeZone(timezone).minDocCount(0).extendedBounds(new LongBounds("now/d", "now/d+23h"))
11561156
).get();
11571157
assertSearchResponse(response);
11581158

@@ -1205,7 +1205,7 @@ public void testSingleValueFieldWithExtendedBoundsOffset() throws Exception {
12051205
.addAggregation(
12061206
dateHistogram("histo").field("date").dateHistogramInterval(DateHistogramInterval.days(1))
12071207
.offset("+6h").minDocCount(0)
1208-
.extendedBounds(new ExtendedBounds("2016-01-01T06:00:00Z", "2016-01-08T08:00:00Z"))
1208+
.extendedBounds(new LongBounds("2016-01-01T06:00:00Z", "2016-01-08T08:00:00Z"))
12091209
).get();
12101210
assertSearchResponse(response);
12111211

@@ -1377,7 +1377,7 @@ public void testFormatIndexUnmapped() throws InterruptedException, ExecutionExce
13771377
SearchResponse response = client().prepareSearch(indexDateUnmapped)
13781378
.addAggregation(
13791379
dateHistogram("histo").field("dateField").dateHistogramInterval(DateHistogramInterval.MONTH).format("yyyy-MM")
1380-
.minDocCount(0).extendedBounds(new ExtendedBounds("2018-01", "2018-01")))
1380+
.minDocCount(0).extendedBounds(new LongBounds("2018-01", "2018-01")))
13811381
.get();
13821382
assertSearchResponse(response);
13831383
Histogram histo = response.getAggregations().get("histo");
@@ -1433,7 +1433,7 @@ public void testDSTEndTransition() throws Exception {
14331433
.setQuery(new MatchNoneQueryBuilder())
14341434
.addAggregation(dateHistogram("histo").field("date").timeZone(ZoneId.of("Europe/Oslo"))
14351435
.calendarInterval(DateHistogramInterval.HOUR).minDocCount(0).extendedBounds(
1436-
new ExtendedBounds("2015-10-25T02:00:00.000+02:00", "2015-10-25T04:00:00.000+01:00")))
1436+
new LongBounds("2015-10-25T02:00:00.000+02:00", "2015-10-25T04:00:00.000+01:00")))
14371437
.get();
14381438

14391439
Histogram histo = response.getAggregations().get("histo");
@@ -1450,7 +1450,7 @@ public void testDSTEndTransition() throws Exception {
14501450
.setQuery(new MatchNoneQueryBuilder())
14511451
.addAggregation(dateHistogram("histo").field("date").timeZone(ZoneId.of("Europe/Oslo"))
14521452
.dateHistogramInterval(DateHistogramInterval.HOUR).minDocCount(0).extendedBounds(
1453-
new ExtendedBounds("2015-10-25T02:00:00.000+02:00", "2015-10-25T04:00:00.000+01:00")))
1453+
new LongBounds("2015-10-25T02:00:00.000+02:00", "2015-10-25T04:00:00.000+01:00")))
14541454
.get();
14551455

14561456
histo = response.getAggregations().get("histo");
@@ -1647,4 +1647,23 @@ public void testDateKeyFormatting() {
16471647
assertThat(buckets.get(1).getKeyAsString(), equalTo("2012-02-01T00:00:00.000-07:00"));
16481648
assertThat(buckets.get(2).getKeyAsString(), equalTo("2012-03-01T00:00:00.000-07:00"));
16491649
}
1650+
1651+
public void testHardBoundsOnDates() {
1652+
SearchResponse response = client().prepareSearch("idx")
1653+
.addAggregation(dateHistogram("histo")
1654+
.field("date")
1655+
.calendarInterval(DateHistogramInterval.DAY)
1656+
.hardBounds(new LongBounds("2012-02-01T00:00:00.000", "2012-03-03T00:00:00.000"))
1657+
)
1658+
.get();
1659+
1660+
assertSearchResponse(response);
1661+
1662+
InternalDateHistogram histogram = response.getAggregations().get("histo");
1663+
List<InternalDateHistogram.Bucket> buckets = histogram.getBuckets();
1664+
assertThat(buckets.size(), equalTo(30));
1665+
assertThat(buckets.get(1).getKeyAsString(), equalTo("2012-02-03T00:00:00.000Z"));
1666+
assertThat(buckets.get(29).getKeyAsString(), equalTo("2012-03-02T00:00:00.000Z"));
1667+
}
1668+
16501669
}

server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/HistogramIT.java

+53
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.elasticsearch.search.aggregations.BucketOrder;
3535
import org.elasticsearch.search.aggregations.InternalAggregation;
3636
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
37+
import org.elasticsearch.search.aggregations.bucket.histogram.DoubleBounds;
3738
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
3839
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket;
3940
import org.elasticsearch.search.aggregations.metrics.Avg;
@@ -1195,6 +1196,58 @@ public void testSingleValuedFieldOrderedBySingleValueSubAggregationAscAsCompound
11951196
assertMultiSortResponse(expectedKeys, BucketOrder.aggregation("avg_l", true));
11961197
}
11971198

1199+
public void testInvalidBounds() {
1200+
SearchPhaseExecutionException e = expectThrows(SearchPhaseExecutionException.class, () -> client().prepareSearch("empty_bucket_idx")
1201+
.addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).hardBounds(new DoubleBounds(0.0, 10.0))
1202+
.extendedBounds(3, 20)).get());
1203+
assertThat(e.toString(), containsString("Extended bounds have to be inside hard bounds, hard bounds"));
1204+
1205+
e = expectThrows(SearchPhaseExecutionException.class, () -> client().prepareSearch("empty_bucket_idx")
1206+
.addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).hardBounds(new DoubleBounds(3.0, null))
1207+
.extendedBounds(0, 20)).get());
1208+
assertThat(e.toString(), containsString("Extended bounds have to be inside hard bounds, hard bounds"));
1209+
}
1210+
1211+
public void testHardBounds() throws Exception {
1212+
assertAcked(prepareCreate("test").setMapping("d", "type=double").get());
1213+
indexRandom(true,
1214+
client().prepareIndex("test").setId("1").setSource("d", -0.6),
1215+
client().prepareIndex("test").setId("2").setSource("d", 0.5),
1216+
client().prepareIndex("test").setId("3").setSource("d", 0.1));
1217+
1218+
SearchResponse r = client().prepareSearch("test")
1219+
.addAggregation(histogram("histo").field("d").interval(0.1).hardBounds(new DoubleBounds(0.0, null)))
1220+
.get();
1221+
assertSearchResponse(r);
1222+
1223+
Histogram histogram = r.getAggregations().get("histo");
1224+
List<? extends Bucket> buckets = histogram.getBuckets();
1225+
assertEquals(5, buckets.size());
1226+
assertEquals(0.1, (double) buckets.get(0).getKey(), 0.01d);
1227+
assertEquals(0.5, (double) buckets.get(4).getKey(), 0.01d);
1228+
1229+
r = client().prepareSearch("test")
1230+
.addAggregation(histogram("histo").field("d").interval(0.1).hardBounds(new DoubleBounds(null, 0.0)))
1231+
.get();
1232+
assertSearchResponse(r);
1233+
1234+
histogram = r.getAggregations().get("histo");
1235+
buckets = histogram.getBuckets();
1236+
assertEquals(1, buckets.size());
1237+
assertEquals(-0.6, (double) buckets.get(0).getKey(), 0.01d);
1238+
1239+
r = client().prepareSearch("test")
1240+
.addAggregation(histogram("histo").field("d").interval(0.1).hardBounds(new DoubleBounds(0.0, 3.0)))
1241+
.get();
1242+
assertSearchResponse(r);
1243+
1244+
histogram = r.getAggregations().get("histo");
1245+
buckets = histogram.getBuckets();
1246+
assertEquals(1, buckets.size());
1247+
assertEquals(0.1, (double) buckets.get(0).getKey(), 0.01d);
1248+
1249+
}
1250+
11981251
private void assertMultiSortResponse(long[] expectedKeys, BucketOrder... order) {
11991252
SearchResponse response = client()
12001253
.prepareSearch("sort_idx")

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java

+3
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public abstract class AbstractHistogramAggregator extends BucketsAggregator {
5050
protected final long minDocCount;
5151
protected final double minBound;
5252
protected final double maxBound;
53+
protected final DoubleBounds hardBounds;
5354
protected final LongKeyedBucketOrds bucketOrds;
5455

5556
public AbstractHistogramAggregator(
@@ -62,6 +63,7 @@ public AbstractHistogramAggregator(
6263
long minDocCount,
6364
double minBound,
6465
double maxBound,
66+
DoubleBounds hardBounds,
6567
DocValueFormat formatter,
6668
SearchContext context,
6769
Aggregator parent,
@@ -80,6 +82,7 @@ public AbstractHistogramAggregator(
8082
this.minDocCount = minDocCount;
8183
this.minBound = minBound;
8284
this.maxBound = maxBound;
85+
this.hardBounds = hardBounds;
8386
this.formatter = formatter;
8487
bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), cardinalityUpperBound);
8588
}

0 commit comments

Comments
 (0)