Skip to content

Commit 7c7fe01

Browse files
authored
Save memory when auto_date_histogram is not on top (#57304)
This builds an `auto_date_histogram` aggregator that natively aggregates from many buckets and uses it when the `auto_date_histogram` used to use `asMultiBucketAggregator` which should save a significant amount of memory in those cases. In particular, this happens when `auto_date_histogram` is a sub-aggregator of a multi-bucketing aggregator like `terms` or `histogram` or `filters`. For the most part we preserve the original implementation when `auto_date_histogram` only collects from a single bucket. It isn't possible to "just port the aggregator" without taking a pretty significant performance hit because we used to rewrite all of the buckets every time we switched to a coarser and coarser rounding configuration. Without some major surgery to how to delay sub-aggs we'd end up rewriting the delay list zillions of time if there are many buckets. The multi-bucket version of the aggregator has a "budget" of "wasted" buckets and only rewrites all of the buckets when we exceed that budget. Now that we don't rebucket every time we increase the rounding we can no longer get an accurate count of the number of buckets! So instead the aggregator uses an estimate of the number of buckets to trigger switching to a coarser rounding. This estimate is likely to be *terrible* when buckets are far apart compared to the rounding. So it also uses the difference between the first and last bucket to trigger switching to a coarser rounding. Which covers for the shortcomings of the bucket estimation technique pretty well. It also causes the aggregator to emit fewer buckets in cases where they'd be reduced together on the coordinating node. This is wonderful! But probably fairly rare. All of that does buy us some speed improvements when the aggregator is a child of multi-bucket aggregator: Without metrics or time zone: 25% faster With metrics: 15% faster With time zone: 22% faster Relates to #56487
1 parent 793b571 commit 7c7fe01

16 files changed

+1049
-359
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ setup:
44
index: test
55
body:
66
settings:
7+
number_of_shards: 1
78
number_of_replicas: 0
89
mappings:
910
properties:
@@ -75,3 +76,25 @@ setup:
7576
- match: { aggregations.histo.buckets.1.doc_count: 2 }
7677
- match: { aggregations.histo.buckets.1.v.value: 7 }
7778
- match: { aggregations.histo_avg_v.value: 5 }
79+
80+
---
81+
"profile at top level":
82+
- skip:
83+
version: " - 7.99.99"
84+
reason: Debug information added in 8.0.0 (to be backported to 7.9.0)
85+
86+
- do:
87+
search:
88+
body:
89+
profile: true
90+
size: 0
91+
aggs:
92+
histo:
93+
auto_date_histogram:
94+
field: date
95+
buckets: 2
96+
97+
- match: { hits.total.value: 4 }
98+
- length: { aggregations.histo.buckets: 2 }
99+
- match: { profile.shards.0.aggregations.0.type: AutoDateHistogramAggregator.FromSingle }
100+
- match: { profile.shards.0.aggregations.0.debug.surviving_buckets: 4 }

server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.elasticsearch.search.aggregations.InternalAggregation;
3131
import org.elasticsearch.search.aggregations.InternalAggregations;
3232
import org.elasticsearch.search.aggregations.LeafBucketCollector;
33+
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregator;
3334
import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;
3435
import org.elasticsearch.search.aggregations.support.AggregationPath;
3536
import org.elasticsearch.search.internal.SearchContext;
@@ -329,7 +330,7 @@ protected final <B> InternalAggregation[] buildAggregationsForVariableBuckets(lo
329330
* @param bucketOrds hash of values to the bucket ordinal
330331
*/
331332
protected final <B> InternalAggregation[] buildAggregationsForVariableBuckets(long[] owningBucketOrds, LongKeyedBucketOrds bucketOrds,
332-
BucketBuilderForVariable<B> bucketBuilder, Function<List<B>, InternalAggregation> resultBuilder) throws IOException {
333+
BucketBuilderForVariable<B> bucketBuilder, ResultBuilderForVariable<B> resultBuilder) throws IOException {
333334
long totalOrdsToCollect = 0;
334335
for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) {
335336
totalOrdsToCollect += bucketOrds.bucketsInOrd(owningBucketOrds[ordIdx]);
@@ -360,14 +361,18 @@ protected final <B> InternalAggregation[] buildAggregationsForVariableBuckets(lo
360361
}
361362
buckets.add(bucketBuilder.build(ordsEnum.value(), bucketDocCount(ordsEnum.ord()), subAggregationResults[b++]));
362363
}
363-
results[ordIdx] = resultBuilder.apply(buckets);
364+
results[ordIdx] = resultBuilder.build(owningBucketOrds[ordIdx], buckets);
364365
}
365366
return results;
366367
}
367368
@FunctionalInterface
368369
protected interface BucketBuilderForVariable<B> {
369370
B build(long bucketValue, int docCount, InternalAggregations subAggregationResults);
370371
}
372+
@FunctionalInterface
373+
protected interface ResultBuilderForVariable<B> {
374+
InternalAggregation build(long owninigBucketOrd, List<B> buckets);
375+
}
371376

372377
/**
373378
* Utility method to build empty aggregations of the sub aggregators.
@@ -407,4 +412,15 @@ public BucketComparator bucketComparator(String key, SortOrder order) {
407412
"Either drop the key (a la \"" + name() + "\") or change it to \"doc_count\" (a la \"" + name() +
408413
".doc_count\") or \"key\".");
409414
}
415+
416+
public static boolean descendsFromGlobalAggregator(Aggregator parent) {
417+
while (parent != null) {
418+
if (parent.getClass() == GlobalAggregator.class) {
419+
return true;
420+
}
421+
parent = parent.parent();
422+
}
423+
return false;
424+
}
425+
410426
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferableBucketAggregator.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import org.elasticsearch.search.aggregations.AggregatorFactories;
2424
import org.elasticsearch.search.aggregations.BucketCollector;
2525
import org.elasticsearch.search.aggregations.MultiBucketCollector;
26-
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregator;
2726
import org.elasticsearch.search.internal.SearchContext;
2827

2928
import java.io.IOException;
@@ -70,16 +69,6 @@ protected void doPreCollection() throws IOException {
7069
collectableSubAggregators = MultiBucketCollector.wrap(collectors);
7170
}
7271

73-
public static boolean descendsFromGlobalAggregator(Aggregator parent) {
74-
while (parent != null) {
75-
if (parent.getClass() == GlobalAggregator.class) {
76-
return true;
77-
}
78-
parent = parent.parent();
79-
}
80-
return false;
81-
}
82-
8372
public DeferringBucketCollector getDeferringCollector() {
8473
// Default impl is a collector that selects the best buckets
8574
// but an alternative defer policy may be based on best docs.

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I
9090
double roundKey = Double.longBitsToDouble(bucketValue);
9191
double key = roundKey * interval + offset;
9292
return new InternalHistogram.Bucket(key, docCount, keyed, formatter, subAggregationResults);
93-
}, buckets -> {
93+
}, (owningBucketOrd, buckets) -> {
9494
// the contract of the histogram aggregation is that shards must return buckets ordered by key in ascending order
9595
CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator());
9696

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregationBuilder.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,10 @@ public long getRoughEstimateDurationMillis() {
290290
return roughEstimateDurationMillis;
291291
}
292292

293+
public long getMaximumRoughEstimateDurationMillis() {
294+
return getRoughEstimateDurationMillis() * getMaximumInnerInterval();
295+
}
296+
293297
@Override
294298
public int hashCode() {
295299
return Objects.hash(rounding, Arrays.hashCode(innerIntervals), dateTimeUnit);

0 commit comments

Comments
 (0)