From 906f3a40b02ca64f834b42c869ebdc8337381606 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 2 Aug 2017 11:54:11 +0200 Subject: [PATCH] Use `global_ordinals_hash` execution mode when sorting by sub aggregations. This is a safer default since sorting by sub aggregations prevents these aggregations from being deferred. `global_ordinals_hash` will at least make sure that we do not use memory for buckets that are not collected. Closes #24359 --- .../bucket/terms/TermsAggregatorFactory.java | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index b78634357a5aa..d2c124529204e 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -30,6 +30,7 @@ import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalOrder; +import org.elasticsearch.search.aggregations.InternalOrder.CompoundOrder; import org.elasticsearch.search.aggregations.NonCollectingAggregator; import org.elasticsearch.search.aggregations.bucket.BucketUtils; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; @@ -93,6 +94,17 @@ public InternalAggregation buildEmptyAggregation() { }; } + private static boolean isAggregationSort(BucketOrder order) { + if (order instanceof InternalOrder.Aggregation) { + return true; + } else if (order instanceof InternalOrder.CompoundOrder) { + InternalOrder.CompoundOrder compoundOrder = (CompoundOrder) order; + return compoundOrder.orderElements().stream().anyMatch(TermsAggregatorFactory::isAggregationSort); + } else { + return false; + } + } + @Override protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List pipelineAggregators, Map metaData) throws IOException { @@ -139,10 +151,17 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare // to be unbounded and most instances may only aggregate few // documents, so use hashed based // global ordinals to keep the bucket ords dense. + // Additionally, if using partitioned terms the regular global // ordinals would be sparse so we opt for hash + + // Finally if we are sorting by sub aggregations, then these + // aggregations cannot be deferred, so global_ordinals_hash is + // a safer choice as we won't use memory for sub aggregations + // for buckets that are not collected. if (Aggregator.descendsFromBucketAggregator(parent) || - (includeExclude != null && includeExclude.isPartitionBased())) { + (includeExclude != null && includeExclude.isPartitionBased()) || + isAggregationSort(order)) { execution = ExecutionMode.GLOBAL_ORDINALS_HASH; } else { if (factories == AggregatorFactories.EMPTY) {