Skip to content

Commit b7308aa

Browse files
authored
Don't load global ordinals with the map execution_hint (#37833)
The terms aggregator loads the global ordinals to retrieve the cardinality of the field to aggregate on. This information is then used to select the strategy to use for the aggregation (breadth_first or depth_first). However this should be avoided if the execution_hint is explicitly set to map since this mode doesn't really need the global ordinals. Since we still need the cardinality of the field this change picks the maximum cardinality in the segments as an estimation of the total cardinality to select the strategy to use (breadth_first or depth_first). This estimation is only used if the execution hint is set to map, otherwise the global ordinals are still used to retrieve the accurate cardinality. Closes #37705
1 parent 23f00e3 commit b7308aa

File tree

2 files changed

+87
-5
lines changed

2 files changed

+87
-5
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml

+71
Original file line numberDiff line numberDiff line change
@@ -700,3 +700,74 @@ setup:
700700
- is_false: aggregations.str_terms.buckets.1.key_as_string
701701

702702
- match: { aggregations.str_terms.buckets.1.doc_count: 2 }
703+
704+
---
705+
"Global ordinals are not loaded with the map execution hint":
706+
707+
- skip:
708+
version: " - 6.99.99"
709+
reason: bug fixed in 7.0
710+
711+
- do:
712+
index:
713+
refresh: true
714+
index: test_1
715+
id: 1
716+
routing: 1
717+
body: { "str": "abc" }
718+
719+
- do:
720+
index:
721+
refresh: true
722+
index: test_1
723+
id: 2
724+
routing: 1
725+
body: { "str": "abc" }
726+
727+
- do:
728+
index:
729+
refresh: true
730+
index: test_1
731+
id: 3
732+
routing: 1
733+
body: { "str": "bcd" }
734+
735+
- do:
736+
indices.refresh: {}
737+
738+
- do:
739+
search:
740+
index: test_1
741+
body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "map" } } } }
742+
743+
- match: { hits.total.value: 3}
744+
- length: { aggregations.str_terms.buckets: 2 }
745+
746+
- do:
747+
indices.stats:
748+
index: test_1
749+
metric: fielddata
750+
fielddata_fields: str
751+
752+
- match: { indices.test_1.total.fielddata.memory_size_in_bytes: 0}
753+
754+
- do:
755+
search:
756+
index: test_1
757+
body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "global_ordinals" } } } }
758+
759+
- match: { hits.total.value: 3}
760+
- length: { aggregations.str_terms.buckets: 2 }
761+
762+
- do:
763+
indices.stats:
764+
index: test_1
765+
metric: fielddata
766+
fielddata_fields: str
767+
768+
- gt: { indices.test_1.total.fielddata.memory_size_in_bytes: 0}
769+
770+
771+
772+
773+

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java

+16-5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.elasticsearch.search.aggregations.bucket.terms;
2121

2222
import org.apache.logging.log4j.LogManager;
23+
import org.apache.lucene.index.LeafReaderContext;
2324
import org.apache.lucene.search.IndexSearcher;
2425
import org.elasticsearch.common.ParseField;
2526
import org.elasticsearch.common.logging.DeprecationLogger;
@@ -133,7 +134,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
133134
if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) {
134135
execution = ExecutionMode.MAP;
135136
}
136-
final long maxOrd = getMaxOrd(valuesSource, context.searcher());
137+
final long maxOrd = getMaxOrd(context.searcher(), valuesSource, execution);
137138
if (execution == null) {
138139
execution = ExecutionMode.GLOBAL_ORDINALS;
139140
}
@@ -207,13 +208,23 @@ static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd)
207208
}
208209

209210
/**
210-
* Get the maximum global ordinal value for the provided {@link ValuesSource} or -1
211+
* Get the maximum ordinal value for the provided {@link ValuesSource} or -1
211212
* if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}.
212213
*/
213-
static long getMaxOrd(ValuesSource source, IndexSearcher searcher) throws IOException {
214+
static long getMaxOrd(IndexSearcher searcher, ValuesSource source, ExecutionMode executionMode) throws IOException {
214215
if (source instanceof ValuesSource.Bytes.WithOrdinals) {
215216
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) source;
216-
return valueSourceWithOrdinals.globalMaxOrd(searcher);
217+
if (executionMode == ExecutionMode.MAP) {
218+
// global ordinals are not requested so we don't load them
219+
// and return the biggest cardinality per segment instead.
220+
long maxOrd = -1;
221+
for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
222+
maxOrd = Math.max(maxOrd, valueSourceWithOrdinals.ordinalsValues(leaf).getValueCount());
223+
}
224+
return maxOrd;
225+
} else {
226+
return valueSourceWithOrdinals.globalMaxOrd(searcher);
227+
}
217228
} else {
218229
return -1;
219230
}
@@ -258,7 +269,7 @@ Aggregator create(String name,
258269
List<PipelineAggregator> pipelineAggregators,
259270
Map<String, Object> metaData) throws IOException {
260271

261-
final long maxOrd = getMaxOrd(valuesSource, context.searcher());
272+
final long maxOrd = getMaxOrd(context.searcher(), valuesSource, ExecutionMode.GLOBAL_ORDINALS);
262273
assert maxOrd != -1;
263274
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
264275

0 commit comments

Comments
 (0)