Skip to content

Commit b9dc491

Browse files
authored
Speed up aggs with sub-aggregations (backport of #69806) (#69940)
This allows many of the optimizations added in #63643 and #68871 to run on aggregations with sub-aggregations. This should: * Speed up `terms` aggregations on fields with less than 1000 values that also have sub-aggregations. Locally I see 2 second searches run in 1.2 seconds. * Applies that same speedup to `range` and `date_histogram` aggregations but it feels less impressive because the point range queries are a little slower to get up and go. * Massively speed up `filters` aggregations with sub-aggregations that don't have a `parent` aggregation or collect "other" buckets. Also save a ton of memory while collecting them.
1 parent 785a17c commit b9dc491

File tree

17 files changed

+603
-98
lines changed

17 files changed

+603
-98
lines changed

modules/parent-join/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
2727
import org.elasticsearch.index.Index;
2828
import org.elasticsearch.index.mapper.IdFieldMapper;
29+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
2930
import org.elasticsearch.index.mapper.MappedFieldType;
3031
import org.elasticsearch.index.mapper.NumberFieldMapper;
3132
import org.elasticsearch.index.mapper.Uid;
@@ -108,6 +109,7 @@ public void testParentChild() throws IOException {
108109
}
109110

110111
public void testParentChildAsSubAgg() throws IOException {
112+
MappedFieldType kwd = new KeywordFieldMapper.KeywordFieldType("kwd", randomBoolean(), true, null);
111113
try (Directory directory = newDirectory()) {
112114
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
113115

@@ -146,7 +148,7 @@ public void testParentChildAsSubAgg() throws IOException {
146148
indexSearcher,
147149
new MatchAllDocsQuery(),
148150
request,
149-
withJoinFields(longField("number"), keywordField("kwd"))
151+
withJoinFields(longField("number"), kwd)
150152
);
151153

152154
StringTerms.Bucket evenBucket = result.getBucketByKey("even");
@@ -190,6 +192,7 @@ private static List<Field> createParentDocument(String id, String kwd) {
190192
return Arrays.asList(
191193
new StringField(IdFieldMapper.NAME, Uid.encodeId(id), Field.Store.NO),
192194
new SortedSetDocValuesField("kwd", new BytesRef(kwd)),
195+
new Field("kwd", new BytesRef(kwd), KeywordFieldMapper.Defaults.FIELD_TYPE),
193196
new StringField("join_field", PARENT_TYPE, Field.Store.NO),
194197
createJoinField(PARENT_TYPE, id)
195198
);

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml

Lines changed: 117 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -820,27 +820,43 @@ setup:
820820
body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } }
821821

822822
---
823-
"string profiler via global ordinals":
823+
"string profiler via global ordinals filters implementation":
824824
- skip:
825-
version: " - 7.8.99"
826-
reason: debug information added in 7.9.0
825+
version: " - 7.12.99"
826+
reason: filters implementation first supported with sub-aggregators in 7.13.0
827+
- do:
828+
indices.create:
829+
index: test_3
830+
body:
831+
settings:
832+
number_of_shards: 1
833+
number_of_replicas: 0
834+
mappings:
835+
properties:
836+
str:
837+
type: keyword
838+
boolean:
839+
type: boolean
840+
number:
841+
type: long
842+
827843
- do:
828844
bulk:
829-
index: test_1
845+
index: test_3
830846
refresh: true
831847
body: |
832848
{ "index": {} }
833-
{ "str": "sheep", "number": 1 }
849+
{ "boolean": true, "str": "sheep", "number": 1 }
834850
{ "index": {} }
835-
{ "str": "sheep", "number": 3 }
851+
{ "boolean": true, "str": "sheep", "number": 3 }
836852
{ "index": {} }
837-
{ "str": "cow", "number": 1 }
853+
{ "boolean": true, "str": "cow", "number": 1 }
838854
{ "index": {} }
839-
{ "str": "pig", "number": 1 }
855+
{ "boolean": true, "str": "pig", "number": 1 }
840856
841857
- do:
842858
search:
843-
index: test_1
859+
index: test_3
844860
body:
845861
profile: true
846862
size: 0
@@ -860,17 +876,73 @@ setup:
860876
- match: { aggregations.str_terms.buckets.1.max_number.value: 1 }
861877
- match: { aggregations.str_terms.buckets.2.key: pig }
862878
- match: { aggregations.str_terms.buckets.2.max_number.value: 1 }
863-
- match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
879+
- match: { profile.shards.0.aggregations.0.type: StringTermsAggregatorFromFilters }
864880
- match: { profile.shards.0.aggregations.0.description: str_terms }
865-
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
866-
- match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
867-
- match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
868-
- match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
869-
- gt: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
870-
- match: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
871-
- match: { profile.shards.0.aggregations.0.debug.has_filter: false }
881+
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 }
882+
- match: { profile.shards.0.aggregations.0.debug.delegate: FiltersAggregator.FilterByFilter }
883+
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.0.query: str:cow }
884+
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.1.query: str:pig }
885+
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.2.query: str:sheep }
872886
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
873887
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
888+
- match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }
889+
890+
---
891+
"string profiler via global ordinals native implementation":
892+
- skip:
893+
version: " - 7.8.99"
894+
reason: debug information added in 7.9.0
895+
- do:
896+
bulk:
897+
index: test_1
898+
refresh: true
899+
body: |
900+
{ "index": {} }
901+
{ "boolean": true, "str": "sheep", "number": 1 }
902+
{ "index": {} }
903+
{ "boolean": true, "str": "sheep", "number": 3 }
904+
{ "index": {} }
905+
{ "boolean": true, "str": "cow", "number": 1 }
906+
{ "index": {} }
907+
{ "boolean": true, "str": "pig", "number": 1 }
908+
909+
- do:
910+
search:
911+
index: test_1
912+
body:
913+
profile: true
914+
size: 0
915+
aggs:
916+
bool: # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
917+
terms:
918+
field: boolean
919+
aggs:
920+
str_terms:
921+
terms:
922+
field: str
923+
collect_mode: breadth_first
924+
execution_hint: global_ordinals
925+
aggs:
926+
max_number:
927+
max:
928+
field: number
929+
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: sheep }
930+
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 3 }
931+
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: cow }
932+
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 1 }
933+
- match: { aggregations.bool.buckets.0.str_terms.buckets.2.key: pig }
934+
- match: { aggregations.bool.buckets.0.str_terms.buckets.2.max_number.value: 1 }
935+
- match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
936+
- match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
937+
- match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }
938+
- match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
939+
- match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: '/remap( using many bucket ords)?/' } # older versions just say "remap"
940+
- match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
941+
- gt: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
942+
- match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
943+
- match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
944+
- match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
945+
- match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }
874946

875947
- do:
876948
indices.create:
@@ -889,7 +961,7 @@ setup:
889961
refresh: true
890962
body: |
891963
{ "index": {} }
892-
{ "str": ["pig", "sheep"], "number": 100 }
964+
{ "boolean": true, "str": ["pig", "sheep"], "number": 100 }
893965
894966
- do:
895967
search:
@@ -898,30 +970,35 @@ setup:
898970
profile: true
899971
size: 0
900972
aggs:
901-
str_terms:
973+
bool: # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
902974
terms:
903-
field: str
904-
collect_mode: breadth_first
905-
execution_hint: global_ordinals
975+
field: boolean
906976
aggs:
907-
max_number:
908-
max:
909-
field: number
910-
- match: { aggregations.str_terms.buckets.0.key: pig }
911-
- match: { aggregations.str_terms.buckets.0.max_number.value: 100 }
912-
- match: { aggregations.str_terms.buckets.1.key: sheep }
913-
- match: { aggregations.str_terms.buckets.1.max_number.value: 100 }
914-
- match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
915-
- match: { profile.shards.0.aggregations.0.description: str_terms }
916-
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 1 }
917-
- match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
918-
- match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
919-
- match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
920-
- match: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
921-
- gt: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
922-
- match: { profile.shards.0.aggregations.0.debug.has_filter: false }
923-
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
924-
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
977+
str_terms:
978+
terms:
979+
field: str
980+
collect_mode: breadth_first
981+
execution_hint: global_ordinals
982+
aggs:
983+
max_number:
984+
max:
985+
field: number
986+
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: pig }
987+
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 100 }
988+
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: sheep }
989+
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 100 }
990+
- match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
991+
- match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
992+
- match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 1 }
993+
- match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
994+
- match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: '/remap( using many bucket ords)?/' } # older versions just say "remap"
995+
- match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
996+
- match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
997+
- gt: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
998+
- match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
999+
- match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
1000+
- match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }
1001+
9251002

9261003
---
9271004
"string profiler via map":

server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,12 @@ public void testSingleValuedFieldOrderedByIllegalAgg() throws Exception {
585585
} else {
586586
throw e;
587587
}
588+
} else if (e.getCause() instanceof IllegalArgumentException) {
589+
// Thrown when the terms agg runs as a filters agg
590+
assertThat(
591+
e.getCause().getMessage(),
592+
equalTo("Invalid aggregation order path [inner_terms>avg]. Can't sort by a descendant of a [sterms] aggregation [avg]")
593+
);
588594
} else {
589595
throw e;
590596
}

0 commit comments

Comments
 (0)