Skip to content

Commit b5b270b

Browse files
committed
Speed up aggs with sub-aggregations (backport of elastic#69806)
This allows many of the optimizations added in elastic#63643 and elastic#68871 to run on aggregations with sub-aggregations. This should: * Speed up `terms` aggregations on fields with less than 1000 values that also have sub-aggregations. Locally I see 2 second searches run in 1.2 seconds. * Applies that same speedup to `range` and `date_histogram` aggregations but it feels less impressive because the point range queries are a little slower to get up and go. * Massively speed up `filters` aggregations with sub-aggregations that don't have a `parent` aggregation or collect "other" buckets. Also save a ton of memory while collecting them.
1 parent f7b2638 commit b5b270b

File tree

17 files changed

+585
-96
lines changed

17 files changed

+585
-96
lines changed

modules/parent-join/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
2727
import org.elasticsearch.index.Index;
2828
import org.elasticsearch.index.mapper.IdFieldMapper;
29+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
2930
import org.elasticsearch.index.mapper.MappedFieldType;
3031
import org.elasticsearch.index.mapper.NumberFieldMapper;
3132
import org.elasticsearch.index.mapper.Uid;
@@ -108,6 +109,7 @@ public void testParentChild() throws IOException {
108109
}
109110

110111
public void testParentChildAsSubAgg() throws IOException {
112+
MappedFieldType kwd = new KeywordFieldMapper.KeywordFieldType("kwd", randomBoolean(), true, null);
111113
try (Directory directory = newDirectory()) {
112114
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
113115

@@ -146,7 +148,7 @@ public void testParentChildAsSubAgg() throws IOException {
146148
indexSearcher,
147149
new MatchAllDocsQuery(),
148150
request,
149-
withJoinFields(longField("number"), keywordField("kwd"))
151+
withJoinFields(longField("number"), kwd)
150152
);
151153

152154
StringTerms.Bucket evenBucket = result.getBucketByKey("even");
@@ -190,6 +192,7 @@ private static List<Field> createParentDocument(String id, String kwd) {
190192
return Arrays.asList(
191193
new StringField(IdFieldMapper.NAME, Uid.encodeId(id), Field.Store.NO),
192194
new SortedSetDocValuesField("kwd", new BytesRef(kwd)),
195+
new Field("kwd", new BytesRef(kwd), KeywordFieldMapper.Defaults.FIELD_TYPE),
193196
new StringField("join_field", PARENT_TYPE, Field.Store.NO),
194197
createJoinField(PARENT_TYPE, id)
195198
);

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml

Lines changed: 99 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -820,23 +820,23 @@ setup:
820820
body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } }
821821

822822
---
823-
"string profiler via global ordinals":
823+
"string profiler via global ordinals filters implementation":
824824
- skip:
825-
version: " - 7.8.99"
826-
reason: debug information added in 7.9.0
825+
version: " - 7.99.99"
826+
reason: filters implementation first supported with sub-aggregators in 8.0.0, being backported to 7.13.0
827827
- do:
828828
bulk:
829829
index: test_1
830830
refresh: true
831831
body: |
832832
{ "index": {} }
833-
{ "str": "sheep", "number": 1 }
833+
{ "boolean": true, "str": "sheep", "number": 1 }
834834
{ "index": {} }
835-
{ "str": "sheep", "number": 3 }
835+
{ "boolean": true, "str": "sheep", "number": 3 }
836836
{ "index": {} }
837-
{ "str": "cow", "number": 1 }
837+
{ "boolean": true, "str": "cow", "number": 1 }
838838
{ "index": {} }
839-
{ "str": "pig", "number": 1 }
839+
{ "boolean": true, "str": "pig", "number": 1 }
840840
841841
- do:
842842
search:
@@ -860,17 +860,73 @@ setup:
860860
- match: { aggregations.str_terms.buckets.1.max_number.value: 1 }
861861
- match: { aggregations.str_terms.buckets.2.key: pig }
862862
- match: { aggregations.str_terms.buckets.2.max_number.value: 1 }
863-
- match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
863+
- match: { profile.shards.0.aggregations.0.type: StringTermsAggregatorFromFilters }
864864
- match: { profile.shards.0.aggregations.0.description: str_terms }
865-
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
866-
- match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
867-
- match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
868-
- match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
869-
- gt: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
870-
- match: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
871-
- match: { profile.shards.0.aggregations.0.debug.has_filter: false }
865+
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 }
866+
- match: { profile.shards.0.aggregations.0.debug.delegate: FiltersAggregator.FilterByFilter }
867+
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.0.query: str:cow }
868+
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.1.query: str:pig }
869+
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.2.query: str:sheep }
872870
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
873871
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
872+
- match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }
873+
874+
---
875+
"string profiler via global ordinals native implementation":
876+
- skip:
877+
version: " - 7.8.99"
878+
reason: debug information added in 7.9.0
879+
- do:
880+
bulk:
881+
index: test_1
882+
refresh: true
883+
body: |
884+
{ "index": {} }
885+
{ "boolean": true, "str": "sheep", "number": 1 }
886+
{ "index": {} }
887+
{ "boolean": true, "str": "sheep", "number": 3 }
888+
{ "index": {} }
889+
{ "boolean": true, "str": "cow", "number": 1 }
890+
{ "index": {} }
891+
{ "boolean": true, "str": "pig", "number": 1 }
892+
893+
- do:
894+
search:
895+
index: test_1
896+
body:
897+
profile: true
898+
size: 0
899+
aggs:
900+
bool: # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
901+
terms:
902+
field: boolean
903+
aggs:
904+
str_terms:
905+
terms:
906+
field: str
907+
collect_mode: breadth_first
908+
execution_hint: global_ordinals
909+
aggs:
910+
max_number:
911+
max:
912+
field: number
913+
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: sheep }
914+
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 3 }
915+
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: cow }
916+
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 1 }
917+
- match: { aggregations.bool.buckets.0.str_terms.buckets.2.key: pig }
918+
- match: { aggregations.bool.buckets.0.str_terms.buckets.2.max_number.value: 1 }
919+
- match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
920+
- match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
921+
- match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }
922+
- match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
923+
- match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: remap using many bucket ords }
924+
- match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
925+
- gt: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
926+
- match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
927+
- match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
928+
- match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
929+
- match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }
874930

875931
- do:
876932
indices.create:
@@ -889,7 +945,7 @@ setup:
889945
refresh: true
890946
body: |
891947
{ "index": {} }
892-
{ "str": ["pig", "sheep"], "number": 100 }
948+
{ "boolean": true, "str": ["pig", "sheep"], "number": 100 }
893949
894950
- do:
895951
search:
@@ -898,30 +954,35 @@ setup:
898954
profile: true
899955
size: 0
900956
aggs:
901-
str_terms:
957+
bool: # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
902958
terms:
903-
field: str
904-
collect_mode: breadth_first
905-
execution_hint: global_ordinals
959+
field: boolean
906960
aggs:
907-
max_number:
908-
max:
909-
field: number
910-
- match: { aggregations.str_terms.buckets.0.key: pig }
911-
- match: { aggregations.str_terms.buckets.0.max_number.value: 100 }
912-
- match: { aggregations.str_terms.buckets.1.key: sheep }
913-
- match: { aggregations.str_terms.buckets.1.max_number.value: 100 }
914-
- match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
915-
- match: { profile.shards.0.aggregations.0.description: str_terms }
916-
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 1 }
917-
- match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
918-
- match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
919-
- match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
920-
- match: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
921-
- gt: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
922-
- match: { profile.shards.0.aggregations.0.debug.has_filter: false }
923-
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
924-
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
961+
str_terms:
962+
terms:
963+
field: str
964+
collect_mode: breadth_first
965+
execution_hint: global_ordinals
966+
aggs:
967+
max_number:
968+
max:
969+
field: number
970+
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: pig }
971+
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 100 }
972+
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: sheep }
973+
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 100 }
974+
- match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
975+
- match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
976+
- match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 1 }
977+
- match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
978+
- match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: remap using many bucket ords }
979+
- match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
980+
- match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
981+
- gt: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
982+
- match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
983+
- match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
984+
- match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }
985+
925986

926987
---
927988
"string profiler via map":

server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,12 @@ public void testSingleValuedFieldOrderedByIllegalAgg() throws Exception {
585585
} else {
586586
throw e;
587587
}
588+
} else if (e.getCause() instanceof IllegalArgumentException) {
589+
// Thrown when the terms agg runs as a filters agg
590+
assertThat(
591+
e.getCause().getMessage(),
592+
equalTo("Invalid aggregation order path [inner_terms>avg]. Can't sort by a descendant of a [sterms] aggregation [avg]")
593+
);
588594
} else {
589595
throw e;
590596
}

0 commit comments

Comments
 (0)