Skip to content

Commit 4ffdad3

Browse files
authored
Speed up terms agg when alone (#69377)
This speeds up the `terms` agg in a very specific case: 1. It has no child aggregations 2. It has no parent aggregations 3. There are no deleted documents 4. You are not using document level security 5. There is no top level query 6. The field has global ordinals 7. There are less than one thousand distinct terms That is a lot of restirctions! But the speed up pretty substantial because in those cases we can serve the entire aggregation using metadata that lucene precomputes while it builds the index. In a real rally track we have we get a 92% speed improvement, but the index isn't *that* big: ``` | 90th percentile service time | keyword-terms-low-cardinality | 446.031 | 36.7677 | -409.263 | ms | ``` In a rally track with a larger index I ran some tests by hand and the aggregation went from 2200ms to 8ms. Even though there are 7 restrictions on this, I expect it to come into play enough to matter. Restriction 6 just means you are aggregating on a `keyword` field. Or an `ip`. And its fairly common for `keyword`s to have less than a thousand distinct values. Certainly not everywhere, but some places. I expect "cold tier" indices are very very likely not to have deleted documents at all. And the optimization works segment by segment - so it'll save some time on each segment without deleted documents. But more time if the entire index doesn't have any. The optimization builds on #68871 which translates `terms` aggregations against low cardinality fields with global ordinals into a `filters` aggregation. This teaches the `filters` aggregation to recognize when it can get its results from the index metadata. Rather, it creates the infrastructure to make that fairly simple and applies it in the case of the queries generated by the terms aggregation.
1 parent 2fd6337 commit 4ffdad3

File tree

11 files changed

+759
-223
lines changed

11 files changed

+759
-223
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,4 @@ setup:
178178
- match: { aggregations.f.buckets.foo.doc_count: 8 }
179179
- match: { aggregations.f.buckets.xyz.doc_count: 5 }
180180
- match: { profile.shards.0.aggregations.0.type: FiltersAggregator.FilterByFilter }
181-
- gte: { profile.shards.0.aggregations.0.debug.segments_with_doc_count: 1 }
181+
- gte: { profile.shards.0.aggregations.0.debug.segments_with_doc_count_field: 1 }

server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
import static org.hamcrest.Matchers.greaterThan;
4242
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
4343
import static org.hamcrest.Matchers.hasEntry;
44+
import static org.hamcrest.Matchers.hasKey;
45+
import static org.hamcrest.Matchers.hasSize;
4446
import static org.hamcrest.Matchers.notNullValue;
4547

4648
@ESIntegTestCase.SuiteScopeTestCase
@@ -633,10 +635,16 @@ public void testFilterByFilter() throws InterruptedException, IOException {
633635
assertThat(delegate.get("delegate"), equalTo("FiltersAggregator.FilterByFilter"));
634636
Map<?, ?> delegateDebug = (Map<?, ?>) delegate.get("delegate_debug");
635637
assertThat(delegateDebug, hasEntry("segments_with_deleted_docs", 0));
636-
assertThat(delegateDebug, hasEntry("segments_with_doc_count", 0));
638+
assertThat(delegateDebug, hasEntry("segments_with_doc_count_field", 0));
637639
assertThat(delegateDebug, hasEntry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
638640
assertThat(delegateDebug, hasEntry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
639641
assertThat((long) delegateDebug.get("estimate_cost_time"), greaterThanOrEqualTo(0L)); // ~1,276,734 nanos is normal
642+
List<?> filtersDebug = (List<?>) delegateDebug.get("filters");
643+
assertThat(filtersDebug, hasSize(1));
644+
Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
645+
assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
646+
assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
647+
assertThat(queryDebug, hasEntry("query", "ConstantScore(DocValuesFieldExistsQuery [field=date])"));
640648
}
641649
}
642650
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java

Lines changed: 122 additions & 175 deletions
Large diffs are not rendered by default.

server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
package org.elasticsearch.search.aggregations.bucket.filter;
1010

11-
import org.apache.lucene.search.Query;
1211
import org.elasticsearch.search.aggregations.Aggregator;
1312
import org.elasticsearch.search.aggregations.AggregatorFactories;
1413
import org.elasticsearch.search.aggregations.AggregatorFactory;
@@ -17,13 +16,13 @@
1716
import org.elasticsearch.search.aggregations.support.AggregationContext;
1817

1918
import java.io.IOException;
19+
import java.util.ArrayList;
2020
import java.util.List;
2121
import java.util.Map;
2222

2323
public class FiltersAggregatorFactory extends AggregatorFactory {
2424

25-
private final String[] keys;
26-
private final Query[] filters;
25+
private final List<QueryToFilterAdapter<?>> filters;
2726
private final boolean keyed;
2827
private final boolean otherBucket;
2928
private final String otherBucketKey;
@@ -35,20 +34,17 @@ public FiltersAggregatorFactory(String name, List<KeyedFilter> filters, boolean
3534
this.keyed = keyed;
3635
this.otherBucket = otherBucket;
3736
this.otherBucketKey = otherBucketKey;
38-
keys = new String[filters.size()];
39-
this.filters = new Query[filters.size()];
40-
for (int i = 0; i < filters.size(); ++i) {
41-
KeyedFilter keyedFilter = filters.get(i);
42-
this.keys[i] = keyedFilter.key();
43-
this.filters[i] = context.buildQuery(keyedFilter.filter());
37+
this.filters = new ArrayList<>(filters.size());
38+
for (KeyedFilter f : filters) {
39+
this.filters.add(QueryToFilterAdapter.build(context.searcher(), f.key(), context.buildQuery(f.filter())));
4440
}
4541
}
4642

4743
@Override
4844
public Aggregator createInternal(Aggregator parent,
4945
CardinalityUpperBound cardinality,
5046
Map<String, Object> metadata) throws IOException {
51-
return FiltersAggregator.build(name, factories, keys, filters, keyed,
47+
return FiltersAggregator.build(name, factories, filters, keyed,
5248
otherBucket ? otherBucketKey : null, context, parent, cardinality, metadata);
5349
}
5450
}

0 commit comments

Comments
 (0)