Skip to content

Commit a795134

Browse files
committed
Speed up date_histogram with children
This allows us to run the optimization introduced in elastic#63643 when the `date_histogram` has children. It isn't a revolutionary performance improvement though because children tend to be a lot heavier than the `date_histogram`. It is faster, but only by a couple of percentage points.
1 parent 1e3618b commit a795134

File tree

4 files changed

+134
-34
lines changed

4 files changed

+134
-34
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@ public AdaptingAggregator(
5151
* agg tree. Thisis how it has always been and some aggs rely on it.
5252
*/
5353
this.delegate = delegate.apply(subAggregators.fixParent(this));
54-
assert this.delegate.parent() == parent : "invalid parent set on delegate";
54+
/*
55+
* Sometimes the delegateBuilder will return `null` to signal that this
56+
* strategy is not valid.
57+
*/
58+
assert this.delegate == null || this.delegate.parent() == parent : "invalid parent set on delegate";
5559
}
5660

5761
/**

server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@
2626
import org.apache.lucene.search.CollectionTerminatedException;
2727
import org.apache.lucene.search.IndexOrDocValuesQuery;
2828
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
29+
import org.apache.lucene.search.LeafCollector;
2930
import org.apache.lucene.search.MatchAllDocsQuery;
3031
import org.apache.lucene.search.PointRangeQuery;
3132
import org.apache.lucene.search.Query;
33+
import org.apache.lucene.search.Scorable;
3234
import org.apache.lucene.search.ScoreMode;
3335
import org.apache.lucene.search.TotalHitCountCollector;
3436
import org.apache.lucene.search.Weight;
@@ -203,19 +205,16 @@ public static FilterByFilter buildFilterOrderOrNull(
203205
if (parent != null) {
204206
return null;
205207
}
206-
if (factories.countAggregators() != 0) {
207-
return null;
208-
}
209208
if (otherBucketKey != null) {
210209
return null;
211210
}
212211
return new FiltersAggregator.FilterByFilter(
213212
name,
213+
factories,
214214
keys,
215215
filters,
216216
keyed,
217217
context,
218-
parent,
219218
cardinality,
220219
metadata
221220
);
@@ -289,15 +288,15 @@ public static class FilterByFilter extends FiltersAggregator {
289288

290289
private FilterByFilter(
291290
String name,
291+
AggregatorFactories factories,
292292
String[] keys,
293293
Query[] filters,
294294
boolean keyed,
295295
AggregationContext context,
296-
Aggregator parent,
297296
CardinalityUpperBound cardinality,
298297
Map<String, Object> metadata
299298
) throws IOException {
300-
super(name, AggregatorFactories.EMPTY, keys, keyed, null, context, parent, cardinality, metadata);
299+
super(name, factories, keys, keyed, null, context, null, cardinality, metadata);
301300
this.filters = filters;
302301
this.profiling = context.profiling();
303302
}
@@ -378,9 +377,22 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket
378377
// the filter doesn't match any docs
379378
continue;
380379
}
381-
TotalHitCountCollector collector = new TotalHitCountCollector();
382-
scorer.score(collector, live);
383-
incrementBucketDocCount(filterOrd, collector.getTotalHits());
380+
if (sub == LeafBucketCollector.NO_OP_COLLECTOR) {
381+
TotalHitCountCollector collector = new TotalHitCountCollector();
382+
scorer.score(collector, live);
383+
incrementBucketDocCount(filterOrd, collector.getTotalHits());
384+
} else {
385+
/*
386+
* We can use the pre-constructed leaf collected for the first
387+
* filter. But it almost certainly not going to work for the
388+
* second one because it'll try to "go backwards". So we build
389+
* a new one for each subsequent filter.
390+
*/
391+
LeafBucketCollector filterLeafCollector = filterOrd == 0 ? sub : collectableSubAggregators.getLeafCollector(ctx);
392+
SubCollector collector = new SubCollector(filterOrd, filterLeafCollector);
393+
scorer.score(collector, live);
394+
incrementBucketDocCount(filterOrd, collector.total);
395+
}
384396
}
385397
// Throwing this exception is how we communicate to the collection mechanism that we don't need the segment.
386398
throw new CollectionTerminatedException();
@@ -397,6 +409,31 @@ public void collectDebugInfo(BiConsumer<String, Object> add) {
397409
add.accept("estimate_cost_time", estimateCostTime);
398410
}
399411
}
412+
413+
/**
414+
* Adapts filter-by-filter hit collection into sub-aggregations.
415+
*/
416+
private static class SubCollector implements LeafCollector {
417+
private final int filterOrd;
418+
private final LeafBucketCollector sub;
419+
private int total;
420+
421+
SubCollector(int filterOrd, LeafBucketCollector sub) {
422+
this.filterOrd = filterOrd;
423+
this.sub = sub;
424+
}
425+
426+
@Override
427+
public void setScorer(Scorable scorer) throws IOException {
428+
sub.setScorer(scorer);
429+
}
430+
431+
@Override
432+
public void collect(int doc) throws IOException {
433+
total++;
434+
sub.collect(doc, filterOrd);
435+
}
436+
}
400437
}
401438

402439
/**

server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ public static Aggregator build(
312312
/*
313313
* Looks like it'd be more expensive to use the filter-by-filter
314314
* aggregator. Oh well. Snapshot the the filter-by-filter
315-
* aggregator's debug information if we're profiling bececause it
315+
* aggregator's debug information if we're profiling because it
316316
* is useful even if the aggregator isn't.
317317
*/
318318
if (context.profiling()) {
@@ -356,7 +356,6 @@ public static FromFilters<?> adaptIntoFiltersOrNull(
356356
if (averageDocsPerRange < DOCS_PER_RANGE_TO_USE_FILTERS) {
357357
return null;
358358
}
359-
// TODO bail here for runtime fields. We should check the cost estimates on the Scorer.
360359
if (valuesSourceConfig.fieldType() instanceof DateFieldType
361360
&& ((DateFieldType) valuesSourceConfig.fieldType()).resolution() == Resolution.NANOSECONDS) {
362361
// We don't generate sensible Queries for nanoseconds.
@@ -393,37 +392,33 @@ public static FromFilters<?> adaptIntoFiltersOrNull(
393392
builder.to(ranges[i].to == Double.POSITIVE_INFINITY ? null : format.format(ranges[i].to)).includeUpper(false);
394393
filters[i] = context.buildQuery(builder);
395394
}
396-
FiltersAggregator.FilterByFilter delegate = FiltersAggregator.buildFilterOrderOrNull(
397-
name,
398-
factories,
399-
keys,
400-
filters,
401-
false,
402-
null,
403-
context,
404-
parent,
405-
cardinality,
406-
metadata
407-
);
408-
if (delegate == null) {
409-
return null;
410-
}
411395
RangeAggregator.FromFilters<?> fromFilters = new RangeAggregator.FromFilters<>(
412396
parent,
413397
factories,
414-
subAggregators -> {
415-
if (subAggregators.countAggregators() > 0) {
416-
throw new IllegalStateException("didn't expect to have a delegate if there are child aggs");
417-
}
418-
return delegate;
419-
},
398+
subAggregators -> FiltersAggregator.buildFilterOrderOrNull(
399+
name,
400+
subAggregators,
401+
keys,
402+
filters,
403+
false,
404+
null,
405+
context,
406+
parent,
407+
cardinality,
408+
metadata
409+
),
420410
valuesSourceConfig.format(),
421411
ranges,
422412
keyed,
423413
rangeFactory,
424414
averageDocsPerRange
425415
);
426-
return fromFilters;
416+
/*
417+
* A null delegate means we weren't able to run the aggregation
418+
* filter by filter so we have to give up and go back to the
419+
* standard range aggregator.
420+
*/
421+
return fromFilters.delegate() == null ? null : fromFilters;
427422
}
428423

429424
public static Aggregator buildWithoutAttemptedToAdaptToFilters(

server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
import org.apache.lucene.document.Document;
2222
import org.apache.lucene.document.Field;
2323
import org.apache.lucene.document.LongPoint;
24+
import org.apache.lucene.document.SortedNumericDocValuesField;
2425
import org.apache.lucene.index.DirectoryReader;
2526
import org.apache.lucene.index.IndexReader;
27+
import org.apache.lucene.index.IndexableField;
2628
import org.apache.lucene.index.RandomIndexWriter;
2729
import org.apache.lucene.search.IndexOrDocValuesQuery;
2830
import org.apache.lucene.search.IndexSearcher;
@@ -33,16 +35,22 @@
3335
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
3436
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3537
import org.elasticsearch.index.mapper.MappedFieldType;
38+
import org.elasticsearch.index.mapper.NumberFieldMapper;
39+
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
3640
import org.elasticsearch.index.query.QueryBuilder;
3741
import org.elasticsearch.index.query.QueryBuilders;
3842
import org.elasticsearch.index.query.RangeQueryBuilder;
3943
import org.elasticsearch.search.aggregations.AggregationBuilder;
4044
import org.elasticsearch.search.aggregations.AggregatorTestCase;
4145
import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter;
46+
import org.elasticsearch.search.aggregations.metrics.InternalMax;
47+
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
4248
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
4349
import org.junit.Before;
4450

4551
import java.io.IOException;
52+
import java.util.ArrayList;
53+
import java.util.Collections;
4654
import java.util.HashMap;
4755
import java.util.HashSet;
4856
import java.util.List;
@@ -289,4 +297,60 @@ public void testFilterByFilterCost() throws IOException {
289297
ft
290298
);
291299
}
300+
301+
public void testSubAggs() throws IOException {
302+
MappedFieldType dateFt = new DateFieldMapper.DateFieldType(
303+
"test",
304+
true,
305+
false,
306+
false,
307+
DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER,
308+
Resolution.MILLISECONDS,
309+
null,
310+
null
311+
);
312+
MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER);
313+
AggregationBuilder builder = new FiltersAggregationBuilder(
314+
"test",
315+
new KeyedFilter("q1", new RangeQueryBuilder("test").from("2010-01-01").to("2010-03-01").includeUpper(false)),
316+
new KeyedFilter("q2", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false))
317+
).subAggregation(new MaxAggregationBuilder("m").field("int"));
318+
List<List<IndexableField>> docs = new ArrayList<>();
319+
docs.add(
320+
List.of(
321+
new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02")),
322+
new SortedNumericDocValuesField("int", 100)
323+
)
324+
);
325+
docs.add(
326+
List.of(
327+
new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02")),
328+
new SortedNumericDocValuesField("int", 5)
329+
)
330+
);
331+
docs.add(
332+
List.of(
333+
new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-03")),
334+
new SortedNumericDocValuesField("int", 10)
335+
)
336+
);
337+
/*
338+
* Shuffle the docs so we collect them in a random order which causes
339+
* bad implementations of filter-by-filter aggregation to fail with
340+
* assertion errors while executing.
341+
*/
342+
Collections.shuffle(docs, random());
343+
testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> {
344+
InternalFilters filters = (InternalFilters) result;
345+
assertThat(filters.getBuckets(), hasSize(2));
346+
347+
assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(1L));
348+
InternalMax max = filters.getBucketByKey("q1").getAggregations().get("m");
349+
assertThat(max.getValue(), equalTo(100.0));
350+
351+
assertThat(filters.getBucketByKey("q2").getDocCount(), equalTo(2L));
352+
max = filters.getBucketByKey("q2").getAggregations().get("m");
353+
assertThat(max.getValue(), equalTo(10.0));
354+
}, dateFt, intFt);
355+
}
292356
}

0 commit comments

Comments
 (0)