Skip to content

Commit 3d19468

Browse files
authored
Optimize lone single bucket date_histogram (backport of #71180) (#72989)
This optimizes the `date_histogram` agg when there is a single bucket and no sub-aggregations. We expect this to happen from time to time when the buckets are larger than a day because folks often use "daily" indices. This was already fairly fast, but using the metadata makes it 10x faster. Something like 98ms becomes 7.5ms. Nice if you can get it! Like #69377 this optimization will disable itself if you have document level security enabled or are querying a rollup index. Also like #69377 it won't do anything if there is a top level query.
1 parent f02d601 commit 3d19468

File tree

5 files changed

+318
-16
lines changed

5 files changed

+318
-16
lines changed

server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;
1818
import org.elasticsearch.search.aggregations.bucket.sampler.DiversifiedOrdinalsSamplerAggregator;
1919
import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator;
20+
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
2021
import org.elasticsearch.search.profile.ProfileResult;
2122
import org.elasticsearch.search.profile.ProfileShardResult;
2223
import org.elasticsearch.test.ESIntegTestCase;
@@ -591,7 +592,9 @@ public void testFilterByFilter() throws InterruptedException, IOException {
591592

592593
SearchResponse response = client().prepareSearch("dateidx")
593594
.setProfile(true)
594-
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH))
595+
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH)
596+
// Add a sub-agg so we don't get to use metadata. That's great and all, but it outputs less debugging info for us to verify.
597+
.subAggregation(new MaxAggregationBuilder("m").field("date")))
595598
.get();
596599
assertSearchResponse(response);
597600
Map<String, ProfileShardResult> profileResults = response.getProfileResults();
@@ -608,7 +611,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
608611
assertThat(histoAggResult, notNullValue());
609612
assertThat(histoAggResult.getQueryName(), equalTo("DateHistogramAggregator.FromDateRange"));
610613
assertThat(histoAggResult.getLuceneDescription(), equalTo("histo"));
611-
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(0));
614+
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(1));
612615
assertThat(histoAggResult.getTime(), greaterThan(0L));
613616
Map<String, Long> breakdown = histoAggResult.getTimeBreakdown();
614617
assertThat(breakdown, notNullValue());
@@ -643,7 +646,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
643646
Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
644647
assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
645648
assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
646-
assertThat(queryDebug, hasEntry("query", "ConstantScore(DocValuesFieldExistsQuery [field=date])"));
649+
assertThat(queryDebug, hasEntry("query", "DocValuesFieldExistsQuery [field=date]"));
647650
}
648651
}
649652
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,15 @@
88

99
package org.elasticsearch.search.aggregations.bucket.filter;
1010

11+
import org.apache.lucene.index.FieldInfo;
1112
import org.apache.lucene.index.IndexReader;
1213
import org.apache.lucene.index.LeafReaderContext;
14+
import org.apache.lucene.index.PointValues;
1315
import org.apache.lucene.search.BooleanClause;
1416
import org.apache.lucene.search.BooleanQuery;
1517
import org.apache.lucene.search.BulkScorer;
1618
import org.apache.lucene.search.ConstantScoreQuery;
19+
import org.apache.lucene.search.DocValuesFieldExistsQuery;
1720
import org.apache.lucene.search.IndexOrDocValuesQuery;
1821
import org.apache.lucene.search.IndexSearcher;
1922
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
@@ -49,9 +52,21 @@ public class QueryToFilterAdapter<Q extends Query> {
4952
*/
5053
public static QueryToFilterAdapter<?> build(IndexSearcher searcher, String key, Query query) throws IOException {
5154
query = searcher.rewrite(query);
55+
if (query instanceof ConstantScoreQuery) {
56+
/*
57+
* Unwrap constant score because it gets in the way of us
58+
* understanding what the queries are trying to do and we
59+
* don't use the score at all anyway. Effectively we always
60+
* run in constant score mode.
61+
*/
62+
query = ((ConstantScoreQuery) query).getQuery();
63+
}
5264
if (query instanceof TermQuery) {
5365
return new TermQueryToFilterAdapter(searcher, key, (TermQuery) query);
5466
}
67+
if (query instanceof DocValuesFieldExistsQuery) {
68+
return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) query);
69+
}
5570
if (query instanceof MatchAllDocsQuery) {
5671
return new MatchAllQueryToFilterAdapter(searcher, key, (MatchAllDocsQuery) query);
5772
}
@@ -386,4 +401,50 @@ void collectDebugInfo(BiConsumer<String, Object> add) {
386401
add.accept("results_from_metadata", resultsFromMetadata);
387402
}
388403
}
404+
405+
private static class DocValuesFieldExistsAdapter extends QueryToFilterAdapter<DocValuesFieldExistsQuery> {
406+
private int resultsFromMetadata;
407+
408+
private DocValuesFieldExistsAdapter(IndexSearcher searcher, String key, DocValuesFieldExistsQuery query) {
409+
super(searcher, key, query);
410+
}
411+
412+
@Override
413+
long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) throws IOException {
414+
if (countCanUseMetadata(counter, live) && canCountFromMetadata(ctx)) {
415+
resultsFromMetadata++;
416+
PointValues points = ctx.reader().getPointValues(query().getField());
417+
if (points == null) {
418+
return 0;
419+
}
420+
return points.getDocCount();
421+
422+
}
423+
return super.count(ctx, counter, live);
424+
}
425+
426+
@Override
427+
long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
428+
if (canUseMetadata.get() && canCountFromMetadata(ctx)) {
429+
return 0;
430+
}
431+
return super.estimateCountCost(ctx, canUseMetadata);
432+
}
433+
434+
private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException {
435+
FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField());
436+
if (info == null) {
437+
// If we don't have any info then there aren't any values anyway.
438+
return true;
439+
}
440+
return info.getPointDimensionCount() > 0;
441+
}
442+
443+
@Override
444+
void collectDebugInfo(BiConsumer<String, Object> add) {
445+
super.collectDebugInfo(add);
446+
add.accept("specialized_for", "docvalues_field_exists");
447+
add.accept("results_from_metadata", resultsFromMetadata);
448+
}
449+
}
389450
}

server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java

Lines changed: 184 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
import org.apache.lucene.document.Document;
1111
import org.apache.lucene.document.Field;
1212
import org.apache.lucene.document.LongPoint;
13+
import org.apache.lucene.document.NumericDocValuesField;
1314
import org.apache.lucene.document.SortedNumericDocValuesField;
15+
import org.apache.lucene.document.SortedSetDocValuesField;
1416
import org.apache.lucene.index.DirectoryReader;
1517
import org.apache.lucene.index.IndexReader;
1618
import org.apache.lucene.index.IndexableField;
@@ -33,12 +35,14 @@
3335
import org.elasticsearch.index.mapper.DateFieldMapper;
3436
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
3537
import org.elasticsearch.index.mapper.DocCountFieldMapper;
38+
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
3639
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3740
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
3841
import org.elasticsearch.index.mapper.MappedFieldType;
3942
import org.elasticsearch.index.mapper.NumberFieldMapper;
4043
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
4144
import org.elasticsearch.index.mapper.ObjectMapper;
45+
import org.elasticsearch.index.query.ExistsQueryBuilder;
4246
import org.elasticsearch.index.query.MatchAllQueryBuilder;
4347
import org.elasticsearch.index.query.MatchQueryBuilder;
4448
import org.elasticsearch.index.query.QueryBuilder;
@@ -62,7 +66,6 @@
6266
import org.elasticsearch.search.aggregations.support.AggregationContext;
6367
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
6468
import org.elasticsearch.search.internal.ContextIndexSearcherTests.DocumentSubsetDirectoryReader;
65-
import org.junit.Before;
6669

6770
import java.io.IOException;
6871
import java.util.ArrayList;
@@ -73,6 +76,7 @@
7376
import java.util.Map;
7477
import java.util.Set;
7578
import java.util.concurrent.TimeUnit;
79+
import java.util.function.IntFunction;
7680

7781
import static org.hamcrest.Matchers.both;
7882
import static org.hamcrest.Matchers.equalTo;
@@ -86,14 +90,6 @@
8690
import static org.mockito.Mockito.mock;
8791

8892
public class FiltersAggregatorTests extends AggregatorTestCase {
89-
private MappedFieldType fieldType;
90-
91-
@Before
92-
public void setUpTest() throws Exception {
93-
super.setUp();
94-
fieldType = new KeywordFieldMapper.KeywordFieldType("field");
95-
}
96-
9793
public void testEmpty() throws Exception {
9894
Directory directory = newDirectory();
9995
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
@@ -107,7 +103,12 @@ public void testEmpty() throws Exception {
107103
}
108104
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
109105
builder.otherBucketKey("other");
110-
InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
106+
InternalFilters response = searchAndReduce(
107+
indexSearcher,
108+
new MatchAllDocsQuery(),
109+
builder,
110+
new KeywordFieldMapper.KeywordFieldType("field")
111+
);
111112
assertEquals(response.getBuckets().size(), numFilters);
112113
for (InternalFilters.InternalBucket filter : response.getBuckets()) {
113114
assertEquals(filter.getDocCount(), 0);
@@ -207,7 +208,12 @@ public void testKeyedFilter() throws Exception {
207208
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
208209
builder.otherBucket(true);
209210
builder.otherBucketKey("other");
210-
final InternalFilters filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
211+
final InternalFilters filters = searchAndReduce(
212+
indexSearcher,
213+
new MatchAllDocsQuery(),
214+
builder,
215+
new KeywordFieldMapper.KeywordFieldType("field")
216+
);
211217
assertEquals(filters.getBuckets().size(), 7);
212218
assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
213219
assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
@@ -262,7 +268,12 @@ public void testRandom() throws Exception {
262268
builder.otherBucket(true);
263269
builder.otherBucketKey("other");
264270

265-
final InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
271+
final InternalFilters response = searchAndReduce(
272+
indexSearcher,
273+
new MatchAllDocsQuery(),
274+
builder,
275+
new KeywordFieldMapper.KeywordFieldType("field")
276+
);
266277
List<InternalFilters.InternalBucket> buckets = response.getBuckets();
267278
assertEquals(buckets.size(), filters.length + 1);
268279

@@ -822,6 +833,167 @@ public void testSubAggsManyFilters() throws IOException {
822833
}, dateFt, intFt);
823834
}
824835

836+
public void testDocValuesFieldExistsForDate() throws IOException {
837+
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
838+
QueryBuilder exists;
839+
if (randomBoolean()) {
840+
exists = new ExistsQueryBuilder("f");
841+
} else {
842+
// Range query covering all values in the index is rewritten to exists
843+
exists = new RangeQueryBuilder("f").gte("2020-01-01").lt("2020-01-02");
844+
}
845+
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
846+
docValuesFieldExistsTestCase(exists, ft, true, i -> {
847+
long date = start + TimeUnit.HOURS.toMillis(i);
848+
return org.elasticsearch.common.collect.List.of(new LongPoint("f", date), new NumericDocValuesField("f", date));
849+
});
850+
}
851+
852+
public void testDocValuesFieldExistsForDateWithMultiValuedFields() throws IOException {
853+
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
854+
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
855+
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
856+
long date = start + TimeUnit.HOURS.toMillis(i);
857+
return org.elasticsearch.common.collect.List.of(
858+
new LongPoint("f", date),
859+
new LongPoint("f", date + 10),
860+
new SortedNumericDocValuesField("f", date),
861+
new SortedNumericDocValuesField("f", date + 10)
862+
);
863+
});
864+
}
865+
866+
public void testDocValuesFieldExistsForDateWithoutData() throws IOException {
867+
docValuesFieldExistsNoDataTestCase(new DateFieldMapper.DateFieldType("f"));
868+
}
869+
870+
public void testDocValuesFieldExistsForNumber() throws IOException {
871+
NumberFieldMapper.NumberType numberType = randomFrom(NumberFieldMapper.NumberType.values());
872+
NumberFieldMapper.NumberFieldType ft = new NumberFieldMapper.NumberFieldType(
873+
"f",
874+
numberType,
875+
true,
876+
false,
877+
true,
878+
true,
879+
null,
880+
org.elasticsearch.common.collect.Map.of(),
881+
null
882+
);
883+
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
884+
return numberType.createFields("f", i, true, true, false);
885+
});
886+
}
887+
888+
public void testDocValuesFieldExistsForNumberWithoutData() throws IOException {
889+
docValuesFieldExistsNoDataTestCase(new NumberFieldMapper.NumberFieldType(
890+
"f",
891+
randomFrom(NumberFieldMapper.NumberType.values()),
892+
true,
893+
false,
894+
true,
895+
true,
896+
null,
897+
org.elasticsearch.common.collect.Map.of(),
898+
null
899+
));
900+
}
901+
902+
public void testDocValuesFieldExistsForKeyword() throws IOException {
903+
KeywordFieldMapper.KeywordFieldType ft = new KeywordFieldMapper.KeywordFieldType(
904+
"f",
905+
true,
906+
true,
907+
org.elasticsearch.common.collect.Map.of()
908+
);
909+
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, false, i -> {
910+
BytesRef text = new BytesRef(randomAlphaOfLength(5));
911+
return org.elasticsearch.common.collect.List.of(
912+
new Field("f", text, KeywordFieldMapper.Defaults.FIELD_TYPE),
913+
new SortedSetDocValuesField("f", text)
914+
);
915+
});
916+
}
917+
918+
public void testDocValuesFieldExistsForKeywordWithoutData() throws IOException {
919+
docValuesFieldExistsNoDataTestCase(
920+
new KeywordFieldMapper.KeywordFieldType("f", true, true, org.elasticsearch.common.collect.Map.of())
921+
);
922+
}
923+
924+
private void docValuesFieldExistsTestCase(
925+
QueryBuilder exists,
926+
MappedFieldType fieldType,
927+
boolean canUseMetadata,
928+
IntFunction<List<? extends IndexableField>> buildDocWithField
929+
) throws IOException {
930+
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
931+
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
932+
for (int i = 0; i < 10; i++) {
933+
iw.addDocument(buildDocWithField.apply(i));
934+
}
935+
for (int i = 0; i < 10; i++) {
936+
iw.addDocument(org.elasticsearch.common.collect.List.of());
937+
}
938+
};
939+
// Exists queries convert to MatchNone if this isn't defined
940+
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
941+
debugTestCase(
942+
builder,
943+
new MatchAllDocsQuery(),
944+
buildIndex,
945+
(InternalFilters result, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
946+
assertThat(result.getBuckets(), hasSize(1));
947+
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(10L));
948+
949+
assertThat(impl, equalTo(FiltersAggregator.FilterByFilter.class));
950+
Map<?, ?> filterAggDebug = debug.get("test");
951+
List<?> filtersDebug = (List<?>) filterAggDebug.get("filters");
952+
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
953+
assertThat(filterDebug, hasEntry("specialized_for", "docvalues_field_exists"));
954+
assertThat((int) filterDebug.get("results_from_metadata"), canUseMetadata ? greaterThan(0) : equalTo(0));
955+
},
956+
fieldType,
957+
fnft
958+
);
959+
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
960+
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
961+
Map<String, Object> debug = new HashMap<>();
962+
aggregator.collectDebugInfo(debug::put);
963+
List<?> filtersDebug = (List<?>) debug.get("filters");
964+
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
965+
assertThat(estimatedCost, canUseMetadata ? equalTo(0L) : greaterThan(0L));
966+
assertThat((int) filterDebug.get("scorers_prepared_while_estimating_cost"), canUseMetadata ? equalTo(0) : greaterThan(0));
967+
}, fieldType, fnft);
968+
}
969+
970+
private void docValuesFieldExistsNoDataTestCase(
971+
MappedFieldType fieldType
972+
) throws IOException {
973+
QueryBuilder exists = new ExistsQueryBuilder(fieldType.name());
974+
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
975+
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
976+
for (int i = 0; i < 10; i++) {
977+
iw.addDocument(org.elasticsearch.common.collect.List.of());
978+
}
979+
};
980+
// Exists queries convert to MatchNone if this isn't defined
981+
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
982+
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
983+
assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
984+
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
985+
Map<String, Object> debug = collectAndGetFilterDebugInfo(searcher, aggregator);
986+
assertThat(debug, hasEntry("specialized_for", "docvalues_field_exists"));
987+
assertThat(estimatedCost, equalTo(0L));
988+
assertThat((int) debug.get("results_from_metadata"), greaterThan(0));
989+
assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), equalTo(0));
990+
}, fieldType, fnft);
991+
testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> {
992+
assertThat(result.getBuckets(), hasSize(1));
993+
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(0L));
994+
}, fieldType, fnft);
995+
}
996+
825997
@Override
826998
protected List<ObjectMapper> objectMappers() {
827999
return MOCK_OBJECT_MAPPERS;

0 commit comments

Comments
 (0)