Skip to content

Commit 0cf63fc

Browse files
authored
Optimize lone single bucket date_histogram (#71180)
This optimizes the `date_histogram` agg when there is a single bucket and no sub-aggregations. We expect this to happen from time to time when the buckets are larger than a day because folks often use "daily" indices. This was already fairly fast, but using the metadata makes it 10x faster. Something like 98ms becomes 7.5ms. Nice if you can get it! Like #69377 this optimization will disable itself if you have document level security enabled or are querying a rollup index. Also like #69377 it won't do anything if there is a top level query.
1 parent dc1bf6e commit 0cf63fc

File tree

5 files changed

+308
-16
lines changed

5 files changed

+308
-16
lines changed

server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;
1818
import org.elasticsearch.search.aggregations.bucket.sampler.DiversifiedOrdinalsSamplerAggregator;
1919
import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator;
20+
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
2021
import org.elasticsearch.search.profile.ProfileResult;
2122
import org.elasticsearch.search.profile.ProfileShardResult;
2223
import org.elasticsearch.test.ESIntegTestCase;
@@ -590,7 +591,9 @@ public void testFilterByFilter() throws InterruptedException, IOException {
590591

591592
SearchResponse response = client().prepareSearch("dateidx")
592593
.setProfile(true)
593-
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH))
594+
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH)
595+
// Add a sub-agg so we don't get to use metadata. That's great and all, but it outputs less debugging info for us to verify.
596+
.subAggregation(new MaxAggregationBuilder("m").field("date")))
594597
.get();
595598
assertSearchResponse(response);
596599
Map<String, ProfileShardResult> profileResults = response.getProfileResults();
@@ -607,7 +610,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
607610
assertThat(histoAggResult, notNullValue());
608611
assertThat(histoAggResult.getQueryName(), equalTo("DateHistogramAggregator.FromDateRange"));
609612
assertThat(histoAggResult.getLuceneDescription(), equalTo("histo"));
610-
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(0));
613+
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(1));
611614
assertThat(histoAggResult.getTime(), greaterThan(0L));
612615
Map<String, Long> breakdown = histoAggResult.getTimeBreakdown();
613616
assertThat(breakdown, notNullValue());
@@ -639,7 +642,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
639642
Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
640643
assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
641644
assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
642-
assertThat(queryDebug, hasEntry("query", "ConstantScore(DocValuesFieldExistsQuery [field=date])"));
645+
assertThat(queryDebug, hasEntry("query", "DocValuesFieldExistsQuery [field=date]"));
643646
}
644647
}
645648
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,15 @@
88

99
package org.elasticsearch.search.aggregations.bucket.filter;
1010

11+
import org.apache.lucene.index.FieldInfo;
1112
import org.apache.lucene.index.IndexReader;
1213
import org.apache.lucene.index.LeafReaderContext;
14+
import org.apache.lucene.index.PointValues;
1315
import org.apache.lucene.search.BooleanClause;
1416
import org.apache.lucene.search.BooleanQuery;
1517
import org.apache.lucene.search.BulkScorer;
1618
import org.apache.lucene.search.ConstantScoreQuery;
19+
import org.apache.lucene.search.DocValuesFieldExistsQuery;
1720
import org.apache.lucene.search.IndexOrDocValuesQuery;
1821
import org.apache.lucene.search.IndexSearcher;
1922
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
@@ -49,9 +52,21 @@ public class QueryToFilterAdapter<Q extends Query> {
4952
*/
5053
public static QueryToFilterAdapter<?> build(IndexSearcher searcher, String key, Query query) throws IOException {
5154
query = searcher.rewrite(query);
55+
if (query instanceof ConstantScoreQuery) {
56+
/*
57+
* Unwrap constant score because it gets in the way of us
58+
* understanding what the queries are trying to do and we
59+
* don't use the score at all anyway. Effectively we always
60+
* run in constant score mode.
61+
*/
62+
query = ((ConstantScoreQuery) query).getQuery();
63+
}
5264
if (query instanceof TermQuery) {
5365
return new TermQueryToFilterAdapter(searcher, key, (TermQuery) query);
5466
}
67+
if (query instanceof DocValuesFieldExistsQuery) {
68+
return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) query);
69+
}
5570
if (query instanceof MatchAllDocsQuery) {
5671
return new MatchAllQueryToFilterAdapter(searcher, key, (MatchAllDocsQuery) query);
5772
}
@@ -386,4 +401,50 @@ void collectDebugInfo(BiConsumer<String, Object> add) {
386401
add.accept("results_from_metadata", resultsFromMetadata);
387402
}
388403
}
404+
405+
private static class DocValuesFieldExistsAdapter extends QueryToFilterAdapter<DocValuesFieldExistsQuery> {
406+
private int resultsFromMetadata;
407+
408+
private DocValuesFieldExistsAdapter(IndexSearcher searcher, String key, DocValuesFieldExistsQuery query) {
409+
super(searcher, key, query);
410+
}
411+
412+
@Override
413+
long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) throws IOException {
414+
if (countCanUseMetadata(counter, live) && canCountFromMetadata(ctx)) {
415+
resultsFromMetadata++;
416+
PointValues points = ctx.reader().getPointValues(query().getField());
417+
if (points == null) {
418+
return 0;
419+
}
420+
return points.getDocCount();
421+
422+
}
423+
return super.count(ctx, counter, live);
424+
}
425+
426+
@Override
427+
long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
428+
if (canUseMetadata.get() && canCountFromMetadata(ctx)) {
429+
return 0;
430+
}
431+
return super.estimateCountCost(ctx, canUseMetadata);
432+
}
433+
434+
private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException {
435+
FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField());
436+
if (info == null) {
437+
// If we don't have any info then there aren't any values anyway.
438+
return true;
439+
}
440+
return info.getPointDimensionCount() > 0;
441+
}
442+
443+
@Override
444+
void collectDebugInfo(BiConsumer<String, Object> add) {
445+
super.collectDebugInfo(add);
446+
add.accept("specialized_for", "docvalues_field_exists");
447+
add.accept("results_from_metadata", resultsFromMetadata);
448+
}
449+
}
389450
}

server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java

Lines changed: 174 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
import org.apache.lucene.document.Document;
1111
import org.apache.lucene.document.Field;
1212
import org.apache.lucene.document.LongPoint;
13+
import org.apache.lucene.document.NumericDocValuesField;
1314
import org.apache.lucene.document.SortedNumericDocValuesField;
15+
import org.apache.lucene.document.SortedSetDocValuesField;
1416
import org.apache.lucene.index.DirectoryReader;
1517
import org.apache.lucene.index.IndexReader;
1618
import org.apache.lucene.index.IndexableField;
@@ -32,12 +34,14 @@
3234
import org.elasticsearch.index.mapper.DateFieldMapper;
3335
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
3436
import org.elasticsearch.index.mapper.DocCountFieldMapper;
37+
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
3538
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3639
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
3740
import org.elasticsearch.index.mapper.MappedFieldType;
3841
import org.elasticsearch.index.mapper.NumberFieldMapper;
3942
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
4043
import org.elasticsearch.index.mapper.ObjectMapper;
44+
import org.elasticsearch.index.query.ExistsQueryBuilder;
4145
import org.elasticsearch.index.query.MatchAllQueryBuilder;
4246
import org.elasticsearch.index.query.MatchQueryBuilder;
4347
import org.elasticsearch.index.query.QueryBuilder;
@@ -61,7 +65,6 @@
6165
import org.elasticsearch.search.aggregations.support.AggregationContext;
6266
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
6367
import org.elasticsearch.search.internal.ContextIndexSearcherTests.DocumentSubsetDirectoryReader;
64-
import org.junit.Before;
6568

6669
import java.io.IOException;
6770
import java.util.ArrayList;
@@ -72,6 +75,7 @@
7275
import java.util.Map;
7376
import java.util.Set;
7477
import java.util.concurrent.TimeUnit;
78+
import java.util.function.IntFunction;
7579

7680
import static org.hamcrest.Matchers.both;
7781
import static org.hamcrest.Matchers.equalTo;
@@ -85,14 +89,6 @@
8589
import static org.mockito.Mockito.mock;
8690

8791
public class FiltersAggregatorTests extends AggregatorTestCase {
88-
private MappedFieldType fieldType;
89-
90-
@Before
91-
public void setUpTest() throws Exception {
92-
super.setUp();
93-
fieldType = new KeywordFieldMapper.KeywordFieldType("field");
94-
}
95-
9692
public void testEmpty() throws Exception {
9793
Directory directory = newDirectory();
9894
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
@@ -106,7 +102,12 @@ public void testEmpty() throws Exception {
106102
}
107103
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
108104
builder.otherBucketKey("other");
109-
InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
105+
InternalFilters response = searchAndReduce(
106+
indexSearcher,
107+
new MatchAllDocsQuery(),
108+
builder,
109+
new KeywordFieldMapper.KeywordFieldType("field")
110+
);
110111
assertEquals(response.getBuckets().size(), numFilters);
111112
for (InternalFilters.InternalBucket filter : response.getBuckets()) {
112113
assertEquals(filter.getDocCount(), 0);
@@ -206,7 +207,12 @@ public void testKeyedFilter() throws Exception {
206207
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
207208
builder.otherBucket(true);
208209
builder.otherBucketKey("other");
209-
final InternalFilters filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
210+
final InternalFilters filters = searchAndReduce(
211+
indexSearcher,
212+
new MatchAllDocsQuery(),
213+
builder,
214+
new KeywordFieldMapper.KeywordFieldType("field")
215+
);
210216
assertEquals(filters.getBuckets().size(), 7);
211217
assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
212218
assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
@@ -261,7 +267,12 @@ public void testRandom() throws Exception {
261267
builder.otherBucket(true);
262268
builder.otherBucketKey("other");
263269

264-
final InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
270+
final InternalFilters response = searchAndReduce(
271+
indexSearcher,
272+
new MatchAllDocsQuery(),
273+
builder,
274+
new KeywordFieldMapper.KeywordFieldType("field")
275+
);
265276
List<InternalFilters.InternalBucket> buckets = response.getBuckets();
266277
assertEquals(buckets.size(), filters.length + 1);
267278

@@ -789,6 +800,157 @@ public void testSubAggsManyFilters() throws IOException {
789800
}, dateFt, intFt);
790801
}
791802

803+
public void testDocValuesFieldExistsForDate() throws IOException {
804+
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
805+
QueryBuilder exists;
806+
if (randomBoolean()) {
807+
exists = new ExistsQueryBuilder("f");
808+
} else {
809+
// Range query covering all values in the index is rewritten to exists
810+
exists = new RangeQueryBuilder("f").gte("2020-01-01").lt("2020-01-02");
811+
}
812+
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
813+
docValuesFieldExistsTestCase(exists, ft, true, i -> {
814+
long date = start + TimeUnit.HOURS.toMillis(i);
815+
return List.of(new LongPoint("f", date), new NumericDocValuesField("f", date));
816+
});
817+
}
818+
819+
public void testDocValuesFieldExistsForDateWithMultiValuedFields() throws IOException {
820+
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
821+
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
822+
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
823+
long date = start + TimeUnit.HOURS.toMillis(i);
824+
return List.of(
825+
new LongPoint("f", date),
826+
new LongPoint("f", date + 10),
827+
new SortedNumericDocValuesField("f", date),
828+
new SortedNumericDocValuesField("f", date + 10)
829+
);
830+
});
831+
}
832+
833+
public void testDocValuesFieldExistsForDateWithoutData() throws IOException {
834+
docValuesFieldExistsNoDataTestCase(new DateFieldMapper.DateFieldType("f"));
835+
}
836+
837+
public void testDocValuesFieldExistsForNumber() throws IOException {
838+
NumberFieldMapper.NumberType numberType = randomFrom(NumberFieldMapper.NumberType.values());
839+
NumberFieldMapper.NumberFieldType ft = new NumberFieldMapper.NumberFieldType(
840+
"f",
841+
numberType,
842+
true,
843+
false,
844+
true,
845+
true,
846+
null,
847+
Map.of(),
848+
null
849+
);
850+
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
851+
return numberType.createFields("f", i, true, true, false);
852+
});
853+
}
854+
855+
public void testDocValuesFieldExistsForNumberWithoutData() throws IOException {
856+
docValuesFieldExistsNoDataTestCase(new NumberFieldMapper.NumberFieldType(
857+
"f",
858+
randomFrom(NumberFieldMapper.NumberType.values()),
859+
true,
860+
false,
861+
true,
862+
true,
863+
null,
864+
Map.of(),
865+
null
866+
));
867+
}
868+
869+
public void testDocValuesFieldExistsForKeyword() throws IOException {
870+
KeywordFieldMapper.KeywordFieldType ft = new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of());
871+
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, false, i -> {
872+
BytesRef text = new BytesRef(randomAlphaOfLength(5));
873+
return List.of(new Field("f", text, KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("f", text));
874+
});
875+
}
876+
877+
public void testDocValuesFieldExistsForKeywordWithoutData() throws IOException {
878+
docValuesFieldExistsNoDataTestCase(new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of()));
879+
}
880+
881+
private void docValuesFieldExistsTestCase(
882+
QueryBuilder exists,
883+
MappedFieldType fieldType,
884+
boolean canUseMetadata,
885+
IntFunction<List<? extends IndexableField>> buildDocWithField
886+
) throws IOException {
887+
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
888+
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
889+
for (int i = 0; i < 10; i++) {
890+
iw.addDocument(buildDocWithField.apply(i));
891+
}
892+
for (int i = 0; i < 10; i++) {
893+
iw.addDocument(List.of());
894+
}
895+
};
896+
// Exists queries convert to MatchNone if this isn't defined
897+
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
898+
debugTestCase(
899+
builder,
900+
new MatchAllDocsQuery(),
901+
buildIndex,
902+
(InternalFilters result, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
903+
assertThat(result.getBuckets(), hasSize(1));
904+
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(10L));
905+
906+
assertThat(impl, equalTo(FiltersAggregator.FilterByFilter.class));
907+
Map<?, ?> filterAggDebug = debug.get("test");
908+
List<?> filtersDebug = (List<?>) filterAggDebug.get("filters");
909+
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
910+
assertThat(filterDebug, hasEntry("specialized_for", "docvalues_field_exists"));
911+
assertThat((int) filterDebug.get("results_from_metadata"), canUseMetadata ? greaterThan(0) : equalTo(0));
912+
},
913+
fieldType,
914+
fnft
915+
);
916+
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
917+
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
918+
Map<String, Object> debug = new HashMap<>();
919+
aggregator.collectDebugInfo(debug::put);
920+
List<?> filtersDebug = (List<?>) debug.get("filters");
921+
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
922+
assertThat(estimatedCost, canUseMetadata ? equalTo(0L) : greaterThan(0L));
923+
assertThat((int) filterDebug.get("scorers_prepared_while_estimating_cost"), canUseMetadata ? equalTo(0) : greaterThan(0));
924+
}, fieldType, fnft);
925+
}
926+
927+
private void docValuesFieldExistsNoDataTestCase(
928+
MappedFieldType fieldType
929+
) throws IOException {
930+
QueryBuilder exists = new ExistsQueryBuilder(fieldType.name());
931+
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
932+
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
933+
for (int i = 0; i < 10; i++) {
934+
iw.addDocument(List.of());
935+
}
936+
};
937+
// Exists queries convert to MatchNone if this isn't defined
938+
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
939+
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
940+
assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
941+
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
942+
Map<String, Object> debug = collectAndGetFilterDebugInfo(searcher, aggregator);
943+
assertThat(debug, hasEntry("specialized_for", "docvalues_field_exists"));
944+
assertThat(estimatedCost, equalTo(0L));
945+
assertThat((int) debug.get("results_from_metadata"), greaterThan(0));
946+
assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), equalTo(0));
947+
}, fieldType, fnft);
948+
testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> {
949+
assertThat(result.getBuckets(), hasSize(1));
950+
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(0L));
951+
}, fieldType, fnft);
952+
}
953+
792954
@Override
793955
protected List<ObjectMapper> objectMappers() {
794956
return MOCK_OBJECT_MAPPERS;

0 commit comments

Comments
 (0)