Skip to content

Optimize lone single bucket date_histogram (backport of #71180) #72989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;
import org.elasticsearch.search.aggregations.bucket.sampler.DiversifiedOrdinalsSamplerAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator;
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.elasticsearch.search.profile.ProfileResult;
import org.elasticsearch.search.profile.ProfileShardResult;
import org.elasticsearch.test.ESIntegTestCase;
Expand Down Expand Up @@ -591,7 +592,9 @@ public void testFilterByFilter() throws InterruptedException, IOException {

SearchResponse response = client().prepareSearch("dateidx")
.setProfile(true)
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH))
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH)
// Add a sub-agg so we don't get to use metadata. That's great and all, but it outputs less debugging info for us to verify.
.subAggregation(new MaxAggregationBuilder("m").field("date")))
.get();
assertSearchResponse(response);
Map<String, ProfileShardResult> profileResults = response.getProfileResults();
Expand All @@ -608,7 +611,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
assertThat(histoAggResult, notNullValue());
assertThat(histoAggResult.getQueryName(), equalTo("DateHistogramAggregator.FromDateRange"));
assertThat(histoAggResult.getLuceneDescription(), equalTo("histo"));
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(0));
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(1));
assertThat(histoAggResult.getTime(), greaterThan(0L));
Map<String, Long> breakdown = histoAggResult.getTimeBreakdown();
assertThat(breakdown, notNullValue());
Expand Down Expand Up @@ -643,7 +646,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
assertThat(queryDebug, hasEntry("query", "ConstantScore(DocValuesFieldExistsQuery [field=date])"));
assertThat(queryDebug, hasEntry("query", "DocValuesFieldExistsQuery [field=date]"));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@

package org.elasticsearch.search.aggregations.bucket.filter;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
Expand Down Expand Up @@ -49,9 +52,21 @@ public class QueryToFilterAdapter<Q extends Query> {
*/
public static QueryToFilterAdapter<?> build(IndexSearcher searcher, String key, Query query) throws IOException {
query = searcher.rewrite(query);
if (query instanceof ConstantScoreQuery) {
/*
* Unwrap constant score because it gets in the way of us
* understanding what the queries are trying to do and we
* don't use the score at all anyway. Effectively we always
* run in constant score mode.
*/
query = ((ConstantScoreQuery) query).getQuery();
}
if (query instanceof TermQuery) {
return new TermQueryToFilterAdapter(searcher, key, (TermQuery) query);
}
if (query instanceof DocValuesFieldExistsQuery) {
return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) query);
}
if (query instanceof MatchAllDocsQuery) {
return new MatchAllQueryToFilterAdapter(searcher, key, (MatchAllDocsQuery) query);
}
Expand Down Expand Up @@ -386,4 +401,50 @@ void collectDebugInfo(BiConsumer<String, Object> add) {
add.accept("results_from_metadata", resultsFromMetadata);
}
}

private static class DocValuesFieldExistsAdapter extends QueryToFilterAdapter<DocValuesFieldExistsQuery> {
private int resultsFromMetadata;

private DocValuesFieldExistsAdapter(IndexSearcher searcher, String key, DocValuesFieldExistsQuery query) {
super(searcher, key, query);
}

@Override
long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) throws IOException {
if (countCanUseMetadata(counter, live) && canCountFromMetadata(ctx)) {
resultsFromMetadata++;
PointValues points = ctx.reader().getPointValues(query().getField());
if (points == null) {
return 0;
}
return points.getDocCount();

}
return super.count(ctx, counter, live);
}

@Override
long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
if (canUseMetadata.get() && canCountFromMetadata(ctx)) {
return 0;
}
return super.estimateCountCost(ctx, canUseMetadata);
}

private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException {
FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField());
if (info == null) {
// If we don't have any info then there aren't any values anyway.
return true;
}
return info.getPointDimensionCount() > 0;
}

@Override
void collectDebugInfo(BiConsumer<String, Object> add) {
super.collectDebugInfo(add);
add.accept("specialized_for", "docvalues_field_exists");
add.accept("results_from_metadata", resultsFromMetadata);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
Expand All @@ -33,12 +35,14 @@
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
import org.elasticsearch.index.mapper.DocCountFieldMapper;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.index.query.ExistsQueryBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
Expand All @@ -62,7 +66,6 @@
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.internal.ContextIndexSearcherTests.DocumentSubsetDirectoryReader;
import org.junit.Before;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -73,6 +76,7 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.IntFunction;

import static org.hamcrest.Matchers.both;
import static org.hamcrest.Matchers.equalTo;
Expand All @@ -86,14 +90,6 @@
import static org.mockito.Mockito.mock;

public class FiltersAggregatorTests extends AggregatorTestCase {
private MappedFieldType fieldType;

@Before
public void setUpTest() throws Exception {
super.setUp();
fieldType = new KeywordFieldMapper.KeywordFieldType("field");
}

public void testEmpty() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Expand All @@ -107,7 +103,12 @@ public void testEmpty() throws Exception {
}
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
builder.otherBucketKey("other");
InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
InternalFilters response = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
assertEquals(response.getBuckets().size(), numFilters);
for (InternalFilters.InternalBucket filter : response.getBuckets()) {
assertEquals(filter.getDocCount(), 0);
Expand Down Expand Up @@ -207,7 +208,12 @@ public void testKeyedFilter() throws Exception {
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
builder.otherBucket(true);
builder.otherBucketKey("other");
final InternalFilters filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
final InternalFilters filters = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
assertEquals(filters.getBuckets().size(), 7);
assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
Expand Down Expand Up @@ -262,7 +268,12 @@ public void testRandom() throws Exception {
builder.otherBucket(true);
builder.otherBucketKey("other");

final InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
final InternalFilters response = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
List<InternalFilters.InternalBucket> buckets = response.getBuckets();
assertEquals(buckets.size(), filters.length + 1);

Expand Down Expand Up @@ -822,6 +833,167 @@ public void testSubAggsManyFilters() throws IOException {
}, dateFt, intFt);
}

public void testDocValuesFieldExistsForDate() throws IOException {
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
QueryBuilder exists;
if (randomBoolean()) {
exists = new ExistsQueryBuilder("f");
} else {
// Range query covering all values in the index is rewritten to exists
exists = new RangeQueryBuilder("f").gte("2020-01-01").lt("2020-01-02");
}
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
docValuesFieldExistsTestCase(exists, ft, true, i -> {
long date = start + TimeUnit.HOURS.toMillis(i);
return org.elasticsearch.common.collect.List.of(new LongPoint("f", date), new NumericDocValuesField("f", date));
});
}

public void testDocValuesFieldExistsForDateWithMultiValuedFields() throws IOException {
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
long date = start + TimeUnit.HOURS.toMillis(i);
return org.elasticsearch.common.collect.List.of(
new LongPoint("f", date),
new LongPoint("f", date + 10),
new SortedNumericDocValuesField("f", date),
new SortedNumericDocValuesField("f", date + 10)
);
});
}

public void testDocValuesFieldExistsForDateWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new DateFieldMapper.DateFieldType("f"));
}

public void testDocValuesFieldExistsForNumber() throws IOException {
NumberFieldMapper.NumberType numberType = randomFrom(NumberFieldMapper.NumberType.values());
NumberFieldMapper.NumberFieldType ft = new NumberFieldMapper.NumberFieldType(
"f",
numberType,
true,
false,
true,
true,
null,
org.elasticsearch.common.collect.Map.of(),
null
);
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
return numberType.createFields("f", i, true, true, false);
});
}

public void testDocValuesFieldExistsForNumberWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new NumberFieldMapper.NumberFieldType(
"f",
randomFrom(NumberFieldMapper.NumberType.values()),
true,
false,
true,
true,
null,
org.elasticsearch.common.collect.Map.of(),
null
));
}

public void testDocValuesFieldExistsForKeyword() throws IOException {
KeywordFieldMapper.KeywordFieldType ft = new KeywordFieldMapper.KeywordFieldType(
"f",
true,
true,
org.elasticsearch.common.collect.Map.of()
);
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, false, i -> {
BytesRef text = new BytesRef(randomAlphaOfLength(5));
return org.elasticsearch.common.collect.List.of(
new Field("f", text, KeywordFieldMapper.Defaults.FIELD_TYPE),
new SortedSetDocValuesField("f", text)
);
});
}

public void testDocValuesFieldExistsForKeywordWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(
new KeywordFieldMapper.KeywordFieldType("f", true, true, org.elasticsearch.common.collect.Map.of())
);
}

private void docValuesFieldExistsTestCase(
QueryBuilder exists,
MappedFieldType fieldType,
boolean canUseMetadata,
IntFunction<List<? extends IndexableField>> buildDocWithField
) throws IOException {
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
for (int i = 0; i < 10; i++) {
iw.addDocument(buildDocWithField.apply(i));
}
for (int i = 0; i < 10; i++) {
iw.addDocument(org.elasticsearch.common.collect.List.of());
}
};
// Exists queries convert to MatchNone if this isn't defined
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
debugTestCase(
builder,
new MatchAllDocsQuery(),
buildIndex,
(InternalFilters result, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
assertThat(result.getBuckets(), hasSize(1));
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(10L));

assertThat(impl, equalTo(FiltersAggregator.FilterByFilter.class));
Map<?, ?> filterAggDebug = debug.get("test");
List<?> filtersDebug = (List<?>) filterAggDebug.get("filters");
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(filterDebug, hasEntry("specialized_for", "docvalues_field_exists"));
assertThat((int) filterDebug.get("results_from_metadata"), canUseMetadata ? greaterThan(0) : equalTo(0));
},
fieldType,
fnft
);
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
Map<String, Object> debug = new HashMap<>();
aggregator.collectDebugInfo(debug::put);
List<?> filtersDebug = (List<?>) debug.get("filters");
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(estimatedCost, canUseMetadata ? equalTo(0L) : greaterThan(0L));
assertThat((int) filterDebug.get("scorers_prepared_while_estimating_cost"), canUseMetadata ? equalTo(0) : greaterThan(0));
}, fieldType, fnft);
}

private void docValuesFieldExistsNoDataTestCase(
MappedFieldType fieldType
) throws IOException {
QueryBuilder exists = new ExistsQueryBuilder(fieldType.name());
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
for (int i = 0; i < 10; i++) {
iw.addDocument(org.elasticsearch.common.collect.List.of());
}
};
// Exists queries convert to MatchNone if this isn't defined
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
Map<String, Object> debug = collectAndGetFilterDebugInfo(searcher, aggregator);
assertThat(debug, hasEntry("specialized_for", "docvalues_field_exists"));
assertThat(estimatedCost, equalTo(0L));
assertThat((int) debug.get("results_from_metadata"), greaterThan(0));
assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), equalTo(0));
}, fieldType, fnft);
testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> {
assertThat(result.getBuckets(), hasSize(1));
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(0L));
}, fieldType, fnft);
}

@Override
protected List<ObjectMapper> objectMappers() {
return MOCK_OBJECT_MAPPERS;
Expand Down
Loading