Skip to content

Commit 45ef685

Browse files
authored
Fix bug with nested and filters agg (backport of #67043) (#67171)
Fixes a bug where nested documents that match a filter in the `filters` agg will be counted as matching the filter. Usually nested documents only match if you explicitly ask to match them. Worse, we only mach them in the "filter by filter" mode that we wrote to speed up date_histogram. The `filters` agg is fairly rare, but with #63643 we run `date_histogram` and `range` aggregations using `filters`.
1 parent f3b47b5 commit 45ef685

File tree

7 files changed

+168
-6
lines changed

7 files changed

+168
-6
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/220_filters_bucket.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,3 +341,55 @@ setup:
341341
- length: { aggregations.f.buckets: 2 }
342342
- match: { aggregations.f.buckets.foo.doc_count: 4 }
343343
- match: { aggregations.f.buckets.foo_bar.doc_count: 4 }
344+
345+
---
346+
347+
nested:
348+
# Tests that we don't accidentally match nested documents when the filter
349+
# matches it.
350+
- skip:
351+
version: " - 7.10.99"
352+
reason: fixed in 7.11.0
353+
354+
- do:
355+
indices.create:
356+
index: test_nested
357+
body:
358+
settings:
359+
number_of_replicas: 0
360+
mappings:
361+
properties:
362+
i:
363+
type: integer
364+
nested:
365+
type: nested
366+
properties:
367+
j:
368+
type: integer
369+
370+
- do:
371+
bulk:
372+
refresh: true
373+
index: test_nested
374+
body:
375+
- index: {}
376+
- i: 1
377+
nested:
378+
- j: 2
379+
- j: 3
380+
- j: 4
381+
382+
- do:
383+
search:
384+
index: test_nested
385+
body:
386+
size: 0
387+
aggs:
388+
f:
389+
filters:
390+
filters:
391+
foo:
392+
match_all: {}
393+
- match: { hits.total.value: 1 }
394+
- length: { aggregations.f.buckets: 1 }
395+
- match: { aggregations.f.buckets.foo.doc_count: 1 }

server/src/main/java/org/elasticsearch/search/SearchService.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -938,7 +938,12 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc
938938
if (source.aggregations() != null && includeAggregations) {
939939
AggregationContext aggContext = new ProductionAggregationContext(
940940
context.getQueryShardContext(),
941-
context.query() == null ? new MatchAllDocsQuery() : context.query(),
941+
/*
942+
* The query on the search context right now doesn't include
943+
* the filter for nested documents or slicing so we have to
944+
* delay reading it until the aggs ask for it.
945+
*/
946+
() -> context.query() == null ? new MatchAllDocsQuery() : context.query(),
942947
context.getProfilers() == null ? null : context.getProfilers().getAggregationProfiler(),
943948
multiBucketConsumerService.create(),
944949
() -> new SubSearchContext(context).parsedQuery(context.parsedQuery()).fetchFieldsContext(context.fetchFieldsContext()),

server/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ public final AggregationUsageService getUsageService() {
247247
public static class ProductionAggregationContext extends AggregationContext {
248248
private final QueryShardContext context;
249249
private final BigArrays bigArrays;
250-
private final Query topLevelQuery;
250+
private final Supplier<Query> topLevelQuery;
251251
private final AggregationProfiler profiler;
252252
private final MultiBucketConsumer multiBucketConsumer;
253253
private final Supplier<SubSearchContext> subSearchContextBuilder;
@@ -259,7 +259,7 @@ public static class ProductionAggregationContext extends AggregationContext {
259259

260260
public ProductionAggregationContext(
261261
QueryShardContext context,
262-
Query topLevelQuery,
262+
Supplier<Query> topLevelQuery,
263263
@Nullable AggregationProfiler profiler,
264264
MultiBucketConsumer multiBucketConsumer,
265265
Supplier<SubSearchContext> subSearchContextBuilder,
@@ -284,7 +284,7 @@ public ProductionAggregationContext(
284284

285285
@Override
286286
public Query query() {
287-
return topLevelQuery;
287+
return topLevelQuery.get();
288288
}
289289

290290
@Override

server/src/test/java/org/elasticsearch/search/SearchServiceTests.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,12 @@
2323
import org.apache.lucene.index.DirectoryReader;
2424
import org.apache.lucene.index.FilterDirectoryReader;
2525
import org.apache.lucene.index.LeafReader;
26+
import org.apache.lucene.search.ConstantScoreQuery;
27+
import org.apache.lucene.search.MatchAllDocsQuery;
2628
import org.apache.lucene.search.Query;
2729
import org.apache.lucene.store.AlreadyClosedException;
2830
import org.elasticsearch.ElasticsearchException;
31+
import org.elasticsearch.Version;
2932
import org.elasticsearch.action.ActionListener;
3033
import org.elasticsearch.action.OriginalIndices;
3134
import org.elasticsearch.action.index.IndexResponse;
@@ -42,9 +45,11 @@
4245
import org.elasticsearch.common.UUIDs;
4346
import org.elasticsearch.common.io.stream.StreamInput;
4447
import org.elasticsearch.common.io.stream.StreamOutput;
48+
import org.elasticsearch.common.lucene.search.Queries;
4549
import org.elasticsearch.common.settings.Settings;
4650
import org.elasticsearch.common.unit.TimeValue;
4751
import org.elasticsearch.common.xcontent.XContentBuilder;
52+
import org.elasticsearch.common.xcontent.json.JsonXContent;
4853
import org.elasticsearch.index.Index;
4954
import org.elasticsearch.index.IndexModule;
5055
import org.elasticsearch.index.IndexNotFoundException;
@@ -74,8 +79,10 @@
7479
import org.elasticsearch.search.aggregations.AggregationBuilders;
7580
import org.elasticsearch.search.aggregations.InternalAggregation;
7681
import org.elasticsearch.search.aggregations.MultiBucketConsumerService;
82+
import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregationBuilder;
7783
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
7884
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
85+
import org.elasticsearch.search.aggregations.support.AggregationContext;
7986
import org.elasticsearch.search.aggregations.support.ValueType;
8087
import org.elasticsearch.search.builder.SearchSourceBuilder;
8188
import org.elasticsearch.search.fetch.FetchSearchResult;
@@ -104,6 +111,7 @@
104111
import java.util.concurrent.Semaphore;
105112
import java.util.concurrent.atomic.AtomicBoolean;
106113
import java.util.concurrent.atomic.AtomicInteger;
114+
import java.util.function.Consumer;
107115
import java.util.function.Function;
108116

109117
import static java.util.Collections.singletonList;
@@ -824,6 +832,52 @@ public void testSetSearchThrottled() {
824832
service.canMatch(req, ActionListener.wrap(r -> assertSame(Thread.currentThread(), currentThread), e -> fail("unexpected")));
825833
}
826834

835+
public void testAggContextGetsMatchAll() throws IOException {
836+
createIndex("test");
837+
withAggregationContext("test", context -> assertThat(context.query(), equalTo(new MatchAllDocsQuery())));
838+
}
839+
840+
public void testAggContextGetsNestedFilter() throws IOException {
841+
XContentBuilder mapping = JsonXContent.contentBuilder().startObject().startObject("properties");
842+
mapping.startObject("nested").field("type", "nested").endObject();
843+
mapping.endObject().endObject();
844+
845+
createIndex("test", Settings.EMPTY, "test", mapping);
846+
withAggregationContext(
847+
"test",
848+
context -> assertThat(context.query(), equalTo(new ConstantScoreQuery(Queries.newNonNestedFilter(Version.CURRENT))))
849+
);
850+
}
851+
852+
/**
853+
* Build an {@link AggregationContext} with the named index.
854+
*/
855+
private void withAggregationContext(String index, Consumer<AggregationContext> check) throws IOException {
856+
IndexService indexService = getInstanceFromNode(IndicesService.class).indexServiceSafe(resolveIndex(index));
857+
ShardId shardId = new ShardId(indexService.index(), 0);
858+
859+
SearchRequest request = new SearchRequest().indices(index)
860+
.source(new SearchSourceBuilder().aggregation(new FiltersAggregationBuilder("test", new MatchAllQueryBuilder())))
861+
.allowPartialSearchResults(false);
862+
ShardSearchRequest shardRequest = new ShardSearchRequest(
863+
OriginalIndices.NONE,
864+
request,
865+
shardId,
866+
0,
867+
1,
868+
AliasFilter.EMPTY,
869+
1,
870+
0,
871+
null
872+
);
873+
874+
try (ReaderContext readerContext = createReaderContext(indexService, indexService.getShard(0))) {
875+
try (SearchContext context = getInstanceFromNode(SearchService.class).createContext(readerContext, shardRequest, null, true)) {
876+
check.accept(context.aggregations().factories().context());
877+
}
878+
}
879+
}
880+
827881
public void testExpandSearchThrottled() {
828882
createIndex("throttled_threadpool_index");
829883
client().execute(

server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,24 @@
2929
import org.apache.lucene.search.MatchAllDocsQuery;
3030
import org.apache.lucene.search.Query;
3131
import org.apache.lucene.store.Directory;
32+
import org.elasticsearch.Version;
33+
import org.elasticsearch.common.CheckedConsumer;
34+
import org.elasticsearch.common.lucene.search.Queries;
3235
import org.elasticsearch.index.mapper.DateFieldMapper;
3336
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
3437
import org.elasticsearch.index.mapper.KeywordFieldMapper;
38+
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
3539
import org.elasticsearch.index.mapper.MappedFieldType;
40+
import org.elasticsearch.index.mapper.ObjectMapper;
41+
import org.elasticsearch.index.query.MatchAllQueryBuilder;
3642
import org.elasticsearch.index.query.QueryBuilder;
3743
import org.elasticsearch.index.query.QueryBuilders;
3844
import org.elasticsearch.index.query.RangeQueryBuilder;
45+
import org.elasticsearch.index.query.TermQueryBuilder;
3946
import org.elasticsearch.search.aggregations.AggregationBuilder;
4047
import org.elasticsearch.search.aggregations.AggregatorTestCase;
4148
import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter;
49+
import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregatorTests;
4250
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
4351
import org.junit.Before;
4452

@@ -296,4 +304,46 @@ public void testFilterByFilterCost() throws IOException {
296304
ft
297305
);
298306
}
307+
308+
/**
309+
* Check that we don't accidentally find nested documents when the filter
310+
* matches it.
311+
*/
312+
public void testNested() throws IOException {
313+
KeywordFieldType ft = new KeywordFieldType("author");
314+
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> iw.addDocuments(
315+
NestedAggregatorTests.generateBook("test", new String[] { "foo", "bar" }, new int[] { 5, 10, 15, 20 })
316+
);
317+
testCase(
318+
new FiltersAggregationBuilder("test", new KeyedFilter("q1", new TermQueryBuilder("author", "foo"))),
319+
Queries.newNonNestedFilter(Version.CURRENT),
320+
buildIndex,
321+
result -> {
322+
InternalFilters filters = (InternalFilters) result;
323+
assertThat(filters.getBuckets(), hasSize(1));
324+
assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(1L));
325+
},
326+
ft
327+
);
328+
testCase(
329+
new FiltersAggregationBuilder("test", new KeyedFilter("q1", new MatchAllQueryBuilder())),
330+
Queries.newNonNestedFilter(Version.CURRENT),
331+
buildIndex,
332+
result -> {
333+
InternalFilters filters = (InternalFilters) result;
334+
assertThat(filters.getBuckets(), hasSize(1));
335+
assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(1L));
336+
},
337+
ft
338+
);
339+
}
340+
341+
@Override
342+
protected List<ObjectMapper> objectMappers() {
343+
return MOCK_OBJECT_MAPPERS;
344+
}
345+
346+
static final List<ObjectMapper> MOCK_OBJECT_MAPPERS = org.elasticsearch.common.collect.List.of(
347+
NestedAggregatorTests.nestedObject("nested_chapters")
348+
);
299349
}

server/src/test/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregatorTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ private static double[] generateDocuments(List<Document> documents, int numNeste
843843
return values;
844844
}
845845

846-
private List<Document> generateBook(String id, String[] authors, int[] numPages) {
846+
public static List<Document> generateBook(String id, String[] authors, int[] numPages) {
847847
List<Document> documents = new ArrayList<>();
848848

849849
for (int numPage : numPages) {
@@ -858,6 +858,7 @@ private List<Document> generateBook(String id, String[] authors, int[] numPages)
858858
document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.FIELD_TYPE));
859859
document.add(sequenceIDFields.primaryTerm);
860860
for (String author : authors) {
861+
document.add(new Field("author", author, KeywordFieldMapper.Defaults.FIELD_TYPE));
861862
document.add(new SortedSetDocValuesField("author", new BytesRef(author)));
862863
}
863864
documents.add(document);

test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ public void onCache(ShardId shardId, Accountable accountable) {}
280280
MultiBucketConsumer consumer = new MultiBucketConsumer(maxBucket, breakerService.getBreaker(CircuitBreaker.REQUEST));
281281
return new ProductionAggregationContext(
282282
queryShardContext,
283-
query,
283+
() -> query,
284284
null,
285285
consumer,
286286
() -> buildSubSearchContext(indexSettings, queryShardContext, bitsetFilterCache),

0 commit comments

Comments
 (0)