Skip to content

Commit e82818a

Browse files
authored
Fix aggregators early termination with breadth-first mode (#44963)
This commit fixes a bug when a deferred aggregator tries to early terminate the collection. In such case the CollectionTerminatedException is not caught and the search fails on the shard. This change makes sure that we catch the exception in order to continue the deferred collection on the next leaf. Fixes #44909
1 parent 11b3365 commit e82818a

File tree

4 files changed

+112
-40
lines changed

4 files changed

+112
-40
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollector.java

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.elasticsearch.search.aggregations.bucket;
2121

2222
import org.apache.lucene.index.LeafReaderContext;
23+
import org.apache.lucene.search.CollectionTerminatedException;
2324
import org.apache.lucene.search.DocIdSetIterator;
2425
import org.apache.lucene.search.MatchAllDocsQuery;
2526
import org.apache.lucene.search.Query;
@@ -168,32 +169,37 @@ public void prepareSelectedBuckets(long... selectedBuckets) throws IOException {
168169

169170
for (Entry entry : entries) {
170171
assert entry.docDeltas.size() > 0 : "segment should have at least one document to replay, got 0";
171-
final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context);
172-
DocIdSetIterator scoreIt = null;
173-
if (needsScores) {
174-
Scorer scorer = weight.scorer(entry.context);
175-
// We don't need to check if the scorer is null
176-
// since we are sure that there are documents to replay (entry.docDeltas it not empty).
177-
scoreIt = scorer.iterator();
178-
leafCollector.setScorer(scorer);
179-
}
180-
final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator();
181-
final PackedLongValues.Iterator buckets = entry.buckets.iterator();
182-
int doc = 0;
183-
for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) {
184-
doc += docDeltaIterator.next();
185-
final long bucket = buckets.next();
186-
final long rebasedBucket = hash.find(bucket);
187-
if (rebasedBucket != -1) {
188-
if (needsScores) {
189-
if (scoreIt.docID() < doc) {
190-
scoreIt.advance(doc);
172+
try {
173+
final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context);
174+
DocIdSetIterator scoreIt = null;
175+
if (needsScores) {
176+
Scorer scorer = weight.scorer(entry.context);
177+
// We don't need to check if the scorer is null
178+
// since we are sure that there are documents to replay (entry.docDeltas it not empty).
179+
scoreIt = scorer.iterator();
180+
leafCollector.setScorer(scorer);
181+
}
182+
final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator();
183+
final PackedLongValues.Iterator buckets = entry.buckets.iterator();
184+
int doc = 0;
185+
for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) {
186+
doc += docDeltaIterator.next();
187+
final long bucket = buckets.next();
188+
final long rebasedBucket = hash.find(bucket);
189+
if (rebasedBucket != -1) {
190+
if (needsScores) {
191+
if (scoreIt.docID() < doc) {
192+
scoreIt.advance(doc);
193+
}
194+
// aggregations should only be replayed on matching documents
195+
assert scoreIt.docID() == doc;
191196
}
192-
// aggregations should only be replayed on matching documents
193-
assert scoreIt.docID() == doc;
197+
leafCollector.collect(doc, rebasedBucket);
194198
}
195-
leafCollector.collect(doc, rebasedBucket);
196199
}
200+
} catch (CollectionTerminatedException e) {
201+
// collection was terminated prematurely
202+
// continue with the following leaf
197203
}
198204
}
199205
collector.postCollection();

server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.elasticsearch.search.aggregations.bucket.sampler;
2020

2121
import org.apache.lucene.index.LeafReaderContext;
22+
import org.apache.lucene.search.CollectionTerminatedException;
2223
import org.apache.lucene.search.LeafCollector;
2324
import org.apache.lucene.search.Scorable;
2425
import org.apache.lucene.search.ScoreDoc;
@@ -254,25 +255,30 @@ public void setScorer(Scorable scorer) throws IOException {
254255
}
255256

256257
public void replayRelatedMatches(List<ScoreDoc> sd) throws IOException {
257-
final LeafBucketCollector leafCollector = deferred.getLeafCollector(readerContext);
258-
leafCollector.setScorer(this);
258+
try {
259+
final LeafBucketCollector leafCollector = deferred.getLeafCollector(readerContext);
260+
leafCollector.setScorer(this);
259261

260-
currentScore = 0;
261-
currentDocId = -1;
262-
if (maxDocId < 0) {
263-
return;
264-
}
265-
for (ScoreDoc scoreDoc : sd) {
266-
// Doc ids from TopDocCollector are root-level Reader so
267-
// need rebasing
268-
int rebased = scoreDoc.doc - readerContext.docBase;
269-
if ((rebased >= 0) && (rebased <= maxDocId)) {
270-
currentScore = scoreDoc.score;
271-
currentDocId = rebased;
272-
// We stored the bucket ID in Lucene's shardIndex property
273-
// for convenience.
274-
leafCollector.collect(rebased, scoreDoc.shardIndex);
262+
currentScore = 0;
263+
currentDocId = -1;
264+
if (maxDocId < 0) {
265+
return;
266+
}
267+
for (ScoreDoc scoreDoc : sd) {
268+
// Doc ids from TopDocCollector are root-level Reader so
269+
// need rebasing
270+
int rebased = scoreDoc.doc - readerContext.docBase;
271+
if ((rebased >= 0) && (rebased <= maxDocId)) {
272+
currentScore = scoreDoc.score;
273+
currentDocId = rebased;
274+
// We stored the bucket ID in Lucene's shardIndex property
275+
// for convenience.
276+
leafCollector.collect(rebased, scoreDoc.shardIndex);
277+
}
275278
}
279+
} catch (CollectionTerminatedException e) {
280+
// collection was terminated prematurely
281+
// continue with the following leaf
276282
}
277283
}
278284

server/src/test/java/org/elasticsearch/search/aggregations/metrics/MaxIT.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.script.Script;
2525
import org.elasticsearch.script.ScriptType;
2626
import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin;
27+
import org.elasticsearch.search.aggregations.Aggregator;
2728
import org.elasticsearch.search.aggregations.InternalAggregation;
2829
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
2930
import org.elasticsearch.search.aggregations.bucket.global.Global;
@@ -411,4 +412,34 @@ public void testEarlyTermination() throws Exception {
411412
assertThat(count.getName(), equalTo("count"));
412413
assertThat(count.getValue(), equalTo(20L));
413414
}
415+
416+
public void testNestedEarlyTermination() throws Exception {
417+
for (Aggregator.SubAggCollectionMode collectionMode : Aggregator.SubAggCollectionMode.values()) {
418+
SearchResponse searchResponse = client().prepareSearch("idx")
419+
.setTrackTotalHits(false)
420+
.setQuery(matchAllQuery())
421+
.addAggregation(max("max").field("values"))
422+
.addAggregation(count("count").field("values"))
423+
.addAggregation(terms("terms").field("value")
424+
.collectMode(collectionMode)
425+
.subAggregation(max("sub_max").field("invalid")))
426+
.get();
427+
428+
Max max = searchResponse.getAggregations().get("max");
429+
assertThat(max, notNullValue());
430+
assertThat(max.getName(), equalTo("max"));
431+
assertThat(max.getValue(), equalTo(12.0));
432+
433+
ValueCount count = searchResponse.getAggregations().get("count");
434+
assertThat(count.getName(), equalTo("count"));
435+
assertThat(count.getValue(), equalTo(20L));
436+
437+
Terms terms = searchResponse.getAggregations().get("terms");
438+
assertThat(terms.getBuckets().size(), equalTo(10));
439+
for (Terms.Bucket b : terms.getBuckets()) {
440+
InternalMax subMax = b.getAggregations().get("sub_max");
441+
assertThat(subMax.getValue(), equalTo(Double.NEGATIVE_INFINITY));
442+
}
443+
}
444+
}
414445
}

server/src/test/java/org/elasticsearch/search/aggregations/metrics/MinIT.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.script.Script;
2525
import org.elasticsearch.script.ScriptType;
2626
import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin;
27+
import org.elasticsearch.search.aggregations.Aggregator;
2728
import org.elasticsearch.search.aggregations.InternalAggregation;
2829
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
2930
import org.elasticsearch.search.aggregations.bucket.global.Global;
@@ -423,4 +424,32 @@ public void testEarlyTermination() throws Exception {
423424
assertThat(count.getName(), equalTo("count"));
424425
assertThat(count.getValue(), equalTo(20L));
425426
}
427+
428+
public void testNestedEarlyTermination() throws Exception {
429+
SearchResponse searchResponse = client().prepareSearch("idx")
430+
.setTrackTotalHits(false)
431+
.setQuery(matchAllQuery())
432+
.addAggregation(min("min").field("values"))
433+
.addAggregation(count("count").field("values"))
434+
.addAggregation(terms("terms").field("value")
435+
.collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST)
436+
.subAggregation(min("sub_min").field("invalid")))
437+
.get();
438+
439+
Min min = searchResponse.getAggregations().get("min");
440+
assertThat(min, notNullValue());
441+
assertThat(min.getName(), equalTo("min"));
442+
assertThat(min.getValue(), equalTo(2.0));
443+
444+
ValueCount count = searchResponse.getAggregations().get("count");
445+
assertThat(count.getName(), equalTo("count"));
446+
assertThat(count.getValue(), equalTo(20L));
447+
448+
Terms terms = searchResponse.getAggregations().get("terms");
449+
assertThat(terms.getBuckets().size(), equalTo(10));
450+
for (Terms.Bucket b : terms.getBuckets()) {
451+
InternalMin subMin = b.getAggregations().get("sub_min");
452+
assertThat(subMin.getValue(), equalTo(Double.POSITIVE_INFINITY));
453+
}
454+
}
426455
}

0 commit comments

Comments
 (0)