-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Small speed up of date_histogram with children #67012
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,9 +26,11 @@ | |
import org.apache.lucene.search.CollectionTerminatedException; | ||
import org.apache.lucene.search.IndexOrDocValuesQuery; | ||
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; | ||
import org.apache.lucene.search.LeafCollector; | ||
import org.apache.lucene.search.MatchAllDocsQuery; | ||
import org.apache.lucene.search.PointRangeQuery; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.Scorable; | ||
import org.apache.lucene.search.ScoreMode; | ||
import org.apache.lucene.search.TotalHitCountCollector; | ||
import org.apache.lucene.search.Weight; | ||
|
@@ -203,19 +205,16 @@ public static FilterByFilter buildFilterOrderOrNull( | |
if (parent != null) { | ||
return null; | ||
} | ||
if (factories.countAggregators() != 0) { | ||
return null; | ||
} | ||
if (otherBucketKey != null) { | ||
return null; | ||
} | ||
return new FiltersAggregator.FilterByFilter( | ||
name, | ||
factories, | ||
keys, | ||
filters, | ||
keyed, | ||
context, | ||
parent, | ||
cardinality, | ||
metadata | ||
); | ||
|
@@ -289,15 +288,15 @@ public static class FilterByFilter extends FiltersAggregator { | |
|
||
private FilterByFilter( | ||
String name, | ||
AggregatorFactories factories, | ||
String[] keys, | ||
Query[] filters, | ||
boolean keyed, | ||
AggregationContext context, | ||
Aggregator parent, | ||
CardinalityUpperBound cardinality, | ||
Map<String, Object> metadata | ||
) throws IOException { | ||
super(name, AggregatorFactories.EMPTY, keys, keyed, null, context, parent, cardinality, metadata); | ||
super(name, factories, keys, keyed, null, context, null, cardinality, metadata); | ||
this.filters = filters; | ||
this.profiling = context.profiling(); | ||
} | ||
|
@@ -378,9 +377,26 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket | |
// the filter doesn't match any docs | ||
continue; | ||
} | ||
TotalHitCountCollector collector = new TotalHitCountCollector(); | ||
scorer.score(collector, live); | ||
incrementBucketDocCount(filterOrd, collector.getTotalHits()); | ||
if (sub == LeafBucketCollector.NO_OP_COLLECTOR) { | ||
TotalHitCountCollector collector = new TotalHitCountCollector(); | ||
scorer.score(collector, live); | ||
incrementBucketDocCount(filterOrd, collector.getTotalHits()); | ||
} else { | ||
/* | ||
* We can use the pre-constructed leaf collected for the first | ||
* filter. But it almost certainly not going to work for the | ||
* second one because it'll try to "go backwards". So we build | ||
* a new one for each subsequent filter. | ||
*/ | ||
// NOCOMMIT switch to the block collection mechanism if we have more than a single sub-agg instead of this. | ||
/* | ||
* The switch is better because we can better estimate the costs. | ||
*/ | ||
LeafBucketCollector filterLeafCollector = filterOrd == 0 ? sub : collectableSubAggregators.getLeafCollector(ctx); | ||
SubCollector collector = new SubCollector(filterOrd, filterLeafCollector); | ||
scorer.score(collector, live); | ||
incrementBucketDocCount(filterOrd, collector.total); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's possible that this could actually be slower than the standard execution mechanism. I wonder if we need an escape hatch so folks can dodge this mechanism if it proves a bad idea. Also: there is another possible implementation here that involves collecting a block of matches for each filter and then running all of the children in parallel. I'm not sure if it'll be faster or not. It kind of depends on the speed of iterating the doc values. It is a little more complex so I didn't do it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After taking a couple of days away I think the block matching mechanism is probably better here. Its much simpler to estimate the cost. Also - I'd love to know why the old way is so slow - the block based mechanism feels like it'd be fast and it reads quite similarly to the Compatible mechanism. I think the big difference is that we don't get to join the main query with the filter query. So it can't skip matches effectively. Maybe. I've got to play. |
||
} | ||
// Throwing this exception is how we communicate to the collection mechanism that we don't need the segment. | ||
throw new CollectionTerminatedException(); | ||
|
@@ -397,6 +413,31 @@ public void collectDebugInfo(BiConsumer<String, Object> add) { | |
add.accept("estimate_cost_time", estimateCostTime); | ||
} | ||
} | ||
|
||
/** | ||
* Adapts filter-by-filter hit collection into sub-aggregations. | ||
*/ | ||
private static class SubCollector implements LeafCollector { | ||
private final int filterOrd; | ||
private final LeafBucketCollector sub; | ||
private int total; | ||
|
||
SubCollector(int filterOrd, LeafBucketCollector sub) { | ||
this.filterOrd = filterOrd; | ||
this.sub = sub; | ||
} | ||
|
||
@Override | ||
public void setScorer(Scorable scorer) throws IOException { | ||
sub.setScorer(scorer); | ||
} | ||
|
||
@Override | ||
public void collect(int doc) throws IOException { | ||
total++; | ||
sub.collect(doc, filterOrd); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The equality check feels brittle to me here. I wonder if we should put a method on
LeafBucketCollector
to return a boolean if it's going to do any work, and check that. Might be premature abstraction on my part though, what do you think?