Skip to content

Commit 7cf1708

Browse files
Optimize sort on numeric long and date fields. (#49732)
This rewrites long sort as a `DistanceFeatureQuery`, which can efficiently skip non-competitive blocks and segments of documents. Depending on the dataset, the speedups can be 2 - 10 times. The optimization can be disabled with setting the system property `es.search.rewrite_sort` to `false`. Optimization is skipped when an index has 50% or more data with the same value. Optimization is done through: 1. Rewriting sort as `DistanceFeatureQuery` which can efficiently skip non-competitive blocks and segments of documents. 2. Sorting segments according to the primary numeric sort field(#44021) This allows to skip non-competitive segments. 3. Using collector manager. When we optimize sort, we sort segments by their min/max value. As a collector expects to have segments in order, we can not use a single collector for sorted segments. We use collectorManager, where for every segment a dedicated collector will be created. 4. Using Lucene's shared TopFieldCollector manager This collector manager is able to exchange minimum competitive score between collectors, which allows us to efficiently skip the whole segments that don't contain competitive scores. 5. When index is force merged to a single segment, #48533 interleaving old and new segments allows for this optimization as well, as blocks with non-competitive docs can be skipped. Backport for #48804 Co-authored-by: Jim Ferenczi <[email protected]>
1 parent 27d45c9 commit 7cf1708

File tree

12 files changed

+769
-297
lines changed

12 files changed

+769
-297
lines changed

buildSrc/src/main/groovy/org/elasticsearch/gradle/BuildPlugin.groovy

+3
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,9 @@ class BuildPlugin implements Plugin<Project> {
728728
// TODO: remove this once ctx isn't added to update script params in 7.0
729729
test.systemProperty 'es.scripting.update.ctx_in_params', 'false'
730730

731+
// TODO: remove this property in 8.0
732+
test.systemProperty 'es.search.rewrite_sort', 'true'
733+
731734
// TODO: remove this once cname is prepended to transport.publish_address by default in 8.0
732735
test.systemProperty 'es.transport.cname_in_publish_address', 'true'
733736

docs/reference/search/profile.asciidoc

+20-41
Original file line numberDiff line numberDiff line change
@@ -153,16 +153,9 @@ The API returns the following result:
153153
"rewrite_time": 51443,
154154
"collector": [
155155
{
156-
"name": "CancellableCollector",
157-
"reason": "search_cancelled",
158-
"time_in_nanos": "304311",
159-
"children": [
160-
{
161-
"name": "SimpleTopScoreDocCollector",
162-
"reason": "search_top_hits",
163-
"time_in_nanos": "32273"
164-
}
165-
]
156+
"name": "SimpleTopScoreDocCollector",
157+
"reason": "search_top_hits",
158+
"time_in_nanos": "32273"
166159
}
167160
]
168161
}
@@ -445,16 +438,9 @@ Looking at the previous example:
445438
--------------------------------------------------
446439
"collector": [
447440
{
448-
"name": "CancellableCollector",
449-
"reason": "search_cancelled",
450-
"time_in_nanos": "304311",
451-
"children": [
452-
{
453-
"name": "SimpleTopScoreDocCollector",
454-
"reason": "search_top_hits",
455-
"time_in_nanos": "32273"
456-
}
457-
]
441+
"name": "SimpleTopScoreDocCollector",
442+
"reason": "search_top_hits",
443+
"time_in_nanos": "32273"
458444
}
459445
]
460446
--------------------------------------------------
@@ -657,33 +643,26 @@ The API returns the following result:
657643
"rewrite_time": 7208,
658644
"collector": [
659645
{
660-
"name": "CancellableCollector",
661-
"reason": "search_cancelled",
662-
"time_in_nanos": 2390,
646+
"name": "MultiCollector",
647+
"reason": "search_multi",
648+
"time_in_nanos": 1820,
663649
"children": [
664650
{
665-
"name": "MultiCollector",
666-
"reason": "search_multi",
667-
"time_in_nanos": 1820,
651+
"name": "FilteredCollector",
652+
"reason": "search_post_filter",
653+
"time_in_nanos": 7735,
668654
"children": [
669655
{
670-
"name": "FilteredCollector",
671-
"reason": "search_post_filter",
672-
"time_in_nanos": 7735,
673-
"children": [
674-
{
675-
"name": "SimpleTopScoreDocCollector",
676-
"reason": "search_top_hits",
677-
"time_in_nanos": 1328
678-
}
679-
]
680-
},
681-
{
682-
"name": "MultiBucketCollector: [[my_scoped_agg, my_global_agg]]",
683-
"reason": "aggregation",
684-
"time_in_nanos": 8273
656+
"name": "SimpleTopScoreDocCollector",
657+
"reason": "search_top_hits",
658+
"time_in_nanos": 1328
685659
}
686660
]
661+
},
662+
{
663+
"name": "MultiBucketCollector: [[my_scoped_agg, my_global_agg]]",
664+
"reason": "aggregation",
665+
"time_in_nanos": 8273
687666
}
688667
]
689668
}

server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java

+86-42
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.lucene.search.CollectionStatistics;
2828
import org.apache.lucene.search.CollectionTerminatedException;
2929
import org.apache.lucene.search.Collector;
30+
import org.apache.lucene.search.CollectorManager;
3031
import org.apache.lucene.search.ConjunctionDISI;
3132
import org.apache.lucene.search.DocIdSetIterator;
3233
import org.apache.lucene.search.Explanation;
@@ -35,24 +36,31 @@
3536
import org.apache.lucene.search.Query;
3637
import org.apache.lucene.search.QueryCache;
3738
import org.apache.lucene.search.QueryCachingPolicy;
39+
import org.apache.lucene.search.ScoreDoc;
3840
import org.apache.lucene.search.ScoreMode;
3941
import org.apache.lucene.search.Scorer;
4042
import org.apache.lucene.search.TermStatistics;
43+
import org.apache.lucene.search.TopFieldDocs;
44+
import org.apache.lucene.search.TotalHits;
4145
import org.apache.lucene.search.Weight;
4246
import org.apache.lucene.search.similarities.Similarity;
4347
import org.apache.lucene.util.BitSet;
4448
import org.apache.lucene.util.BitSetIterator;
4549
import org.apache.lucene.util.Bits;
4650
import org.apache.lucene.util.CombinedBitSet;
4751
import org.apache.lucene.util.SparseFixedBitSet;
52+
import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
53+
import org.elasticsearch.search.DocValueFormat;
4854
import org.elasticsearch.search.dfs.AggregatedDfs;
4955
import org.elasticsearch.search.profile.Timer;
5056
import org.elasticsearch.search.profile.query.ProfileWeight;
5157
import org.elasticsearch.search.profile.query.QueryProfileBreakdown;
5258
import org.elasticsearch.search.profile.query.QueryProfiler;
5359
import org.elasticsearch.search.profile.query.QueryTimingType;
60+
import org.elasticsearch.search.query.QuerySearchResult;
5461

5562
import java.io.IOException;
63+
import java.util.ArrayList;
5664
import java.util.Arrays;
5765
import java.util.List;
5866
import java.util.Set;
@@ -131,12 +139,86 @@ public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws
131139
}
132140
}
133141

142+
private void checkCancelled() {
143+
if (checkCancelled != null) {
144+
checkCancelled.run();
145+
}
146+
}
147+
148+
public void search(List<LeafReaderContext> leaves, Weight weight, CollectorManager manager,
149+
QuerySearchResult result, DocValueFormat[] formats, TotalHits totalHits) throws IOException {
150+
final List<Collector> collectors = new ArrayList<>(leaves.size());
151+
for (LeafReaderContext ctx : leaves) {
152+
final Collector collector = manager.newCollector();
153+
searchLeaf(ctx, weight, collector);
154+
collectors.add(collector);
155+
}
156+
TopFieldDocs mergedTopDocs = (TopFieldDocs) manager.reduce(collectors);
157+
// Lucene sets shards indexes during merging of topDocs from different collectors
158+
// We need to reset shard index; ES will set shard index later during reduce stage
159+
for (ScoreDoc scoreDoc : mergedTopDocs.scoreDocs) {
160+
scoreDoc.shardIndex = -1;
161+
}
162+
if (totalHits != null) { // we have already precalculated totalHits for the whole index
163+
mergedTopDocs = new TopFieldDocs(totalHits, mergedTopDocs.scoreDocs, mergedTopDocs.fields);
164+
}
165+
result.topDocs(new TopDocsAndMaxScore(mergedTopDocs, Float.NaN), formats);
166+
}
167+
134168
@Override
135169
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
136-
final Weight cancellableWeight;
137-
if (checkCancelled != null) {
138-
cancellableWeight = new Weight(weight.getQuery()) {
170+
for (LeafReaderContext ctx : leaves) { // search each subreader
171+
searchLeaf(ctx, weight, collector);
172+
}
173+
}
174+
175+
/**
176+
* Lower-level search API.
177+
*
178+
* {@link LeafCollector#collect(int)} is called for every matching document in
179+
* the provided <code>ctx</code>.
180+
*/
181+
private void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector) throws IOException {
182+
checkCancelled();
183+
weight = wrapWeight(weight);
184+
final LeafCollector leafCollector;
185+
try {
186+
leafCollector = collector.getLeafCollector(ctx);
187+
} catch (CollectionTerminatedException e) {
188+
// there is no doc of interest in this reader context
189+
// continue with the following leaf
190+
return;
191+
}
192+
Bits liveDocs = ctx.reader().getLiveDocs();
193+
BitSet liveDocsBitSet = getSparseBitSetOrNull(liveDocs);
194+
if (liveDocsBitSet == null) {
195+
BulkScorer bulkScorer = weight.bulkScorer(ctx);
196+
if (bulkScorer != null) {
197+
try {
198+
bulkScorer.score(leafCollector, liveDocs);
199+
} catch (CollectionTerminatedException e) {
200+
// collection was terminated prematurely
201+
// continue with the following leaf
202+
}
203+
}
204+
} else {
205+
// if the role query result set is sparse then we should use the SparseFixedBitSet for advancing:
206+
Scorer scorer = weight.scorer(ctx);
207+
if (scorer != null) {
208+
try {
209+
intersectScorerAndBitSet(scorer, liveDocsBitSet, leafCollector,
210+
checkCancelled == null ? () -> { } : checkCancelled);
211+
} catch (CollectionTerminatedException e) {
212+
// collection was terminated prematurely
213+
// continue with the following leaf
214+
}
215+
}
216+
}
217+
}
139218

219+
private Weight wrapWeight(Weight weight) {
220+
if (checkCancelled != null) {
221+
return new Weight(weight.getQuery()) {
140222
@Override
141223
public void extractTerms(Set<Term> terms) {
142224
throw new UnsupportedOperationException();
@@ -168,48 +250,10 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
168250
}
169251
};
170252
} else {
171-
cancellableWeight = weight;
253+
return weight;
172254
}
173-
searchInternal(leaves, cancellableWeight, collector);
174255
}
175256

176-
private void searchInternal(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
177-
for (LeafReaderContext ctx : leaves) { // search each subreader
178-
final LeafCollector leafCollector;
179-
try {
180-
leafCollector = collector.getLeafCollector(ctx);
181-
} catch (CollectionTerminatedException e) {
182-
// there is no doc of interest in this reader context
183-
// continue with the following leaf
184-
continue;
185-
}
186-
Bits liveDocs = ctx.reader().getLiveDocs();
187-
BitSet liveDocsBitSet = getSparseBitSetOrNull(liveDocs);
188-
if (liveDocsBitSet == null) {
189-
BulkScorer bulkScorer = weight.bulkScorer(ctx);
190-
if (bulkScorer != null) {
191-
try {
192-
bulkScorer.score(leafCollector, liveDocs);
193-
} catch (CollectionTerminatedException e) {
194-
// collection was terminated prematurely
195-
// continue with the following leaf
196-
}
197-
}
198-
} else {
199-
// if the role query result set is sparse then we should use the SparseFixedBitSet for advancing:
200-
Scorer scorer = weight.scorer(ctx);
201-
if (scorer != null) {
202-
try {
203-
intersectScorerAndBitSet(scorer, liveDocsBitSet, leafCollector,
204-
checkCancelled == null ? () -> {} : checkCancelled);
205-
} catch (CollectionTerminatedException e) {
206-
// collection was terminated prematurely
207-
// continue with the following leaf
208-
}
209-
}
210-
}
211-
}
212-
}
213257

214258
private static BitSet getSparseBitSetOrNull(Bits liveDocs) {
215259
if (liveDocs instanceof SparseFixedBitSet) {

server/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java

-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ public class CollectorResult implements ToXContentObject, Writeable {
4949
public static final String REASON_SEARCH_POST_FILTER = "search_post_filter";
5050
public static final String REASON_SEARCH_MIN_SCORE = "search_min_score";
5151
public static final String REASON_SEARCH_MULTI = "search_multi";
52-
public static final String REASON_SEARCH_TIMEOUT = "search_timeout";
53-
public static final String REASON_SEARCH_CANCELLED = "search_cancelled";
5452
public static final String REASON_AGGREGATION = "aggregation";
5553
public static final String REASON_AGGREGATION_GLOBAL = "aggregation_global";
5654

server/src/main/java/org/elasticsearch/search/query/CancellableCollector.java

-53
This file was deleted.

server/src/main/java/org/elasticsearch/search/query/QueryCollectorContext.java

-15
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,13 @@
2828
import org.elasticsearch.common.lucene.MinimumScoreCollector;
2929
import org.elasticsearch.common.lucene.search.FilteredCollector;
3030
import org.elasticsearch.search.profile.query.InternalProfileCollector;
31-
import org.elasticsearch.tasks.TaskCancelledException;
3231

3332
import java.io.IOException;
3433
import java.util.ArrayList;
3534
import java.util.Collection;
3635
import java.util.Collections;
3736
import java.util.List;
38-
import java.util.function.BooleanSupplier;
3937

40-
import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_CANCELLED;
4138
import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_MIN_SCORE;
4239
import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_MULTI;
4340
import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_POST_FILTER;
@@ -150,18 +147,6 @@ protected InternalProfileCollector createWithProfiler(InternalProfileCollector i
150147
};
151148
}
152149

153-
/**
154-
* Creates a collector that throws {@link TaskCancelledException} if the search is cancelled
155-
*/
156-
static QueryCollectorContext createCancellableCollectorContext(BooleanSupplier cancelled) {
157-
return new QueryCollectorContext(REASON_SEARCH_CANCELLED) {
158-
@Override
159-
Collector create(Collector in) throws IOException {
160-
return new CancellableCollector(cancelled, in);
161-
}
162-
};
163-
}
164-
165150
/**
166151
* Creates collector limiting the collection to the first <code>numHits</code> documents
167152
*/

0 commit comments

Comments
 (0)