Skip to content

Commit 567a739

Browse files
Optimize sort on numeric long and date fields (#39770)
Optimize sort on numeric long and date fields, when the system property `es.search.long_sort_optimized` is true.
1 parent ad2e9fa commit 567a739

File tree

4 files changed

+387
-1
lines changed

4 files changed

+387
-1
lines changed

buildSrc/src/main/groovy/org/elasticsearch/gradle/BuildPlugin.groovy

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,9 @@ class BuildPlugin implements Plugin<Project> {
872872
// TODO: remove this once ctx isn't added to update script params in 7.0
873873
test.systemProperty 'es.scripting.update.ctx_in_params', 'false'
874874

875+
// TODO: remove when sort optimization is merged
876+
test.systemProperty 'es.search.long_sort_optimized', 'true'
877+
875878
test.testLogging { TestLoggingContainer logging ->
876879
logging.showExceptions = true
877880
logging.showCauses = true

server/src/main/java/org/elasticsearch/search/query/QueryPhase.java

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,26 +21,36 @@
2121

2222
import org.apache.logging.log4j.LogManager;
2323
import org.apache.logging.log4j.Logger;
24+
import org.apache.lucene.document.LongPoint;
25+
import org.apache.lucene.index.IndexOptions;
2426
import org.apache.lucene.index.IndexReader;
2527
import org.apache.lucene.index.LeafReaderContext;
28+
import org.apache.lucene.index.PointValues;
2629
import org.apache.lucene.queries.MinDocQuery;
2730
import org.apache.lucene.queries.SearchAfterSortedDocQuery;
2831
import org.apache.lucene.search.BooleanClause;
2932
import org.apache.lucene.search.BooleanQuery;
3033
import org.apache.lucene.search.Collector;
3134
import org.apache.lucene.search.ConstantScoreQuery;
35+
import org.apache.lucene.search.DocValuesFieldExistsQuery;
3236
import org.apache.lucene.search.FieldDoc;
3337
import org.apache.lucene.search.IndexSearcher;
3438
import org.apache.lucene.search.MatchAllDocsQuery;
3539
import org.apache.lucene.search.Query;
3640
import org.apache.lucene.search.ScoreDoc;
3741
import org.apache.lucene.search.Sort;
42+
import org.apache.lucene.search.SortField;
3843
import org.apache.lucene.search.TopDocs;
44+
import org.apache.lucene.search.TopFieldDocs;
3945
import org.apache.lucene.search.TotalHits;
4046
import org.elasticsearch.action.search.SearchTask;
47+
import org.elasticsearch.common.Booleans;
4148
import org.elasticsearch.common.lucene.Lucene;
4249
import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
4350
import org.elasticsearch.common.util.concurrent.QueueResizingEsThreadPoolExecutor;
51+
import org.elasticsearch.index.IndexSortConfig;
52+
import org.elasticsearch.index.mapper.MappedFieldType;
53+
import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType;
4454
import org.elasticsearch.search.DocValueFormat;
4555
import org.elasticsearch.search.SearchPhase;
4656
import org.elasticsearch.search.SearchService;
@@ -57,6 +67,8 @@
5767
import org.elasticsearch.tasks.TaskCancelledException;
5868
import org.elasticsearch.threadpool.ThreadPool;
5969

70+
import java.io.IOException;
71+
import java.util.Arrays;
6072
import java.util.LinkedList;
6173
import java.util.concurrent.ExecutorService;
6274
import java.util.function.Consumer;
@@ -67,6 +79,7 @@
6779
import static org.elasticsearch.search.query.QueryCollectorContext.createMinScoreCollectorContext;
6880
import static org.elasticsearch.search.query.QueryCollectorContext.createMultiCollectorContext;
6981
import static org.elasticsearch.search.query.TopDocsCollectorContext.createTopDocsCollectorContext;
82+
import static org.elasticsearch.search.query.TopDocsCollectorContext.shortcutTotalHitCount;
7083

7184

7285
/**
@@ -75,6 +88,8 @@
7588
*/
7689
public class QueryPhase implements SearchPhase {
7790
private static final Logger LOGGER = LogManager.getLogger(QueryPhase.class);
91+
public static final boolean SYS_PROP_LONG_SORT_OPTIMIZED =
92+
Booleans.parseBoolean(System.getProperty("es.search.long_sort_optimized", "false"));
7893

7994
private final AggregationPhase aggregationPhase;
8095
private final SuggestPhase suggestPhase;
@@ -133,6 +148,7 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep
133148
static boolean execute(SearchContext searchContext,
134149
final IndexSearcher searcher,
135150
Consumer<Runnable> checkCancellationSetter) throws QueryPhaseExecutionException {
151+
SortAndFormats sortAndFormatsForRewrittenNumericSort = null;
136152
final IndexReader reader = searcher.getIndexReader();
137153
QuerySearchResult queryResult = searchContext.queryResult();
138154
queryResult.searchTimedOut(false);
@@ -204,6 +220,25 @@ static boolean execute(SearchContext searchContext,
204220
hasFilterCollector = true;
205221
}
206222

223+
// try to rewrite numeric or date sort to the optimized distanceFeatureQuery
224+
if ((searchContext.sort() != null) && SYS_PROP_LONG_SORT_OPTIMIZED) {
225+
Query rewrittenQuery = tryRewriteLongSort(searchContext, searcher.getIndexReader(), query, hasFilterCollector);
226+
if (rewrittenQuery != null) {
227+
query = rewrittenQuery;
228+
// modify sorts: add sort on _score as 1st sort, and move the sort on the original field as the 2nd sort
229+
SortField[] oldSortFields = searchContext.sort().sort.getSort();
230+
DocValueFormat[] oldFormats = searchContext.sort().formats;
231+
SortField[] newSortFields = new SortField[oldSortFields.length + 1];
232+
DocValueFormat[] newFormats = new DocValueFormat[oldSortFields.length + 1];
233+
newSortFields[0] = SortField.FIELD_SCORE;
234+
newFormats[0] = DocValueFormat.RAW;
235+
System.arraycopy(oldSortFields, 0, newSortFields, 1, oldSortFields.length);
236+
System.arraycopy(oldFormats, 0, newFormats, 1, oldFormats.length);
237+
sortAndFormatsForRewrittenNumericSort = searchContext.sort(); // stash SortAndFormats to restore it later
238+
searchContext.sort(new SortAndFormats(new Sort(newSortFields), newFormats));
239+
}
240+
}
241+
207242
boolean timeoutSet = scrollContext == null && searchContext.timeout() != null &&
208243
searchContext.timeout().equals(SearchService.NO_TIMEOUT) == false;
209244

@@ -290,6 +325,13 @@ static boolean execute(SearchContext searchContext,
290325
for (QueryCollectorContext ctx : collectors) {
291326
ctx.postProcess(result);
292327
}
328+
329+
// if we rewrote numeric long or date sort, restore fieldDocs based on the original sort
330+
if (sortAndFormatsForRewrittenNumericSort != null) {
331+
searchContext.sort(sortAndFormatsForRewrittenNumericSort); // restore SortAndFormats
332+
restoreTopFieldDocs(result, sortAndFormatsForRewrittenNumericSort);
333+
}
334+
293335
ExecutorService executor = searchContext.indexShard().getThreadPool().executor(ThreadPool.Names.SEARCH);
294336
if (executor instanceof QueueResizingEsThreadPoolExecutor) {
295337
QueueResizingEsThreadPoolExecutor rExecutor = (QueueResizingEsThreadPoolExecutor) executor;
@@ -306,6 +348,92 @@ static boolean execute(SearchContext searchContext,
306348
}
307349
}
308350

351+
private static Query tryRewriteLongSort(SearchContext searchContext, IndexReader reader,
352+
Query query, boolean hasFilterCollector) throws IOException {
353+
if (searchContext.searchAfter() != null) return null;
354+
if (searchContext.scrollContext() != null) return null;
355+
if (searchContext.collapse() != null) return null;
356+
if (searchContext.trackScores()) return null;
357+
if (searchContext.aggregations() != null) return null;
358+
Sort sort = searchContext.sort().sort;
359+
SortField sortField = sort.getSort()[0];
360+
if (SortField.Type.LONG.equals(IndexSortConfig.getSortFieldType(sortField)) == false) return null;
361+
362+
// check if this is a field of type Long or Date, that is indexed and has doc values
363+
String fieldName = sortField.getField();
364+
if (fieldName == null) return null; // happens when _score or _doc is the 1st sort field
365+
if (searchContext.mapperService() == null) return null; // mapperService can be null in tests
366+
final MappedFieldType fieldType = searchContext.mapperService().fullName(fieldName);
367+
if (fieldType == null) return null; // for unmapped fields, default behaviour depending on "unmapped_type" flag
368+
if ((fieldType.typeName().equals("long") == false) && (fieldType instanceof DateFieldType == false)) return null;
369+
if (fieldType.indexOptions() == IndexOptions.NONE) return null; //TODO: change to pointDataDimensionCount() when implemented
370+
if (fieldType.hasDocValues() == false) return null;
371+
372+
// check that all sorts are actual document fields or _doc
373+
for (int i = 1; i < sort.getSort().length; i++) {
374+
SortField sField = sort.getSort()[i];
375+
String sFieldName = sField.getField();
376+
if (sFieldName == null) {
377+
if (SortField.FIELD_DOC.equals(sField) == false) return null;
378+
} else {
379+
if (searchContext.mapperService().fullName(sFieldName) == null) return null; // could be _script field that uses _score
380+
}
381+
}
382+
383+
// check that setting of missing values allows optimization
384+
if (sortField.getMissingValue() == null) return null;
385+
Long missingValue = (Long) sortField.getMissingValue();
386+
boolean missingValuesAccordingToSort = (sortField.getReverse() && (missingValue == Long.MIN_VALUE)) ||
387+
((sortField.getReverse() == false) && (missingValue == Long.MAX_VALUE));
388+
if (missingValuesAccordingToSort == false) return null;
389+
390+
// check for multiple values
391+
if (PointValues.size(reader, fieldName) != PointValues.getDocCount(reader, fieldName)) return null; //TODO: handle multiple values
392+
393+
// check if the optimization makes sense with the track_total_hits setting
394+
if (searchContext.trackTotalHitsUpTo() == Integer.MAX_VALUE) {
395+
// with filter, we can't pre-calculate hitsCount, we need to explicitly calculate them => optimization does't make sense
396+
if (hasFilterCollector) return null;
397+
// if we can't pre-calculate hitsCount based on the query type, optimization does't make sense
398+
if (shortcutTotalHitCount(reader, query) == -1) return null;
399+
}
400+
401+
byte[] minValueBytes = PointValues.getMinPackedValue(reader, fieldName);
402+
byte[] maxValueBytes = PointValues.getMaxPackedValue(reader, fieldName);
403+
if ((maxValueBytes == null) || (minValueBytes == null)) return null;
404+
long minValue = LongPoint.decodeDimension(minValueBytes, 0);
405+
long maxValue = LongPoint.decodeDimension(maxValueBytes, 0);
406+
407+
Query rewrittenQuery;
408+
if (minValue == maxValue) {
409+
rewrittenQuery = new DocValuesFieldExistsQuery(fieldName);
410+
} else {
411+
long origin = (sortField.getReverse()) ? maxValue : minValue;
412+
long pivotDistance = (maxValue - minValue) >>> 1; // division by 2 on the unsigned representation to avoid overflow
413+
if (pivotDistance == 0) { // 0 if maxValue = (minValue + 1)
414+
pivotDistance = 1;
415+
}
416+
rewrittenQuery = LongPoint.newDistanceFeatureQuery(sortField.getField(), 1, origin, pivotDistance);
417+
}
418+
rewrittenQuery = new BooleanQuery.Builder()
419+
.add(query, BooleanClause.Occur.FILTER) // filter for original query
420+
.add(rewrittenQuery, BooleanClause.Occur.SHOULD) //should for rewrittenQuery
421+
.build();
422+
return rewrittenQuery;
423+
}
424+
425+
// Restore fieldsDocs to remove the first _score sort
426+
// updating in place without creating new FieldDoc objects
427+
static void restoreTopFieldDocs(QuerySearchResult result, SortAndFormats originalSortAndFormats) {
428+
TopDocs topDocs = result.topDocs().topDocs;
429+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
430+
FieldDoc fieldDoc = (FieldDoc) scoreDoc;
431+
fieldDoc.fields = Arrays.copyOfRange(fieldDoc.fields, 1, fieldDoc.fields.length);
432+
}
433+
TopFieldDocs newTopDocs = new TopFieldDocs(topDocs.totalHits, topDocs.scoreDocs, originalSortAndFormats.sort.getSort());
434+
result.topDocs(new TopDocsAndMaxScore(newTopDocs, Float.NaN), originalSortAndFormats.formats);
435+
}
436+
309437
/**
310438
* Returns true if the provided <code>query</code> returns docs in index order (internal doc ids).
311439
* @param query The query to execute

0 commit comments

Comments
 (0)