Skip to content

Commit 98d9032

Browse files
committed
Optimize numeric sort on match_all queries
This is a follow up of elastic#48804 where we rewrite numeric sort to use the DistanceFeatureQuery. This change adds another optimization if the query is a `match_all` that instead of using a distance feature query will simply extract the documents directly from the indexed point and early terminate as soon as enough docs have been collected. This optimization has a constant cost so it can be considerably faster than the other optimization since it only needs to visit the BKD-tree of a field and can early terminate as soon as it collected the number of requested hits. Note that this optimization can only work when the query is a match_all and the numeric sort order is not reversed. The pr is in WIP state, it needs more tests and some cleanup but I wanted to open it early in order to discuss whether we should pursue this path or not.
1 parent 602e589 commit 98d9032

File tree

5 files changed

+444
-106
lines changed

5 files changed

+444
-106
lines changed

server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ public MetadataFieldMapper getDefault(MappedFieldType fieldType, ParserContext c
130130
}
131131
}
132132

133-
static final class SeqNoFieldType extends SimpleMappedFieldType {
133+
public static final class SeqNoFieldType extends SimpleMappedFieldType {
134134

135135
SeqNoFieldType() {
136136
}

server/src/main/java/org/elasticsearch/search/query/QueryPhase.java

Lines changed: 134 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.elasticsearch.index.IndexSortConfig;
5656
import org.elasticsearch.index.mapper.MappedFieldType;
5757
import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType;
58+
import org.elasticsearch.index.mapper.SeqNoFieldMapper.SeqNoFieldType;
5859
import org.elasticsearch.search.DocValueFormat;
5960
import org.elasticsearch.search.SearchPhase;
6061
import org.elasticsearch.search.SearchService;
@@ -226,22 +227,27 @@ static boolean executeInternal(SearchContext searchContext) throws QueryPhaseExe
226227

227228
CheckedConsumer<List<LeafReaderContext>, IOException> leafSorter = l -> {};
228229
// try to rewrite numeric or date sort to the optimized distanceFeatureQuery
229-
if ((searchContext.sort() != null) && SYS_PROP_REWRITE_SORT) {
230-
Query rewrittenQuery = tryRewriteLongSort(searchContext, searcher.getIndexReader(), query, hasFilterCollector);
231-
if (rewrittenQuery != null) {
232-
query = rewrittenQuery;
233-
// modify sorts: add sort on _score as 1st sort, and move the sort on the original field as the 2nd sort
234-
SortField[] oldSortFields = searchContext.sort().sort.getSort();
235-
DocValueFormat[] oldFormats = searchContext.sort().formats;
236-
SortField[] newSortFields = new SortField[oldSortFields.length + 1];
237-
DocValueFormat[] newFormats = new DocValueFormat[oldSortFields.length + 1];
238-
newSortFields[0] = SortField.FIELD_SCORE;
239-
newFormats[0] = DocValueFormat.RAW;
240-
System.arraycopy(oldSortFields, 0, newSortFields, 1, oldSortFields.length);
241-
System.arraycopy(oldFormats, 0, newFormats, 1, oldFormats.length);
242-
sortAndFormatsForRewrittenNumericSort = searchContext.sort(); // stash SortAndFormats to restore it later
243-
searchContext.sort(new SortAndFormats(new Sort(newSortFields), newFormats));
244-
leafSorter = createLeafSorter(oldSortFields[0]);
230+
if (canOptimizeSort(searchContext, hasFilterCollector) && SYS_PROP_REWRITE_SORT) {
231+
Query matchAllQuery = tryRewriteMatchAllSort(searchContext);
232+
if (matchAllQuery != null) {
233+
query = matchAllQuery;
234+
} else {
235+
Query distanceQuery = tryRewriteLongSort(searchContext);
236+
if (distanceQuery != null) {
237+
query = distanceQuery;
238+
// modify sorts: add sort on _score as 1st sort, and move the sort on the original field as the 2nd sort
239+
SortField[] oldSortFields = searchContext.sort().sort.getSort();
240+
DocValueFormat[] oldFormats = searchContext.sort().formats;
241+
SortField[] newSortFields = new SortField[oldSortFields.length + 1];
242+
DocValueFormat[] newFormats = new DocValueFormat[oldSortFields.length + 1];
243+
newSortFields[0] = SortField.FIELD_SCORE;
244+
newFormats[0] = DocValueFormat.RAW;
245+
System.arraycopy(oldSortFields, 0, newSortFields, 1, oldSortFields.length);
246+
System.arraycopy(oldFormats, 0, newFormats, 1, oldFormats.length);
247+
sortAndFormatsForRewrittenNumericSort = searchContext.sort(); // stash SortAndFormats to restore it later
248+
searchContext.sort(new SortAndFormats(new Sort(newSortFields), newFormats));
249+
leafSorter = createLeafSorter(oldSortFields[0]);
250+
}
245251
}
246252
}
247253

@@ -405,60 +411,129 @@ private static boolean searchWithCollectorManager(SearchContext searchContext, C
405411
return false; // no rescoring when sorting by field
406412
}
407413

408-
private static Query tryRewriteLongSort(SearchContext searchContext, IndexReader reader,
409-
Query query, boolean hasFilterCollector) throws IOException {
410-
if (searchContext.searchAfter() != null) return null; //TODO: handle sort optimization with search after
411-
if (searchContext.scrollContext() != null) return null;
412-
if (searchContext.collapse() != null) return null;
413-
if (searchContext.trackScores()) return null;
414-
if (searchContext.aggregations() != null) return null;
415-
Sort sort = searchContext.sort().sort;
416-
SortField sortField = sort.getSort()[0];
417-
if (SortField.Type.LONG.equals(IndexSortConfig.getSortFieldType(sortField)) == false) return null;
418-
419-
// check if this is a field of type Long or Date, that is indexed and has doc values
420-
String fieldName = sortField.getField();
421-
if (fieldName == null) return null; // happens when _score or _doc is the 1st sort field
422-
if (searchContext.mapperService() == null) return null; // mapperService can be null in tests
423-
final MappedFieldType fieldType = searchContext.mapperService().fullName(fieldName);
424-
if (fieldType == null) return null; // for unmapped fields, default behaviour depending on "unmapped_type" flag
425-
if ((fieldType.typeName().equals("long") == false) && (fieldType instanceof DateFieldType == false)) return null;
426-
if (fieldType.indexOptions() == IndexOptions.NONE) return null; //TODO: change to pointDataDimensionCount() when implemented
427-
if (fieldType.hasDocValues() == false) return null;
414+
/**
415+
* Returns true if the query can be optimized using the primary numeric field sort, false
416+
* otherwise.
417+
*/
418+
private static boolean canOptimizeSort(SearchContext context, boolean hasFilterCollector) throws IOException {
419+
if (context.mapperService() == null
420+
|| context.sort() == null
421+
|| context.collapse() != null
422+
|| context.aggregations() != null) {
423+
return false;
424+
}
425+
426+
final Sort sort = context.sort().sort;
427+
final IndexReader reader = context.searcher().getIndexReader();
428+
final int numDocs = reader.numDocs();
429+
final SortField sortField = sort.getSort()[0];
430+
final MappedFieldType fieldType = context.mapperService().fullName(sortField.getField());
431+
final String fieldName = fieldType.name();
432+
433+
if (canEarlyTerminate(reader, context.sort())) {
434+
// disable this optimization if index sorting matches the query sort since it's already optimized
435+
// by index searcher.
436+
return false;
437+
}
428438

439+
if (SortField.Type.LONG.equals(IndexSortConfig.getSortFieldType(sortField)) == false
440+
|| fieldType == null
441+
|| fieldType.indexOptions() == IndexOptions.NONE) {
442+
// we only handle indexed long field in this optimization
443+
return false;
444+
}
445+
446+
if ((fieldType.typeName().equals("long") == false)
447+
&& (fieldType instanceof DateFieldType == false)
448+
&& fieldType instanceof SeqNoFieldType == false) {
449+
return false;
450+
}
429451

430452
// check that all sorts are actual document fields or _doc
431453
for (int i = 1; i < sort.getSort().length; i++) {
432454
SortField sField = sort.getSort()[i];
433455
String sFieldName = sField.getField();
434456
if (sFieldName == null) {
435-
if (SortField.FIELD_DOC.equals(sField) == false) return null;
457+
if (SortField.FIELD_DOC.equals(sField) == false) {
458+
return false;
459+
}
436460
} else {
437461
//TODO: find out how to cover _script sort that don't use _score
438-
if (searchContext.mapperService().fullName(sFieldName) == null) return null; // could be _script sort that uses _score
462+
if (context.mapperService().fullName(sFieldName) == null) {
463+
return false; // could be _script sort that uses _score
464+
}
439465
}
440466
}
441467

442-
// check that setting of missing values allows optimization
443-
if (sortField.getMissingValue() == null) return null;
444-
Long missingValue = (Long) sortField.getMissingValue();
445-
boolean missingValuesAccordingToSort = (sortField.getReverse() && (missingValue == Long.MIN_VALUE)) ||
446-
((sortField.getReverse() == false) && (missingValue == Long.MAX_VALUE));
447-
if (missingValuesAccordingToSort == false) return null;
448-
449-
int docCount = PointValues.getDocCount(reader, fieldName);
450-
// is not worth to run optimization on small index
451-
if (docCount <= 512) return null;
468+
// check if the optimization makes sense with the track_total_hits setting
469+
if (context.trackTotalHitsUpTo() == Integer.MAX_VALUE) {
470+
// with filter, we can't pre-calculate hitsCount, we need to explicitly calculate them => optimization does't make sense
471+
if (hasFilterCollector) {
472+
return false;
473+
}
474+
// if we can't pre-calculate hitsCount based on the query type, optimization doesn't make sense
475+
if (shortcutTotalHitCount(reader, context.query()) == -1) {
476+
return false;
477+
}
478+
}
452479

453480
// check for multiple values
454-
if (PointValues.size(reader, fieldName) != docCount) return null; //TODO: handle multiple values
481+
if (PointValues.size(reader, fieldName) != numDocs) {
482+
return false; // TODO: handle multiple values
483+
}
455484

456-
// check if the optimization makes sense with the track_total_hits setting
457-
if (searchContext.trackTotalHitsUpTo() == Integer.MAX_VALUE) {
458-
// with filter, we can't pre-calculate hitsCount, we need to explicitly calculate them => optimization does't make sense
459-
if (hasFilterCollector) return null;
460-
// if we can't pre-calculate hitsCount based on the query type, optimization does't make sense
461-
if (shortcutTotalHitCount(reader, query) == -1) return null;
485+
return true;
486+
}
487+
488+
/**
489+
* Return a {@link SortedLongQuery} if the request is a {@link MatchAllDocsQuery}
490+
* sorted by a numeric long field and <code>null</code> otherwise.
491+
*/
492+
private static Query tryRewriteMatchAllSort(SearchContext context) {
493+
final Query query = context.query();
494+
final SortField sortField = context.sort().sort.getSort()[0];
495+
if ((query != null && query instanceof MatchAllDocsQuery == false)
496+
|| context.sort().sort.getSort().length > 1
497+
|| sortField.getReverse()
498+
|| (context.searchAfter() != null && context.searchAfter().fields.length > 1)) {
499+
return null;
500+
}
501+
502+
FieldDoc lastDoc = null;
503+
if (context.searchAfter() != null) {
504+
lastDoc = context.searchAfter();
505+
} else if (context.scrollContext() != null) {
506+
lastDoc = (FieldDoc) context.scrollContext().lastEmittedDoc;
507+
}
508+
long minValue = Long.MIN_VALUE;
509+
int minDoc = Integer.MAX_VALUE;
510+
if (lastDoc != null) {
511+
minValue = (long) lastDoc.fields[0];
512+
minDoc = lastDoc.doc;
513+
}
514+
return new SortedLongQuery(sortField.getField(), context.size(), minValue, minDoc);
515+
}
516+
517+
/**
518+
* Rewrite the query into a {@link LongPoint#newDistanceFeatureQuery(String, float, long, long)}
519+
* if the request is sorted by a numeric long field and <code>null</code> otherwise.
520+
*/
521+
private static Query tryRewriteLongSort(SearchContext context) throws IOException {
522+
final IndexReader reader = context.searcher().getIndexReader();
523+
final SortField sortField = context.sort().sort.getSort()[0];
524+
final MappedFieldType fieldType = context.mapperService().fullName(sortField.getField());
525+
final String fieldName = sortField.getField();
526+
if (fieldType.hasDocValues() == false
527+
|| context.searchAfter() != null // TODO: handle optimization with search after
528+
|| context.scrollContext() != null // TODO: handle optimization with scroll
529+
|| context.trackScores()) {
530+
return null;
531+
}
532+
533+
int docCount = PointValues.getDocCount(reader, fieldType.name());
534+
// is not worth to run optimization on small index
535+
if (docCount <= 512) {
536+
return null;
462537
}
463538

464539
byte[] minValueBytes = PointValues.getMinPackedValue(reader, fieldName);
@@ -471,7 +546,9 @@ private static Query tryRewriteLongSort(SearchContext searchContext, IndexReader
471546
if (minValue == maxValue) {
472547
rewrittenQuery = new DocValuesFieldExistsQuery(fieldName);
473548
} else {
474-
if (indexFieldHasDuplicateData(reader, fieldName)) return null;
549+
if (indexFieldHasDuplicateData(reader, fieldName)) {
550+
return null;
551+
}
475552
long origin = (sortField.getReverse()) ? maxValue : minValue;
476553
long pivotDistance = (maxValue - minValue) >>> 1; // division by 2 on the unsigned representation to avoid overflow
477554
if (pivotDistance == 0) { // 0 if maxValue = (minValue + 1)
@@ -480,7 +557,7 @@ private static Query tryRewriteLongSort(SearchContext searchContext, IndexReader
480557
rewrittenQuery = LongPoint.newDistanceFeatureQuery(sortField.getField(), 1, origin, pivotDistance);
481558
}
482559
rewrittenQuery = new BooleanQuery.Builder()
483-
.add(query, BooleanClause.Occur.FILTER) // filter for original query
560+
.add(context.query(), BooleanClause.Occur.FILTER) // filter for original query
484561
.add(rewrittenQuery, BooleanClause.Occur.SHOULD) //should for rewrittenQuery
485562
.build();
486563
return rewrittenQuery;

0 commit comments

Comments
 (0)