21
21
22
22
import org .apache .logging .log4j .LogManager ;
23
23
import org .apache .logging .log4j .Logger ;
24
+ import org .apache .lucene .document .LongPoint ;
25
+ import org .apache .lucene .index .IndexOptions ;
24
26
import org .apache .lucene .index .IndexReader ;
25
27
import org .apache .lucene .index .LeafReaderContext ;
28
+ import org .apache .lucene .index .PointValues ;
26
29
import org .apache .lucene .queries .MinDocQuery ;
27
30
import org .apache .lucene .queries .SearchAfterSortedDocQuery ;
28
31
import org .apache .lucene .search .BooleanClause ;
29
32
import org .apache .lucene .search .BooleanQuery ;
30
33
import org .apache .lucene .search .Collector ;
31
34
import org .apache .lucene .search .ConstantScoreQuery ;
35
+ import org .apache .lucene .search .DocValuesFieldExistsQuery ;
32
36
import org .apache .lucene .search .FieldDoc ;
33
37
import org .apache .lucene .search .IndexSearcher ;
34
38
import org .apache .lucene .search .MatchAllDocsQuery ;
35
39
import org .apache .lucene .search .Query ;
36
40
import org .apache .lucene .search .ScoreDoc ;
37
41
import org .apache .lucene .search .Sort ;
42
+ import org .apache .lucene .search .SortField ;
38
43
import org .apache .lucene .search .TopDocs ;
44
+ import org .apache .lucene .search .TopFieldDocs ;
39
45
import org .apache .lucene .search .TotalHits ;
40
46
import org .elasticsearch .action .search .SearchTask ;
47
+ import org .elasticsearch .common .Booleans ;
41
48
import org .elasticsearch .common .lucene .Lucene ;
42
49
import org .elasticsearch .common .lucene .search .TopDocsAndMaxScore ;
43
50
import org .elasticsearch .common .util .concurrent .QueueResizingEsThreadPoolExecutor ;
51
+ import org .elasticsearch .index .IndexSortConfig ;
52
+ import org .elasticsearch .index .mapper .MappedFieldType ;
53
+ import org .elasticsearch .index .mapper .DateFieldMapper .DateFieldType ;
44
54
import org .elasticsearch .search .DocValueFormat ;
45
55
import org .elasticsearch .search .SearchPhase ;
46
56
import org .elasticsearch .search .SearchService ;
57
67
import org .elasticsearch .tasks .TaskCancelledException ;
58
68
import org .elasticsearch .threadpool .ThreadPool ;
59
69
70
+ import java .io .IOException ;
71
+ import java .util .Arrays ;
60
72
import java .util .LinkedList ;
61
73
import java .util .concurrent .ExecutorService ;
62
74
import java .util .function .Consumer ;
67
79
import static org .elasticsearch .search .query .QueryCollectorContext .createMinScoreCollectorContext ;
68
80
import static org .elasticsearch .search .query .QueryCollectorContext .createMultiCollectorContext ;
69
81
import static org .elasticsearch .search .query .TopDocsCollectorContext .createTopDocsCollectorContext ;
82
+ import static org .elasticsearch .search .query .TopDocsCollectorContext .shortcutTotalHitCount ;
70
83
71
84
72
85
/**
75
88
*/
76
89
public class QueryPhase implements SearchPhase {
77
90
private static final Logger LOGGER = LogManager .getLogger (QueryPhase .class );
91
+ public static final boolean SYS_PROP_LONG_SORT_OPTIMIZED =
92
+ Booleans .parseBoolean (System .getProperty ("es.search.long_sort_optimized" , "false" ));
78
93
79
94
private final AggregationPhase aggregationPhase ;
80
95
private final SuggestPhase suggestPhase ;
@@ -133,6 +148,7 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep
133
148
static boolean execute (SearchContext searchContext ,
134
149
final IndexSearcher searcher ,
135
150
Consumer <Runnable > checkCancellationSetter ) throws QueryPhaseExecutionException {
151
+ SortAndFormats sortAndFormatsForRewrittenNumericSort = null ;
136
152
final IndexReader reader = searcher .getIndexReader ();
137
153
QuerySearchResult queryResult = searchContext .queryResult ();
138
154
queryResult .searchTimedOut (false );
@@ -204,6 +220,25 @@ static boolean execute(SearchContext searchContext,
204
220
hasFilterCollector = true ;
205
221
}
206
222
223
+ // try to rewrite numeric or date sort to the optimized distanceFeatureQuery
224
+ if ((searchContext .sort () != null ) && SYS_PROP_LONG_SORT_OPTIMIZED ) {
225
+ Query rewrittenQuery = tryRewriteLongSort (searchContext , searcher .getIndexReader (), query , hasFilterCollector );
226
+ if (rewrittenQuery != null ) {
227
+ query = rewrittenQuery ;
228
+ // modify sorts: add sort on _score as 1st sort, and move the sort on the original field as the 2nd sort
229
+ SortField [] oldSortFields = searchContext .sort ().sort .getSort ();
230
+ DocValueFormat [] oldFormats = searchContext .sort ().formats ;
231
+ SortField [] newSortFields = new SortField [oldSortFields .length + 1 ];
232
+ DocValueFormat [] newFormats = new DocValueFormat [oldSortFields .length + 1 ];
233
+ newSortFields [0 ] = SortField .FIELD_SCORE ;
234
+ newFormats [0 ] = DocValueFormat .RAW ;
235
+ System .arraycopy (oldSortFields , 0 , newSortFields , 1 , oldSortFields .length );
236
+ System .arraycopy (oldFormats , 0 , newFormats , 1 , oldFormats .length );
237
+ sortAndFormatsForRewrittenNumericSort = searchContext .sort (); // stash SortAndFormats to restore it later
238
+ searchContext .sort (new SortAndFormats (new Sort (newSortFields ), newFormats ));
239
+ }
240
+ }
241
+
207
242
boolean timeoutSet = scrollContext == null && searchContext .timeout () != null &&
208
243
searchContext .timeout ().equals (SearchService .NO_TIMEOUT ) == false ;
209
244
@@ -290,6 +325,13 @@ static boolean execute(SearchContext searchContext,
290
325
for (QueryCollectorContext ctx : collectors ) {
291
326
ctx .postProcess (result );
292
327
}
328
+
329
+ // if we rewrote numeric long or date sort, restore fieldDocs based on the original sort
330
+ if (sortAndFormatsForRewrittenNumericSort != null ) {
331
+ searchContext .sort (sortAndFormatsForRewrittenNumericSort ); // restore SortAndFormats
332
+ restoreTopFieldDocs (result , sortAndFormatsForRewrittenNumericSort );
333
+ }
334
+
293
335
ExecutorService executor = searchContext .indexShard ().getThreadPool ().executor (ThreadPool .Names .SEARCH );
294
336
if (executor instanceof QueueResizingEsThreadPoolExecutor ) {
295
337
QueueResizingEsThreadPoolExecutor rExecutor = (QueueResizingEsThreadPoolExecutor ) executor ;
@@ -306,6 +348,92 @@ static boolean execute(SearchContext searchContext,
306
348
}
307
349
}
308
350
351
+ private static Query tryRewriteLongSort (SearchContext searchContext , IndexReader reader ,
352
+ Query query , boolean hasFilterCollector ) throws IOException {
353
+ if (searchContext .searchAfter () != null ) return null ;
354
+ if (searchContext .scrollContext () != null ) return null ;
355
+ if (searchContext .collapse () != null ) return null ;
356
+ if (searchContext .trackScores ()) return null ;
357
+ if (searchContext .aggregations () != null ) return null ;
358
+ Sort sort = searchContext .sort ().sort ;
359
+ SortField sortField = sort .getSort ()[0 ];
360
+ if (SortField .Type .LONG .equals (IndexSortConfig .getSortFieldType (sortField )) == false ) return null ;
361
+
362
+ // check if this is a field of type Long or Date, that is indexed and has doc values
363
+ String fieldName = sortField .getField ();
364
+ if (fieldName == null ) return null ; // happens when _score or _doc is the 1st sort field
365
+ if (searchContext .mapperService () == null ) return null ; // mapperService can be null in tests
366
+ final MappedFieldType fieldType = searchContext .mapperService ().fullName (fieldName );
367
+ if (fieldType == null ) return null ; // for unmapped fields, default behaviour depending on "unmapped_type" flag
368
+ if ((fieldType .typeName ().equals ("long" ) == false ) && (fieldType instanceof DateFieldType == false )) return null ;
369
+ if (fieldType .indexOptions () == IndexOptions .NONE ) return null ; //TODO: change to pointDataDimensionCount() when implemented
370
+ if (fieldType .hasDocValues () == false ) return null ;
371
+
372
+ // check that all sorts are actual document fields or _doc
373
+ for (int i = 1 ; i < sort .getSort ().length ; i ++) {
374
+ SortField sField = sort .getSort ()[i ];
375
+ String sFieldName = sField .getField ();
376
+ if (sFieldName == null ) {
377
+ if (SortField .FIELD_DOC .equals (sField ) == false ) return null ;
378
+ } else {
379
+ if (searchContext .mapperService ().fullName (sFieldName ) == null ) return null ; // could be _script field that uses _score
380
+ }
381
+ }
382
+
383
+ // check that setting of missing values allows optimization
384
+ if (sortField .getMissingValue () == null ) return null ;
385
+ Long missingValue = (Long ) sortField .getMissingValue ();
386
+ boolean missingValuesAccordingToSort = (sortField .getReverse () && (missingValue == Long .MIN_VALUE )) ||
387
+ ((sortField .getReverse () == false ) && (missingValue == Long .MAX_VALUE ));
388
+ if (missingValuesAccordingToSort == false ) return null ;
389
+
390
+ // check for multiple values
391
+ if (PointValues .size (reader , fieldName ) != PointValues .getDocCount (reader , fieldName )) return null ; //TODO: handle multiple values
392
+
393
+ // check if the optimization makes sense with the track_total_hits setting
394
+ if (searchContext .trackTotalHitsUpTo () == Integer .MAX_VALUE ) {
395
+ // with filter, we can't pre-calculate hitsCount, we need to explicitly calculate them => optimization does't make sense
396
+ if (hasFilterCollector ) return null ;
397
+ // if we can't pre-calculate hitsCount based on the query type, optimization does't make sense
398
+ if (shortcutTotalHitCount (reader , query ) == -1 ) return null ;
399
+ }
400
+
401
+ byte [] minValueBytes = PointValues .getMinPackedValue (reader , fieldName );
402
+ byte [] maxValueBytes = PointValues .getMaxPackedValue (reader , fieldName );
403
+ if ((maxValueBytes == null ) || (minValueBytes == null )) return null ;
404
+ long minValue = LongPoint .decodeDimension (minValueBytes , 0 );
405
+ long maxValue = LongPoint .decodeDimension (maxValueBytes , 0 );
406
+
407
+ Query rewrittenQuery ;
408
+ if (minValue == maxValue ) {
409
+ rewrittenQuery = new DocValuesFieldExistsQuery (fieldName );
410
+ } else {
411
+ long origin = (sortField .getReverse ()) ? maxValue : minValue ;
412
+ long pivotDistance = (maxValue - minValue ) >>> 1 ; // division by 2 on the unsigned representation to avoid overflow
413
+ if (pivotDistance == 0 ) { // 0 if maxValue = (minValue + 1)
414
+ pivotDistance = 1 ;
415
+ }
416
+ rewrittenQuery = LongPoint .newDistanceFeatureQuery (sortField .getField (), 1 , origin , pivotDistance );
417
+ }
418
+ rewrittenQuery = new BooleanQuery .Builder ()
419
+ .add (query , BooleanClause .Occur .FILTER ) // filter for original query
420
+ .add (rewrittenQuery , BooleanClause .Occur .SHOULD ) //should for rewrittenQuery
421
+ .build ();
422
+ return rewrittenQuery ;
423
+ }
424
+
425
+ // Restore fieldsDocs to remove the first _score sort
426
+ // updating in place without creating new FieldDoc objects
427
+ static void restoreTopFieldDocs (QuerySearchResult result , SortAndFormats originalSortAndFormats ) {
428
+ TopDocs topDocs = result .topDocs ().topDocs ;
429
+ for (ScoreDoc scoreDoc : topDocs .scoreDocs ) {
430
+ FieldDoc fieldDoc = (FieldDoc ) scoreDoc ;
431
+ fieldDoc .fields = Arrays .copyOfRange (fieldDoc .fields , 1 , fieldDoc .fields .length );
432
+ }
433
+ TopFieldDocs newTopDocs = new TopFieldDocs (topDocs .totalHits , topDocs .scoreDocs , originalSortAndFormats .sort .getSort ());
434
+ result .topDocs (new TopDocsAndMaxScore (newTopDocs , Float .NaN ), originalSortAndFormats .formats );
435
+ }
436
+
309
437
/**
310
438
* Returns true if the provided <code>query</code> returns docs in index order (internal doc ids).
311
439
* @param query The query to execute
0 commit comments