54
54
import java .util .ArrayList ;
55
55
import java .util .Arrays ;
56
56
import java .util .Collections ;
57
+ import java .util .Comparator ;
57
58
import java .util .HashMap ;
58
59
import java .util .HashSet ;
59
60
import java .util .List ;
60
61
import java .util .Map ;
61
62
import java .util .Objects ;
62
63
import java .util .Set ;
63
64
import java .util .function .BiFunction ;
65
+ import java .util .stream .Collectors ;
64
66
65
67
import static java .util .stream .Collectors .toSet ;
66
68
@@ -366,36 +368,37 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
366
368
Set <QueryExtraction > extractions = new HashSet <>();
367
369
Set <String > seenRangeFields = new HashSet <>();
368
370
for (Result result : results ) {
369
- QueryExtraction [] t = result .extractions .toArray (new QueryExtraction [1 ]);
370
- if (result .extractions .size () == 1 && t [0 ].range != null ) {
371
- // In case of range queries each extraction does not simply increment the minimum_should_match
372
- // for that percolator query like for a term based extraction, so that can lead to more false
373
- // positives for percolator queries with range queries than term based queries.
374
- // The is because the way number fields are extracted from the document to be percolated.
375
- // Per field a single range is extracted and if a percolator query has two or more range queries
376
- // on the same field than the the minimum should match can be higher than clauses in the CoveringQuery.
377
- // Therefore right now the minimum should match is incremented once per number field when processing
378
- // the percolator query at index time.
379
- if (seenRangeFields .add (t [0 ].range .fieldName )) {
380
- msm += 1 ;
381
- }
382
- } else {
383
- // In case that there are duplicate query extractions we need to be careful with incrementing msm,
384
- // because that could lead to valid matches not becoming candidate matches:
385
- // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
386
- // doc: field: val1 val2 val3
387
- // So lets be protective and decrease the msm:
388
- int resultMsm = result .minimumShouldMatch ;
389
- for (QueryExtraction queryExtraction : result .extractions ) {
390
- if (extractions .contains (queryExtraction )) {
391
- // To protect against negative msm:
392
- // (sub results could consist out of disjunction and conjunction and
393
- // then we do not know which extraction contributed to msm)
394
- resultMsm = Math .max (0 , resultMsm - 1 );
371
+ // In case that there are duplicate query extractions we need to be careful with incrementing msm,
372
+ // because that could lead to valid matches not becoming candidate matches:
373
+ // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
374
+ // doc: field: val1 val2 val3
375
+ // So lets be protective and decrease the msm:
376
+ int resultMsm = result .minimumShouldMatch ;
377
+ for (QueryExtraction queryExtraction : result .extractions ) {
378
+ if (queryExtraction .range != null ) {
379
+ // In case of range queries each extraction does not simply increment the minimum_should_match
380
+ // for that percolator query like for a term based extraction, so that can lead to more false
381
+ // positives for percolator queries with range queries than term based queries.
382
+ // The is because the way number fields are extracted from the document to be percolated.
383
+ // Per field a single range is extracted and if a percolator query has two or more range queries
384
+ // on the same field than the the minimum should match can be higher than clauses in the CoveringQuery.
385
+ // Therefore right now the minimum should match is incremented once per number field when processing
386
+ // the percolator query at index time.
387
+ if (seenRangeFields .add (queryExtraction .range .fieldName )) {
388
+ resultMsm = 1 ;
389
+ } else {
390
+ resultMsm = 0 ;
395
391
}
396
392
}
397
- msm += resultMsm ;
393
+
394
+ if (extractions .contains (queryExtraction )) {
395
+ // To protect against negative msm:
396
+ // (sub results could consist out of disjunction and conjunction and
397
+ // then we do not know which extraction contributed to msm)
398
+ resultMsm = Math .max (0 , resultMsm - 1 );
399
+ }
398
400
}
401
+ msm += resultMsm ;
399
402
verified &= result .verified ;
400
403
matchAllDocs &= result .matchAllDocs ;
401
404
extractions .addAll (result .extractions );
@@ -518,8 +521,7 @@ private static BiFunction<Query, Version, Result> toParentBlockJoinQuery() {
518
521
private static Result handleDisjunction (List <Query > disjunctions , int requiredShouldClauses , boolean otherClauses ,
519
522
Version version ) {
520
523
// Keep track of the msm for each clause:
521
- int [] msmPerClause = new int [disjunctions .size ()];
522
- String [] rangeFieldNames = new String [disjunctions .size ()];
524
+ List <DisjunctionClause > clauses = new ArrayList <>(disjunctions .size ());
523
525
boolean verified = otherClauses == false ;
524
526
if (version .before (Version .V_6_1_0 )) {
525
527
verified &= requiredShouldClauses <= 1 ;
@@ -535,17 +537,14 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
535
537
}
536
538
int resultMsm = subResult .minimumShouldMatch ;
537
539
for (QueryExtraction extraction : subResult .extractions ) {
538
- if (terms .contains (extraction )) {
539
- resultMsm = Math .max (1 , resultMsm - 1 );
540
+ if (terms .add (extraction ) == false ) {
541
+ resultMsm = Math .max (0 , resultMsm - 1 );
540
542
}
541
543
}
542
- msmPerClause [i ] = resultMsm ;
543
- terms .addAll (subResult .extractions );
544
-
545
- QueryExtraction [] t = subResult .extractions .toArray (new QueryExtraction [1 ]);
546
- if (subResult .extractions .size () == 1 && t [0 ].range != null ) {
547
- rangeFieldNames [i ] = t [0 ].range .fieldName ;
548
- }
544
+ clauses .add (new DisjunctionClause (resultMsm , subResult .extractions .stream ()
545
+ .filter (extraction -> extraction .range != null )
546
+ .map (extraction -> extraction .range .fieldName )
547
+ .collect (toSet ())));
549
548
}
550
549
boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses ;
551
550
@@ -554,15 +553,20 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
554
553
Set <String > seenRangeFields = new HashSet <>();
555
554
// Figure out what the combined msm is for this disjunction:
556
555
// (sum the lowest required clauses, otherwise we're too strict and queries may not match)
557
- Arrays .sort (msmPerClause );
558
- int limit = Math .min (msmPerClause .length , Math .max (1 , requiredShouldClauses ));
556
+ clauses = clauses .stream ()
557
+ .filter (o -> o .msm > 0 )
558
+ .sorted (Comparator .comparingInt (o -> o .msm ))
559
+ .collect (Collectors .toList ());
560
+ int limit = Math .min (clauses .size (), Math .max (1 , requiredShouldClauses ));
559
561
for (int i = 0 ; i < limit ; i ++) {
560
- if (rangeFieldNames [i ] != null ) {
561
- if (seenRangeFields .add (rangeFieldNames [i ])) {
562
- msm += 1 ;
562
+ if (clauses .get (i ).rangeFieldNames .isEmpty () == false ) {
563
+ for (String rangeField : clauses .get (i ).rangeFieldNames ) {
564
+ if (seenRangeFields .add (rangeField )) {
565
+ msm += 1 ;
566
+ }
563
567
}
564
568
} else {
565
- msm += msmPerClause [ i ] ;
569
+ msm += clauses . get ( i ). msm ;
566
570
}
567
571
}
568
572
} else {
@@ -575,6 +579,17 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
575
579
}
576
580
}
577
581
582
+ static class DisjunctionClause {
583
+
584
+ final int msm ;
585
+ final Set <String > rangeFieldNames ;
586
+
587
+ DisjunctionClause (int msm , Set <String > rangeFieldNames ) {
588
+ this .msm = msm ;
589
+ this .rangeFieldNames = rangeFieldNames ;
590
+ }
591
+ }
592
+
578
593
static Set <QueryExtraction > selectBestExtraction (Set <QueryExtraction > extractions1 , Set <QueryExtraction > extractions2 ) {
579
594
assert extractions1 != null || extractions2 != null ;
580
595
if (extractions1 == null ) {
0 commit comments