Skip to content

Commit 204f402

Browse files
committed
percolator: Do not take duplicate query extractions into account for minimum_should_match attribute
If a percolator query contains duplicate query clauses somewhere in the query tree then when these clauses are extracted then they should not affect the msm. This can lead a percolator query that should be a valid match not become a candidate match, because at query time, the msm that is being used by the CoveringQuery would never match with the msm used at index time. Closes #28315
1 parent 1d01bcf commit 204f402

File tree

4 files changed

+207
-2
lines changed

4 files changed

+207
-2
lines changed

modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,21 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
380380
msm += 1;
381381
}
382382
} else {
383-
msm += result.minimumShouldMatch;
383+
// In case that there are duplicate query extractions we need to be careful with incrementing msm,
384+
// because that could lead to valid matches not becoming candidate matches:
385+
// query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
386+
// doc: field: val1 val2 val3
387+
// So lets be protective and decrease the msm:
388+
int resultMsm = result.minimumShouldMatch;
389+
for (QueryExtraction queryExtraction : result.extractions) {
390+
if (extractions.contains(queryExtraction)) {
391+
// To protect against negative msm:
392+
// (sub results could consist out of disjunction and conjunction and
393+
// then we do not know which extraction contributed to msm)
394+
resultMsm = Math.max(0, resultMsm - 1);
395+
}
396+
}
397+
msm += resultMsm;
384398
}
385399
verified &= result.verified;
386400
matchAllDocs &= result.matchAllDocs;
@@ -519,10 +533,16 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
519533
if (subResult.matchAllDocs) {
520534
numMatchAllClauses++;
521535
}
536+
int resultMsm = subResult.minimumShouldMatch;
537+
for (QueryExtraction extraction : subResult.extractions) {
538+
if (terms.contains(extraction)) {
539+
resultMsm = Math.max(1, resultMsm - 1);
540+
}
541+
}
542+
msmPerClause[i] = resultMsm;
522543
terms.addAll(subResult.extractions);
523544

524545
QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]);
525-
msmPerClause[i] = subResult.minimumShouldMatch;
526546
if (subResult.extractions.size() == 1 && t[0].range != null) {
527547
rangeFieldNames[i] = t[0].range.fieldName;
528548
}

modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
import org.elasticsearch.Version;
7373
import org.elasticsearch.common.CheckedFunction;
7474
import org.elasticsearch.common.bytes.BytesArray;
75+
import org.elasticsearch.common.bytes.BytesReference;
7576
import org.elasticsearch.common.compress.CompressedXContent;
7677
import org.elasticsearch.common.settings.Settings;
7778
import org.elasticsearch.common.xcontent.XContentFactory;
@@ -622,6 +623,55 @@ public void testPercolateSmallAndLargeDocument() throws Exception {
622623
}
623624
}
624625

626+
public void testDuplicatedClauses() throws Exception {
627+
List<ParseContext.Document> docs = new ArrayList<>();
628+
629+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
630+
BooleanQuery.Builder builder1 = new BooleanQuery.Builder();
631+
builder1.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
632+
builder1.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
633+
builder.add(builder1.build(), BooleanClause.Occur.MUST);
634+
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
635+
builder2.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
636+
builder2.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST);
637+
builder.add(builder2.build(), BooleanClause.Occur.MUST);
638+
addQuery(builder.build(), docs);
639+
640+
builder = new BooleanQuery.Builder()
641+
.setMinimumNumberShouldMatch(2);
642+
builder1 = new BooleanQuery.Builder();
643+
builder1.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
644+
builder1.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
645+
builder.add(builder1.build(), BooleanClause.Occur.SHOULD);
646+
builder2 = new BooleanQuery.Builder();
647+
builder2.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
648+
builder2.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST);
649+
builder.add(builder2.build(), BooleanClause.Occur.SHOULD);
650+
BooleanQuery.Builder builder3 = new BooleanQuery.Builder();
651+
builder3.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST);
652+
builder3.add(new TermQuery(new Term("field", "value4")), BooleanClause.Occur.MUST);
653+
builder.add(builder3.build(), BooleanClause.Occur.SHOULD);
654+
addQuery(builder.build(), docs);
655+
656+
indexWriter.addDocuments(docs);
657+
indexWriter.close();
658+
directoryReader = DirectoryReader.open(directory);
659+
IndexSearcher shardSearcher = newSearcher(directoryReader);
660+
shardSearcher.setQueryCache(null);
661+
662+
Version v = Version.CURRENT;
663+
List<BytesReference> sources = Collections.singletonList(new BytesArray("{}"));
664+
665+
MemoryIndex memoryIndex = new MemoryIndex();
666+
memoryIndex.addField("field", "value1 value2 value3", new WhitespaceAnalyzer());
667+
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
668+
PercolateQuery query = (PercolateQuery) fieldType.percolateQuery("_name", queryStore, sources, percolateSearcher, v);
669+
TopDocs topDocs = shardSearcher.search(query, 10, new Sort(SortField.FIELD_DOC), true, true);
670+
assertEquals(2L, topDocs.totalHits);
671+
assertEquals(0, topDocs.scoreDocs[0].doc);
672+
assertEquals(1, topDocs.scoreDocs[1].doc);
673+
}
674+
625675
private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
626676
boolean requireScore = randomBoolean();
627677
IndexSearcher percolateSearcher = memoryIndex.createSearcher();

modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,10 @@
100100
import java.util.List;
101101
import java.util.Map;
102102
import java.util.function.Function;
103+
import java.util.stream.Collectors;
103104

104105
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
106+
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
105107
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
106108
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
107109
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
@@ -850,6 +852,79 @@ public void testEncodeRange() {
850852
}
851853
}
852854

855+
public void testDuplicatedClauses() throws Exception {
856+
addQueryFieldMappings();
857+
858+
QueryBuilder qb = boolQuery()
859+
.must(boolQuery().must(termQuery("field", "value1")).must(termQuery("field", "value2")))
860+
.must(boolQuery().must(termQuery("field", "value2")).must(termQuery("field", "value3")));
861+
ParsedDocument doc = mapperService.documentMapper("doc").parse(SourceToParse.source("test", "doc", "1",
862+
XContentFactory.jsonBuilder().startObject()
863+
.field(fieldName, qb)
864+
.endObject().bytes(),
865+
XContentType.JSON));
866+
867+
List<String> values = Arrays.stream(doc.rootDoc().getFields(fieldType.queryTermsField.name()))
868+
.map(f -> f.binaryValue().utf8ToString())
869+
.sorted()
870+
.collect(Collectors.toList());
871+
assertThat(values.size(), equalTo(3));
872+
assertThat(values.get(0), equalTo("field\0value1"));
873+
assertThat(values.get(1), equalTo("field\0value2"));
874+
assertThat(values.get(2), equalTo("field\0value3"));
875+
int msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
876+
assertThat(msm, equalTo(3));
877+
878+
qb = boolQuery()
879+
.must(boolQuery().must(termQuery("field", "value1")).must(termQuery("field", "value2")))
880+
.must(boolQuery().must(termQuery("field", "value2")).must(termQuery("field", "value3")))
881+
.must(boolQuery().must(termQuery("field", "value3")).must(termQuery("field", "value4")))
882+
.must(boolQuery().should(termQuery("field", "value4")).should(termQuery("field", "value5")));
883+
doc = mapperService.documentMapper("doc").parse(SourceToParse.source("test", "doc", "1",
884+
XContentFactory.jsonBuilder().startObject()
885+
.field(fieldName, qb)
886+
.endObject().bytes(),
887+
XContentType.JSON));
888+
889+
values = Arrays.stream(doc.rootDoc().getFields(fieldType.queryTermsField.name()))
890+
.map(f -> f.binaryValue().utf8ToString())
891+
.sorted()
892+
.collect(Collectors.toList());
893+
assertThat(values.size(), equalTo(5));
894+
assertThat(values.get(0), equalTo("field\0value1"));
895+
assertThat(values.get(1), equalTo("field\0value2"));
896+
assertThat(values.get(2), equalTo("field\0value3"));
897+
assertThat(values.get(3), equalTo("field\0value4"));
898+
assertThat(values.get(4), equalTo("field\0value5"));
899+
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
900+
assertThat(msm, equalTo(4));
901+
902+
qb = boolQuery()
903+
.minimumShouldMatch(3)
904+
.should(boolQuery().should(termQuery("field", "value1")).should(termQuery("field", "value2")))
905+
.should(boolQuery().should(termQuery("field", "value2")).should(termQuery("field", "value3")))
906+
.should(boolQuery().should(termQuery("field", "value3")).should(termQuery("field", "value4")))
907+
.should(boolQuery().should(termQuery("field", "value4")).should(termQuery("field", "value5")));
908+
doc = mapperService.documentMapper("doc").parse(SourceToParse.source("test", "doc", "1",
909+
XContentFactory.jsonBuilder().startObject()
910+
.field(fieldName, qb)
911+
.endObject().bytes(),
912+
XContentType.JSON));
913+
914+
values = Arrays.stream(doc.rootDoc().getFields(fieldType.queryTermsField.name()))
915+
.map(f -> f.binaryValue().utf8ToString())
916+
.sorted()
917+
.collect(Collectors.toList());
918+
assertThat(values.size(), equalTo(5));
919+
assertThat(values.get(0), equalTo("field\0value1"));
920+
assertThat(values.get(1), equalTo("field\0value2"));
921+
assertThat(values.get(2), equalTo("field\0value3"));
922+
assertThat(values.get(3), equalTo("field\0value4"));
923+
assertThat(values.get(4), equalTo("field\0value5"));
924+
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
925+
assertThat(msm, equalTo(3));
926+
}
927+
853928
private static byte[] subByteArray(byte[] source, int offset, int length) {
854929
return Arrays.copyOfRange(source, offset, offset + length);
855930
}

modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,66 @@ public void testPointRangeQuerySelectRanges() {
11081108
assertEquals("_field1", new ArrayList<>(result.extractions).get(1).range.fieldName);
11091109
}
11101110

1111+
public void testExtractQueryMetadata_duplicatedClauses() {
1112+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
1113+
builder.add(
1114+
new BooleanQuery.Builder()
1115+
.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST)
1116+
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
1117+
.build(),
1118+
BooleanClause.Occur.MUST
1119+
);
1120+
builder.add(
1121+
new BooleanQuery.Builder()
1122+
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
1123+
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
1124+
.build(),
1125+
BooleanClause.Occur.MUST
1126+
);
1127+
builder.add(
1128+
new BooleanQuery.Builder()
1129+
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
1130+
.add(new TermQuery(new Term("field", "value4")), BooleanClause.Occur.MUST)
1131+
.build(),
1132+
BooleanClause.Occur.MUST
1133+
);
1134+
Result result = analyze(builder.build(), Version.CURRENT);
1135+
assertThat(result.verified, is(true));
1136+
assertThat(result.matchAllDocs, is(false));
1137+
assertThat(result.minimumShouldMatch, equalTo(4));
1138+
assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),
1139+
new Term("field", "value3"), new Term("field", "value4"));
1140+
1141+
builder = new BooleanQuery.Builder().setMinimumNumberShouldMatch(2);
1142+
builder.add(
1143+
new BooleanQuery.Builder()
1144+
.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST)
1145+
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
1146+
.build(),
1147+
BooleanClause.Occur.SHOULD
1148+
);
1149+
builder.add(
1150+
new BooleanQuery.Builder()
1151+
.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST)
1152+
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
1153+
.build(),
1154+
BooleanClause.Occur.SHOULD
1155+
);
1156+
builder.add(
1157+
new BooleanQuery.Builder()
1158+
.add(new TermQuery(new Term("field", "value3")), BooleanClause.Occur.MUST)
1159+
.add(new TermQuery(new Term("field", "value4")), BooleanClause.Occur.MUST)
1160+
.build(),
1161+
BooleanClause.Occur.SHOULD
1162+
);
1163+
result = analyze(builder.build(), Version.CURRENT);
1164+
assertThat(result.verified, is(true));
1165+
assertThat(result.matchAllDocs, is(false));
1166+
assertThat(result.minimumShouldMatch, equalTo(2));
1167+
assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),
1168+
new Term("field", "value3"), new Term("field", "value4"));
1169+
}
1170+
11111171
private static void assertDimension(byte[] expected, Consumer<byte[]> consumer) {
11121172
byte[] dest = new byte[expected.length];
11131173
consumer.accept(dest);

0 commit comments

Comments
 (0)