Skip to content

Commit 1fd448b

Browse files
committed
Fix some query extraction bugs. (#29283)
While playing with the percolator I found two bugs: - Sometimes we set a min_should_match that is greater than the number of extractions. While this doesn't cause direct trouble, it does when the query is nested into a boolean query and the boolean query tries to compute the min_should_match for the entire query based on its own min_should_match and those of the sub queries. So I changed the code to throw an exception when min_should_match is greater than the number of extractions. - Boolean queries claim matches are verified when in fact they shouldn't. This is due to the fact that boolean queries assume that they are verified if all sub clauses are verified but things are more complex than that, eg. conjunctions that are nested in a disjunction or disjunctions that are nested in a conjunction can generally not be verified without running the query.
1 parent fff4e10 commit 1fd448b

File tree

3 files changed

+174
-34
lines changed

3 files changed

+174
-34
lines changed

modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java

+37-12
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ static Result analyze(Query query, Version indexVersion) {
143143
}
144144

145145
private static BiFunction<Query, Version, Result> matchNoDocsQuery() {
146-
return (query, version) -> new Result(true, Collections.emptySet(), 1);
146+
return (query, version) -> new Result(true, Collections.emptySet(), 0);
147147
}
148148

149149
private static BiFunction<Query, Version, Result> matchAllDocsQuery() {
@@ -179,36 +179,36 @@ private static BiFunction<Query, Version, Result> termInSetQuery() {
179179
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
180180
terms.add(new QueryExtraction(new Term(iterator.field(), term)));
181181
}
182-
return new Result(true, terms, 1);
182+
return new Result(true, terms, Math.min(1, terms.size()));
183183
};
184184
}
185185

186186
private static BiFunction<Query, Version, Result> synonymQuery() {
187187
return (query, version) -> {
188188
Set<QueryExtraction> terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
189-
return new Result(true, terms, 1);
189+
return new Result(true, terms, Math.min(1, terms.size()));
190190
};
191191
}
192192

193193
private static BiFunction<Query, Version, Result> commonTermsQuery() {
194194
return (query, version) -> {
195195
Set<QueryExtraction> terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
196-
return new Result(false, terms, 1);
196+
return new Result(false, terms, Math.min(1, terms.size()));
197197
};
198198
}
199199

200200
private static BiFunction<Query, Version, Result> blendedTermQuery() {
201201
return (query, version) -> {
202202
Set<QueryExtraction> terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
203-
return new Result(true, terms, 1);
203+
return new Result(true, terms, Math.min(1, terms.size()));
204204
};
205205
}
206206

207207
private static BiFunction<Query, Version, Result> phraseQuery() {
208208
return (query, version) -> {
209209
Term[] terms = ((PhraseQuery) query).getTerms();
210210
if (terms.length == 0) {
211-
return new Result(true, Collections.emptySet(), 1);
211+
return new Result(true, Collections.emptySet(), 0);
212212
}
213213

214214
if (version.onOrAfter(Version.V_6_1_0)) {
@@ -232,7 +232,7 @@ private static BiFunction<Query, Version, Result> multiPhraseQuery() {
232232
return (query, version) -> {
233233
Term[][] terms = ((MultiPhraseQuery) query).getTermArrays();
234234
if (terms.length == 0) {
235-
return new Result(true, Collections.emptySet(), 1);
235+
return new Result(true, Collections.emptySet(), 0);
236236
}
237237

238238
if (version.onOrAfter(Version.V_6_1_0)) {
@@ -297,7 +297,7 @@ private static BiFunction<Query, Version, Result> spanOrQuery() {
297297
for (SpanQuery clause : spanOrQuery.getClauses()) {
298298
terms.addAll(analyze(clause, version).extractions);
299299
}
300-
return new Result(false, terms, 1);
300+
return new Result(false, terms, Math.min(1, terms.size()));
301301
};
302302
}
303303

@@ -334,6 +334,9 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
334334
numOptionalClauses++;
335335
}
336336
}
337+
if (minimumShouldMatch > numOptionalClauses) {
338+
return new Result(false, Collections.emptySet(), 0);
339+
}
337340
if (numRequiredClauses > 0) {
338341
if (version.onOrAfter(Version.V_6_1_0)) {
339342
UnsupportedQueryException uqe = null;
@@ -345,7 +348,12 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
345348
// since they are completely optional.
346349

347350
try {
348-
results.add(analyze(clause.getQuery(), version));
351+
Result subResult = analyze(clause.getQuery(), version);
352+
if (subResult.matchAllDocs == false && subResult.extractions.isEmpty()) {
353+
// doesn't match anything
354+
return subResult;
355+
}
356+
results.add(subResult);
349357
} catch (UnsupportedQueryException e) {
350358
uqe = e;
351359
}
@@ -399,7 +407,12 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
399407
}
400408
}
401409
msm += resultMsm;
402-
verified &= result.verified;
410+
411+
if (result.verified == false
412+
// If some inner extractions are optional, the result can't be verified
413+
|| result.minimumShouldMatch < result.extractions.size()) {
414+
verified = false;
415+
}
403416
matchAllDocs &= result.matchAllDocs;
404417
extractions.addAll(result.extractions);
405418
}
@@ -491,7 +504,7 @@ private static BiFunction<Query, Version, Result> pointRangeQuery() {
491504
// Need to check whether upper is not smaller than lower, otherwise NumericUtils.subtract(...) fails IAE
492505
// If upper is really smaller than lower then we deal with like MatchNoDocsQuery. (verified and no extractions)
493506
if (new BytesRef(lowerPoint).compareTo(new BytesRef(upperPoint)) > 0) {
494-
return new Result(true, Collections.emptySet(), 1);
507+
return new Result(true, Collections.emptySet(), 0);
495508
}
496509

497510
byte[] interval = new byte[16];
@@ -536,7 +549,15 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
536549
for (int i = 0; i < disjunctions.size(); i++) {
537550
Query disjunct = disjunctions.get(i);
538551
Result subResult = analyze(disjunct, version);
539-
verified &= subResult.verified;
552+
if (subResult.verified == false
553+
// one of the sub queries requires more than one term to match, we can't
554+
// verify it with a single top-level min_should_match
555+
|| subResult.minimumShouldMatch > 1
556+
// One of the inner clauses has multiple extractions, we won't be able to
557+
// verify it with a single top-level min_should_match
558+
|| (subResult.extractions.size() > 1 && requiredShouldClauses > 1)) {
559+
verified = false;
560+
}
540561
if (subResult.matchAllDocs) {
541562
numMatchAllClauses++;
542563
}
@@ -682,6 +703,10 @@ static class Result {
682703
final boolean matchAllDocs;
683704

684705
Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) {
706+
if (minimumShouldMatch > extractions.size()) {
707+
throw new IllegalArgumentException("minimumShouldMatch can't be greater than the number of extractions: "
708+
+ minimumShouldMatch + " > " + extractions.size());
709+
}
685710
this.extractions = extractions;
686711
this.verified = verified;
687712
this.minimumShouldMatch = minimumShouldMatch;

modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -210,12 +210,13 @@ public void testDuel() throws Exception {
210210
new BytesRef(randomFrom(stringContent.get(field1)))));
211211
queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(stringContent.get(field1))),
212212
new BytesRef(randomFrom(stringContent.get(field1)))));
213-
int numRandomBoolQueries = randomIntBetween(16, 32);
213+
// many iterations with boolean queries, which are the most complex queries to deal with when nested
214+
int numRandomBoolQueries = 1000;
214215
for (int i = 0; i < numRandomBoolQueries; i++) {
215216
queryFunctions.add(() -> createRandomBooleanQuery(1, stringFields, stringContent, intFieldType, intValues));
216217
}
217218
queryFunctions.add(() -> {
218-
int numClauses = randomIntBetween(1, 16);
219+
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4));
219220
List<Query> clauses = new ArrayList<>();
220221
for (int i = 0; i < numClauses; i++) {
221222
String field = randomFrom(stringFields);
@@ -266,7 +267,7 @@ public void testDuel() throws Exception {
266267
private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Map<String, List<String>> content,
267268
MappedFieldType intFieldType, List<Integer> intValues) {
268269
BooleanQuery.Builder builder = new BooleanQuery.Builder();
269-
int numClauses = randomIntBetween(1, 16);
270+
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); // use low numbers of clauses more often
270271
int numShouldClauses = 0;
271272
boolean onlyShouldClauses = rarely();
272273
for (int i = 0; i < numClauses; i++) {
@@ -313,7 +314,7 @@ private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Ma
313314
numShouldClauses++;
314315
}
315316
}
316-
builder.setMinimumNumberShouldMatch(numShouldClauses);
317+
builder.setMinimumNumberShouldMatch(randomIntBetween(0, numShouldClauses));
317318
return builder.build();
318319
}
319320

0 commit comments

Comments
 (0)