-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Fix some query extraction bugs. #29283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -143,7 +143,7 @@ static Result analyze(Query query, Version indexVersion) { | |
} | ||
|
||
private static BiFunction<Query, Version, Result> matchNoDocsQuery() { | ||
return (query, version) -> new Result(true, Collections.emptySet(), 1); | ||
return (query, version) -> new Result(true, Collections.emptySet(), 0); | ||
} | ||
|
||
private static BiFunction<Query, Version, Result> matchAllDocsQuery() { | ||
|
@@ -179,36 +179,36 @@ private static BiFunction<Query, Version, Result> termInSetQuery() { | |
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { | ||
terms.add(new QueryExtraction(new Term(iterator.field(), term))); | ||
} | ||
return new Result(true, terms, 1); | ||
return new Result(true, terms, Math.min(1, terms.size())); | ||
}; | ||
} | ||
|
||
private static BiFunction<Query, Version, Result> synonymQuery() { | ||
return (query, version) -> { | ||
Set<QueryExtraction> terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); | ||
return new Result(true, terms, 1); | ||
return new Result(true, terms, Math.min(1, terms.size())); | ||
}; | ||
} | ||
|
||
private static BiFunction<Query, Version, Result> commonTermsQuery() { | ||
return (query, version) -> { | ||
Set<QueryExtraction> terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); | ||
return new Result(false, terms, 1); | ||
return new Result(false, terms, Math.min(1, terms.size())); | ||
}; | ||
} | ||
|
||
private static BiFunction<Query, Version, Result> blendedTermQuery() { | ||
return (query, version) -> { | ||
Set<QueryExtraction> terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); | ||
return new Result(true, terms, 1); | ||
return new Result(true, terms, Math.min(1, terms.size())); | ||
}; | ||
} | ||
|
||
private static BiFunction<Query, Version, Result> phraseQuery() { | ||
return (query, version) -> { | ||
Term[] terms = ((PhraseQuery) query).getTerms(); | ||
if (terms.length == 0) { | ||
return new Result(true, Collections.emptySet(), 1); | ||
return new Result(true, Collections.emptySet(), 0); | ||
} | ||
|
||
if (version.onOrAfter(Version.V_6_1_0)) { | ||
|
@@ -232,7 +232,7 @@ private static BiFunction<Query, Version, Result> multiPhraseQuery() { | |
return (query, version) -> { | ||
Term[][] terms = ((MultiPhraseQuery) query).getTermArrays(); | ||
if (terms.length == 0) { | ||
return new Result(true, Collections.emptySet(), 1); | ||
return new Result(true, Collections.emptySet(), 0); | ||
} | ||
|
||
if (version.onOrAfter(Version.V_6_1_0)) { | ||
|
@@ -297,7 +297,7 @@ private static BiFunction<Query, Version, Result> spanOrQuery() { | |
for (SpanQuery clause : spanOrQuery.getClauses()) { | ||
terms.addAll(analyze(clause, version).extractions); | ||
} | ||
return new Result(false, terms, 1); | ||
return new Result(false, terms, Math.min(1, terms.size())); | ||
}; | ||
} | ||
|
||
|
@@ -334,6 +334,9 @@ private static BiFunction<Query, Version, Result> booleanQuery() { | |
numOptionalClauses++; | ||
} | ||
} | ||
if (minimumShouldMatch > numOptionalClauses) { | ||
return new Result(false, Collections.emptySet(), 0); | ||
} | ||
if (numRequiredClauses > 0) { | ||
if (version.onOrAfter(Version.V_6_1_0)) { | ||
UnsupportedQueryException uqe = null; | ||
|
@@ -345,7 +348,12 @@ private static BiFunction<Query, Version, Result> booleanQuery() { | |
// since they are completely optional. | ||
|
||
try { | ||
results.add(analyze(clause.getQuery(), version)); | ||
Result subResult = analyze(clause.getQuery(), version); | ||
if (subResult.matchAllDocs == false && subResult.extractions.isEmpty()) { | ||
// doesn't match anything | ||
return subResult; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
} | ||
results.add(subResult); | ||
} catch (UnsupportedQueryException e) { | ||
uqe = e; | ||
} | ||
|
@@ -400,7 +408,11 @@ private static BiFunction<Query, Version, Result> booleanQuery() { | |
} | ||
msm += resultMsm; | ||
|
||
verified &= result.verified; | ||
if (result.verified == false | ||
// If some inner extractions are optional, the result can't be verified | ||
|| result.minimumShouldMatch < result.extractions.size()) { | ||
verified = false; | ||
} | ||
matchAllDocs &= result.matchAllDocs; | ||
extractions.addAll(result.extractions); | ||
} | ||
|
@@ -492,7 +504,7 @@ private static BiFunction<Query, Version, Result> pointRangeQuery() { | |
// Need to check whether upper is not smaller than lower, otherwise NumericUtils.subtract(...) fails IAE | ||
// If upper is really smaller than lower then we deal with like MatchNoDocsQuery. (verified and no extractions) | ||
if (new BytesRef(lowerPoint).compareTo(new BytesRef(upperPoint)) > 0) { | ||
return new Result(true, Collections.emptySet(), 1); | ||
return new Result(true, Collections.emptySet(), 0); | ||
} | ||
|
||
byte[] interval = new byte[16]; | ||
|
@@ -537,7 +549,15 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh | |
for (int i = 0; i < disjunctions.size(); i++) { | ||
Query disjunct = disjunctions.get(i); | ||
Result subResult = analyze(disjunct, version); | ||
verified &= subResult.verified; | ||
if (subResult.verified == false | ||
// one of the sub queries requires more than one term to match, we can't | ||
// verify it with a single top-level min_should_match | ||
|| subResult.minimumShouldMatch > 1 | ||
// One of the inner clauses has multiple extractions, we won't be able to | ||
// verify it with a single top-level min_should_match | ||
|| (subResult.extractions.size() > 1 && requiredShouldClauses > 1)) { | ||
verified = false; | ||
} | ||
if (subResult.matchAllDocs) { | ||
numMatchAllClauses++; | ||
} | ||
|
@@ -683,6 +703,10 @@ static class Result { | |
final boolean matchAllDocs; | ||
|
||
Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) { | ||
if (minimumShouldMatch > extractions.size()) { | ||
throw new IllegalArgumentException("minimumShouldMatch can't be greater than the number of extractions: " | ||
+ minimumShouldMatch + " > " + extractions.size()); | ||
} | ||
this.extractions = extractions; | ||
this.verified = verified; | ||
this.minimumShouldMatch = minimumShouldMatch; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -210,12 +210,13 @@ public void testDuel() throws Exception { | |
new BytesRef(randomFrom(stringContent.get(field1))))); | ||
queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(stringContent.get(field1))), | ||
new BytesRef(randomFrom(stringContent.get(field1))))); | ||
int numRandomBoolQueries = randomIntBetween(16, 32); | ||
// many iterations with boolean queries, which are the most complex queries to deal with when nested | ||
int numRandomBoolQueries = 1000; | ||
for (int i = 0; i < numRandomBoolQueries; i++) { | ||
queryFunctions.add(() -> createRandomBooleanQuery(1, stringFields, stringContent, intFieldType, intValues)); | ||
} | ||
queryFunctions.add(() -> { | ||
int numClauses = randomIntBetween(1, 16); | ||
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); | ||
List<Query> clauses = new ArrayList<>(); | ||
for (int i = 0; i < numClauses; i++) { | ||
String field = randomFrom(stringFields); | ||
|
@@ -266,7 +267,7 @@ public void testDuel() throws Exception { | |
private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Map<String, List<String>> content, | ||
MappedFieldType intFieldType, List<Integer> intValues) { | ||
BooleanQuery.Builder builder = new BooleanQuery.Builder(); | ||
int numClauses = randomIntBetween(1, 16); | ||
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); // use low numbers of clauses more often | ||
int numShouldClauses = 0; | ||
boolean onlyShouldClauses = rarely(); | ||
for (int i = 0; i < numClauses; i++) { | ||
|
@@ -313,7 +314,7 @@ private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Ma | |
numShouldClauses++; | ||
} | ||
} | ||
builder.setMinimumNumberShouldMatch(numShouldClauses); | ||
builder.setMinimumNumberShouldMatch(randomIntBetween(0, numShouldClauses)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did these changes to the randomized test catch the two bugs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried to make them more likely to catch bugs but unfortunately they did not. |
||
return builder.build(); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍 because we know query will never match