Skip to content

Commit 8249317

Browse files
committed
Merge branch '6.x' into silent-batch-mode-migration
* 6.x: Improve similarity integration. (elastic#29187) Fix some query extraction bugs. (elastic#29283) Fixed quote_field_suffix in query_string (elastic#29332) TEST: Update negative byte size setting error msg Fix bwc in GeoDistanceQuery serialization (elastic#29325)
2 parents cd3608a + 9e61f6d commit 8249317

34 files changed

+750
-855
lines changed

docs/reference/index-modules/similarity.asciidoc

+5-3
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ Type name: `BM25`
8686
[[classic-similarity]]
8787
==== Classic similarity
8888

89+
deprecated[6.3.0, The quality of the produced scores used to rely on coordination factors, which have been removed. It is advised to use BM25 instead.]
90+
8991
The classic similarity that is based on the TF/IDF model. This
9092
similarity has the following option:
9193

@@ -97,7 +99,7 @@ similarity has the following option:
9799
Type name: `classic`
98100

99101
[float]
100-
[[drf]]
102+
[[dfr]]
101103
==== DFR similarity
102104

103105
Similarity that implements the
@@ -510,7 +512,7 @@ PUT /index
510512
"index": {
511513
"similarity": {
512514
"default": {
513-
"type": "classic"
515+
"type": "boolean"
514516
}
515517
}
516518
}
@@ -532,7 +534,7 @@ PUT /index/_settings
532534
"index": {
533535
"similarity": {
534536
"default": {
535-
"type": "classic"
537+
"type": "boolean"
536538
}
537539
}
538540
}

docs/reference/mapping/params/similarity.asciidoc

+2-7
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,9 @@ PUT my_index
4444
"default_field": { <1>
4545
"type": "text"
4646
},
47-
"classic_field": {
48-
"type": "text",
49-
"similarity": "classic" <2>
50-
},
5147
"boolean_sim_field": {
5248
"type": "text",
53-
"similarity": "boolean" <3>
49+
"similarity": "boolean" <2>
5450
}
5551
}
5652
}
@@ -59,5 +55,4 @@ PUT my_index
5955
--------------------------------------------------
6056
// CONSOLE
6157
<1> The `default_field` uses the `BM25` similarity.
62-
<2> The `classic_field` uses the `classic` similarity (ie TF/IDF).
63-
<3> The `boolean_sim_field` uses the `boolean` similarity.
58+
<2> The `boolean_sim_field` uses the `boolean` similarity.

modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java

+2-4
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ protected Settings indexSettings() {
9797

9898
@Override
9999
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
100-
similarity = randomFrom("classic", "BM25");
100+
similarity = randomFrom("boolean", "BM25");
101101
XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
102102
.startObject("join_field")
103103
.field("type", "join")
@@ -336,9 +336,7 @@ public void testNonDefaultSimilarity() throws Exception {
336336
hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
337337
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
338338
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
339-
.create(similarity, Settings.EMPTY,
340-
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
341-
.get();
339+
.apply(Settings.EMPTY, Version.CURRENT, null);
342340
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
343341
}
344342

modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java

+2-4
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {
8787

8888
@Override
8989
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
90-
similarity = randomFrom("classic", "BM25");
90+
similarity = randomFrom("boolean", "BM25");
9191
// TODO: use a single type when inner hits have been changed to work with join field,
9292
// this test randomly generates queries with inner hits
9393
mapperService.merge(PARENT_TYPE, new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(PARENT_TYPE,
@@ -323,9 +323,7 @@ public void testNonDefaultSimilarity() throws Exception {
323323
hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
324324
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
325325
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
326-
.create(similarity, Settings.EMPTY,
327-
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
328-
.get();
326+
.apply(Settings.EMPTY, Version.CURRENT, null);
329327
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
330328
}
331329

modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java

+37-12
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ static Result analyze(Query query, Version indexVersion) {
143143
}
144144

145145
private static BiFunction<Query, Version, Result> matchNoDocsQuery() {
146-
return (query, version) -> new Result(true, Collections.emptySet(), 1);
146+
return (query, version) -> new Result(true, Collections.emptySet(), 0);
147147
}
148148

149149
private static BiFunction<Query, Version, Result> matchAllDocsQuery() {
@@ -179,36 +179,36 @@ private static BiFunction<Query, Version, Result> termInSetQuery() {
179179
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
180180
terms.add(new QueryExtraction(new Term(iterator.field(), term)));
181181
}
182-
return new Result(true, terms, 1);
182+
return new Result(true, terms, Math.min(1, terms.size()));
183183
};
184184
}
185185

186186
private static BiFunction<Query, Version, Result> synonymQuery() {
187187
return (query, version) -> {
188188
Set<QueryExtraction> terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
189-
return new Result(true, terms, 1);
189+
return new Result(true, terms, Math.min(1, terms.size()));
190190
};
191191
}
192192

193193
private static BiFunction<Query, Version, Result> commonTermsQuery() {
194194
return (query, version) -> {
195195
Set<QueryExtraction> terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
196-
return new Result(false, terms, 1);
196+
return new Result(false, terms, Math.min(1, terms.size()));
197197
};
198198
}
199199

200200
private static BiFunction<Query, Version, Result> blendedTermQuery() {
201201
return (query, version) -> {
202202
Set<QueryExtraction> terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
203-
return new Result(true, terms, 1);
203+
return new Result(true, terms, Math.min(1, terms.size()));
204204
};
205205
}
206206

207207
private static BiFunction<Query, Version, Result> phraseQuery() {
208208
return (query, version) -> {
209209
Term[] terms = ((PhraseQuery) query).getTerms();
210210
if (terms.length == 0) {
211-
return new Result(true, Collections.emptySet(), 1);
211+
return new Result(true, Collections.emptySet(), 0);
212212
}
213213

214214
if (version.onOrAfter(Version.V_6_1_0)) {
@@ -232,7 +232,7 @@ private static BiFunction<Query, Version, Result> multiPhraseQuery() {
232232
return (query, version) -> {
233233
Term[][] terms = ((MultiPhraseQuery) query).getTermArrays();
234234
if (terms.length == 0) {
235-
return new Result(true, Collections.emptySet(), 1);
235+
return new Result(true, Collections.emptySet(), 0);
236236
}
237237

238238
if (version.onOrAfter(Version.V_6_1_0)) {
@@ -297,7 +297,7 @@ private static BiFunction<Query, Version, Result> spanOrQuery() {
297297
for (SpanQuery clause : spanOrQuery.getClauses()) {
298298
terms.addAll(analyze(clause, version).extractions);
299299
}
300-
return new Result(false, terms, 1);
300+
return new Result(false, terms, Math.min(1, terms.size()));
301301
};
302302
}
303303

@@ -334,6 +334,9 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
334334
numOptionalClauses++;
335335
}
336336
}
337+
if (minimumShouldMatch > numOptionalClauses) {
338+
return new Result(false, Collections.emptySet(), 0);
339+
}
337340
if (numRequiredClauses > 0) {
338341
if (version.onOrAfter(Version.V_6_1_0)) {
339342
UnsupportedQueryException uqe = null;
@@ -345,7 +348,12 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
345348
// since they are completely optional.
346349

347350
try {
348-
results.add(analyze(clause.getQuery(), version));
351+
Result subResult = analyze(clause.getQuery(), version);
352+
if (subResult.matchAllDocs == false && subResult.extractions.isEmpty()) {
353+
// doesn't match anything
354+
return subResult;
355+
}
356+
results.add(subResult);
349357
} catch (UnsupportedQueryException e) {
350358
uqe = e;
351359
}
@@ -399,7 +407,12 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
399407
}
400408
}
401409
msm += resultMsm;
402-
verified &= result.verified;
410+
411+
if (result.verified == false
412+
// If some inner extractions are optional, the result can't be verified
413+
|| result.minimumShouldMatch < result.extractions.size()) {
414+
verified = false;
415+
}
403416
matchAllDocs &= result.matchAllDocs;
404417
extractions.addAll(result.extractions);
405418
}
@@ -491,7 +504,7 @@ private static BiFunction<Query, Version, Result> pointRangeQuery() {
491504
// Need to check whether upper is not smaller than lower, otherwise NumericUtils.subtract(...) fails IAE
492505
// If upper is really smaller than lower then we deal with like MatchNoDocsQuery. (verified and no extractions)
493506
if (new BytesRef(lowerPoint).compareTo(new BytesRef(upperPoint)) > 0) {
494-
return new Result(true, Collections.emptySet(), 1);
507+
return new Result(true, Collections.emptySet(), 0);
495508
}
496509

497510
byte[] interval = new byte[16];
@@ -536,7 +549,15 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
536549
for (int i = 0; i < disjunctions.size(); i++) {
537550
Query disjunct = disjunctions.get(i);
538551
Result subResult = analyze(disjunct, version);
539-
verified &= subResult.verified;
552+
if (subResult.verified == false
553+
// one of the sub queries requires more than one term to match, we can't
554+
// verify it with a single top-level min_should_match
555+
|| subResult.minimumShouldMatch > 1
556+
// One of the inner clauses has multiple extractions, we won't be able to
557+
// verify it with a single top-level min_should_match
558+
|| (subResult.extractions.size() > 1 && requiredShouldClauses > 1)) {
559+
verified = false;
560+
}
540561
if (subResult.matchAllDocs) {
541562
numMatchAllClauses++;
542563
}
@@ -682,6 +703,10 @@ static class Result {
682703
final boolean matchAllDocs;
683704

684705
Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) {
706+
if (minimumShouldMatch > extractions.size()) {
707+
throw new IllegalArgumentException("minimumShouldMatch can't be greater than the number of extractions: "
708+
+ minimumShouldMatch + " > " + extractions.size());
709+
}
685710
this.extractions = extractions;
686711
this.verified = verified;
687712
this.minimumShouldMatch = minimumShouldMatch;

modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -210,12 +210,13 @@ public void testDuel() throws Exception {
210210
new BytesRef(randomFrom(stringContent.get(field1)))));
211211
queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(stringContent.get(field1))),
212212
new BytesRef(randomFrom(stringContent.get(field1)))));
213-
int numRandomBoolQueries = randomIntBetween(16, 32);
213+
// many iterations with boolean queries, which are the most complex queries to deal with when nested
214+
int numRandomBoolQueries = 1000;
214215
for (int i = 0; i < numRandomBoolQueries; i++) {
215216
queryFunctions.add(() -> createRandomBooleanQuery(1, stringFields, stringContent, intFieldType, intValues));
216217
}
217218
queryFunctions.add(() -> {
218-
int numClauses = randomIntBetween(1, 16);
219+
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4));
219220
List<Query> clauses = new ArrayList<>();
220221
for (int i = 0; i < numClauses; i++) {
221222
String field = randomFrom(stringFields);
@@ -266,7 +267,7 @@ public void testDuel() throws Exception {
266267
private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Map<String, List<String>> content,
267268
MappedFieldType intFieldType, List<Integer> intValues) {
268269
BooleanQuery.Builder builder = new BooleanQuery.Builder();
269-
int numClauses = randomIntBetween(1, 16);
270+
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); // use low numbers of clauses more often
270271
int numShouldClauses = 0;
271272
boolean onlyShouldClauses = rarely();
272273
for (int i = 0; i < numClauses; i++) {
@@ -313,7 +314,7 @@ private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Ma
313314
numShouldClauses++;
314315
}
315316
}
316-
builder.setMinimumNumberShouldMatch(numShouldClauses);
317+
builder.setMinimumNumberShouldMatch(randomIntBetween(0, numShouldClauses));
317318
return builder.build();
318319
}
319320

0 commit comments

Comments
 (0)