Skip to content

Commit 2666342

Browse files
committed
Fix (simple)_query_string to ignore removed terms (#28871)
This change ensures that we ignore terms removed from the analysis rather than returning a match_no_docs query for the part that contain the stop word. For instance a query like "the AND fox" should ignore "the" if it is considered as a stop word instead of adding a match_no_docs query. This change also fixes the analysis of prefix terms that start with a stop word (e.g. `the*`). In such case if `analyze_wildcard` is true and `the` is considered as a stop word this part of the query is rewritten into a match_no_docs query. Since it's a prefix query this change forces the prefix query on `the` even if it is removed from the analysis. Fixes #28855 Fixes #28856
1 parent c32ce0c commit 2666342

File tree

7 files changed

+74
-10
lines changed

7 files changed

+74
-10
lines changed

server/src/main/java/org/elasticsearch/index/search/MatchQuery.java

+13-4
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,10 @@ public void writeTo(StreamOutput out) throws IOException {
103103

104104
public enum ZeroTermsQuery implements Writeable {
105105
NONE(0),
106-
ALL(1);
106+
ALL(1),
107+
// this is used internally to make sure that query_string and simple_query_string
108+
// ignores query part that removes all tokens.
109+
NULL(2);
107110

108111
private final int ordinal;
109112

@@ -313,10 +316,16 @@ protected final Query termQuery(MappedFieldType fieldType, BytesRef value, boole
313316
}
314317

315318
protected Query zeroTermsQuery() {
316-
if (zeroTermsQuery == DEFAULT_ZERO_TERMS_QUERY) {
317-
return Queries.newMatchNoDocsQuery("Matching no documents because no terms present.");
319+
switch (zeroTermsQuery) {
320+
case NULL:
321+
return null;
322+
case NONE:
323+
return Queries.newMatchNoDocsQuery("Matching no documents because no terms present");
324+
case ALL:
325+
return Queries.newMatchAllQuery();
326+
default:
327+
throw new IllegalStateException("unknown zeroTermsQuery " + zeroTermsQuery);
318328
}
319-
return Queries.newMatchAllQuery();
320329
}
321330

322331
private class MatchQueryBuilder extends QueryBuilder {

server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ private QueryStringQueryParser(QueryShardContext context, String defaultField,
150150
this.context = context;
151151
this.fieldsAndWeights = Collections.unmodifiableMap(fieldsAndWeights);
152152
this.queryBuilder = new MultiMatchQuery(context);
153+
queryBuilder.setZeroTermsQuery(MatchQuery.ZeroTermsQuery.NULL);
153154
queryBuilder.setLenient(lenient);
154155
this.lenient = lenient;
155156
}
@@ -346,7 +347,6 @@ protected Query getFieldQuery(String field, String queryText, int slop) throws P
346347
if (fields.isEmpty()) {
347348
return newUnmappedFieldQuery(field);
348349
}
349-
final Query query;
350350
Analyzer oldAnalyzer = queryBuilder.analyzer;
351351
int oldSlop = queryBuilder.phraseSlop;
352352
try {
@@ -356,7 +356,7 @@ protected Query getFieldQuery(String field, String queryText, int slop) throws P
356356
queryBuilder.setAnalyzer(forceAnalyzer);
357357
}
358358
queryBuilder.setPhraseSlop(slop);
359-
query = queryBuilder.parse(MultiMatchQueryBuilder.Type.PHRASE, fields, queryText, null);
359+
Query query = queryBuilder.parse(MultiMatchQueryBuilder.Type.PHRASE, fields, queryText, null);
360360
return applySlop(query, slop);
361361
} catch (IOException e) {
362362
throw new ParseException(e.getMessage());
@@ -558,7 +558,7 @@ private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throw
558558
}
559559

560560
if (tlist.size() == 0) {
561-
return new MatchNoDocsQuery("analysis was empty for " + field + ":" + termStr);
561+
return super.getPrefixQuery(field, termStr);
562562
}
563563

564564
if (tlist.size() == 1 && tlist.get(0).size() == 1) {
@@ -772,7 +772,7 @@ private PhraseQuery addSlopToPhrase(PhraseQuery query, int slop) {
772772
@Override
773773
public Query parse(String query) throws ParseException {
774774
if (query.trim().isEmpty()) {
775-
return queryBuilder.zeroTermsQuery();
775+
return Queries.newMatchNoDocsQuery("Matching no documents because no terms present");
776776
}
777777
return super.parse(query);
778778
}

server/src/main/java/org/elasticsearch/index/search/SimpleQueryStringQueryParser.java

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ public SimpleQueryStringQueryParser(Analyzer analyzer, Map<String, Float> weight
7474
this.queryBuilder = new MultiMatchQuery(context);
7575
this.queryBuilder.setAutoGenerateSynonymsPhraseQuery(settings.autoGenerateSynonymsPhraseQuery());
7676
this.queryBuilder.setLenient(settings.lenient());
77+
this.queryBuilder.setZeroTermsQuery(MatchQuery.ZeroTermsQuery.NULL);
7778
if (analyzer != null) {
7879
this.queryBuilder.setAnalyzer(analyzer);
7980
}

server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ protected MatchQueryBuilder doCreateTestQueryBuilder() {
110110
}
111111

112112
if (randomBoolean()) {
113-
matchQuery.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.values()));
113+
matchQuery.zeroTermsQuery(randomFrom(ZeroTermsQuery.ALL, ZeroTermsQuery.NONE));
114114
}
115115

116116
if (randomBoolean()) {

server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ protected MultiMatchQueryBuilder doCreateTestQueryBuilder() {
132132
query.cutoffFrequency((float) 10 / randomIntBetween(1, 100));
133133
}
134134
if (randomBoolean()) {
135-
query.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.values()));
135+
query.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.NONE, MatchQuery.ZeroTermsQuery.ALL));
136136
}
137137
if (randomBoolean()) {
138138
query.autoGenerateSynonymsPhraseQuery(randomBoolean());

server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java

+27
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,33 @@ public void testToFuzzyQuery() throws Exception {
10581058
assertEquals(expected, query);
10591059
}
10601060

1061+
public void testWithStopWords() throws Exception {
1062+
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
1063+
Query query = new QueryStringQueryBuilder("the quick fox")
1064+
.field(STRING_FIELD_NAME)
1065+
.analyzer("english")
1066+
.toQuery(createShardContext());
1067+
BooleanQuery expected = new BooleanQuery.Builder()
1068+
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD)
1069+
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), Occur.SHOULD)
1070+
.build();
1071+
assertEquals(expected, query);
1072+
}
1073+
1074+
public void testWithPrefixStopWords() throws Exception {
1075+
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
1076+
Query query = new QueryStringQueryBuilder("the* quick fox")
1077+
.field(STRING_FIELD_NAME)
1078+
.analyzer("english")
1079+
.toQuery(createShardContext());
1080+
BooleanQuery expected = new BooleanQuery.Builder()
1081+
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), Occur.SHOULD)
1082+
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD)
1083+
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), Occur.SHOULD)
1084+
.build();
1085+
assertEquals(expected, query);
1086+
}
1087+
10611088
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
10621089
Settings build = Settings.builder().put(oldIndexSettings)
10631090
.put(indexSettings)

server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java

+27
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,33 @@ public void testLenientToPrefixQuery() throws Exception {
626626
assertEquals(expected, query);
627627
}
628628

629+
public void testWithStopWords() throws Exception {
630+
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
631+
Query query = new SimpleQueryStringBuilder("the quick fox")
632+
.field(STRING_FIELD_NAME)
633+
.analyzer("english")
634+
.toQuery(createShardContext());
635+
BooleanQuery expected = new BooleanQuery.Builder()
636+
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD)
637+
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), BooleanClause.Occur.SHOULD)
638+
.build();
639+
assertEquals(expected, query);
640+
}
641+
642+
public void testWithPrefixStopWords() throws Exception {
643+
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
644+
Query query = new SimpleQueryStringBuilder("the* quick fox")
645+
.field(STRING_FIELD_NAME)
646+
.analyzer("english")
647+
.toQuery(createShardContext());
648+
BooleanQuery expected = new BooleanQuery.Builder()
649+
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), BooleanClause.Occur.SHOULD)
650+
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD)
651+
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), BooleanClause.Occur.SHOULD)
652+
.build();
653+
assertEquals(expected, query);
654+
}
655+
629656
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
630657
Settings build = Settings.builder().put(oldIndexSettings)
631658
.put(indexSettings)

0 commit comments

Comments
 (0)