Skip to content

Commit f94a757

Browse files
authored
Fix index prefixes to work with span_multi (elastic#31066)
* Fix index prefixes to work with span_multi Text fields that use `index_prefixes` can rewrite `prefix` queries into `term` queries internally. This commit fix the handling of this rewriting in the `span_multi` query. This change also copies the index options of the text field into the prefix field in order to be able to run positional queries. This is mandatory for `span_multi` to work but this could also be useful to optimize `match_phrase_prefix` queries in a follow up. Note that this change can only be done on indices created after 6.3 since we set the index options to doc only in this version. Fixes elastic#31056
1 parent 024400b commit f94a757

File tree

6 files changed

+277
-73
lines changed

6 files changed

+277
-73
lines changed

docs/reference/query-dsl/span-multi-term-query.asciidoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,11 @@ GET /_search
3636
}
3737
--------------------------------------------------
3838
// CONSOLE
39+
40+
WARNING: By default `span_multi queries are rewritten to a `span_or` query
41+
containing **all** the expanded terms. This can be expensive if the number of expanded
42+
terms is large. To avoid an unbounded expansion you can set the
43+
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
44+
rewrite. Or, if you use `span_multi` on `prefix` query only, you can
45+
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
46+
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.

rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
---
2-
"search with index prefixes":
1+
setup:
32
- skip:
4-
version: " - 6.99.99"
3+
version: " - 6.2.99"
54
reason: index_prefixes is only available as of 6.3.0
5+
66
- do:
77
indices.create:
88
index: test
@@ -27,6 +27,11 @@
2727
indices.refresh:
2828
index: [test]
2929

30+
---
31+
"search with index prefixes":
32+
- skip:
33+
version: " - 6.2.99"
34+
reason: index_prefixes is only available as of 6.3.0
3035
- do:
3136
search:
3237
index: test
@@ -57,3 +62,23 @@
5762

5863
- match: {hits.total: 1}
5964
- match: {hits.hits.0._score: 1}
65+
66+
---
67+
"search index prefixes with span_multi":
68+
- skip:
69+
version: " - 6.99.99"
70+
reason: span_multi throws an exception with prefix fields on < versions
71+
72+
- do:
73+
search:
74+
index: test
75+
body:
76+
query:
77+
span_near:
78+
clauses: [
79+
{ "span_term": { "text": "short" } },
80+
{ "span_multi": { "match": { "prefix": { "text": "word" } } } }
81+
]
82+
83+
- match: {hits.total: 1}
84+

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.apache.lucene.search.PhraseQuery;
4141
import org.apache.lucene.search.Query;
4242
import org.apache.lucene.search.TermQuery;
43+
import org.elasticsearch.Version;
4344
import org.elasticsearch.common.collect.Iterators;
4445
import org.elasticsearch.common.logging.ESLoggerFactory;
4546
import org.elasticsearch.common.settings.Settings;
@@ -175,7 +176,16 @@ public TextFieldMapper build(BuilderContext context) {
175176
if (fieldType().isSearchable() == false) {
176177
throw new IllegalArgumentException("Cannot set index_prefixes on unindexed field [" + name() + "]");
177178
}
178-
if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
179+
// Copy the index options of the main field to allow phrase queries on
180+
// the prefix field.
181+
if (context.indexCreatedVersion().onOrAfter(Version.V_6_4_0)) {
182+
if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS) {
183+
// frequencies are not needed because prefix queries always use a constant score
184+
prefixFieldType.setIndexOptions(IndexOptions.DOCS);
185+
} else {
186+
prefixFieldType.setIndexOptions(fieldType.indexOptions());
187+
}
188+
} else if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
179189
prefixFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
180190
}
181191
if (fieldType.storeTermVectorOffsets()) {

server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,28 @@
1818
*/
1919
package org.elasticsearch.index.query;
2020

21+
import org.apache.lucene.index.Term;
2122
import org.apache.lucene.search.BoostQuery;
23+
import org.apache.lucene.search.ConstantScoreQuery;
2224
import org.apache.lucene.search.MultiTermQuery;
25+
import org.apache.lucene.search.PrefixQuery;
2326
import org.apache.lucene.search.Query;
27+
import org.apache.lucene.search.TermQuery;
28+
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
2429
import org.apache.lucene.search.spans.SpanBoostQuery;
2530
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
2631
import org.apache.lucene.search.spans.SpanQuery;
32+
import org.apache.lucene.search.spans.SpanTermQuery;
33+
import org.elasticsearch.Version;
2734
import org.elasticsearch.common.ParseField;
2835
import org.elasticsearch.common.ParsingException;
2936
import org.elasticsearch.common.io.stream.StreamInput;
3037
import org.elasticsearch.common.io.stream.StreamOutput;
38+
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
3139
import org.elasticsearch.common.xcontent.XContentBuilder;
3240
import org.elasticsearch.common.xcontent.XContentParser;
41+
import org.elasticsearch.index.mapper.TextFieldMapper;
42+
import org.elasticsearch.index.query.support.QueryParsers;
3343

3444
import java.io.IOException;
3545
import java.util.Objects;
@@ -124,22 +134,67 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro
124134
protected Query doToQuery(QueryShardContext context) throws IOException {
125135
Query subQuery = multiTermQueryBuilder.toQuery(context);
126136
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
127-
if (subQuery instanceof BoostQuery) {
128-
BoostQuery boostQuery = (BoostQuery) subQuery;
129-
subQuery = boostQuery.getQuery();
130-
boost = boostQuery.getBoost();
137+
while (true) {
138+
if (subQuery instanceof ConstantScoreQuery) {
139+
subQuery = ((ConstantScoreQuery) subQuery).getQuery();
140+
boost = 1;
141+
} else if (subQuery instanceof BoostQuery) {
142+
BoostQuery boostQuery = (BoostQuery) subQuery;
143+
subQuery = boostQuery.getQuery();
144+
boost *= boostQuery.getBoost();
145+
} else {
146+
break;
147+
}
131148
}
132-
//no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here
149+
final SpanQuery spanQuery;
150+
// no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here
133151
assert subQuery instanceof SpanBoostQuery == false;
134-
if (subQuery instanceof MultiTermQuery == false) {
135-
throw new UnsupportedOperationException("unsupported inner query, should be " + MultiTermQuery.class.getName() +" but was "
136-
+ subQuery.getClass().getName());
152+
if (subQuery instanceof TermQuery) {
153+
/**
154+
* Text fields that index prefixes can rewrite prefix queries
155+
* into term queries. See {@link TextFieldMapper.TextFieldType#prefixQuery}.
156+
*/
157+
if (multiTermQueryBuilder.getClass() != PrefixQueryBuilder.class) {
158+
throw new UnsupportedOperationException("unsupported inner query generated by " +
159+
multiTermQueryBuilder.getClass().getName() + ", should be " + MultiTermQuery.class.getName()
160+
+ " but was " + subQuery.getClass().getName());
161+
}
162+
if (context.getIndexSettings().getIndexVersionCreated().before(Version.V_6_4_0)) {
163+
/**
164+
* Indices created in this version do not index positions on the prefix field
165+
* so we cannot use it to match positional queries. Instead, we explicitly create the prefix
166+
* query on the main field to avoid the rewrite.
167+
*/
168+
PrefixQueryBuilder prefixBuilder = (PrefixQueryBuilder) multiTermQueryBuilder;
169+
PrefixQuery prefixQuery = new PrefixQuery(new Term(prefixBuilder.fieldName(), prefixBuilder.value()));
170+
if (prefixBuilder.rewrite() != null) {
171+
MultiTermQuery.RewriteMethod rewriteMethod =
172+
QueryParsers.parseRewriteMethod(prefixBuilder.rewrite(), null, LoggingDeprecationHandler.INSTANCE);
173+
prefixQuery.setRewriteMethod(rewriteMethod);
174+
}
175+
spanQuery = new SpanMultiTermQueryWrapper<>(prefixQuery);
176+
} else {
177+
String origFieldName = ((PrefixQueryBuilder) multiTermQueryBuilder).fieldName();
178+
SpanTermQuery spanTermQuery = new SpanTermQuery(((TermQuery) subQuery).getTerm());
179+
/**
180+
* Prefixes are indexed in a different field so we mask the term query with the original field
181+
* name. This is required because span_near and span_or queries don't work across different field.
182+
* The masking is safe because the prefix field is indexed using the same content than the original field
183+
* and the prefix analyzer preserves positions.
184+
*/
185+
spanQuery = new FieldMaskingSpanQuery(spanTermQuery, origFieldName);
186+
}
187+
} else {
188+
if (subQuery instanceof MultiTermQuery == false) {
189+
throw new UnsupportedOperationException("unsupported inner query, should be "
190+
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
191+
}
192+
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
137193
}
138-
SpanQuery wrapper = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
139194
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
140-
wrapper = new SpanBoostQuery(wrapper, boost);
195+
return new SpanBoostQuery(spanQuery, boost);
141196
}
142-
return wrapper;
197+
return spanQuery;
143198
}
144199

145200
@Override

server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.lucene.search.Query;
3838
import org.apache.lucene.search.TermQuery;
3939
import org.apache.lucene.util.BytesRef;
40+
import org.elasticsearch.Version;
4041
import org.elasticsearch.action.index.IndexRequest;
4142
import org.elasticsearch.common.Strings;
4243
import org.elasticsearch.common.bytes.BytesReference;
@@ -638,7 +639,7 @@ public void testIndexPrefixIndexTypes() throws IOException {
638639
.field("type", "text")
639640
.field("analyzer", "english")
640641
.startObject("index_prefixes").endObject()
641-
.field("index_options", "positions")
642+
.field("index_options", "freqs")
642643
.endObject().endObject().endObject().endObject());
643644

644645
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
@@ -649,6 +650,27 @@ public void testIndexPrefixIndexTypes() throws IOException {
649650
assertFalse(ft.storeTermVectors());
650651
}
651652

653+
{
654+
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
655+
.startObject("properties").startObject("field")
656+
.field("type", "text")
657+
.field("analyzer", "english")
658+
.startObject("index_prefixes").endObject()
659+
.field("index_options", "positions")
660+
.endObject().endObject().endObject().endObject());
661+
662+
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
663+
664+
FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix");
665+
FieldType ft = prefix.fieldType;
666+
if (indexService.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) {
667+
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, ft.indexOptions());
668+
} else {
669+
assertEquals(IndexOptions.DOCS, ft.indexOptions());
670+
}
671+
assertFalse(ft.storeTermVectors());
672+
}
673+
652674
{
653675
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
654676
.startObject("properties").startObject("field")
@@ -662,7 +684,11 @@ public void testIndexPrefixIndexTypes() throws IOException {
662684

663685
FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix");
664686
FieldType ft = prefix.fieldType;
665-
assertEquals(IndexOptions.DOCS, ft.indexOptions());
687+
if (indexService.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) {
688+
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, ft.indexOptions());
689+
} else {
690+
assertEquals(IndexOptions.DOCS, ft.indexOptions());
691+
}
666692
assertTrue(ft.storeTermVectorOffsets());
667693
}
668694

@@ -679,7 +705,11 @@ public void testIndexPrefixIndexTypes() throws IOException {
679705

680706
FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix");
681707
FieldType ft = prefix.fieldType;
682-
assertEquals(IndexOptions.DOCS, ft.indexOptions());
708+
if (indexService.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) {
709+
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, ft.indexOptions());
710+
} else {
711+
assertEquals(IndexOptions.DOCS, ft.indexOptions());
712+
}
683713
assertFalse(ft.storeTermVectorOffsets());
684714
}
685715
}

0 commit comments

Comments
 (0)