Skip to content

Commit 76d0edd

Browse files
committed
Add prefix intervals source (#43635)
This commit adds a prefix intervals source, allowing you to search for intervals that contain terms starting with a given prefix. The source can make use of the index_prefixes mapping option. Relates to #43198
1 parent c121b00 commit 76d0edd

File tree

7 files changed

+217
-6
lines changed

7 files changed

+217
-6
lines changed

docs/reference/query-dsl/intervals-query.asciidoc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,25 @@ to search across multiple fields as if they were all the same field; for example
8282
you could index the same text into stemmed and unstemmed fields, and search for
8383
stemmed tokens near unstemmed ones.
8484

85+
[[intervals-prefix]]
86+
==== `prefix`
87+
88+
The `prefix` rule finds terms that start with a specified prefix. The prefix will
89+
expand to match at most 128 terms; if there are more matching terms in the index,
90+
then an error will be returned. To avoid this limit, enable the
91+
<<index-prefixes,`index-prefixes`>> option on the field being searched.
92+
93+
[horizontal]
94+
`prefix`::
95+
Match terms starting with this prefix
96+
`analyzer`::
97+
Which analyzer should be used to normalize the `prefix`. By default, the
98+
search analyzer of the top-level field will be used.
99+
`use_field`::
100+
If specified, then match intervals from this field rather than the top-level field.
101+
The `prefix` will be normalized using the search analyzer from this field, unless
102+
`analyzer` is specified separately.
103+
85104
[[intervals-all_of]]
86105
==== `all_of`
87106

rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,3 +387,23 @@ setup:
387387
- match: { hits.total.value: 1 }
388388
- match: { hits.hits.0._id: "4" }
389389

390+
---
391+
"Test prefix":
392+
- skip:
393+
version: " - 8.0.0"
394+
reason: "TODO: change to 7.3 in backport"
395+
- do:
396+
search:
397+
index: test
398+
body:
399+
query:
400+
intervals:
401+
text:
402+
all_of:
403+
intervals:
404+
- match:
405+
query: cold
406+
- prefix:
407+
prefix: out
408+
- match: { hits.total.value: 3 }
409+

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,8 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew
390390
/**
391391
* Create an {@link IntervalsSource} to be used for proximity queries
392392
*/
393-
public IntervalsSource intervals(String query, int max_gaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
393+
public IntervalsSource intervals(String query, int max_gaps, boolean ordered,
394+
NamedAnalyzer analyzer, boolean prefix) throws IOException {
394395
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
395396
+ "] which is of type [" + typeName() + "]");
396397
}

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,15 @@
4444
import org.apache.lucene.search.Query;
4545
import org.apache.lucene.search.SynonymQuery;
4646
import org.apache.lucene.search.TermQuery;
47+
import org.apache.lucene.search.intervals.Intervals;
4748
import org.apache.lucene.search.intervals.IntervalsSource;
4849
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
4950
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
5051
import org.apache.lucene.search.spans.SpanNearQuery;
5152
import org.apache.lucene.search.spans.SpanOrQuery;
5253
import org.apache.lucene.search.spans.SpanQuery;
5354
import org.apache.lucene.search.spans.SpanTermQuery;
55+
import org.apache.lucene.util.BytesRef;
5456
import org.apache.lucene.util.automaton.Automata;
5557
import org.apache.lucene.util.automaton.Automaton;
5658
import org.apache.lucene.util.automaton.Operations;
@@ -408,6 +410,21 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer
408410
.build();
409411
}
410412

413+
public IntervalsSource intervals(BytesRef term) {
414+
if (term.length > maxChars) {
415+
return Intervals.prefix(term.utf8ToString());
416+
}
417+
if (term.length >= minChars) {
418+
return Intervals.fixField(name(), Intervals.term(term));
419+
}
420+
StringBuilder sb = new StringBuilder(term.utf8ToString());
421+
for (int i = term.length; i < minChars; i++) {
422+
sb.append("?");
423+
}
424+
String wildcardTerm = sb.toString();
425+
return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(wildcardTerm)), Intervals.term(term));
426+
}
427+
411428
@Override
412429
public PrefixFieldType clone() {
413430
return new PrefixFieldType(parentField, name(), minChars, maxChars);
@@ -636,10 +653,21 @@ public Query existsQuery(QueryShardContext context) {
636653
}
637654

638655
@Override
639-
public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
656+
public IntervalsSource intervals(String text, int maxGaps, boolean ordered,
657+
NamedAnalyzer analyzer, boolean prefix) throws IOException {
640658
if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
641659
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
642660
}
661+
if (analyzer == null) {
662+
analyzer = searchAnalyzer();
663+
}
664+
if (prefix) {
665+
BytesRef normalizedTerm = analyzer.normalize(name(), text);
666+
if (prefixFieldType != null) {
667+
return prefixFieldType.intervals(normalizedTerm);
668+
}
669+
return Intervals.prefix(normalizedTerm.utf8ToString()); // TODO make Intervals.prefix() take a BytesRef
670+
}
643671
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
644672
return builder.analyzeText(text, maxGaps, ordered);
645673
}

server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java

Lines changed: 108 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,11 @@ public static IntervalsSourceProvider fromXContent(XContentParser parser) throws
7878
return Disjunction.fromXContent(parser);
7979
case "all_of":
8080
return Combine.fromXContent(parser);
81+
case "prefix":
82+
return Prefix.fromXContent(parser);
8183
}
8284
throw new ParsingException(parser.getTokenLocation(),
83-
"Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of]");
85+
"Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of, prefix]");
8486
}
8587

8688
private static IntervalsSourceProvider parseInnerIntervals(XContentParser parser) throws IOException {
@@ -138,10 +140,10 @@ public IntervalsSource getSource(QueryShardContext context, MappedFieldType fiel
138140
if (useField != null) {
139141
fieldType = context.fieldMapper(useField);
140142
assert fieldType != null;
141-
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer));
143+
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false));
142144
}
143145
else {
144-
source = fieldType.intervals(query, maxGaps, ordered, analyzer);
146+
source = fieldType.intervals(query, maxGaps, ordered, analyzer, false);
145147
}
146148
if (filter != null) {
147149
return filter.filter(source, context, fieldType);
@@ -440,6 +442,109 @@ public static Combine fromXContent(XContentParser parser) {
440442
}
441443
}
442444

445+
public static class Prefix extends IntervalsSourceProvider {
446+
447+
public static final String NAME = "prefix";
448+
449+
private final String term;
450+
private final String analyzer;
451+
private final String useField;
452+
453+
public Prefix(String term, String analyzer, String useField) {
454+
this.term = term;
455+
this.analyzer = analyzer;
456+
this.useField = useField;
457+
}
458+
459+
public Prefix(StreamInput in) throws IOException {
460+
this.term = in.readString();
461+
this.analyzer = in.readOptionalString();
462+
this.useField = in.readOptionalString();
463+
}
464+
465+
@Override
466+
public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException {
467+
NamedAnalyzer analyzer = null;
468+
if (this.analyzer != null) {
469+
analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
470+
}
471+
IntervalsSource source;
472+
if (useField != null) {
473+
fieldType = context.fieldMapper(useField);
474+
assert fieldType != null;
475+
source = Intervals.fixField(useField, fieldType.intervals(term, 0, false, analyzer, true));
476+
}
477+
else {
478+
source = fieldType.intervals(term, 0, false, analyzer, true);
479+
}
480+
return source;
481+
}
482+
483+
@Override
484+
public void extractFields(Set<String> fields) {
485+
if (useField != null) {
486+
fields.add(useField);
487+
}
488+
}
489+
490+
@Override
491+
public boolean equals(Object o) {
492+
if (this == o) return true;
493+
if (o == null || getClass() != o.getClass()) return false;
494+
Prefix prefix = (Prefix) o;
495+
return Objects.equals(term, prefix.term) &&
496+
Objects.equals(analyzer, prefix.analyzer) &&
497+
Objects.equals(useField, prefix.useField);
498+
}
499+
500+
@Override
501+
public int hashCode() {
502+
return Objects.hash(term, analyzer, useField);
503+
}
504+
505+
@Override
506+
public String getWriteableName() {
507+
return NAME;
508+
}
509+
510+
@Override
511+
public void writeTo(StreamOutput out) throws IOException {
512+
out.writeString(term);
513+
out.writeOptionalString(analyzer);
514+
out.writeOptionalString(useField);
515+
}
516+
517+
@Override
518+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
519+
builder.startObject(NAME);
520+
builder.field("term", term);
521+
if (analyzer != null) {
522+
builder.field("analyzer", analyzer);
523+
}
524+
if (useField != null) {
525+
builder.field("use_field", useField);
526+
}
527+
builder.endObject();
528+
return builder;
529+
}
530+
531+
private static final ConstructingObjectParser<Prefix, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
532+
String term = (String) args[0];
533+
String analyzer = (String) args[1];
534+
String useField = (String) args[2];
535+
return new Prefix(term, analyzer, useField);
536+
});
537+
static {
538+
PARSER.declareString(constructorArg(), new ParseField("term"));
539+
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
540+
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
541+
}
542+
543+
public static Prefix fromXContent(XContentParser parser) throws IOException {
544+
return PARSER.parse(parser, null);
545+
}
546+
}
547+
443548
static class ScriptFilterSource extends FilteredIntervalsSource {
444549

445550
final IntervalFilterScript script;

server/src/main/java/org/elasticsearch/search/SearchModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,8 @@ private void registerIntervalsSourceProviders() {
847847
IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new));
848848
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
849849
IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new));
850+
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
851+
IntervalsSourceProvider.Prefix.NAME, IntervalsSourceProvider.Prefix::new));
850852
}
851853

852854
private void registerQuery(QuerySpec<?> spec) {

server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ protected IntervalQueryBuilder doCreateTestQueryBuilder() {
5959

6060
private static final String MASKED_FIELD = "masked_field";
6161
private static final String NO_POSITIONS_FIELD = "no_positions_field";
62+
private static final String PREFIXED_FIELD = "prefixed_field";
6263

6364
@Override
6465
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
@@ -70,6 +71,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws
7071
.field("type", "text")
7172
.field("index_options", "freqs")
7273
.endObject()
74+
.startObject(PREFIXED_FIELD)
75+
.field("type", "text")
76+
.startObject("index_prefixes").endObject()
77+
.endObject()
7378
.endObject().endObject().endObject();
7479

7580
mapperService.merge("_doc",
@@ -384,5 +389,36 @@ public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryTyp
384389
assertEquals(expected, q);
385390

386391
}
387-
392+
393+
public void testPrefixes() throws IOException {
394+
395+
String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
396+
"\"prefix\" : { \"term\" : \"term\" } } } }";
397+
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
398+
Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.prefix("term"));
399+
assertEquals(expected, builder.toQuery(createShardContext()));
400+
401+
String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
402+
"\"prefix\" : { \"term\" : \"term\" } } } }";
403+
expectThrows(IllegalArgumentException.class, () -> {
404+
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json);
405+
builder1.toQuery(createShardContext());
406+
});
407+
408+
String prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
409+
"\"prefix\" : { \"term\" : \"term\" } } } }";
410+
builder = (IntervalQueryBuilder) parseQuery(prefix_json);
411+
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term")));
412+
assertEquals(expected, builder.toQuery(createShardContext()));
413+
414+
String short_prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
415+
"\"prefix\" : { \"term\" : \"t\" } } } }";
416+
builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
417+
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.or(
418+
Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard("t?")),
419+
Intervals.term("t")));
420+
assertEquals(expected, builder.toQuery(createShardContext()));
421+
422+
}
423+
388424
}

0 commit comments

Comments
 (0)