diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index 3049cb363173a..951147a21ac50 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -82,6 +82,25 @@ to search across multiple fields as if they were all the same field; for example you could index the same text into stemmed and unstemmed fields, and search for stemmed tokens near unstemmed ones. +[[intervals-prefix]] +==== `prefix` + +The `prefix` rule finds terms that start with a specified prefix. The prefix will +expand to match at most 128 terms; if there are more matching terms in the index, +then an error will be returned. To avoid this limit, enable the +<> option on the field being searched. + +[horizontal] +`prefix`:: +Match terms starting with this prefix +`analyzer`:: +Which analyzer should be used to normalize the `prefix`. By default, the +search analyzer of the top-level field will be used. +`use_field`:: +If specified, then match intervals from this field rather than the top-level field. +The `prefix` will be normalized using the search analyzer from this field, unless +`analyzer` is specified separately. + [[intervals-all_of]] ==== `all_of` diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml index 46bf2cada8e4d..c5238e237e580 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -384,3 +384,23 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "4" } +--- +"Test prefix": + - skip: + version: " - 8.0.0" + reason: "TODO: change to 7.3 in backport" + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cold + - prefix: + prefix: out + - match: { hits.total.value: 3 } + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 5ef689709400d..411045abaf796 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -390,7 +390,8 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew /** * Create an {@link IntervalsSource} to be used for proximity queries */ - public IntervalsSource intervals(String query, int max_gaps, boolean ordered, NamedAnalyzer analyzer) throws IOException { + public IntervalsSource intervals(String query, int max_gaps, boolean ordered, + NamedAnalyzer analyzer, boolean prefix) throws IOException { throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 6906ceb113b9c..05ca08a796593 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -44,6 +44,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.intervals.Intervals; import org.apache.lucene.search.intervals.IntervalsSource; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; @@ -51,6 +52,7 @@ import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; @@ -403,6 +405,17 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer .build(); } + public IntervalsSource intervals(BytesRef term) { + if (term.length > maxChars) { + return Intervals.prefix(term.utf8ToString()); + } + if (term.length >= minChars) { + return Intervals.fixField(name(), Intervals.term(term)); + } + String wildcardTerm = term.utf8ToString() + "?".repeat(Math.max(0, minChars - term.length)); + return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(wildcardTerm)), Intervals.term(term)); + } + @Override public PrefixFieldType clone() { return new PrefixFieldType(parentField, name(), minChars, maxChars); @@ -631,10 +644,21 @@ public Query existsQuery(QueryShardContext context) { } @Override - public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException { + public IntervalsSource intervals(String text, int maxGaps, boolean ordered, + NamedAnalyzer analyzer, boolean prefix) throws IOException { if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed"); } + if (analyzer == null) { + analyzer = searchAnalyzer(); + } + if (prefix) { + BytesRef normalizedTerm = analyzer.normalize(name(), text); + if (prefixFieldType != null) { + return prefixFieldType.intervals(normalizedTerm); + } + return Intervals.prefix(normalizedTerm.utf8ToString()); // TODO make Intervals.prefix() take a BytesRef + } IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer); return builder.analyzeText(text, maxGaps, ordered); } diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index e551654af9a76..234018971ed59 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -78,9 +78,11 @@ public static IntervalsSourceProvider fromXContent(XContentParser parser) throws return Disjunction.fromXContent(parser); case "all_of": return Combine.fromXContent(parser); + case "prefix": + return Prefix.fromXContent(parser); } throw new ParsingException(parser.getTokenLocation(), - "Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of]"); + "Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of, prefix]"); } private static IntervalsSourceProvider parseInnerIntervals(XContentParser parser) throws IOException { @@ -138,10 +140,10 @@ public IntervalsSource getSource(QueryShardContext context, MappedFieldType fiel if (useField != null) { fieldType = context.fieldMapper(useField); assert fieldType != null; - source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer)); + source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false)); } else { - source = fieldType.intervals(query, maxGaps, ordered, analyzer); + source = fieldType.intervals(query, maxGaps, ordered, analyzer, false); } if (filter != null) { return filter.filter(source, context, fieldType); @@ -440,6 +442,109 @@ public static Combine fromXContent(XContentParser parser) { } } + public static class Prefix extends IntervalsSourceProvider { + + public static final String NAME = "prefix"; + + private final String term; + private final String analyzer; + private final String useField; + + public Prefix(String term, String analyzer, String useField) { + this.term = term; + this.analyzer = analyzer; + this.useField = useField; + } + + public Prefix(StreamInput in) throws IOException { + this.term = in.readString(); + this.analyzer = in.readOptionalString(); + this.useField = in.readOptionalString(); + } + + @Override + public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException { + NamedAnalyzer analyzer = null; + if (this.analyzer != null) { + analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer); + } + IntervalsSource source; + if (useField != null) { + fieldType = context.fieldMapper(useField); + assert fieldType != null; + source = Intervals.fixField(useField, fieldType.intervals(term, 0, false, analyzer, true)); + } + else { + source = fieldType.intervals(term, 0, false, analyzer, true); + } + return source; + } + + @Override + public void extractFields(Set fields) { + if (useField != null) { + fields.add(useField); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Prefix prefix = (Prefix) o; + return Objects.equals(term, prefix.term) && + Objects.equals(analyzer, prefix.analyzer) && + Objects.equals(useField, prefix.useField); + } + + @Override + public int hashCode() { + return Objects.hash(term, analyzer, useField); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(term); + out.writeOptionalString(analyzer); + out.writeOptionalString(useField); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.field("term", term); + if (analyzer != null) { + builder.field("analyzer", analyzer); + } + if (useField != null) { + builder.field("use_field", useField); + } + builder.endObject(); + return builder; + } + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, args -> { + String term = (String) args[0]; + String analyzer = (String) args[1]; + String useField = (String) args[2]; + return new Prefix(term, analyzer, useField); + }); + static { + PARSER.declareString(constructorArg(), new ParseField("term")); + PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer")); + PARSER.declareString(optionalConstructorArg(), new ParseField("use_field")); + } + + public static Prefix fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + } + static class ScriptFilterSource extends FilteredIntervalsSource { final IntervalFilterScript script; diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index ade835fb33a84..5eb82854a2097 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -792,6 +792,8 @@ private void registerIntervalsSourceProviders() { IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new)); namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new)); + namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, + IntervalsSourceProvider.Prefix.NAME, IntervalsSourceProvider.Prefix::new)); } private void registerQuery(QuerySpec spec) { diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index e858d04e54333..7838f77cc1697 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -59,6 +59,7 @@ protected IntervalQueryBuilder doCreateTestQueryBuilder() { private static final String MASKED_FIELD = "masked_field"; private static final String NO_POSITIONS_FIELD = "no_positions_field"; + private static final String PREFIXED_FIELD = "prefixed_field"; @Override protected void initializeAdditionalMappings(MapperService mapperService) throws IOException { @@ -70,6 +71,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws .field("type", "text") .field("index_options", "freqs") .endObject() + .startObject(PREFIXED_FIELD) + .field("type", "text") + .startObject("index_prefixes").endObject() + .endObject() .endObject().endObject().endObject(); mapperService.merge("_doc", @@ -385,4 +390,35 @@ public FactoryType compile(Script script, ScriptContext { + IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json); + builder1.toQuery(createShardContext()); + }); + + String prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " + + "\"prefix\" : { \"term\" : \"term\" } } } }"; + builder = (IntervalQueryBuilder) parseQuery(prefix_json); + expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term"))); + assertEquals(expected, builder.toQuery(createShardContext())); + + String short_prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " + + "\"prefix\" : { \"term\" : \"t\" } } } }"; + builder = (IntervalQueryBuilder) parseQuery(short_prefix_json); + expected = new IntervalQuery(PREFIXED_FIELD, Intervals.or( + Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard("t?")), + Intervals.term("t"))); + assertEquals(expected, builder.toQuery(createShardContext())); + + } + }