Skip to content

Add prefix intervals source #43635

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs/reference/query-dsl/intervals-query.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,25 @@ to search across multiple fields as if they were all the same field; for example
you could index the same text into stemmed and unstemmed fields, and search for
stemmed tokens near unstemmed ones.

[[intervals-prefix]]
==== `prefix`

The `prefix` rule finds terms that start with a specified prefix. The prefix will
expand to match at most 128 terms; if there are more matching terms in the index,
then an error will be returned. To avoid this limit, enable the
<<index-prefixes,`index-prefixes`>> option on the field being searched.

[horizontal]
`prefix`::
Match terms starting with this prefix
`analyzer`::
Which analyzer should be used to normalize the `prefix`. By default, the
search analyzer of the top-level field will be used.
`use_field`::
If specified, then match intervals from this field rather than the top-level field.
The `prefix` will be normalized using the search analyzer from this field, unless
`analyzer` is specified separately.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a note regarding the index_prefixes option of the text field since some queries will fail if they expand to more than 128 terms ?


[[intervals-all_of]]
==== `all_of`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,3 +384,23 @@ setup:
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "4" }

---
"Test prefix":
- skip:
version: " - 8.0.0"
reason: "TODO: change to 7.3 in backport"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: cold
- prefix:
prefix: out
- match: { hits.total.value: 3 }

Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew
/**
* Create an {@link IntervalsSource} to be used for proximity queries
*/
public IntervalsSource intervals(String query, int max_gaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
public IntervalsSource intervals(String query, int max_gaps, boolean ordered,
NamedAnalyzer analyzer, boolean prefix) throws IOException {
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
+ "] which is of type [" + typeName() + "]");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,15 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.intervals.Intervals;
import org.apache.lucene.search.intervals.IntervalsSource;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
Expand Down Expand Up @@ -403,6 +405,17 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer
.build();
}

public IntervalsSource intervals(BytesRef term) {
if (term.length > maxChars) {
return Intervals.prefix(term.utf8ToString());
}
if (term.length >= minChars) {
return Intervals.fixField(name(), Intervals.term(term));
}
String wildcardTerm = term.utf8ToString() + "?".repeat(Math.max(0, minChars - term.length));
return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(wildcardTerm)), Intervals.term(term));
}

@Override
public PrefixFieldType clone() {
return new PrefixFieldType(parentField, name(), minChars, maxChars);
Expand Down Expand Up @@ -631,10 +644,21 @@ public Query existsQuery(QueryShardContext context) {
}

@Override
public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
public IntervalsSource intervals(String text, int maxGaps, boolean ordered,
NamedAnalyzer analyzer, boolean prefix) throws IOException {
if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
if (analyzer == null) {
analyzer = searchAnalyzer();
}
if (prefix) {
BytesRef normalizedTerm = analyzer.normalize(name(), text);
if (prefixFieldType != null) {
return prefixFieldType.intervals(normalizedTerm);
}
return Intervals.prefix(normalizedTerm.utf8ToString()); // TODO make Intervals.prefix() take a BytesRef
}
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
return builder.analyzeText(text, maxGaps, ordered);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,11 @@ public static IntervalsSourceProvider fromXContent(XContentParser parser) throws
return Disjunction.fromXContent(parser);
case "all_of":
return Combine.fromXContent(parser);
case "prefix":
return Prefix.fromXContent(parser);
}
throw new ParsingException(parser.getTokenLocation(),
"Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of]");
"Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of, prefix]");
}

private static IntervalsSourceProvider parseInnerIntervals(XContentParser parser) throws IOException {
Expand Down Expand Up @@ -138,10 +140,10 @@ public IntervalsSource getSource(QueryShardContext context, MappedFieldType fiel
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer));
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false));
}
else {
source = fieldType.intervals(query, maxGaps, ordered, analyzer);
source = fieldType.intervals(query, maxGaps, ordered, analyzer, false);
}
if (filter != null) {
return filter.filter(source, context, fieldType);
Expand Down Expand Up @@ -440,6 +442,109 @@ public static Combine fromXContent(XContentParser parser) {
}
}

public static class Prefix extends IntervalsSourceProvider {

public static final String NAME = "prefix";

private final String term;
private final String analyzer;
private final String useField;

public Prefix(String term, String analyzer, String useField) {
this.term = term;
this.analyzer = analyzer;
this.useField = useField;
}

public Prefix(StreamInput in) throws IOException {
this.term = in.readString();
this.analyzer = in.readOptionalString();
this.useField = in.readOptionalString();
}

@Override
public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException {
NamedAnalyzer analyzer = null;
if (this.analyzer != null) {
analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
}
IntervalsSource source;
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(term, 0, false, analyzer, true));
}
else {
source = fieldType.intervals(term, 0, false, analyzer, true);
}
return source;
}

@Override
public void extractFields(Set<String> fields) {
if (useField != null) {
fields.add(useField);
}
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Prefix prefix = (Prefix) o;
return Objects.equals(term, prefix.term) &&
Objects.equals(analyzer, prefix.analyzer) &&
Objects.equals(useField, prefix.useField);
}

@Override
public int hashCode() {
return Objects.hash(term, analyzer, useField);
}

@Override
public String getWriteableName() {
return NAME;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(term);
out.writeOptionalString(analyzer);
out.writeOptionalString(useField);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
builder.field("term", term);
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
if (useField != null) {
builder.field("use_field", useField);
}
builder.endObject();
return builder;
}

private static final ConstructingObjectParser<Prefix, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
String term = (String) args[0];
String analyzer = (String) args[1];
String useField = (String) args[2];
return new Prefix(term, analyzer, useField);
});
static {
PARSER.declareString(constructorArg(), new ParseField("term"));
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
}

public static Prefix fromXContent(XContentParser parser) throws IOException {
return PARSER.parse(parser, null);
}
}

static class ScriptFilterSource extends FilteredIntervalsSource {

final IntervalFilterScript script;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,8 @@ private void registerIntervalsSourceProviders() {
IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new));
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new));
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
IntervalsSourceProvider.Prefix.NAME, IntervalsSourceProvider.Prefix::new));
}

private void registerQuery(QuerySpec<?> spec) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ protected IntervalQueryBuilder doCreateTestQueryBuilder() {

private static final String MASKED_FIELD = "masked_field";
private static final String NO_POSITIONS_FIELD = "no_positions_field";
private static final String PREFIXED_FIELD = "prefixed_field";

@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
Expand All @@ -70,6 +71,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws
.field("type", "text")
.field("index_options", "freqs")
.endObject()
.startObject(PREFIXED_FIELD)
.field("type", "text")
.startObject("index_prefixes").endObject()
.endObject()
.endObject().endObject().endObject();

mapperService.merge("_doc",
Expand Down Expand Up @@ -385,4 +390,35 @@ public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryTyp

}

public void testPrefixes() throws IOException {

String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"prefix\" : { \"term\" : \"term\" } } } }";
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.prefix("term"));
assertEquals(expected, builder.toQuery(createShardContext()));

String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
"\"prefix\" : { \"term\" : \"term\" } } } }";
expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json);
builder1.toQuery(createShardContext());
});

String prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
"\"prefix\" : { \"term\" : \"term\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(prefix_json);
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term")));
assertEquals(expected, builder.toQuery(createShardContext()));

String short_prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
"\"prefix\" : { \"term\" : \"t\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.or(
Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard("t?")),
Intervals.term("t")));
assertEquals(expected, builder.toQuery(createShardContext()));

}

}