Skip to content

Commit 25750a3

Browse files
authored
Make intervals queries fully pluggable through field mappers. (#71429)
`MappedFieldType` only allows configuring `match` and `prefix` queries today. This change makes it possible to configure how to create `wildcard` and `fuzzy` queries as well. This will allow making the upcoming `match_only_text` field fully support intervals queries.
1 parent 4f36d38 commit 25750a3

File tree

7 files changed

+126
-79
lines changed

7 files changed

+126
-79
lines changed

plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,9 @@
88

99
package org.elasticsearch.index.mapper.annotatedtext;
1010

11-
import org.apache.lucene.analysis.standard.StandardAnalyzer;
1211
import org.apache.lucene.queries.intervals.Intervals;
1312
import org.apache.lucene.queries.intervals.IntervalsSource;
14-
import org.elasticsearch.index.analysis.AnalyzerScope;
15-
import org.elasticsearch.index.analysis.NamedAnalyzer;
13+
import org.apache.lucene.util.BytesRef;
1614
import org.elasticsearch.index.mapper.ContentPath;
1715
import org.elasticsearch.index.mapper.FieldTypeTestCase;
1816
import org.elasticsearch.index.mapper.MappedFieldType;
@@ -25,9 +23,8 @@ public class AnnotatedTextFieldTypeTests extends FieldTypeTestCase {
2523

2624
public void testIntervals() throws IOException {
2725
MappedFieldType ft = new AnnotatedTextFieldMapper.AnnotatedTextFieldType("field", Collections.emptyMap());
28-
NamedAnalyzer a = new NamedAnalyzer("name", AnalyzerScope.INDEX, new StandardAnalyzer());
29-
IntervalsSource source = ft.intervals("Donald Trump", 0, true, a, false);
30-
assertEquals(Intervals.phrase(Intervals.term("donald"), Intervals.term("trump")), source);
26+
IntervalsSource source = ft.termIntervals(new BytesRef("donald"), null);
27+
assertEquals(Intervals.term("donald"), source);
3128
}
3229

3330
public void testFetchSourceValue() throws IOException {

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
import org.elasticsearch.common.geo.ShapeRelation;
3333
import org.elasticsearch.common.time.DateMathParser;
3434
import org.elasticsearch.common.unit.Fuzziness;
35-
import org.elasticsearch.index.analysis.NamedAnalyzer;
3635
import org.elasticsearch.index.fielddata.IndexFieldData;
3736
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
3837
import org.elasticsearch.index.query.QueryRewriteContext;
@@ -279,10 +278,34 @@ public Query distanceFeatureQuery(Object origin, String pivot, SearchExecutionCo
279278
}
280279

281280
/**
282-
* Create an {@link IntervalsSource} to be used for proximity queries
281+
* Create an {@link IntervalsSource} for the given term.
283282
*/
284-
public IntervalsSource intervals(String query, int max_gaps, boolean ordered,
285-
NamedAnalyzer analyzer, boolean prefix) throws IOException {
283+
public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) {
284+
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
285+
+ "] which is of type [" + typeName() + "]");
286+
}
287+
288+
/**
289+
* Create an {@link IntervalsSource} for the given prefix.
290+
*/
291+
public IntervalsSource prefixIntervals(BytesRef prefix, SearchExecutionContext context) {
292+
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
293+
+ "] which is of type [" + typeName() + "]");
294+
}
295+
296+
/**
297+
* Create a fuzzy {@link IntervalsSource} for the given term.
298+
*/
299+
public IntervalsSource fuzzyIntervals(String term, int maxDistance, int prefixLength,
300+
boolean transpositions, SearchExecutionContext context) {
301+
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
302+
+ "] which is of type [" + typeName() + "]");
303+
}
304+
305+
/**
306+
* Create a wildcard {@link IntervalsSource} for the given pattern.
307+
*/
308+
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
286309
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
287310
+ "] which is of type [" + typeName() + "]");
288311
}

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.lucene.search.BooleanClause;
2929
import org.apache.lucene.search.BooleanQuery;
3030
import org.apache.lucene.search.ConstantScoreQuery;
31+
import org.apache.lucene.search.FuzzyQuery;
3132
import org.apache.lucene.search.MultiPhraseQuery;
3233
import org.apache.lucene.search.MultiTermQuery;
3334
import org.apache.lucene.search.PhraseQuery;
@@ -58,7 +59,6 @@
5859
import org.elasticsearch.index.fielddata.IndexFieldData;
5960
import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
6061
import org.elasticsearch.index.mapper.Mapper.TypeParser.ParserContext;
61-
import org.elasticsearch.index.query.IntervalBuilder;
6262
import org.elasticsearch.index.query.SearchExecutionContext;
6363
import org.elasticsearch.index.similarity.SimilarityProvider;
6464
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
@@ -676,23 +676,44 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew
676676
}
677677

678678
@Override
679-
public IntervalsSource intervals(String text, int maxGaps, boolean ordered,
680-
NamedAnalyzer analyzer, boolean prefix) throws IOException {
679+
public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) {
681680
if (getTextSearchInfo().hasPositions() == false) {
682681
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
683682
}
684-
if (analyzer == null) {
685-
analyzer = getTextSearchInfo().getSearchAnalyzer();
683+
return Intervals.term(term);
684+
}
685+
686+
@Override
687+
public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) {
688+
if (getTextSearchInfo().hasPositions() == false) {
689+
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
686690
}
687-
if (prefix) {
688-
BytesRef normalizedTerm = analyzer.normalize(name(), text);
689-
if (prefixFieldType != null) {
690-
return prefixFieldType.intervals(normalizedTerm);
691-
}
692-
return Intervals.prefix(normalizedTerm);
691+
if (prefixFieldType != null) {
692+
return prefixFieldType.intervals(term);
693+
}
694+
return Intervals.prefix(term);
695+
}
696+
697+
@Override
698+
public IntervalsSource fuzzyIntervals(String term, int maxDistance, int prefixLength,
699+
boolean transpositions, SearchExecutionContext context) {
700+
if (getTextSearchInfo().hasPositions() == false) {
701+
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
702+
}
703+
FuzzyQuery fq = new FuzzyQuery(new Term(name(), term),
704+
maxDistance, prefixLength, 128, transpositions);
705+
return Intervals.multiterm(fq.getAutomata(), term);
706+
}
707+
708+
@Override
709+
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
710+
if (getTextSearchInfo().hasPositions() == false) {
711+
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
712+
}
713+
if (prefixFieldType != null) {
714+
return prefixFieldType.intervals(pattern);
693715
}
694-
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? getTextSearchInfo().getSearchAnalyzer() : analyzer);
695-
return builder.analyzeText(text, maxGaps, ordered);
716+
return Intervals.wildcard(pattern);
696717
}
697718

698719
@Override

server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
/**
3535
* Constructs an IntervalsSource based on analyzed text
3636
*/
37-
public class IntervalBuilder {
37+
public abstract class IntervalBuilder {
3838

3939
private final String field;
4040
private final Analyzer analyzer;
@@ -44,6 +44,9 @@ public IntervalBuilder(String field, Analyzer analyzer) {
4444
this.analyzer = analyzer;
4545
}
4646

47+
/** Create term intervals for the provided term. */
48+
protected abstract IntervalsSource termIntervals(BytesRef term);
49+
4750
public IntervalsSource analyzeText(String query, int maxGaps, boolean ordered) throws IOException {
4851
try (TokenStream ts = analyzer.tokenStream(field, query);
4952
CachingTokenFilter stream = new CachingTokenFilter(ts)) {
@@ -109,7 +112,7 @@ protected IntervalsSource analyzeTerm(TokenStream ts) throws IOException {
109112
TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
110113
ts.reset();
111114
ts.incrementToken();
112-
return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
115+
return termIntervals(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
113116
}
114117

115118
protected static IntervalsSource combineSources(List<IntervalsSource> sources, int maxGaps, boolean ordered) {
@@ -138,7 +141,7 @@ protected List<IntervalsSource> analyzeTerms(TokenStream ts) throws IOException
138141
while (ts.incrementToken()) {
139142
BytesRef term = bytesAtt.getBytesRef();
140143
int precedingSpaces = posAtt.getPositionIncrement() - 1;
141-
terms.add(extend(Intervals.term(BytesRef.deepCopyOf(term)), precedingSpaces));
144+
terms.add(extend(termIntervals(BytesRef.deepCopyOf(term)), precedingSpaces));
142145
}
143146
ts.end();
144147
return terms;
@@ -170,7 +173,7 @@ else if (synonyms.size() > 1) {
170173
synonyms.clear();
171174
spaces = posInc - 1;
172175
}
173-
synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())));
176+
synonyms.add(termIntervals(BytesRef.deepCopyOf(bytesAtt.getBytesRef())));
174177
}
175178
if (synonyms.size() == 1) {
176179
terms.add(extend(synonyms.get(0), spaces));

server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java

Lines changed: 41 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,10 @@
88

99
package org.elasticsearch.index.query;
1010

11-
import org.apache.lucene.index.Term;
1211
import org.apache.lucene.queries.intervals.FilteredIntervalsSource;
1312
import org.apache.lucene.queries.intervals.IntervalIterator;
1413
import org.apache.lucene.queries.intervals.Intervals;
1514
import org.apache.lucene.queries.intervals.IntervalsSource;
16-
import org.apache.lucene.search.FuzzyQuery;
1715
import org.apache.lucene.util.BytesRef;
1816
import org.elasticsearch.Version;
1917
import org.elasticsearch.common.ParseField;
@@ -128,23 +126,36 @@ public Match(StreamInput in) throws IOException {
128126
}
129127
}
130128

129+
private IntervalsSource intervals(MappedFieldType fieldType, String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer,
130+
SearchExecutionContext context) throws IOException {
131+
IntervalBuilder builder = new IntervalBuilder(fieldType.name(), analyzer) {
132+
@Override
133+
protected IntervalsSource termIntervals(BytesRef term) {
134+
return fieldType.termIntervals(term, context);
135+
}
136+
};
137+
return builder.analyzeText(text, maxGaps, ordered);
138+
}
139+
131140
@Override
132141
public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType fieldType) throws IOException {
133142
NamedAnalyzer analyzer = null;
134143
if (this.analyzer != null) {
135144
analyzer = context.getIndexAnalyzers().get(this.analyzer);
136145
}
137-
IntervalsSource source;
138146
if (useField != null) {
139147
fieldType = context.getFieldType(useField);
140148
assert fieldType != null;
141-
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false));
142149
}
143-
else {
144-
source = fieldType.intervals(query, maxGaps, ordered, analyzer, false);
150+
if (analyzer == null) {
151+
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
152+
}
153+
IntervalsSource source = intervals(fieldType, query, maxGaps, ordered, analyzer, context);
154+
if (useField != null) {
155+
source = Intervals.fixField(useField, source);
145156
}
146157
if (filter != null) {
147-
return filter.filter(source, context, fieldType);
158+
source = filter.filter(source, context, fieldType);
148159
}
149160
return source;
150161
}
@@ -517,14 +528,17 @@ public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType
517528
if (this.analyzer != null) {
518529
analyzer = context.getIndexAnalyzers().get(this.analyzer);
519530
}
520-
IntervalsSource source;
521531
if (useField != null) {
522532
fieldType = context.getFieldType(useField);
523533
assert fieldType != null;
524-
source = Intervals.fixField(useField, fieldType.intervals(prefix, 0, false, analyzer, true));
525534
}
526-
else {
527-
source = fieldType.intervals(prefix, 0, false, analyzer, true);
535+
if (analyzer == null) {
536+
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
537+
}
538+
final BytesRef prefixTerm = analyzer.normalize(fieldType.name(), prefix);
539+
IntervalsSource source = fieldType.prefixIntervals(prefixTerm, context);
540+
if (useField != null) {
541+
source = Intervals.fixField(useField, source);
528542
}
529543
return source;
530544
}
@@ -628,33 +642,23 @@ public Wildcard(StreamInput in) throws IOException {
628642

629643
@Override
630644
public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType fieldType) {
631-
NamedAnalyzer analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
645+
NamedAnalyzer analyzer = null;
632646
if (this.analyzer != null) {
633647
analyzer = context.getIndexAnalyzers().get(this.analyzer);
634648
}
635-
IntervalsSource source;
636649
if (useField != null) {
637650
fieldType = context.getFieldType(useField);
638651
assert fieldType != null;
639-
checkPositions(fieldType);
640-
if (this.analyzer == null) {
641-
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
642-
}
643-
BytesRef normalizedTerm = analyzer.normalize(useField, pattern);
644-
source = Intervals.fixField(useField, Intervals.wildcard(normalizedTerm));
645652
}
646-
else {
647-
checkPositions(fieldType);
648-
BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), pattern);
649-
source = Intervals.wildcard(normalizedTerm);
653+
if (analyzer == null) {
654+
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
650655
}
651-
return source;
652-
}
653-
654-
private void checkPositions(MappedFieldType type) {
655-
if (type.getTextSearchInfo().hasPositions() == false) {
656-
throw new IllegalArgumentException("Cannot create intervals over field [" + type.name() + "] with no positions indexed");
656+
BytesRef normalizedPattern = analyzer.normalize(fieldType.name(), pattern);
657+
IntervalsSource source = fieldType.wildcardIntervals(normalizedPattern, context);
658+
if (useField != null) {
659+
source = Intervals.fixField(useField, source);
657660
}
661+
return source;
658662
}
659663

660664
@Override
@@ -765,36 +769,27 @@ public Fuzzy(StreamInput in) throws IOException {
765769

766770
@Override
767771
public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType fieldType) {
768-
NamedAnalyzer analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
772+
NamedAnalyzer analyzer = null;
769773
if (this.analyzer != null) {
770774
analyzer = context.getIndexAnalyzers().get(this.analyzer);
771775
}
772-
IntervalsSource source;
773776
if (useField != null) {
774777
fieldType = context.getFieldType(useField);
775778
assert fieldType != null;
776-
checkPositions(fieldType);
777-
if (this.analyzer == null) {
778-
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
779-
}
780779
}
781-
checkPositions(fieldType);
782-
BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), term);
783-
FuzzyQuery fq = new FuzzyQuery(new Term(fieldType.name(), normalizedTerm),
784-
fuzziness.asDistance(term), prefixLength, 128, transpositions);
785-
source = Intervals.multiterm(fq.getAutomata(), term);
780+
if (analyzer == null) {
781+
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
782+
}
783+
// Fuzzy queries only work with unicode content so it's legal to call utf8ToString here.
784+
String normalizedTerm = analyzer.normalize(fieldType.name(), term).utf8ToString();
785+
IntervalsSource source = fieldType.fuzzyIntervals(normalizedTerm, fuzziness.asDistance(term),
786+
prefixLength, transpositions, context);
786787
if (useField != null) {
787788
source = Intervals.fixField(useField, source);
788789
}
789790
return source;
790791
}
791792

792-
private void checkPositions(MappedFieldType type) {
793-
if (type.getTextSearchInfo().hasPositions() == false) {
794-
throw new IllegalArgumentException("Cannot create intervals over field [" + type.name() + "] with no positions indexed");
795-
}
796-
}
797-
798793
@Override
799794
public void extractFields(Set<String> fields) {
800795
if (useField != null) {

server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,21 @@
1414
import org.apache.lucene.analysis.standard.StandardAnalyzer;
1515
import org.apache.lucene.queries.intervals.Intervals;
1616
import org.apache.lucene.queries.intervals.IntervalsSource;
17+
import org.apache.lucene.util.BytesRef;
1718
import org.elasticsearch.test.ESTestCase;
1819

1920
import java.io.IOException;
2021

2122
public class IntervalBuilderTests extends ESTestCase {
2223

23-
private static final IntervalBuilder BUILDER = new IntervalBuilder("field1", new StandardAnalyzer());
24+
private static final IntervalBuilder BUILDER = new IntervalBuilder("field1", new StandardAnalyzer()) {
25+
26+
@Override
27+
protected IntervalsSource termIntervals(BytesRef term) {
28+
return Intervals.term(term);
29+
}
30+
31+
};
2432

2533
public void testSimpleTerm() throws IOException {
2634

0 commit comments

Comments
 (0)