Skip to content

Commit cea93d1

Browse files
authored
Don't expose TextFieldMapper subfields (#64597)
TextFieldMapper can optionally index data into subfields for accelerated prefix and phrase queries. Currently, these subfields are implemented as FieldMappers in their own right, made available via TextFieldMapper's iterator() method and with their own standalone MappedFieldType objects. This has the disadvantage that these subfields are directly available for searching, and appear in APIs such as field caps. In addition, because exists queries are not implemented on them, an exists query against an object which contains a text field with one of the subfields enabled can throw an error (see #63585). This commit reworks the subfields so that they are no longer implemented as FieldMappers, and are no longer exposed to classes outside TextFieldMapper either as MappedFieldTypes or as FieldMappers. The parent TextFieldMapper handles indexing and analyzer registration, PhraseFieldType is removed entirely, and PrefixFieldType is retained as a private implementation for fast prefix queries but is unavailable for querying directly. Fixes #63585 Closes #63446
1 parent 33a38d4 commit cea93d1

File tree

4 files changed

+107
-175
lines changed

4 files changed

+107
-175
lines changed

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

+51-143
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@
5757
import org.apache.lucene.util.automaton.Automaton;
5858
import org.apache.lucene.util.automaton.Operations;
5959
import org.elasticsearch.Version;
60-
import org.elasticsearch.common.collect.Iterators;
6160
import org.elasticsearch.common.lucene.Lucene;
6261
import org.elasticsearch.common.lucene.search.AutomatonQueries;
6362
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
@@ -80,7 +79,6 @@
8079
import java.util.Arrays;
8180
import java.util.Collections;
8281
import java.util.HashMap;
83-
import java.util.Iterator;
8482
import java.util.List;
8583
import java.util.Map;
8684
import java.util.Objects;
@@ -92,6 +90,7 @@ public class TextFieldMapper extends FieldMapper {
9290

9391
public static final String CONTENT_TYPE = "text";
9492
private static final String FAST_PHRASE_SUFFIX = "._index_phrase";
93+
private static final String FAST_PREFIX_SUFFIX = "._index_prefix";
9594

9695
public static class Defaults {
9796
public static final double FIELDDATA_MIN_FREQUENCY = 0;
@@ -330,7 +329,7 @@ private TextFieldType buildFieldType(FieldType fieldType, ContentPath contentPat
330329
return ft;
331330
}
332331

333-
private PrefixFieldMapper buildPrefixMapper(ContentPath contentPath, FieldType fieldType, TextFieldType tft) {
332+
private SubFieldInfo buildPrefixInfo(ContentPath contentPath, FieldType fieldType, TextFieldType tft) {
334333
if (indexPrefixes.get() == null) {
335334
return null;
336335
}
@@ -358,16 +357,15 @@ private PrefixFieldMapper buildPrefixMapper(ContentPath contentPath, FieldType f
358357
if (fieldType.storeTermVectorOffsets()) {
359358
pft.setStoreTermVectorOffsets(true);
360359
}
361-
PrefixFieldType prefixFieldType = new PrefixFieldType(tft, fullName + "._index_prefix", indexPrefixes.get());
362-
tft.setPrefixFieldType(prefixFieldType);
363-
return new PrefixFieldMapper(pft, prefixFieldType, new PrefixWrappedAnalyzer(
360+
tft.setIndexPrefixes(indexPrefixes.get().minChars, indexPrefixes.get().maxChars);
361+
return new SubFieldInfo(fullName + "._index_prefix", pft, new PrefixWrappedAnalyzer(
364362
analyzers.getIndexAnalyzer().analyzer(),
365363
analyzers.positionIncrementGap.get(),
366-
prefixFieldType.minChars,
367-
prefixFieldType.maxChars));
364+
indexPrefixes.get().minChars,
365+
indexPrefixes.get().maxChars));
368366
}
369367

370-
private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) {
368+
private SubFieldInfo buildPhraseInfo(FieldType fieldType, TextFieldType parent) {
371369
if (indexPhrases.get() == false) {
372370
return null;
373371
}
@@ -381,24 +379,24 @@ private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType p
381379
parent.setIndexPhrases();
382380
PhraseWrappedAnalyzer a
383381
= new PhraseWrappedAnalyzer(analyzers.getIndexAnalyzer().analyzer(), analyzers.positionIncrementGap.get());
384-
return new PhraseFieldMapper(phraseFieldType, new PhraseFieldType(parent), a);
382+
return new SubFieldInfo(parent.name() + FAST_PHRASE_SUFFIX, phraseFieldType, a);
385383
}
386384

387385
public Map<String, NamedAnalyzer> indexAnalyzers(String name,
388-
PhraseFieldMapper phraseFieldMapper,
389-
PrefixFieldMapper prefixFieldMapper) {
386+
SubFieldInfo phraseFieldInfo,
387+
SubFieldInfo prefixFieldInfo) {
390388
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
391389
NamedAnalyzer main = this.analyzers.getIndexAnalyzer();
392390
analyzers.put(name, main);
393-
if (phraseFieldMapper != null) {
391+
if (phraseFieldInfo != null) {
394392
analyzers.put(
395-
phraseFieldMapper.name(),
396-
new NamedAnalyzer(main.name() + "_phrase", AnalyzerScope.INDEX, phraseFieldMapper.analyzer));
393+
phraseFieldInfo.field,
394+
new NamedAnalyzer(main.name() + "_phrase", AnalyzerScope.INDEX, phraseFieldInfo.analyzer));
397395
}
398-
if (prefixFieldMapper != null) {
396+
if (prefixFieldInfo != null) {
399397
analyzers.put(
400-
prefixFieldMapper.name(),
401-
new NamedAnalyzer(main.name() + "_prefix", AnalyzerScope.INDEX, prefixFieldMapper.analyzer));
398+
prefixFieldInfo.field,
399+
new NamedAnalyzer(main.name() + "_prefix", AnalyzerScope.INDEX, prefixFieldInfo.analyzer));
402400
}
403401
return analyzers;
404402
}
@@ -407,12 +405,18 @@ public Map<String, NamedAnalyzer> indexAnalyzers(String name,
407405
public TextFieldMapper build(ContentPath contentPath) {
408406
FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors);
409407
TextFieldType tft = buildFieldType(fieldType, contentPath);
410-
PhraseFieldMapper phraseFieldMapper = buildPhraseMapper(fieldType, tft);
411-
PrefixFieldMapper prefixFieldMapper = buildPrefixMapper(contentPath, fieldType, tft);
408+
SubFieldInfo phraseFieldInfo = buildPhraseInfo(fieldType, tft);
409+
SubFieldInfo prefixFieldInfo = buildPrefixInfo(contentPath, fieldType, tft);
410+
MultiFields multiFields = multiFieldsBuilder.build(this, contentPath);
411+
for (Mapper mapper : multiFields) {
412+
if (mapper.name().endsWith(FAST_PHRASE_SUFFIX) || mapper.name().endsWith(FAST_PREFIX_SUFFIX)) {
413+
throw new MapperParsingException("Cannot use reserved field name [" + mapper.name() + "]");
414+
}
415+
}
412416
return new TextFieldMapper(name, fieldType, tft,
413-
indexAnalyzers(tft.name(), phraseFieldMapper, prefixFieldMapper),
414-
prefixFieldMapper, phraseFieldMapper,
415-
multiFieldsBuilder.build(this, contentPath), copyTo.build(), this);
417+
indexAnalyzers(tft.name(), phraseFieldInfo, prefixFieldInfo),
418+
prefixFieldInfo, phraseFieldInfo,
419+
multiFields, copyTo.build(), this);
416420
}
417421
}
418422

@@ -478,55 +482,22 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp
478482
}
479483
}
480484

481-
static final class PhraseFieldType extends StringFieldType {
482-
483-
final TextFieldType parent;
484-
485-
PhraseFieldType(TextFieldType parent) {
486-
super(parent.name() + FAST_PHRASE_SUFFIX, true, false, false, parent.getTextSearchInfo(), Collections.emptyMap());
487-
this.parent = parent;
488-
}
489-
490-
@Override
491-
public String typeName() {
492-
return "phrase";
493-
}
494-
495-
@Override
496-
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
497-
// Because this internal field is modelled as a multi-field, SourceValueFetcher will look up its
498-
// parent field in _source. So we don't need to use the parent field name here.
499-
return SourceValueFetcher.toString(name(), context, format);
500-
}
501-
502-
@Override
503-
public Query existsQuery(QueryShardContext context) {
504-
throw new UnsupportedOperationException();
505-
}
506-
}
507-
508-
static final class PrefixFieldType extends StringFieldType {
485+
private static final class PrefixFieldType extends StringFieldType {
509486

510487
final int minChars;
511488
final int maxChars;
512489
final TextFieldType parentField;
513490

514-
PrefixFieldType(TextFieldType parentField, String name, PrefixConfig config) {
515-
this(parentField, name, config.minChars, config.maxChars);
516-
}
517-
518-
PrefixFieldType(TextFieldType parentField, String name, int minChars, int maxChars) {
519-
super(name, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
491+
PrefixFieldType(TextFieldType parentField, int minChars, int maxChars) {
492+
super(parentField.name() + FAST_PREFIX_SUFFIX, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
520493
this.minChars = minChars;
521494
this.maxChars = maxChars;
522495
this.parentField = parentField;
523496
}
524497

525498
@Override
526499
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
527-
// Because this internal field is modelled as a multi-field, SourceValueFetcher will look up its
528-
// parent field in _source. So we don't need to use the parent field name here.
529-
return SourceValueFetcher.toString(name(), context, format);
500+
throw new UnsupportedOperationException();
530501
}
531502

532503
boolean accept(int length) {
@@ -590,67 +561,18 @@ public Query existsQuery(QueryShardContext context) {
590561
}
591562
}
592563

593-
private static final class PhraseFieldMapper extends FieldMapper {
564+
private static final class SubFieldInfo {
594565

595566
private final Analyzer analyzer;
596567
private final FieldType fieldType;
568+
private final String field;
597569

598-
PhraseFieldMapper(FieldType fieldType, PhraseFieldType mappedFieldType, PhraseWrappedAnalyzer analyzer) {
599-
super(mappedFieldType.name(), mappedFieldType, MultiFields.empty(), CopyTo.empty());
570+
SubFieldInfo(String field, FieldType fieldType, Analyzer analyzer) {
600571
this.fieldType = fieldType;
601572
this.analyzer = analyzer;
573+
this.field = field;
602574
}
603575

604-
@Override
605-
protected void parseCreateField(ParseContext context) {
606-
throw new UnsupportedOperationException();
607-
}
608-
609-
@Override
610-
public Builder getMergeBuilder() {
611-
return null;
612-
}
613-
614-
@Override
615-
protected String contentType() {
616-
return "phrase";
617-
}
618-
}
619-
620-
private static final class PrefixFieldMapper extends FieldMapper {
621-
622-
private final Analyzer analyzer;
623-
private final FieldType fieldType;
624-
625-
protected PrefixFieldMapper(FieldType fieldType, PrefixFieldType mappedFieldType, Analyzer analyzer) {
626-
super(mappedFieldType.name(), mappedFieldType, MultiFields.empty(), CopyTo.empty());
627-
this.analyzer = analyzer;
628-
this.fieldType = fieldType;
629-
}
630-
631-
void addField(ParseContext context, String value) {
632-
context.doc().add(new Field(fieldType().name(), value, fieldType));
633-
}
634-
635-
@Override
636-
protected void parseCreateField(ParseContext context) {
637-
throw new UnsupportedOperationException();
638-
}
639-
640-
@Override
641-
public Builder getMergeBuilder() {
642-
return null;
643-
}
644-
645-
@Override
646-
protected String contentType() {
647-
return "prefix";
648-
}
649-
650-
@Override
651-
public String toString() {
652-
return fieldType().toString();
653-
}
654576
}
655577

656578
public static class TextFieldType extends StringFieldType {
@@ -702,8 +624,8 @@ int fielddataMinSegmentSize() {
702624
return filter.minSegmentSize;
703625
}
704626

705-
void setPrefixFieldType(PrefixFieldType prefixFieldType) {
706-
this.prefixFieldType = prefixFieldType;
627+
void setIndexPrefixes(int minChars, int maxChars) {
628+
this.prefixFieldType = new PrefixFieldType(this, minChars, maxChars);
707629
}
708630

709631
void setIndexPhrases() {
@@ -862,14 +784,14 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S
862784

863785
private final Builder builder;
864786
private final FieldType fieldType;
865-
private final PrefixFieldMapper prefixFieldMapper;
866-
private final PhraseFieldMapper phraseFieldMapper;
787+
private final SubFieldInfo prefixFieldInfo;
788+
private final SubFieldInfo phraseFieldInfo;
867789

868790
protected TextFieldMapper(String simpleName, FieldType fieldType,
869791
TextFieldType mappedFieldType,
870792
Map<String, NamedAnalyzer> indexAnalyzers,
871-
PrefixFieldMapper prefixFieldMapper,
872-
PhraseFieldMapper phraseFieldMapper,
793+
SubFieldInfo prefixFieldInfo,
794+
SubFieldInfo phraseFieldInfo,
873795
MultiFields multiFields, CopyTo copyTo, Builder builder) {
874796
super(simpleName, mappedFieldType, indexAnalyzers, multiFields, copyTo);
875797
assert mappedFieldType.getTextSearchInfo().isTokenized();
@@ -878,8 +800,8 @@ protected TextFieldMapper(String simpleName, FieldType fieldType,
878800
throw new IllegalArgumentException("Cannot enable fielddata on a [text] field that is not indexed: [" + name() + "]");
879801
}
880802
this.fieldType = fieldType;
881-
this.prefixFieldMapper = prefixFieldMapper;
882-
this.phraseFieldMapper = phraseFieldMapper;
803+
this.prefixFieldInfo = prefixFieldInfo;
804+
this.phraseFieldInfo = phraseFieldInfo;
883805
this.builder = builder;
884806
}
885807

@@ -907,30 +829,15 @@ protected void parseCreateField(ParseContext context) throws IOException {
907829
if (fieldType.omitNorms()) {
908830
createFieldNamesField(context);
909831
}
910-
if (prefixFieldMapper != null) {
911-
prefixFieldMapper.addField(context, value);
832+
if (prefixFieldInfo != null) {
833+
context.doc().add(new Field(prefixFieldInfo.field, value, prefixFieldInfo.fieldType));
912834
}
913-
if (phraseFieldMapper != null) {
914-
context.doc().add(new Field(phraseFieldMapper.fieldType().name(), value, phraseFieldMapper.fieldType));
835+
if (phraseFieldInfo != null) {
836+
context.doc().add(new Field(phraseFieldInfo.field, value, phraseFieldInfo.fieldType));
915837
}
916838
}
917839
}
918840

919-
@Override
920-
public Iterator<Mapper> iterator() {
921-
List<Mapper> subIterators = new ArrayList<>();
922-
if (prefixFieldMapper != null) {
923-
subIterators.add(prefixFieldMapper);
924-
}
925-
if (phraseFieldMapper != null) {
926-
subIterators.add(phraseFieldMapper);
927-
}
928-
if (subIterators.size() == 0) {
929-
return super.iterator();
930-
}
931-
return Iterators.concat(super.iterator(), subIterators.iterator());
932-
}
933-
934841
@Override
935842
protected String contentType() {
936843
return CONTENT_TYPE;
@@ -1014,10 +921,11 @@ public static Query createPhrasePrefixQuery(TokenStream stream, String field, in
1014921
}
1015922

1016923
if (terms.length == 1) {
1017-
Term[] newTerms = Arrays.stream(terms[0])
924+
SynonymQuery.Builder sb = new SynonymQuery.Builder(prefixField);
925+
Arrays.stream(terms[0])
1018926
.map(term -> new Term(prefixField, term.bytes()))
1019-
.toArray(Term[]::new);
1020-
return new SynonymQuery(newTerms);
927+
.forEach(sb::addTerm);
928+
return sb.build();
1021929
}
1022930

1023931
SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(field, true);

0 commit comments

Comments
 (0)