Skip to content

Commit 2d3c0db

Browse files
committed
Call setReferences() on custom referring tokenfilters in _analyze (#32157)
When building custom tokenfilters without an index in the _analyze endpoint, we need to ensure that referring filters are correctly built by calling their #setReferences() method Fixes #32154
1 parent 54322a4 commit 2d3c0db

File tree

2 files changed

+40
-1
lines changed

2 files changed

+40
-1
lines changed

modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml

+15
Original file line numberDiff line numberDiff line change
@@ -1573,3 +1573,18 @@
15731573
filter: [my_bengali_stem]
15741574
- length: { tokens: 1 }
15751575
- match: { tokens.0.token: কর }
1576+
1577+
---
1578+
"multiplexer":
1579+
- do:
1580+
indices.analyze:
1581+
body:
1582+
text: "The quick fox"
1583+
tokenizer: "standard"
1584+
filter:
1585+
- type: multiplexer
1586+
filters: [ lowercase, uppercase ]
1587+
preserve_original: false
1588+
- length: { tokens: 6 }
1589+
- match: { tokens.0.token: the }
1590+
- match: { tokens.1.token: THE }

server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java

+25-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.elasticsearch.index.analysis.IndexAnalyzers;
5353
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
5454
import org.elasticsearch.index.analysis.NamedAnalyzer;
55+
import org.elasticsearch.index.analysis.ReferringFilterFactory;
5556
import org.elasticsearch.index.analysis.TokenFilterFactory;
5657
import org.elasticsearch.index.analysis.TokenizerFactory;
5758
import org.elasticsearch.index.mapper.AllFieldMapper;
@@ -553,6 +554,7 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
553554
Environment environment, Tuple<String, TokenizerFactory> tokenizerFactory,
554555
List<CharFilterFactory> charFilterFactoryList, boolean normalizer) throws IOException {
555556
List<TokenFilterFactory> tokenFilterFactoryList = new ArrayList<>();
557+
List<ReferringFilterFactory> referringFilters = new ArrayList<>();
556558
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
557559
List<AnalyzeRequest.NameOrDefinition> tokenFilters = request.tokenFilters();
558560
for (AnalyzeRequest.NameOrDefinition tokenFilter : tokenFilters) {
@@ -573,7 +575,9 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
573575
tokenFilterFactory = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter", settings);
574576
tokenFilterFactory = CustomAnalyzerProvider.checkAndApplySynonymFilter(tokenFilterFactory, tokenizerFactory.v1(), tokenizerFactory.v2(), tokenFilterFactoryList,
575577
charFilterFactoryList, environment);
576-
578+
if (tokenFilterFactory instanceof ReferringFilterFactory) {
579+
referringFilters.add((ReferringFilterFactory)tokenFilterFactory);
580+
}
577581

578582
} else {
579583
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
@@ -608,6 +612,26 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
608612
tokenFilterFactoryList.add(tokenFilterFactory);
609613
}
610614
}
615+
if (referringFilters.isEmpty() == false) {
616+
// The request included at least one custom referring tokenfilter that has not already been built by the
617+
// analysis registry, so we need to set its references. Note that this will only apply pre-built
618+
// tokenfilters
619+
if (indexSettings == null) {
620+
Settings settings = Settings.builder()
621+
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
622+
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
623+
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
624+
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
625+
.build();
626+
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
627+
indexSettings = new IndexSettings(metaData, Settings.EMPTY);
628+
}
629+
Map<String, TokenFilterFactory> prebuiltFilters = analysisRegistry.buildTokenFilterFactories(indexSettings);
630+
for (ReferringFilterFactory rff : referringFilters) {
631+
rff.setReferences(prebuiltFilters);
632+
}
633+
634+
}
611635
return tokenFilterFactoryList;
612636
}
613637

0 commit comments

Comments
 (0)