Skip to content

Commit cfb3014

Browse files
authored
Call setReferences() on custom referring tokenfilters in _analyze (#32157)
When building custom tokenfilters without an index in the _analyze endpoint, we need to ensure that referring filters are correctly built by calling their #setReferences() method Fixes #32154
1 parent 6de1f96 commit cfb3014

File tree

2 files changed

+40
-1
lines changed

2 files changed

+40
-1
lines changed

modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml

+15
Original file line numberDiff line numberDiff line change
@@ -1557,3 +1557,18 @@
15571557
filter: [my_bengali_stem]
15581558
- length: { tokens: 1 }
15591559
- match: { tokens.0.token: কর }
1560+
1561+
---
1562+
"multiplexer":
1563+
- do:
1564+
indices.analyze:
1565+
body:
1566+
text: "The quick fox"
1567+
tokenizer: "standard"
1568+
filter:
1569+
- type: multiplexer
1570+
filters: [ lowercase, uppercase ]
1571+
preserve_original: false
1572+
- length: { tokens: 6 }
1573+
- match: { tokens.0.token: the }
1574+
- match: { tokens.1.token: THE }

server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java

+25-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.elasticsearch.index.analysis.IndexAnalyzers;
5353
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
5454
import org.elasticsearch.index.analysis.NamedAnalyzer;
55+
import org.elasticsearch.index.analysis.ReferringFilterFactory;
5556
import org.elasticsearch.index.analysis.TokenFilterFactory;
5657
import org.elasticsearch.index.analysis.TokenizerFactory;
5758
import org.elasticsearch.index.mapper.KeywordFieldMapper;
@@ -574,6 +575,7 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
574575
Environment environment, Tuple<String, TokenizerFactory> tokenizerFactory,
575576
List<CharFilterFactory> charFilterFactoryList, boolean normalizer) throws IOException {
576577
List<TokenFilterFactory> tokenFilterFactoryList = new ArrayList<>();
578+
List<ReferringFilterFactory> referringFilters = new ArrayList<>();
577579
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
578580
List<AnalyzeRequest.NameOrDefinition> tokenFilters = request.tokenFilters();
579581
for (AnalyzeRequest.NameOrDefinition tokenFilter : tokenFilters) {
@@ -594,7 +596,9 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
594596
tokenFilterFactory = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter", settings);
595597
tokenFilterFactory = CustomAnalyzerProvider.checkAndApplySynonymFilter(tokenFilterFactory, tokenizerFactory.v1(), tokenizerFactory.v2(), tokenFilterFactoryList,
596598
charFilterFactoryList, environment);
597-
599+
if (tokenFilterFactory instanceof ReferringFilterFactory) {
600+
referringFilters.add((ReferringFilterFactory)tokenFilterFactory);
601+
}
598602

599603
} else {
600604
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
@@ -629,6 +633,26 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
629633
tokenFilterFactoryList.add(tokenFilterFactory);
630634
}
631635
}
636+
if (referringFilters.isEmpty() == false) {
637+
// The request included at least one custom referring tokenfilter that has not already been built by the
638+
// analysis registry, so we need to set its references. Note that this will only apply pre-built
639+
// tokenfilters
640+
if (indexSettings == null) {
641+
Settings settings = Settings.builder()
642+
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
643+
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
644+
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
645+
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
646+
.build();
647+
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
648+
indexSettings = new IndexSettings(metaData, Settings.EMPTY);
649+
}
650+
Map<String, TokenFilterFactory> prebuiltFilters = analysisRegistry.buildTokenFilterFactories(indexSettings);
651+
for (ReferringFilterFactory rff : referringFilters) {
652+
rff.setReferences(prebuiltFilters);
653+
}
654+
655+
}
632656
return tokenFilterFactoryList;
633657
}
634658

0 commit comments

Comments
 (0)