Skip to content

Commit 375cf49

Browse files
committed
Fix caching for PreConfiguredTokenFilter (#50912)
The PreConfiguredTokenFilter#singletonWithVersion uses the version internaly for the token filter factories but it registers only one instance in the cahce and not one instance per version. This can lead to exceptions like the one described in #50734 since the singleton is created and cached using the version created of the first index that is processed. Remove the singletonWithVersion() methods and use the elasticsearchVersion() methods instead. Fixes: #50734 (cherry picked from commit 24e1858)
1 parent 53909e6 commit 375cf49

File tree

4 files changed

+146
-25
lines changed

4 files changed

+146
-25
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactorie
378378
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
379379
List<PreConfiguredCharFilter> filters = new ArrayList<>();
380380
filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new));
381-
filters.add(PreConfiguredCharFilter.singletonWithVersion("htmlStrip", false, (reader, version) -> {
381+
filters.add(PreConfiguredCharFilter.elasticsearchVersion("htmlStrip", false, (reader, version) -> {
382382
if (version.onOrAfter(org.elasticsearch.Version.V_6_3_0)) {
383383
deprecationLogger.deprecatedAndMaybeLog("htmlStrip_deprecation",
384384
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
@@ -405,7 +405,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
405405
input -> new CommonGramsFilter(input, CharArraySet.EMPTY_SET)));
406406
filters.add(PreConfiguredTokenFilter.singleton("czech_stem", false, CzechStemFilter::new));
407407
filters.add(PreConfiguredTokenFilter.singleton("decimal_digit", true, DecimalDigitFilter::new));
408-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("delimited_payload_filter", false, (input, version) -> {
408+
filters.add(PreConfiguredTokenFilter.elasticsearchVersion("delimited_payload_filter", false, (input, version) -> {
409409
if (version.onOrAfter(Version.V_7_0_0)) {
410410
throw new IllegalArgumentException(
411411
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead");
@@ -424,7 +424,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
424424
filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer())));
425425
filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input ->
426426
new EdgeNGramTokenFilter(input, 1)));
427-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, false, (reader, version) -> {
427+
filters.add(PreConfiguredTokenFilter.elasticsearchVersion("edgeNGram", false, false, (reader, version) -> {
428428
if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
429429
throw new IllegalArgumentException(
430430
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
@@ -452,7 +452,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
452452
LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT,
453453
LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS)));
454454
filters.add(PreConfiguredTokenFilter.singleton("ngram", false, false, reader -> new NGramTokenFilter(reader, 1, 2, false)));
455-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, false, (reader, version) -> {
455+
filters.add(PreConfiguredTokenFilter.elasticsearchVersion("nGram", false, false, (reader, version) -> {
456456
if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
457457
throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
458458
+ "Please change the filter name to [ngram] instead.");
@@ -498,7 +498,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
498498
| WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
499499
| WordDelimiterFilter.SPLIT_ON_NUMERICS
500500
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null)));
501-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("word_delimiter_graph", false, false, (input, version) -> {
501+
filters.add(PreConfiguredTokenFilter.elasticsearchVersion("word_delimiter_graph", false, false, (input, version) -> {
502502
boolean adjustOffsets = version.onOrAfter(Version.V_7_3_0);
503503
return new WordDelimiterGraphFilter(input, adjustOffsets, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE,
504504
WordDelimiterGraphFilter.GENERATE_WORD_PARTS

server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -57,25 +57,6 @@ public static PreConfiguredTokenFilter singleton(String name, boolean useFilterF
5757
(tokenStream, version) -> create.apply(tokenStream));
5858
}
5959

60-
/**
61-
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
62-
*/
63-
public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
64-
BiFunction<TokenStream, Version, TokenStream> create) {
65-
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, true, CachingStrategy.ONE,
66-
(tokenStream, version) -> create.apply(tokenStream, version));
67-
}
68-
69-
/**
70-
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
71-
*/
72-
public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
73-
boolean useFilterForParsingSynonyms,
74-
BiFunction<TokenStream, Version, TokenStream> create) {
75-
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, useFilterForParsingSynonyms, CachingStrategy.ONE,
76-
(tokenStream, version) -> create.apply(tokenStream, version));
77-
}
78-
7960
/**
8061
* Create a pre-configured token filter that may vary based on the Lucene version.
8162
*/
@@ -93,6 +74,16 @@ public static PreConfiguredTokenFilter elasticsearchVersion(String name, boolean
9374
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, true, CachingStrategy.ELASTICSEARCH, create);
9475
}
9576

77+
/**
78+
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
79+
*/
80+
public static PreConfiguredTokenFilter elasticsearchVersion(String name, boolean useFilterForMultitermQueries,
81+
boolean useFilterForParsingSynonyms,
82+
BiFunction<TokenStream, Version, TokenStream> create) {
83+
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, useFilterForParsingSynonyms,
84+
CachingStrategy.ELASTICSEARCH, create);
85+
}
86+
9687
private final boolean useFilterForMultitermQueries;
9788
private final boolean allowForSynonymParsing;
9889
private final BiFunction<TokenStream, Version, TokenStream> create;

server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List
181181
preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
182182
// Add "standard" for old indices (bwc)
183183
preConfiguredTokenFilters.register( "standard",
184-
PreConfiguredTokenFilter.singletonWithVersion("standard", true, (reader, version) -> {
184+
PreConfiguredTokenFilter.elasticsearchVersion("standard", true, (reader, version) -> {
185185
if (version.before(Version.V_7_0_0)) {
186186
deprecationLogger.deprecatedAndMaybeLog("standard_deprecation",
187187
"The [standard] token filter is deprecated and will be removed in a future version.");
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.index.analysis;
20+
21+
import org.apache.lucene.analysis.TokenFilter;
22+
import org.elasticsearch.Version;
23+
import org.elasticsearch.cluster.metadata.IndexMetaData;
24+
import org.elasticsearch.common.settings.Settings;
25+
import org.elasticsearch.env.Environment;
26+
import org.elasticsearch.env.TestEnvironment;
27+
import org.elasticsearch.index.IndexSettings;
28+
import org.elasticsearch.test.ESTestCase;
29+
import org.elasticsearch.test.IndexSettingsModule;
30+
import org.elasticsearch.test.VersionUtils;
31+
32+
import java.io.IOException;
33+
34+
public class PreConfiguredTokenFilterTests extends ESTestCase {
35+
36+
private final Settings emptyNodeSettings = Settings.builder()
37+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
38+
.build();
39+
40+
public void testCachingWithSingleton() throws IOException {
41+
PreConfiguredTokenFilter pctf =
42+
PreConfiguredTokenFilter.singleton("singleton", randomBoolean(),
43+
(tokenStream) -> new TokenFilter(tokenStream) {
44+
@Override
45+
public boolean incrementToken() {
46+
return false;
47+
}
48+
});
49+
50+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
51+
52+
Version version1 = VersionUtils.randomVersion(random());
53+
Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1)
54+
.build();
55+
TokenFilterFactory tff_v1_1 =
56+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings1);
57+
TokenFilterFactory tff_v1_2 =
58+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings1);
59+
assertSame(tff_v1_1, tff_v1_2);
60+
61+
Version version2 = randomValueOtherThan(version1, () -> randomFrom(VersionUtils.allVersions()));
62+
Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2)
63+
.build();
64+
65+
TokenFilterFactory tff_v2 =
66+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings2);
67+
assertSame(tff_v1_1, tff_v2);
68+
}
69+
70+
public void testCachingWithElasticsearchVersion() throws IOException {
71+
PreConfiguredTokenFilter pctf =
72+
PreConfiguredTokenFilter.elasticsearchVersion("elasticsearch_version", randomBoolean(),
73+
(tokenStream, esVersion) -> new TokenFilter(tokenStream) {
74+
@Override
75+
public boolean incrementToken() {
76+
return false;
77+
}
78+
});
79+
80+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
81+
82+
Version version1 = VersionUtils.randomVersion(random());
83+
Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1)
84+
.build();
85+
TokenFilterFactory tff_v1_1 =
86+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings1);
87+
TokenFilterFactory tff_v1_2 =
88+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings1);
89+
assertSame(tff_v1_1, tff_v1_2);
90+
91+
Version version2 = randomValueOtherThan(version1, () -> randomFrom(VersionUtils.allVersions()));
92+
Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2)
93+
.build();
94+
95+
TokenFilterFactory tff_v2 =
96+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings2);
97+
assertNotSame(tff_v1_1, tff_v2);
98+
}
99+
100+
public void testCachingWithLuceneVersion() throws IOException {
101+
PreConfiguredTokenFilter pctf =
102+
PreConfiguredTokenFilter.luceneVersion("lucene_version", randomBoolean(),
103+
(tokenStream, luceneVersion) -> new TokenFilter(tokenStream) {
104+
@Override
105+
public boolean incrementToken() {
106+
return false;
107+
}
108+
});
109+
110+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
111+
112+
Version version1 = Version.CURRENT;
113+
Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1)
114+
.build();
115+
TokenFilterFactory tff_v1_1 =
116+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings1);
117+
TokenFilterFactory tff_v1_2 =
118+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings1);
119+
assertSame(tff_v1_1, tff_v1_2);
120+
121+
byte major = VersionUtils.getFirstVersion().major;
122+
Version version2 = Version.fromString(major - 1 + ".0.0");
123+
Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2)
124+
.build();
125+
126+
TokenFilterFactory tff_v2 =
127+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings2);
128+
assertNotSame(tff_v1_1, tff_v2);
129+
}
130+
}

0 commit comments

Comments
 (0)