Skip to content

Commit 24e1858

Browse files
authored
Fix caching for PreConfiguredTokenFilter (elastic#50912)
The PreConfiguredTokenFilter#singletonWithVersion uses the version internaly for the token filter factories but it registers only one instance in the cahce and not one instance per version. This can lead to exceptions like the one described in elastic#50734 since the singleton is created and cached using the version created of the first index that is processed. Remove the singletonWithVersion() methods and use the elasticsearchVersion() methods instead. Fixes: elastic#50734
1 parent e16d1e5 commit 24e1858

File tree

4 files changed

+142
-21
lines changed

4 files changed

+142
-21
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
507507
| WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
508508
| WordDelimiterFilter.SPLIT_ON_NUMERICS
509509
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null)));
510-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("word_delimiter_graph", false, false, (input, version) -> {
510+
filters.add(PreConfiguredTokenFilter.elasticsearchVersion("word_delimiter_graph", false, false, (input, version) -> {
511511
boolean adjustOffsets = version.onOrAfter(Version.V_7_3_0);
512512
return new WordDelimiterGraphFilter(input, adjustOffsets, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE,
513513
WordDelimiterGraphFilter.GENERATE_WORD_PARTS

server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -52,25 +52,6 @@ public static PreConfiguredTokenFilter singleton(String name, boolean useFilterF
5252
(tokenStream, version) -> create.apply(tokenStream));
5353
}
5454

55-
/**
56-
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
57-
*/
58-
public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
59-
BiFunction<TokenStream, Version, TokenStream> create) {
60-
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, true, CachingStrategy.ONE,
61-
(tokenStream, version) -> create.apply(tokenStream, version));
62-
}
63-
64-
/**
65-
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
66-
*/
67-
public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
68-
boolean useFilterForParsingSynonyms,
69-
BiFunction<TokenStream, Version, TokenStream> create) {
70-
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, useFilterForParsingSynonyms, CachingStrategy.ONE,
71-
(tokenStream, version) -> create.apply(tokenStream, version));
72-
}
73-
7455
/**
7556
* Create a pre-configured token filter that may vary based on the Lucene version.
7657
*/
@@ -88,6 +69,16 @@ public static PreConfiguredTokenFilter elasticsearchVersion(String name, boolean
8869
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, true, CachingStrategy.ELASTICSEARCH, create);
8970
}
9071

72+
/**
73+
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
74+
*/
75+
public static PreConfiguredTokenFilter elasticsearchVersion(String name, boolean useFilterForMultitermQueries,
76+
boolean useFilterForParsingSynonyms,
77+
BiFunction<TokenStream, Version, TokenStream> create) {
78+
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, useFilterForParsingSynonyms,
79+
CachingStrategy.ELASTICSEARCH, create);
80+
}
81+
9182
private final boolean useFilterForMultitermQueries;
9283
private final boolean allowForSynonymParsing;
9384
private final BiFunction<TokenStream, Version, TokenStream> create;

server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List
181181
preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
182182
// Add "standard" for old indices (bwc)
183183
preConfiguredTokenFilters.register( "standard",
184-
PreConfiguredTokenFilter.singletonWithVersion("standard", true, (reader, version) -> {
184+
PreConfiguredTokenFilter.elasticsearchVersion("standard", true, (reader, version) -> {
185185
if (version.before(Version.V_7_0_0)) {
186186
deprecationLogger.deprecatedAndMaybeLog("standard_deprecation",
187187
"The [standard] token filter is deprecated and will be removed in a future version.");
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.index.analysis;
20+
21+
import org.apache.lucene.analysis.TokenFilter;
22+
import org.elasticsearch.Version;
23+
import org.elasticsearch.cluster.metadata.IndexMetaData;
24+
import org.elasticsearch.common.settings.Settings;
25+
import org.elasticsearch.env.Environment;
26+
import org.elasticsearch.env.TestEnvironment;
27+
import org.elasticsearch.index.IndexSettings;
28+
import org.elasticsearch.test.ESTestCase;
29+
import org.elasticsearch.test.IndexSettingsModule;
30+
import org.elasticsearch.test.VersionUtils;
31+
32+
import java.io.IOException;
33+
34+
public class PreConfiguredTokenFilterTests extends ESTestCase {
35+
36+
private final Settings emptyNodeSettings = Settings.builder()
37+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
38+
.build();
39+
40+
public void testCachingWithSingleton() throws IOException {
41+
PreConfiguredTokenFilter pctf =
42+
PreConfiguredTokenFilter.singleton("singleton", randomBoolean(),
43+
(tokenStream) -> new TokenFilter(tokenStream) {
44+
@Override
45+
public boolean incrementToken() {
46+
return false;
47+
}
48+
});
49+
50+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
51+
52+
Version version1 = VersionUtils.randomVersion(random());
53+
Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1)
54+
.build();
55+
TokenFilterFactory tff_v1_1 =
56+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings1);
57+
TokenFilterFactory tff_v1_2 =
58+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings1);
59+
assertSame(tff_v1_1, tff_v1_2);
60+
61+
Version version2 = randomValueOtherThan(version1, () -> randomFrom(VersionUtils.allVersions()));
62+
Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2)
63+
.build();
64+
65+
TokenFilterFactory tff_v2 =
66+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings2);
67+
assertSame(tff_v1_1, tff_v2);
68+
}
69+
70+
public void testCachingWithElasticsearchVersion() throws IOException {
71+
PreConfiguredTokenFilter pctf =
72+
PreConfiguredTokenFilter.elasticsearchVersion("elasticsearch_version", randomBoolean(),
73+
(tokenStream, esVersion) -> new TokenFilter(tokenStream) {
74+
@Override
75+
public boolean incrementToken() {
76+
return false;
77+
}
78+
});
79+
80+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
81+
82+
Version version1 = VersionUtils.randomVersion(random());
83+
Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1)
84+
.build();
85+
TokenFilterFactory tff_v1_1 =
86+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings1);
87+
TokenFilterFactory tff_v1_2 =
88+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings1);
89+
assertSame(tff_v1_1, tff_v1_2);
90+
91+
Version version2 = randomValueOtherThan(version1, () -> randomFrom(VersionUtils.allVersions()));
92+
Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2)
93+
.build();
94+
95+
TokenFilterFactory tff_v2 =
96+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings2);
97+
assertNotSame(tff_v1_1, tff_v2);
98+
}
99+
100+
public void testCachingWithLuceneVersion() throws IOException {
101+
PreConfiguredTokenFilter pctf =
102+
PreConfiguredTokenFilter.luceneVersion("lucene_version", randomBoolean(),
103+
(tokenStream, luceneVersion) -> new TokenFilter(tokenStream) {
104+
@Override
105+
public boolean incrementToken() {
106+
return false;
107+
}
108+
});
109+
110+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
111+
112+
Version version1 = Version.CURRENT;
113+
Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1)
114+
.build();
115+
TokenFilterFactory tff_v1_1 =
116+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings1);
117+
TokenFilterFactory tff_v1_2 =
118+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings1);
119+
assertSame(tff_v1_1, tff_v1_2);
120+
121+
byte major = VersionUtils.getFirstVersion().major;
122+
Version version2 = Version.fromString(major - 1 + ".0.0");
123+
Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2)
124+
.build();
125+
126+
TokenFilterFactory tff_v2 =
127+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings2);
128+
assertNotSame(tff_v1_1, tff_v2);
129+
}
130+
}

0 commit comments

Comments
 (0)