Skip to content

Commit daf0e9f

Browse files
Christoph BüscherSivagurunathanV
Christoph Büscher
authored andcommitted
Throw Error on deprecated nGram and edgeNGram custom filters (elastic#50376)
The camel-case `nGram` and `edgeNGram` filter names were deprecated in 6. We currently throw errors on new indices when they are used. However these errors are currently only thrown for pre-configured filters, adding them as custom filters doesn't trigger the warning and error. This change adds the appropriate exceptions for `nGram` and `edgeNGram` respectively. Closes elastic#50360
1 parent ce97429 commit daf0e9f

File tree

2 files changed

+167
-2
lines changed

2 files changed

+167
-2
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,11 @@
118118
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
119119
import org.elasticsearch.common.logging.DeprecationLogger;
120120
import org.elasticsearch.common.regex.Regex;
121+
import org.elasticsearch.common.settings.Settings;
121122
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
122123
import org.elasticsearch.env.Environment;
123124
import org.elasticsearch.env.NodeEnvironment;
125+
import org.elasticsearch.index.IndexSettings;
124126
import org.elasticsearch.index.analysis.AnalyzerProvider;
125127
import org.elasticsearch.index.analysis.CharFilterFactory;
126128
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
@@ -238,7 +240,24 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
238240
filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
239241
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
240242
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
241-
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
243+
filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
244+
return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings) {
245+
@Override
246+
public TokenStream create(TokenStream tokenStream) {
247+
if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_8_0_0)) {
248+
throw new IllegalArgumentException(
249+
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
250+
+ "Please change the filter name to [edge_ngram] instead.");
251+
} else {
252+
deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation",
253+
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
254+
+ "Please change the filter name to [edge_ngram] instead.");
255+
}
256+
return super.create(tokenStream);
257+
}
258+
259+
};
260+
});
242261
filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
243262
filters.put("fingerprint", FingerprintTokenFilterFactory::new);
244263
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
@@ -258,7 +277,24 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
258277
filters.put("min_hash", MinHashTokenFilterFactory::new);
259278
filters.put("multiplexer", MultiplexerTokenFilterFactory::new);
260279
filters.put("ngram", NGramTokenFilterFactory::new);
261-
filters.put("nGram", NGramTokenFilterFactory::new);
280+
filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
281+
return new NGramTokenFilterFactory(indexSettings, environment, name, settings) {
282+
@Override
283+
public TokenStream create(TokenStream tokenStream) {
284+
if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_8_0_0)) {
285+
throw new IllegalArgumentException(
286+
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
287+
+ "Please change the filter name to [ngram] instead.");
288+
} else {
289+
deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation",
290+
"The [nGram] token filter name is deprecated and will be removed in a future version. "
291+
+ "Please change the filter name to [ngram] instead.");
292+
}
293+
return super.create(tokenStream);
294+
}
295+
296+
};
297+
});
262298
filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
263299
filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
264300
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.analysis.common;
21+
22+
import org.apache.lucene.analysis.MockTokenizer;
23+
import org.apache.lucene.analysis.Tokenizer;
24+
import org.elasticsearch.Version;
25+
import org.elasticsearch.cluster.metadata.IndexMetaData;
26+
import org.elasticsearch.common.settings.Settings;
27+
import org.elasticsearch.env.Environment;
28+
import org.elasticsearch.index.analysis.TokenFilterFactory;
29+
import org.elasticsearch.test.ESTestCase;
30+
import org.elasticsearch.test.IndexSettingsModule;
31+
import org.elasticsearch.test.VersionUtils;
32+
33+
import java.io.IOException;
34+
import java.io.StringReader;
35+
import java.util.Map;
36+
37+
public class CommonAnalysisPluginTests extends ESTestCase {
38+
39+
/**
40+
* Check that the deprecated "nGram" filter throws exception for indices created since 7.0.0 and
41+
* logs a warning for earlier indices when the filter is used as a custom filter
42+
*/
43+
public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
44+
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
45+
.put(IndexMetaData.SETTING_VERSION_CREATED,
46+
VersionUtils.randomVersionBetween(random(), Version.V_8_0_0, Version.CURRENT))
47+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
48+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
49+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
50+
.put("index.analysis.filter.my_ngram.type", "nGram")
51+
.build();
52+
53+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
54+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings),
55+
settings, commonAnalysisPlugin).tokenFilter;
56+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
57+
Tokenizer tokenizer = new MockTokenizer();
58+
tokenizer.setReader(new StringReader("foo bar"));
59+
60+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
61+
assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
62+
+ "Please change the filter name to [ngram] instead.", ex.getMessage());
63+
}
64+
65+
final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
66+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.V_7_6_0))
67+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
68+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
69+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram").put("index.analysis.filter.my_ngram.type", "nGram")
70+
.build();
71+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
72+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
73+
settingsPre7, commonAnalysisPlugin).tokenFilter;
74+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
75+
Tokenizer tokenizer = new MockTokenizer();
76+
tokenizer.setReader(new StringReader("foo bar"));
77+
assertNotNull(tokenFilterFactory.create(tokenizer));
78+
assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. "
79+
+ "Please change the filter name to [ngram] instead.");
80+
}
81+
}
82+
83+
/**
84+
* Check that the deprecated "edgeNGram" filter throws exception for indices created since 7.0.0 and
85+
* logs a warning for earlier indices when the filter is used as a custom filter
86+
*/
87+
public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
88+
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
89+
.put(IndexMetaData.SETTING_VERSION_CREATED,
90+
VersionUtils.randomVersionBetween(random(), Version.V_8_0_0, Version.CURRENT))
91+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
92+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
93+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
94+
.put("index.analysis.filter.my_ngram.type", "edgeNGram")
95+
.build();
96+
97+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
98+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings),
99+
settings, commonAnalysisPlugin).tokenFilter;
100+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
101+
Tokenizer tokenizer = new MockTokenizer();
102+
tokenizer.setReader(new StringReader("foo bar"));
103+
104+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
105+
assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
106+
+ "Please change the filter name to [edge_ngram] instead.", ex.getMessage());
107+
}
108+
109+
final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
110+
.put(IndexMetaData.SETTING_VERSION_CREATED,
111+
VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.V_7_6_0))
112+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
113+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
114+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
115+
.put("index.analysis.filter.my_ngram.type", "edgeNGram")
116+
.build();
117+
118+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
119+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
120+
settingsPre7, commonAnalysisPlugin).tokenFilter;
121+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
122+
Tokenizer tokenizer = new MockTokenizer();
123+
tokenizer.setReader(new StringReader("foo bar"));
124+
assertNotNull(tokenFilterFactory.create(tokenizer));
125+
assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
126+
+ "Please change the filter name to [edge_ngram] instead.");
127+
}
128+
}
129+
}

0 commit comments

Comments
 (0)