Skip to content

Commit 65c87d3

Browse files
author
Christoph Büscher
committed
Error on deprecated nGram and edgeNGram custom filters
The camel-case `nGram` and `edgeNGram` filter names were deprecated in 6. We currently throw errors on new indices when they are used. However these errors are currently only thrown for pre-configured filters, adding them as custom filters doesn't trigger the warning and error. This change adds the appropriate exceptions for `nGram` and `edgeNGram` respectively. Closes elastic#50360
1 parent 9e6e4bb commit 65c87d3

File tree

2 files changed

+264
-2
lines changed

2 files changed

+264
-2
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

+24-2
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,11 @@
118118
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
119119
import org.elasticsearch.common.logging.DeprecationLogger;
120120
import org.elasticsearch.common.regex.Regex;
121+
import org.elasticsearch.common.settings.Settings;
121122
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
122123
import org.elasticsearch.env.Environment;
123124
import org.elasticsearch.env.NodeEnvironment;
125+
import org.elasticsearch.index.IndexSettings;
124126
import org.elasticsearch.index.analysis.AnalyzerProvider;
125127
import org.elasticsearch.index.analysis.CharFilterFactory;
126128
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
@@ -238,7 +240,17 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
238240
filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
239241
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
240242
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
241-
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
243+
filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
244+
if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_7_6_0)) {
245+
throw new IllegalArgumentException("The [edgeNGram] token filter name was deprecated in 6.4 and "
246+
+ "cannot be used in new indices. Please change the filter name to [edge_ngram] instead.");
247+
} else {
248+
deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation",
249+
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
250+
+ "Please change the filter name to [edge_ngram] instead.");
251+
}
252+
return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings);
253+
});
242254
filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
243255
filters.put("fingerprint", FingerprintTokenFilterFactory::new);
244256
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
@@ -258,7 +270,17 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
258270
filters.put("min_hash", MinHashTokenFilterFactory::new);
259271
filters.put("multiplexer", MultiplexerTokenFilterFactory::new);
260272
filters.put("ngram", NGramTokenFilterFactory::new);
261-
filters.put("nGram", NGramTokenFilterFactory::new);
273+
filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
274+
if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_7_6_0)) {
275+
throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
276+
+ "Please change the filter name to [ngram] instead.");
277+
} else {
278+
deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation",
279+
"The [nGram] token filter name is deprecated and will be removed in a future version. "
280+
+ "Please change the filter name to [ngram] instead.");
281+
}
282+
return new NGramTokenFilterFactory(indexSettings, environment, name, settings);
283+
});
262284
filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
263285
filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
264286
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.analysis.common;
21+
22+
import org.apache.lucene.analysis.Analyzer;
23+
import org.apache.lucene.analysis.MockTokenizer;
24+
import org.apache.lucene.analysis.Tokenizer;
25+
import org.elasticsearch.Version;
26+
import org.elasticsearch.cluster.metadata.IndexMetaData;
27+
import org.elasticsearch.common.settings.Settings;
28+
import org.elasticsearch.env.Environment;
29+
import org.elasticsearch.index.IndexSettings;
30+
import org.elasticsearch.index.analysis.IndexAnalyzers;
31+
import org.elasticsearch.index.analysis.NamedAnalyzer;
32+
import org.elasticsearch.index.analysis.TokenFilterFactory;
33+
import org.elasticsearch.test.ESTestCase;
34+
import org.elasticsearch.test.IndexSettingsModule;
35+
import org.elasticsearch.test.VersionUtils;
36+
37+
import java.io.IOException;
38+
import java.io.StringReader;
39+
import java.util.Map;
40+
41+
public class CommonAnalysisPluginTests extends ESTestCase {
42+
43+
/**
44+
* Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.0.0
45+
*/
46+
public void testNGramDeprecationWarning() throws IOException {
47+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
48+
.put(IndexMetaData.SETTING_VERSION_CREATED,
49+
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)))
50+
.build();
51+
52+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
53+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
54+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
55+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
56+
Tokenizer tokenizer = new MockTokenizer();
57+
tokenizer.setReader(new StringReader("foo bar"));
58+
assertNotNull(tokenFilterFactory.create(tokenizer));
59+
assertWarnings(
60+
"The [nGram] token filter name is deprecated and will be removed in a future version. "
61+
+ "Please change the filter name to [ngram] instead.");
62+
}
63+
}
64+
65+
/**
66+
* Check that the deprecated name "nGram" throws an error since 7.0.0
67+
*/
68+
public void testNGramDeprecationError() throws IOException {
69+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
70+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
71+
.build();
72+
73+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
74+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
75+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
76+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
77+
Tokenizer tokenizer = new MockTokenizer();
78+
tokenizer.setReader(new StringReader("foo bar"));
79+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
80+
assertEquals(
81+
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
82+
+ " name to [ngram] instead.",
83+
ex.getMessage());
84+
}
85+
}
86+
87+
/**
88+
* Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.0.0
89+
*/
90+
public void testEdgeNGramDeprecationWarning() throws IOException {
91+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
92+
.put(IndexMetaData.SETTING_VERSION_CREATED,
93+
VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)))
94+
.build();
95+
96+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
97+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
98+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
99+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
100+
Tokenizer tokenizer = new MockTokenizer();
101+
tokenizer.setReader(new StringReader("foo bar"));
102+
assertNotNull(tokenFilterFactory.create(tokenizer));
103+
assertWarnings(
104+
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
105+
+ "Please change the filter name to [edge_ngram] instead.");
106+
}
107+
}
108+
109+
/**
110+
* Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0
111+
*/
112+
public void testEdgeNGramDeprecationError() throws IOException {
113+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
114+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
115+
.build();
116+
117+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
118+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
119+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
120+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
121+
Tokenizer tokenizer = new MockTokenizer();
122+
tokenizer.setReader(new StringReader("foo bar"));
123+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
124+
assertEquals(
125+
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
126+
+ " name to [edge_ngram] instead.",
127+
ex.getMessage());
128+
}
129+
}
130+
131+
/**
132+
* Check that the deprecated "nGram" filter throws exception for indices created since 7.0.0 and
133+
* logs a warning for earlier indices when the filter is used as a custom filter
134+
*/
135+
public void testnGramFilterInCustomAnalyzerDeprecationError() throws IOException {
136+
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
137+
.put(IndexMetaData.SETTING_VERSION_CREATED,
138+
VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, Version.CURRENT))
139+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
140+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
141+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
142+
.put("index.analysis.filter.my_ngram.type", "nGram")
143+
.build();
144+
145+
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
146+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
147+
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
148+
assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
149+
+ "Please change the filter name to [ngram] instead.", ex.getMessage());
150+
151+
final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
152+
.put(IndexMetaData.SETTING_VERSION_CREATED,
153+
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_7_5_2))
154+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
155+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
156+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
157+
.put("index.analysis.filter.my_ngram.type", "nGram")
158+
.build();
159+
160+
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin);
161+
assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. "
162+
+ "Please change the filter name to [ngram] instead.");
163+
}
164+
165+
/**
166+
* Check that the deprecated "edgeNGram" filter throws exception for indices created since 7.0.0 and
167+
* logs a warning for earlier indices when the filter is used as a custom filter
168+
*/
169+
public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
170+
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
171+
.put(IndexMetaData.SETTING_VERSION_CREATED,
172+
VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, Version.CURRENT))
173+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
174+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
175+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
176+
.put("index.analysis.filter.my_ngram.type", "edgeNGram")
177+
.build();
178+
179+
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
180+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
181+
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
182+
assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
183+
+ "Please change the filter name to [edge_ngram] instead.", ex.getMessage());
184+
185+
final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
186+
.put(IndexMetaData.SETTING_VERSION_CREATED,
187+
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_7_5_2))
188+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
189+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
190+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
191+
.put("index.analysis.filter.my_ngram.type", "edgeNGram")
192+
.build();
193+
194+
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin);
195+
assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
196+
+ "Please change the filter name to [edge_ngram] instead.");
197+
}
198+
199+
/**
200+
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
201+
*/
202+
public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException {
203+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
204+
.put(IndexMetaData.SETTING_VERSION_CREATED,
205+
VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.CURRENT))
206+
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
207+
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
208+
.build();
209+
210+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
211+
CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
212+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
213+
() -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin));
214+
assertEquals("[standard_html_strip] analyzer is not supported for new indices, " +
215+
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter", ex.getMessage());
216+
}
217+
218+
/**
219+
* Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created since 6.5.0 until 7
220+
*/
221+
public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException {
222+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
223+
.put(IndexMetaData.SETTING_VERSION_CREATED,
224+
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0,
225+
VersionUtils.getPreviousVersion(Version.V_7_0_0)))
226+
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
227+
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
228+
.build();
229+
230+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
231+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
232+
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
233+
Analyzer analyzer = analyzers.get("custom_analyzer");
234+
assertNotNull(((NamedAnalyzer) analyzer).analyzer());
235+
assertWarnings(
236+
"Deprecated analyzer [standard_html_strip] used, " +
237+
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
238+
}
239+
}
240+
}

0 commit comments

Comments
 (0)