Skip to content

Commit 3fce042

Browse files
committed
Revert "Remove deprecations and 7.x related code from analysis common (elastic#113009)"
This reverts commit ef37511.
1 parent 1141ede commit 3fce042

File tree

3 files changed

+314
-7
lines changed

3 files changed

+314
-7
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

+126-6
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,12 @@
101101
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
102102
import org.apache.lucene.analysis.util.ElisionFilter;
103103
import org.apache.lucene.util.SetOnce;
104+
import org.elasticsearch.common.logging.DeprecationCategory;
105+
import org.elasticsearch.common.logging.DeprecationLogger;
104106
import org.elasticsearch.common.regex.Regex;
107+
import org.elasticsearch.common.settings.Settings;
108+
import org.elasticsearch.env.Environment;
109+
import org.elasticsearch.index.IndexSettings;
105110
import org.elasticsearch.index.IndexVersions;
106111
import org.elasticsearch.index.analysis.AnalyzerProvider;
107112
import org.elasticsearch.index.analysis.CharFilterFactory;
@@ -134,6 +139,8 @@
134139

135140
public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, ScriptPlugin {
136141

142+
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonAnalysisPlugin.class);
143+
137144
private final SetOnce<ScriptService> scriptServiceHolder = new SetOnce<>();
138145
private final SetOnce<SynonymsManagementAPIService> synonymsManagementServiceHolder = new SetOnce<>();
139146

@@ -224,6 +231,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
224231
filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
225232
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
226233
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
234+
filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
235+
return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings) {
236+
@Override
237+
public TokenStream create(TokenStream tokenStream) {
238+
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
239+
throw new IllegalArgumentException(
240+
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
241+
+ "Please change the filter name to [edge_ngram] instead."
242+
);
243+
} else {
244+
deprecationLogger.warn(
245+
DeprecationCategory.ANALYSIS,
246+
"edgeNGram_deprecation",
247+
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
248+
+ "Please change the filter name to [edge_ngram] instead."
249+
);
250+
}
251+
return super.create(tokenStream);
252+
}
253+
254+
};
255+
});
227256
filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
228257
filters.put("fingerprint", FingerprintTokenFilterFactory::new);
229258
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
@@ -243,6 +272,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
243272
filters.put("min_hash", MinHashTokenFilterFactory::new);
244273
filters.put("multiplexer", MultiplexerTokenFilterFactory::new);
245274
filters.put("ngram", NGramTokenFilterFactory::new);
275+
filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
276+
return new NGramTokenFilterFactory(indexSettings, environment, name, settings) {
277+
@Override
278+
public TokenStream create(TokenStream tokenStream) {
279+
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
280+
throw new IllegalArgumentException(
281+
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
282+
+ "Please change the filter name to [ngram] instead."
283+
);
284+
} else {
285+
deprecationLogger.warn(
286+
DeprecationCategory.ANALYSIS,
287+
"nGram_deprecation",
288+
"The [nGram] token filter name is deprecated and will be removed in a future version. "
289+
+ "Please change the filter name to [ngram] instead."
290+
);
291+
}
292+
return super.create(tokenStream);
293+
}
294+
295+
};
296+
});
246297
filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
247298
filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
248299
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
@@ -294,7 +345,39 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
294345
tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new);
295346
tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new);
296347
tokenizers.put("thai", ThaiTokenizerFactory::new);
348+
tokenizers.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
349+
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
350+
throw new IllegalArgumentException(
351+
"The [nGram] tokenizer name was deprecated in 7.6. "
352+
+ "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
353+
);
354+
} else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) {
355+
deprecationLogger.warn(
356+
DeprecationCategory.ANALYSIS,
357+
"nGram_tokenizer_deprecation",
358+
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
359+
+ "Please change the tokenizer name to [ngram] instead."
360+
);
361+
}
362+
return new NGramTokenizerFactory(indexSettings, environment, name, settings);
363+
});
297364
tokenizers.put("ngram", NGramTokenizerFactory::new);
365+
tokenizers.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
366+
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
367+
throw new IllegalArgumentException(
368+
"The [edgeNGram] tokenizer name was deprecated in 7.6. "
369+
+ "Please use the tokenizer name to [edge_nGram] for indices created in versions 8 or higher instead."
370+
);
371+
} else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) {
372+
deprecationLogger.warn(
373+
DeprecationCategory.ANALYSIS,
374+
"edgeNGram_tokenizer_deprecation",
375+
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
376+
+ "Please change the tokenizer name to [edge_ngram] instead."
377+
);
378+
}
379+
return new EdgeNGramTokenizerFactory(indexSettings, environment, name, settings);
380+
});
298381
tokenizers.put("edge_ngram", EdgeNGramTokenizerFactory::new);
299382
tokenizers.put("char_group", CharGroupTokenizerFactory::new);
300383
tokenizers.put("classic", ClassicTokenizerFactory::new);
@@ -505,17 +588,54 @@ public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
505588
tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new));
506589
tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new));
507590
tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new));
508-
tokenizers.add(
509-
PreConfiguredTokenizer.indexVersion(
510-
"edge_ngram",
511-
(version) -> new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE)
512-
)
513-
);
591+
tokenizers.add(PreConfiguredTokenizer.indexVersion("edge_ngram", (version) -> {
592+
if (version.onOrAfter(IndexVersions.V_7_3_0)) {
593+
return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
594+
}
595+
return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
596+
}));
514597
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1)));
515598
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new));
516599
// TODO deprecate and remove in API
517600
// This is already broken with normalization, so backwards compat isn't necessary?
518601
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new));
602+
603+
// Temporary shim for aliases. TODO deprecate after they are moved
604+
tokenizers.add(PreConfiguredTokenizer.indexVersion("nGram", (version) -> {
605+
if (version.onOrAfter(IndexVersions.V_8_0_0)) {
606+
throw new IllegalArgumentException(
607+
"The [nGram] tokenizer name was deprecated in 7.6. "
608+
+ "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
609+
);
610+
} else if (version.onOrAfter(IndexVersions.V_7_6_0)) {
611+
deprecationLogger.warn(
612+
DeprecationCategory.ANALYSIS,
613+
"nGram_tokenizer_deprecation",
614+
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
615+
+ "Please change the tokenizer name to [ngram] instead."
616+
);
617+
}
618+
return new NGramTokenizer();
619+
}));
620+
tokenizers.add(PreConfiguredTokenizer.indexVersion("edgeNGram", (version) -> {
621+
if (version.onOrAfter(IndexVersions.V_8_0_0)) {
622+
throw new IllegalArgumentException(
623+
"The [edgeNGram] tokenizer name was deprecated in 7.6. "
624+
+ "Please use the tokenizer name to [edge_ngram] for indices created in versions 8 or higher instead."
625+
);
626+
} else if (version.onOrAfter(IndexVersions.V_7_6_0)) {
627+
deprecationLogger.warn(
628+
DeprecationCategory.ANALYSIS,
629+
"edgeNGram_tokenizer_deprecation",
630+
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
631+
+ "Please change the tokenizer name to [edge_ngram] instead."
632+
);
633+
}
634+
if (version.onOrAfter(IndexVersions.V_7_3_0)) {
635+
return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
636+
}
637+
return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
638+
}));
519639
tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new));
520640

521641
return tokenizers;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.analysis.common;
11+
12+
import org.apache.lucene.analysis.Tokenizer;
13+
import org.elasticsearch.cluster.metadata.IndexMetadata;
14+
import org.elasticsearch.common.settings.Settings;
15+
import org.elasticsearch.env.Environment;
16+
import org.elasticsearch.index.IndexVersion;
17+
import org.elasticsearch.index.IndexVersions;
18+
import org.elasticsearch.index.analysis.TokenizerFactory;
19+
import org.elasticsearch.test.ESTestCase;
20+
import org.elasticsearch.test.IndexSettingsModule;
21+
import org.elasticsearch.test.index.IndexVersionUtils;
22+
23+
import java.io.IOException;
24+
import java.util.Map;
25+
26+
public class CommonAnalysisPluginTests extends ESTestCase {
27+
28+
/**
29+
* Check that the deprecated "nGram" filter throws exception for indices created since 7.0.0 and
30+
* logs a warning for earlier indices when the filter is used as a custom filter
31+
*/
32+
public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
33+
final Settings settings = Settings.builder()
34+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
35+
.put(
36+
IndexMetadata.SETTING_VERSION_CREATED,
37+
IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current())
38+
)
39+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
40+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
41+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
42+
.put("index.analysis.filter.my_ngram.type", "nGram")
43+
.build();
44+
45+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
46+
IllegalArgumentException ex = expectThrows(
47+
IllegalArgumentException.class,
48+
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
49+
);
50+
assertEquals(
51+
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
52+
+ "Please change the filter name to [ngram] instead.",
53+
ex.getMessage()
54+
);
55+
}
56+
}
57+
58+
/**
59+
* Check that the deprecated "edgeNGram" filter throws exception for indices created since 7.0.0 and
60+
* logs a warning for earlier indices when the filter is used as a custom filter
61+
*/
62+
public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
63+
final Settings settings = Settings.builder()
64+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
65+
.put(
66+
IndexMetadata.SETTING_VERSION_CREATED,
67+
IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current())
68+
)
69+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
70+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
71+
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
72+
.put("index.analysis.filter.my_ngram.type", "edgeNGram")
73+
.build();
74+
75+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
76+
IllegalArgumentException ex = expectThrows(
77+
IllegalArgumentException.class,
78+
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
79+
);
80+
assertEquals(
81+
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
82+
+ "Please change the filter name to [edge_ngram] instead.",
83+
ex.getMessage()
84+
);
85+
}
86+
}
87+
88+
/**
89+
* Check that we log a deprecation warning for "nGram" and "edgeNGram" tokenizer names with 7.6 and
90+
* disallow usages for indices created after 8.0
91+
*/
92+
public void testNGramTokenizerDeprecation() throws IOException {
93+
expectThrows(
94+
IllegalArgumentException.class,
95+
() -> doTestPrebuiltTokenizerDeprecation(
96+
"nGram",
97+
"ngram",
98+
IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()),
99+
true
100+
)
101+
);
102+
expectThrows(
103+
IllegalArgumentException.class,
104+
() -> doTestPrebuiltTokenizerDeprecation(
105+
"edgeNGram",
106+
"edge_ngram",
107+
IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()),
108+
true
109+
)
110+
);
111+
expectThrows(
112+
IllegalArgumentException.class,
113+
() -> doTestCustomTokenizerDeprecation(
114+
"nGram",
115+
"ngram",
116+
IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()),
117+
true
118+
)
119+
);
120+
expectThrows(
121+
IllegalArgumentException.class,
122+
() -> doTestCustomTokenizerDeprecation(
123+
"edgeNGram",
124+
"edge_ngram",
125+
IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()),
126+
true
127+
)
128+
);
129+
}
130+
131+
public void doTestPrebuiltTokenizerDeprecation(String deprecatedName, String replacement, IndexVersion version, boolean expectWarning)
132+
throws IOException {
133+
final Settings settings = Settings.builder()
134+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
135+
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
136+
.build();
137+
138+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
139+
Map<String, TokenizerFactory> tokenizers = createTestAnalysis(
140+
IndexSettingsModule.newIndexSettings("index", settings),
141+
settings,
142+
commonAnalysisPlugin
143+
).tokenizer;
144+
TokenizerFactory tokenizerFactory = tokenizers.get(deprecatedName);
145+
146+
Tokenizer tokenizer = tokenizerFactory.create();
147+
assertNotNull(tokenizer);
148+
if (expectWarning) {
149+
assertWarnings(
150+
"The ["
151+
+ deprecatedName
152+
+ "] tokenizer name is deprecated and will be removed in a future version. "
153+
+ "Please change the tokenizer name to ["
154+
+ replacement
155+
+ "] instead."
156+
);
157+
}
158+
}
159+
}
160+
161+
public void doTestCustomTokenizerDeprecation(String deprecatedName, String replacement, IndexVersion version, boolean expectWarning)
162+
throws IOException {
163+
final Settings settings = Settings.builder()
164+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
165+
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
166+
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
167+
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "my_tokenizer")
168+
.put("index.analysis.tokenizer.my_tokenizer.type", deprecatedName)
169+
.build();
170+
171+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
172+
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
173+
174+
if (expectWarning) {
175+
assertWarnings(
176+
"The ["
177+
+ deprecatedName
178+
+ "] tokenizer name is deprecated and will be removed in a future version. "
179+
+ "Please change the tokenizer name to ["
180+
+ replacement
181+
+ "] instead."
182+
);
183+
}
184+
}
185+
}
186+
}

0 commit comments

Comments
 (0)