Skip to content

Commit ef37511

Browse files
authored
Remove deprecations and 7.x related code from analysis common (#113009)
edgeNGram and NGram tokenizers and token filters were deprecated. They have not been supported in indices created from 8.0, hence their support can entirely be removed from main. The version related logic around the min grams can also be removed as it refers to 7.x which we no longer need to support. Relates to #50376, #50862, #43568
1 parent 25a73cd commit ef37511

File tree

3 files changed

+7
-314
lines changed

3 files changed

+7
-314
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

+6-126
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,7 @@
101101
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
102102
import org.apache.lucene.analysis.util.ElisionFilter;
103103
import org.apache.lucene.util.SetOnce;
104-
import org.elasticsearch.common.logging.DeprecationCategory;
105-
import org.elasticsearch.common.logging.DeprecationLogger;
106104
import org.elasticsearch.common.regex.Regex;
107-
import org.elasticsearch.common.settings.Settings;
108-
import org.elasticsearch.env.Environment;
109-
import org.elasticsearch.index.IndexSettings;
110105
import org.elasticsearch.index.IndexVersions;
111106
import org.elasticsearch.index.analysis.AnalyzerProvider;
112107
import org.elasticsearch.index.analysis.CharFilterFactory;
@@ -139,8 +134,6 @@
139134

140135
public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, ScriptPlugin {
141136

142-
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonAnalysisPlugin.class);
143-
144137
private final SetOnce<ScriptService> scriptServiceHolder = new SetOnce<>();
145138
private final SetOnce<SynonymsManagementAPIService> synonymsManagementServiceHolder = new SetOnce<>();
146139

@@ -231,28 +224,6 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
231224
filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
232225
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
233226
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
234-
filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
235-
return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings) {
236-
@Override
237-
public TokenStream create(TokenStream tokenStream) {
238-
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
239-
throw new IllegalArgumentException(
240-
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
241-
+ "Please change the filter name to [edge_ngram] instead."
242-
);
243-
} else {
244-
deprecationLogger.warn(
245-
DeprecationCategory.ANALYSIS,
246-
"edgeNGram_deprecation",
247-
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
248-
+ "Please change the filter name to [edge_ngram] instead."
249-
);
250-
}
251-
return super.create(tokenStream);
252-
}
253-
254-
};
255-
});
256227
filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
257228
filters.put("fingerprint", FingerprintTokenFilterFactory::new);
258229
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
@@ -272,28 +243,6 @@ public TokenStream create(TokenStream tokenStream) {
272243
filters.put("min_hash", MinHashTokenFilterFactory::new);
273244
filters.put("multiplexer", MultiplexerTokenFilterFactory::new);
274245
filters.put("ngram", NGramTokenFilterFactory::new);
275-
filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
276-
return new NGramTokenFilterFactory(indexSettings, environment, name, settings) {
277-
@Override
278-
public TokenStream create(TokenStream tokenStream) {
279-
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
280-
throw new IllegalArgumentException(
281-
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
282-
+ "Please change the filter name to [ngram] instead."
283-
);
284-
} else {
285-
deprecationLogger.warn(
286-
DeprecationCategory.ANALYSIS,
287-
"nGram_deprecation",
288-
"The [nGram] token filter name is deprecated and will be removed in a future version. "
289-
+ "Please change the filter name to [ngram] instead."
290-
);
291-
}
292-
return super.create(tokenStream);
293-
}
294-
295-
};
296-
});
297246
filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
298247
filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
299248
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
@@ -345,39 +294,7 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
345294
tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new);
346295
tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new);
347296
tokenizers.put("thai", ThaiTokenizerFactory::new);
348-
tokenizers.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
349-
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
350-
throw new IllegalArgumentException(
351-
"The [nGram] tokenizer name was deprecated in 7.6. "
352-
+ "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
353-
);
354-
} else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) {
355-
deprecationLogger.warn(
356-
DeprecationCategory.ANALYSIS,
357-
"nGram_tokenizer_deprecation",
358-
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
359-
+ "Please change the tokenizer name to [ngram] instead."
360-
);
361-
}
362-
return new NGramTokenizerFactory(indexSettings, environment, name, settings);
363-
});
364297
tokenizers.put("ngram", NGramTokenizerFactory::new);
365-
tokenizers.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
366-
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
367-
throw new IllegalArgumentException(
368-
"The [edgeNGram] tokenizer name was deprecated in 7.6. "
369-
+ "Please use the tokenizer name to [edge_nGram] for indices created in versions 8 or higher instead."
370-
);
371-
} else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) {
372-
deprecationLogger.warn(
373-
DeprecationCategory.ANALYSIS,
374-
"edgeNGram_tokenizer_deprecation",
375-
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
376-
+ "Please change the tokenizer name to [edge_ngram] instead."
377-
);
378-
}
379-
return new EdgeNGramTokenizerFactory(indexSettings, environment, name, settings);
380-
});
381298
tokenizers.put("edge_ngram", EdgeNGramTokenizerFactory::new);
382299
tokenizers.put("char_group", CharGroupTokenizerFactory::new);
383300
tokenizers.put("classic", ClassicTokenizerFactory::new);
@@ -588,54 +505,17 @@ public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
588505
tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new));
589506
tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new));
590507
tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new));
591-
tokenizers.add(PreConfiguredTokenizer.indexVersion("edge_ngram", (version) -> {
592-
if (version.onOrAfter(IndexVersions.V_7_3_0)) {
593-
return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
594-
}
595-
return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
596-
}));
508+
tokenizers.add(
509+
PreConfiguredTokenizer.indexVersion(
510+
"edge_ngram",
511+
(version) -> new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE)
512+
)
513+
);
597514
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1)));
598515
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new));
599516
// TODO deprecate and remove in API
600517
// This is already broken with normalization, so backwards compat isn't necessary?
601518
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new));
602-
603-
// Temporary shim for aliases. TODO deprecate after they are moved
604-
tokenizers.add(PreConfiguredTokenizer.indexVersion("nGram", (version) -> {
605-
if (version.onOrAfter(IndexVersions.V_8_0_0)) {
606-
throw new IllegalArgumentException(
607-
"The [nGram] tokenizer name was deprecated in 7.6. "
608-
+ "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
609-
);
610-
} else if (version.onOrAfter(IndexVersions.V_7_6_0)) {
611-
deprecationLogger.warn(
612-
DeprecationCategory.ANALYSIS,
613-
"nGram_tokenizer_deprecation",
614-
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
615-
+ "Please change the tokenizer name to [ngram] instead."
616-
);
617-
}
618-
return new NGramTokenizer();
619-
}));
620-
tokenizers.add(PreConfiguredTokenizer.indexVersion("edgeNGram", (version) -> {
621-
if (version.onOrAfter(IndexVersions.V_8_0_0)) {
622-
throw new IllegalArgumentException(
623-
"The [edgeNGram] tokenizer name was deprecated in 7.6. "
624-
+ "Please use the tokenizer name to [edge_ngram] for indices created in versions 8 or higher instead."
625-
);
626-
} else if (version.onOrAfter(IndexVersions.V_7_6_0)) {
627-
deprecationLogger.warn(
628-
DeprecationCategory.ANALYSIS,
629-
"edgeNGram_tokenizer_deprecation",
630-
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
631-
+ "Please change the tokenizer name to [edge_ngram] instead."
632-
);
633-
}
634-
if (version.onOrAfter(IndexVersions.V_7_3_0)) {
635-
return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
636-
}
637-
return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
638-
}));
639519
tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new));
640520

641521
return tokenizers;

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java

-186
This file was deleted.

0 commit comments

Comments
 (0)