Skip to content

Commit c98ca63

Browse files
authored
Revert 7.x related code from analysis common (#118972)
This reverts #113009 and re-introduces v7 compatibility logic and previous v7 tests since we now support v7 indices as read-only on v9.
1 parent c3a59bb commit c98ca63

File tree

3 files changed

+419
-7
lines changed

3 files changed

+419
-7
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 125 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,12 @@
101101
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
102102
import org.apache.lucene.analysis.util.ElisionFilter;
103103
import org.apache.lucene.util.SetOnce;
104+
import org.elasticsearch.common.logging.DeprecationCategory;
105+
import org.elasticsearch.common.logging.DeprecationLogger;
104106
import org.elasticsearch.common.regex.Regex;
107+
import org.elasticsearch.common.settings.Settings;
108+
import org.elasticsearch.env.Environment;
109+
import org.elasticsearch.index.IndexSettings;
105110
import org.elasticsearch.index.IndexVersions;
106111
import org.elasticsearch.index.analysis.AnalyzerProvider;
107112
import org.elasticsearch.index.analysis.CharFilterFactory;
@@ -134,6 +139,8 @@
134139

135140
public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, ScriptPlugin {
136141

142+
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonAnalysisPlugin.class);
143+
137144
private final SetOnce<ScriptService> scriptServiceHolder = new SetOnce<>();
138145
private final SetOnce<SynonymsManagementAPIService> synonymsManagementServiceHolder = new SetOnce<>();
139146

@@ -224,6 +231,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
224231
filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
225232
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
226233
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
234+
filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
235+
return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings) {
236+
@Override
237+
public TokenStream create(TokenStream tokenStream) {
238+
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
239+
throw new IllegalArgumentException(
240+
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
241+
+ "Please change the filter name to [edge_ngram] instead."
242+
);
243+
} else {
244+
deprecationLogger.warn(
245+
DeprecationCategory.ANALYSIS,
246+
"edgeNGram_deprecation",
247+
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
248+
+ "Please change the filter name to [edge_ngram] instead."
249+
);
250+
}
251+
return super.create(tokenStream);
252+
}
253+
254+
};
255+
});
227256
filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
228257
filters.put("fingerprint", FingerprintTokenFilterFactory::new);
229258
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
@@ -243,6 +272,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
243272
filters.put("min_hash", MinHashTokenFilterFactory::new);
244273
filters.put("multiplexer", MultiplexerTokenFilterFactory::new);
245274
filters.put("ngram", NGramTokenFilterFactory::new);
275+
filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
276+
return new NGramTokenFilterFactory(indexSettings, environment, name, settings) {
277+
@Override
278+
public TokenStream create(TokenStream tokenStream) {
279+
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
280+
throw new IllegalArgumentException(
281+
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
282+
+ "Please change the filter name to [ngram] instead."
283+
);
284+
} else {
285+
deprecationLogger.warn(
286+
DeprecationCategory.ANALYSIS,
287+
"nGram_deprecation",
288+
"The [nGram] token filter name is deprecated and will be removed in a future version. "
289+
+ "Please change the filter name to [ngram] instead."
290+
);
291+
}
292+
return super.create(tokenStream);
293+
}
294+
295+
};
296+
});
246297
filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
247298
filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
248299
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
@@ -294,7 +345,39 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
294345
tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new);
295346
tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new);
296347
tokenizers.put("thai", ThaiTokenizerFactory::new);
348+
tokenizers.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
349+
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
350+
throw new IllegalArgumentException(
351+
"The [nGram] tokenizer name was deprecated in 7.6. "
352+
+ "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
353+
);
354+
} else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) {
355+
deprecationLogger.warn(
356+
DeprecationCategory.ANALYSIS,
357+
"nGram_tokenizer_deprecation",
358+
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
359+
+ "Please change the tokenizer name to [ngram] instead."
360+
);
361+
}
362+
return new NGramTokenizerFactory(indexSettings, environment, name, settings);
363+
});
297364
tokenizers.put("ngram", NGramTokenizerFactory::new);
365+
tokenizers.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
366+
if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
367+
throw new IllegalArgumentException(
368+
"The [edgeNGram] tokenizer name was deprecated in 7.6. "
369+
+ "Please use the tokenizer name to [edge_nGram] for indices created in versions 8 or higher instead."
370+
);
371+
} else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) {
372+
deprecationLogger.warn(
373+
DeprecationCategory.ANALYSIS,
374+
"edgeNGram_tokenizer_deprecation",
375+
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
376+
+ "Please change the tokenizer name to [edge_ngram] instead."
377+
);
378+
}
379+
return new EdgeNGramTokenizerFactory(indexSettings, environment, name, settings);
380+
});
298381
tokenizers.put("edge_ngram", EdgeNGramTokenizerFactory::new);
299382
tokenizers.put("char_group", CharGroupTokenizerFactory::new);
300383
tokenizers.put("classic", ClassicTokenizerFactory::new);
@@ -505,17 +588,53 @@ public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
505588
tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new));
506589
tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new));
507590
tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new));
508-
tokenizers.add(
509-
PreConfiguredTokenizer.indexVersion(
510-
"edge_ngram",
511-
(version) -> new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE)
512-
)
513-
);
591+
tokenizers.add(PreConfiguredTokenizer.indexVersion("edge_ngram", (version) -> {
592+
if (version.onOrAfter(IndexVersions.V_7_3_0)) {
593+
return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
594+
}
595+
return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
596+
}));
514597
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1)));
515598
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new));
516599
// TODO deprecate and remove in API
517600
// This is already broken with normalization, so backwards compat isn't necessary?
518601
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new));
602+
603+
tokenizers.add(PreConfiguredTokenizer.indexVersion("nGram", (version) -> {
604+
if (version.onOrAfter(IndexVersions.V_8_0_0)) {
605+
throw new IllegalArgumentException(
606+
"The [nGram] tokenizer name was deprecated in 7.6. "
607+
+ "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
608+
);
609+
} else if (version.onOrAfter(IndexVersions.V_7_6_0)) {
610+
deprecationLogger.warn(
611+
DeprecationCategory.ANALYSIS,
612+
"nGram_tokenizer_deprecation",
613+
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
614+
+ "Please change the tokenizer name to [ngram] instead."
615+
);
616+
}
617+
return new NGramTokenizer();
618+
}));
619+
tokenizers.add(PreConfiguredTokenizer.indexVersion("edgeNGram", (version) -> {
620+
if (version.onOrAfter(IndexVersions.V_8_0_0)) {
621+
throw new IllegalArgumentException(
622+
"The [edgeNGram] tokenizer name was deprecated in 7.6. "
623+
+ "Please use the tokenizer name to [edge_ngram] for indices created in versions 8 or higher instead."
624+
);
625+
} else if (version.onOrAfter(IndexVersions.V_7_6_0)) {
626+
deprecationLogger.warn(
627+
DeprecationCategory.ANALYSIS,
628+
"edgeNGram_tokenizer_deprecation",
629+
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
630+
+ "Please change the tokenizer name to [edge_ngram] instead."
631+
);
632+
}
633+
if (version.onOrAfter(IndexVersions.V_7_3_0)) {
634+
return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
635+
}
636+
return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
637+
}));
519638
tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new));
520639

521640
return tokenizers;

0 commit comments

Comments
 (0)