101
101
import org .apache .lucene .analysis .tr .TurkishAnalyzer ;
102
102
import org .apache .lucene .analysis .util .ElisionFilter ;
103
103
import org .apache .lucene .util .SetOnce ;
104
+ import org .elasticsearch .common .logging .DeprecationCategory ;
105
+ import org .elasticsearch .common .logging .DeprecationLogger ;
104
106
import org .elasticsearch .common .regex .Regex ;
107
+ import org .elasticsearch .common .settings .Settings ;
108
+ import org .elasticsearch .env .Environment ;
109
+ import org .elasticsearch .index .IndexSettings ;
105
110
import org .elasticsearch .index .IndexVersions ;
106
111
import org .elasticsearch .index .analysis .AnalyzerProvider ;
107
112
import org .elasticsearch .index .analysis .CharFilterFactory ;
134
139
135
140
public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin , ScriptPlugin {
136
141
142
+ private static final DeprecationLogger deprecationLogger = DeprecationLogger .getLogger (CommonAnalysisPlugin .class );
143
+
137
144
private final SetOnce <ScriptService > scriptServiceHolder = new SetOnce <>();
138
145
private final SetOnce <SynonymsManagementAPIService > synonymsManagementServiceHolder = new SetOnce <>();
139
146
@@ -224,6 +231,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
224
231
filters .put ("dictionary_decompounder" , requiresAnalysisSettings (DictionaryCompoundWordTokenFilterFactory ::new ));
225
232
filters .put ("dutch_stem" , DutchStemTokenFilterFactory ::new );
226
233
filters .put ("edge_ngram" , EdgeNGramTokenFilterFactory ::new );
234
+ filters .put ("edgeNGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
235
+ return new EdgeNGramTokenFilterFactory (indexSettings , environment , name , settings ) {
236
+ @ Override
237
+ public TokenStream create (TokenStream tokenStream ) {
238
+ if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
239
+ throw new IllegalArgumentException (
240
+ "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
241
+ + "Please change the filter name to [edge_ngram] instead."
242
+ );
243
+ } else {
244
+ deprecationLogger .warn (
245
+ DeprecationCategory .ANALYSIS ,
246
+ "edgeNGram_deprecation" ,
247
+ "The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
248
+ + "Please change the filter name to [edge_ngram] instead."
249
+ );
250
+ }
251
+ return super .create (tokenStream );
252
+ }
253
+
254
+ };
255
+ });
227
256
filters .put ("elision" , requiresAnalysisSettings (ElisionTokenFilterFactory ::new ));
228
257
filters .put ("fingerprint" , FingerprintTokenFilterFactory ::new );
229
258
filters .put ("flatten_graph" , FlattenGraphTokenFilterFactory ::new );
@@ -243,6 +272,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
243
272
filters .put ("min_hash" , MinHashTokenFilterFactory ::new );
244
273
filters .put ("multiplexer" , MultiplexerTokenFilterFactory ::new );
245
274
filters .put ("ngram" , NGramTokenFilterFactory ::new );
275
+ filters .put ("nGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
276
+ return new NGramTokenFilterFactory (indexSettings , environment , name , settings ) {
277
+ @ Override
278
+ public TokenStream create (TokenStream tokenStream ) {
279
+ if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
280
+ throw new IllegalArgumentException (
281
+ "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
282
+ + "Please change the filter name to [ngram] instead."
283
+ );
284
+ } else {
285
+ deprecationLogger .warn (
286
+ DeprecationCategory .ANALYSIS ,
287
+ "nGram_deprecation" ,
288
+ "The [nGram] token filter name is deprecated and will be removed in a future version. "
289
+ + "Please change the filter name to [ngram] instead."
290
+ );
291
+ }
292
+ return super .create (tokenStream );
293
+ }
294
+
295
+ };
296
+ });
246
297
filters .put ("pattern_capture" , requiresAnalysisSettings (PatternCaptureGroupTokenFilterFactory ::new ));
247
298
filters .put ("pattern_replace" , requiresAnalysisSettings (PatternReplaceTokenFilterFactory ::new ));
248
299
filters .put ("persian_normalization" , PersianNormalizationFilterFactory ::new );
@@ -294,7 +345,39 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
294
345
tokenizers .put ("simple_pattern" , SimplePatternTokenizerFactory ::new );
295
346
tokenizers .put ("simple_pattern_split" , SimplePatternSplitTokenizerFactory ::new );
296
347
tokenizers .put ("thai" , ThaiTokenizerFactory ::new );
348
+ tokenizers .put ("nGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
349
+ if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
350
+ throw new IllegalArgumentException (
351
+ "The [nGram] tokenizer name was deprecated in 7.6. "
352
+ + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
353
+ );
354
+ } else if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_7_6_0 )) {
355
+ deprecationLogger .warn (
356
+ DeprecationCategory .ANALYSIS ,
357
+ "nGram_tokenizer_deprecation" ,
358
+ "The [nGram] tokenizer name is deprecated and will be removed in a future version. "
359
+ + "Please change the tokenizer name to [ngram] instead."
360
+ );
361
+ }
362
+ return new NGramTokenizerFactory (indexSettings , environment , name , settings );
363
+ });
297
364
tokenizers .put ("ngram" , NGramTokenizerFactory ::new );
365
+ tokenizers .put ("edgeNGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
366
+ if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
367
+ throw new IllegalArgumentException (
368
+ "The [edgeNGram] tokenizer name was deprecated in 7.6. "
369
+ + "Please use the tokenizer name to [edge_nGram] for indices created in versions 8 or higher instead."
370
+ );
371
+ } else if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_7_6_0 )) {
372
+ deprecationLogger .warn (
373
+ DeprecationCategory .ANALYSIS ,
374
+ "edgeNGram_tokenizer_deprecation" ,
375
+ "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
376
+ + "Please change the tokenizer name to [edge_ngram] instead."
377
+ );
378
+ }
379
+ return new EdgeNGramTokenizerFactory (indexSettings , environment , name , settings );
380
+ });
298
381
tokenizers .put ("edge_ngram" , EdgeNGramTokenizerFactory ::new );
299
382
tokenizers .put ("char_group" , CharGroupTokenizerFactory ::new );
300
383
tokenizers .put ("classic" , ClassicTokenizerFactory ::new );
@@ -505,17 +588,54 @@ public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
505
588
tokenizers .add (PreConfiguredTokenizer .singleton ("letter" , LetterTokenizer ::new ));
506
589
tokenizers .add (PreConfiguredTokenizer .singleton ("whitespace" , WhitespaceTokenizer ::new ));
507
590
tokenizers .add (PreConfiguredTokenizer .singleton ("ngram" , NGramTokenizer ::new ));
508
- tokenizers .add (
509
- PreConfiguredTokenizer . indexVersion (
510
- "edge_ngram" ,
511
- ( version ) -> new EdgeNGramTokenizer ( NGramTokenizer . DEFAULT_MIN_NGRAM_SIZE , NGramTokenizer . DEFAULT_MAX_NGRAM_SIZE )
512
- )
513
- );
591
+ tokenizers .add (PreConfiguredTokenizer . indexVersion ( "edge_ngram" , ( version ) -> {
592
+ if ( version . onOrAfter ( IndexVersions . V_7_3_0 )) {
593
+ return new EdgeNGramTokenizer ( NGramTokenizer . DEFAULT_MIN_NGRAM_SIZE , NGramTokenizer . DEFAULT_MAX_NGRAM_SIZE );
594
+ }
595
+ return new EdgeNGramTokenizer ( EdgeNGramTokenizer . DEFAULT_MIN_GRAM_SIZE , EdgeNGramTokenizer . DEFAULT_MAX_GRAM_SIZE );
596
+ }) );
514
597
tokenizers .add (PreConfiguredTokenizer .singleton ("pattern" , () -> new PatternTokenizer (Regex .compile ("\\ W+" , null ), -1 )));
515
598
tokenizers .add (PreConfiguredTokenizer .singleton ("thai" , ThaiTokenizer ::new ));
516
599
// TODO deprecate and remove in API
517
600
// This is already broken with normalization, so backwards compat isn't necessary?
518
601
tokenizers .add (PreConfiguredTokenizer .singleton ("lowercase" , XLowerCaseTokenizer ::new ));
602
+
603
+ // Temporary shim for aliases. TODO deprecate after they are moved
604
+ tokenizers .add (PreConfiguredTokenizer .indexVersion ("nGram" , (version ) -> {
605
+ if (version .onOrAfter (IndexVersions .V_8_0_0 )) {
606
+ throw new IllegalArgumentException (
607
+ "The [nGram] tokenizer name was deprecated in 7.6. "
608
+ + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
609
+ );
610
+ } else if (version .onOrAfter (IndexVersions .V_7_6_0 )) {
611
+ deprecationLogger .warn (
612
+ DeprecationCategory .ANALYSIS ,
613
+ "nGram_tokenizer_deprecation" ,
614
+ "The [nGram] tokenizer name is deprecated and will be removed in a future version. "
615
+ + "Please change the tokenizer name to [ngram] instead."
616
+ );
617
+ }
618
+ return new NGramTokenizer ();
619
+ }));
620
+ tokenizers .add (PreConfiguredTokenizer .indexVersion ("edgeNGram" , (version ) -> {
621
+ if (version .onOrAfter (IndexVersions .V_8_0_0 )) {
622
+ throw new IllegalArgumentException (
623
+ "The [edgeNGram] tokenizer name was deprecated in 7.6. "
624
+ + "Please use the tokenizer name to [edge_ngram] for indices created in versions 8 or higher instead."
625
+ );
626
+ } else if (version .onOrAfter (IndexVersions .V_7_6_0 )) {
627
+ deprecationLogger .warn (
628
+ DeprecationCategory .ANALYSIS ,
629
+ "edgeNGram_tokenizer_deprecation" ,
630
+ "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
631
+ + "Please change the tokenizer name to [edge_ngram] instead."
632
+ );
633
+ }
634
+ if (version .onOrAfter (IndexVersions .V_7_3_0 )) {
635
+ return new EdgeNGramTokenizer (NGramTokenizer .DEFAULT_MIN_NGRAM_SIZE , NGramTokenizer .DEFAULT_MAX_NGRAM_SIZE );
636
+ }
637
+ return new EdgeNGramTokenizer (EdgeNGramTokenizer .DEFAULT_MIN_GRAM_SIZE , EdgeNGramTokenizer .DEFAULT_MAX_GRAM_SIZE );
638
+ }));
519
639
tokenizers .add (PreConfiguredTokenizer .singleton ("PathHierarchy" , PathHierarchyTokenizer ::new ));
520
640
521
641
return tokenizers ;
0 commit comments