22
22
import org .apache .lucene .analysis .Analyzer ;
23
23
import org .apache .lucene .analysis .TokenStream ;
24
24
import org .apache .lucene .analysis .Tokenizer ;
25
- import org .apache .lucene .analysis .ar .ArabicAnalyzer ;
26
25
import org .apache .lucene .analysis .ar .ArabicNormalizationFilter ;
27
26
import org .apache .lucene .analysis .ar .ArabicStemFilter ;
28
- import org .apache .lucene .analysis .bg .BulgarianAnalyzer ;
29
- import org .apache .lucene .analysis .br .BrazilianAnalyzer ;
30
27
import org .apache .lucene .analysis .br .BrazilianStemFilter ;
31
- import org .apache .lucene .analysis .ca .CatalanAnalyzer ;
32
28
import org .apache .lucene .analysis .charfilter .HTMLStripCharFilter ;
33
- import org .apache .lucene .analysis .cjk .CJKAnalyzer ;
34
- import org .apache .lucene .analysis .cn .ChineseAnalyzer ;
35
- import org .apache .lucene .analysis .commongrams .*;
29
+ import org .apache .lucene .analysis .commongrams .CommonGramsFilter ;
36
30
import org .apache .lucene .analysis .core .*;
37
- import org .apache .lucene .analysis .cz .CzechAnalyzer ;
38
31
import org .apache .lucene .analysis .cz .CzechStemFilter ;
39
- import org .apache .lucene .analysis .da .DanishAnalyzer ;
40
- import org .apache .lucene .analysis .de .GermanAnalyzer ;
41
32
import org .apache .lucene .analysis .de .GermanStemFilter ;
42
- import org .apache .lucene .analysis .el .GreekAnalyzer ;
43
- import org .apache .lucene .analysis .en .EnglishAnalyzer ;
44
33
import org .apache .lucene .analysis .en .KStemFilter ;
45
34
import org .apache .lucene .analysis .en .PorterStemFilter ;
46
- import org .apache .lucene .analysis .es .SpanishAnalyzer ;
47
- import org .apache .lucene .analysis .eu .BasqueAnalyzer ;
48
- import org .apache .lucene .analysis .fa .PersianAnalyzer ;
49
35
import org .apache .lucene .analysis .fa .PersianNormalizationFilter ;
50
- import org .apache .lucene .analysis .fi .FinnishAnalyzer ;
51
36
import org .apache .lucene .analysis .fr .FrenchAnalyzer ;
52
37
import org .apache .lucene .analysis .fr .FrenchStemFilter ;
53
- import org .apache .lucene .analysis .ga .IrishAnalyzer ;
54
- import org .apache .lucene .analysis .gl .GalicianAnalyzer ;
55
- import org .apache .lucene .analysis .hi .HindiAnalyzer ;
56
- import org .apache .lucene .analysis .hu .HungarianAnalyzer ;
57
- import org .apache .lucene .analysis .hy .ArmenianAnalyzer ;
58
- import org .apache .lucene .analysis .id .IndonesianAnalyzer ;
59
- import org .apache .lucene .analysis .it .ItalianAnalyzer ;
60
- import org .apache .lucene .analysis .lv .LatvianAnalyzer ;
61
38
import org .apache .lucene .analysis .miscellaneous .*;
62
39
import org .apache .lucene .analysis .ngram .EdgeNGramTokenFilter ;
63
40
import org .apache .lucene .analysis .ngram .EdgeNGramTokenizer ;
64
41
import org .apache .lucene .analysis .ngram .NGramTokenFilter ;
65
42
import org .apache .lucene .analysis .ngram .NGramTokenizer ;
66
- import org .apache .lucene .analysis .nl .DutchAnalyzer ;
67
43
import org .apache .lucene .analysis .nl .DutchStemFilter ;
68
- import org .apache .lucene .analysis .no .NorwegianAnalyzer ;
69
44
import org .apache .lucene .analysis .path .PathHierarchyTokenizer ;
70
45
import org .apache .lucene .analysis .pattern .PatternTokenizer ;
71
46
import org .apache .lucene .analysis .payloads .TypeAsPayloadTokenFilter ;
72
- import org .apache .lucene .analysis .pt .PortugueseAnalyzer ;
73
47
import org .apache .lucene .analysis .reverse .ReverseStringFilter ;
74
- import org .apache .lucene .analysis .ro .RomanianAnalyzer ;
75
- import org .apache .lucene .analysis .ru .RussianAnalyzer ;
76
- import org .apache .lucene .analysis .shingle .ShingleFilter ;
77
- import org .apache .lucene .analysis .snowball .SnowballAnalyzer ;
78
48
import org .apache .lucene .analysis .snowball .SnowballFilter ;
79
49
import org .apache .lucene .analysis .standard .*;
80
- import org .apache .lucene .analysis .sv .SwedishAnalyzer ;
81
- import org .apache .lucene .analysis .th .ThaiAnalyzer ;
82
- import org .apache .lucene .analysis .tr .TurkishAnalyzer ;
83
- import org .apache .lucene .analysis .util .ElisionFilter ;
84
50
import org .apache .lucene .analysis .util .CharArraySet ;
51
+ import org .apache .lucene .analysis .util .ElisionFilter ;
52
+ import org .elasticsearch .Version ;
85
53
import org .elasticsearch .common .component .AbstractComponent ;
86
54
import org .elasticsearch .common .inject .Inject ;
87
55
import org .elasticsearch .common .lucene .Lucene ;
91
59
import org .elasticsearch .index .analysis .*;
92
60
93
61
import java .io .Reader ;
62
+ import java .util .Locale ;
94
63
import java .util .Map ;
95
64
96
65
import static org .elasticsearch .common .settings .ImmutableSettings .Builder .EMPTY_SETTINGS ;
101
70
public class IndicesAnalysisService extends AbstractComponent {
102
71
103
72
private final Map <String , PreBuiltAnalyzerProviderFactory > analyzerProviderFactories = ConcurrentCollections .newConcurrentMap ();
104
-
105
73
private final Map <String , PreBuiltTokenizerFactoryFactory > tokenizerFactories = ConcurrentCollections .newConcurrentMap ();
106
74
private final Map <String , PreBuiltTokenFilterFactoryFactory > tokenFilterFactories = ConcurrentCollections .newConcurrentMap ();
107
75
private final Map <String , PreBuiltCharFilterFactoryFactory > charFilterFactories = ConcurrentCollections .newConcurrentMap ();
@@ -114,52 +82,10 @@ public IndicesAnalysisService() {
114
82
public IndicesAnalysisService (Settings settings ) {
115
83
super (settings );
116
84
117
- StandardAnalyzer standardAnalyzer = new StandardAnalyzer (Lucene .ANALYZER_VERSION );
118
- analyzerProviderFactories .put ("default" , new PreBuiltAnalyzerProviderFactory ("default" , AnalyzerScope .INDICES , standardAnalyzer ));
119
- analyzerProviderFactories .put ("standard" , new PreBuiltAnalyzerProviderFactory ("standard" , AnalyzerScope .INDICES , standardAnalyzer ));
120
- analyzerProviderFactories .put ("keyword" , new PreBuiltAnalyzerProviderFactory ("keyword" , AnalyzerScope .INDICES , new KeywordAnalyzer ()));
121
- analyzerProviderFactories .put ("stop" , new PreBuiltAnalyzerProviderFactory ("stop" , AnalyzerScope .INDICES , new StopAnalyzer (Lucene .ANALYZER_VERSION )));
122
- analyzerProviderFactories .put ("whitespace" , new PreBuiltAnalyzerProviderFactory ("whitespace" , AnalyzerScope .INDICES , new WhitespaceAnalyzer (Lucene .ANALYZER_VERSION )));
123
- analyzerProviderFactories .put ("simple" , new PreBuiltAnalyzerProviderFactory ("simple" , AnalyzerScope .INDICES , new SimpleAnalyzer (Lucene .ANALYZER_VERSION )));
124
- analyzerProviderFactories .put ("classic" , new PreBuiltAnalyzerProviderFactory ("classic" , AnalyzerScope .INDICES , new ClassicAnalyzer (Lucene .ANALYZER_VERSION )));
125
-
126
- // extended ones
127
- analyzerProviderFactories .put ("pattern" , new PreBuiltAnalyzerProviderFactory ("pattern" , AnalyzerScope .INDICES , new PatternAnalyzer (Lucene .ANALYZER_VERSION , Regex .compile ("\\ W+" /*PatternAnalyzer.NON_WORD_PATTERN*/ , null ), true , StopAnalyzer .ENGLISH_STOP_WORDS_SET )));
128
- analyzerProviderFactories .put ("snowball" , new PreBuiltAnalyzerProviderFactory ("snowball" , AnalyzerScope .INDICES , new SnowballAnalyzer (Lucene .ANALYZER_VERSION , "English" , StopAnalyzer .ENGLISH_STOP_WORDS_SET )));
129
- analyzerProviderFactories .put ("standard_html_strip" , new PreBuiltAnalyzerProviderFactory ("standard_html_strip" , AnalyzerScope .INDICES , new StandardHtmlStripAnalyzer (Lucene .ANALYZER_VERSION )));
130
-
131
- analyzerProviderFactories .put ("arabic" , new PreBuiltAnalyzerProviderFactory ("arabic" , AnalyzerScope .INDICES , new ArabicAnalyzer (Lucene .ANALYZER_VERSION )));
132
- analyzerProviderFactories .put ("armenian" , new PreBuiltAnalyzerProviderFactory ("armenian" , AnalyzerScope .INDICES , new ArmenianAnalyzer (Lucene .ANALYZER_VERSION )));
133
- analyzerProviderFactories .put ("basque" , new PreBuiltAnalyzerProviderFactory ("basque" , AnalyzerScope .INDICES , new BasqueAnalyzer (Lucene .ANALYZER_VERSION )));
134
- analyzerProviderFactories .put ("brazilian" , new PreBuiltAnalyzerProviderFactory ("brazilian" , AnalyzerScope .INDICES , new BrazilianAnalyzer (Lucene .ANALYZER_VERSION )));
135
- analyzerProviderFactories .put ("bulgarian" , new PreBuiltAnalyzerProviderFactory ("bulgarian" , AnalyzerScope .INDICES , new BulgarianAnalyzer (Lucene .ANALYZER_VERSION )));
136
- analyzerProviderFactories .put ("catalan" , new PreBuiltAnalyzerProviderFactory ("catalan" , AnalyzerScope .INDICES , new CatalanAnalyzer (Lucene .ANALYZER_VERSION )));
137
- analyzerProviderFactories .put ("chinese" , new PreBuiltAnalyzerProviderFactory ("chinese" , AnalyzerScope .INDICES , new ChineseAnalyzer ()));
138
- analyzerProviderFactories .put ("cjk" , new PreBuiltAnalyzerProviderFactory ("cjk" , AnalyzerScope .INDICES , new CJKAnalyzer (Lucene .ANALYZER_VERSION )));
139
- analyzerProviderFactories .put ("czech" , new PreBuiltAnalyzerProviderFactory ("czech" , AnalyzerScope .INDICES , new CzechAnalyzer (Lucene .ANALYZER_VERSION )));
140
- analyzerProviderFactories .put ("dutch" , new PreBuiltAnalyzerProviderFactory ("dutch" , AnalyzerScope .INDICES , new DutchAnalyzer (Lucene .ANALYZER_VERSION )));
141
- analyzerProviderFactories .put ("danish" , new PreBuiltAnalyzerProviderFactory ("danish" , AnalyzerScope .INDICES , new DanishAnalyzer (Lucene .ANALYZER_VERSION )));
142
- analyzerProviderFactories .put ("english" , new PreBuiltAnalyzerProviderFactory ("english" , AnalyzerScope .INDICES , new EnglishAnalyzer (Lucene .ANALYZER_VERSION )));
143
- analyzerProviderFactories .put ("finnish" , new PreBuiltAnalyzerProviderFactory ("finnish" , AnalyzerScope .INDICES , new FinnishAnalyzer (Lucene .ANALYZER_VERSION )));
144
- analyzerProviderFactories .put ("french" , new PreBuiltAnalyzerProviderFactory ("french" , AnalyzerScope .INDICES , new FrenchAnalyzer (Lucene .ANALYZER_VERSION )));
145
- analyzerProviderFactories .put ("galician" , new PreBuiltAnalyzerProviderFactory ("galician" , AnalyzerScope .INDICES , new GalicianAnalyzer (Lucene .ANALYZER_VERSION )));
146
- analyzerProviderFactories .put ("german" , new PreBuiltAnalyzerProviderFactory ("german" , AnalyzerScope .INDICES , new GermanAnalyzer (Lucene .ANALYZER_VERSION )));
147
- analyzerProviderFactories .put ("greek" , new PreBuiltAnalyzerProviderFactory ("greek" , AnalyzerScope .INDICES , new GreekAnalyzer (Lucene .ANALYZER_VERSION )));
148
- analyzerProviderFactories .put ("hindi" , new PreBuiltAnalyzerProviderFactory ("hindi" , AnalyzerScope .INDICES , new HindiAnalyzer (Lucene .ANALYZER_VERSION )));
149
- analyzerProviderFactories .put ("hungarian" , new PreBuiltAnalyzerProviderFactory ("hungarian" , AnalyzerScope .INDICES , new HungarianAnalyzer (Lucene .ANALYZER_VERSION )));
150
- analyzerProviderFactories .put ("indonesian" , new PreBuiltAnalyzerProviderFactory ("indonesian" , AnalyzerScope .INDICES , new IndonesianAnalyzer (Lucene .ANALYZER_VERSION )));
151
- analyzerProviderFactories .put ("irish" , new PreBuiltAnalyzerProviderFactory ("irish" , AnalyzerScope .INDICES , new IrishAnalyzer (Lucene .ANALYZER_VERSION )));
152
- analyzerProviderFactories .put ("italian" , new PreBuiltAnalyzerProviderFactory ("italian" , AnalyzerScope .INDICES , new ItalianAnalyzer (Lucene .ANALYZER_VERSION )));
153
- analyzerProviderFactories .put ("latvian" , new PreBuiltAnalyzerProviderFactory ("latvian" , AnalyzerScope .INDICES , new LatvianAnalyzer (Lucene .ANALYZER_VERSION )));
154
- analyzerProviderFactories .put ("norwegian" , new PreBuiltAnalyzerProviderFactory ("norwegian" , AnalyzerScope .INDICES , new NorwegianAnalyzer (Lucene .ANALYZER_VERSION )));
155
- analyzerProviderFactories .put ("persian" , new PreBuiltAnalyzerProviderFactory ("persian" , AnalyzerScope .INDICES , new PersianAnalyzer (Lucene .ANALYZER_VERSION )));
156
- analyzerProviderFactories .put ("portuguese" , new PreBuiltAnalyzerProviderFactory ("portuguese" , AnalyzerScope .INDICES , new PortugueseAnalyzer (Lucene .ANALYZER_VERSION )));
157
- analyzerProviderFactories .put ("romanian" , new PreBuiltAnalyzerProviderFactory ("romanian" , AnalyzerScope .INDICES , new RomanianAnalyzer (Lucene .ANALYZER_VERSION )));
158
- analyzerProviderFactories .put ("russian" , new PreBuiltAnalyzerProviderFactory ("russian" , AnalyzerScope .INDICES , new RussianAnalyzer (Lucene .ANALYZER_VERSION )));
159
- analyzerProviderFactories .put ("spanish" , new PreBuiltAnalyzerProviderFactory ("spanish" , AnalyzerScope .INDICES , new SpanishAnalyzer (Lucene .ANALYZER_VERSION )));
160
- analyzerProviderFactories .put ("swedish" , new PreBuiltAnalyzerProviderFactory ("swedish" , AnalyzerScope .INDICES , new SwedishAnalyzer (Lucene .ANALYZER_VERSION )));
161
- analyzerProviderFactories .put ("turkish" , new PreBuiltAnalyzerProviderFactory ("turkish" , AnalyzerScope .INDICES , new TurkishAnalyzer (Lucene .ANALYZER_VERSION )));
162
- analyzerProviderFactories .put ("thai" , new PreBuiltAnalyzerProviderFactory ("thai" , AnalyzerScope .INDICES , new ThaiAnalyzer (Lucene .ANALYZER_VERSION )));
85
+ for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers .values ()) {
86
+ String name = preBuiltAnalyzerEnum .name ().toLowerCase (Locale .ROOT );
87
+ analyzerProviderFactories .put (name , new PreBuiltAnalyzerProviderFactory (name , AnalyzerScope .INDICES , preBuiltAnalyzerEnum .getAnalyzer (Version .CURRENT )));
88
+ }
163
89
164
90
// Base Tokenizers
165
91
tokenizerFactories .put ("standard" , new PreBuiltTokenizerFactoryFactory (new TokenizerFactory () {
@@ -771,7 +697,7 @@ public PreBuiltAnalyzerProviderFactory analyzerProviderFactory(String name) {
771
697
}
772
698
773
699
public boolean hasAnalyzer (String name ) {
774
- return analyzer (name ) != null ;
700
+ return analyzerProviderFactories . containsKey (name );
775
701
}
776
702
777
703
public Analyzer analyzer (String name ) {
0 commit comments