22
22
import org .apache .lucene .analysis .Analyzer ;
23
23
import org .apache .lucene .analysis .TokenStream ;
24
24
import org .apache .lucene .analysis .Tokenizer ;
25
- import org .apache .lucene .analysis .ar .ArabicAnalyzer ;
26
25
import org .apache .lucene .analysis .ar .ArabicNormalizationFilter ;
27
26
import org .apache .lucene .analysis .ar .ArabicStemFilter ;
28
- import org .apache .lucene .analysis .bg .BulgarianAnalyzer ;
29
- import org .apache .lucene .analysis .br .BrazilianAnalyzer ;
30
27
import org .apache .lucene .analysis .br .BrazilianStemFilter ;
31
- import org .apache .lucene .analysis .ca .CatalanAnalyzer ;
32
28
import org .apache .lucene .analysis .charfilter .HTMLStripCharFilter ;
33
- import org .apache .lucene .analysis .cjk .CJKAnalyzer ;
34
- import org .apache .lucene .analysis .cn .ChineseAnalyzer ;
35
- import org .apache .lucene .analysis .commongrams .*;
29
+ import org .apache .lucene .analysis .commongrams .CommonGramsFilter ;
36
30
import org .apache .lucene .analysis .core .*;
37
- import org .apache .lucene .analysis .cz .CzechAnalyzer ;
38
31
import org .apache .lucene .analysis .cz .CzechStemFilter ;
39
- import org .apache .lucene .analysis .da .DanishAnalyzer ;
40
- import org .apache .lucene .analysis .de .GermanAnalyzer ;
41
32
import org .apache .lucene .analysis .de .GermanStemFilter ;
42
- import org .apache .lucene .analysis .el .GreekAnalyzer ;
43
- import org .apache .lucene .analysis .en .EnglishAnalyzer ;
44
33
import org .apache .lucene .analysis .en .KStemFilter ;
45
34
import org .apache .lucene .analysis .en .PorterStemFilter ;
46
- import org .apache .lucene .analysis .es .SpanishAnalyzer ;
47
- import org .apache .lucene .analysis .eu .BasqueAnalyzer ;
48
- import org .apache .lucene .analysis .fa .PersianAnalyzer ;
49
35
import org .apache .lucene .analysis .fa .PersianNormalizationFilter ;
50
- import org .apache .lucene .analysis .fi .FinnishAnalyzer ;
51
36
import org .apache .lucene .analysis .fr .FrenchAnalyzer ;
52
37
import org .apache .lucene .analysis .fr .FrenchStemFilter ;
53
- import org .apache .lucene .analysis .ga .IrishAnalyzer ;
54
- import org .apache .lucene .analysis .gl .GalicianAnalyzer ;
55
- import org .apache .lucene .analysis .hi .HindiAnalyzer ;
56
- import org .apache .lucene .analysis .hu .HungarianAnalyzer ;
57
- import org .apache .lucene .analysis .hy .ArmenianAnalyzer ;
58
- import org .apache .lucene .analysis .id .IndonesianAnalyzer ;
59
- import org .apache .lucene .analysis .it .ItalianAnalyzer ;
60
- import org .apache .lucene .analysis .lv .LatvianAnalyzer ;
61
38
import org .apache .lucene .analysis .miscellaneous .*;
62
39
import org .apache .lucene .analysis .ngram .EdgeNGramTokenFilter ;
63
40
import org .apache .lucene .analysis .ngram .EdgeNGramTokenizer ;
64
41
import org .apache .lucene .analysis .ngram .NGramTokenFilter ;
65
42
import org .apache .lucene .analysis .ngram .NGramTokenizer ;
66
- import org .apache .lucene .analysis .nl .DutchAnalyzer ;
67
43
import org .apache .lucene .analysis .nl .DutchStemFilter ;
68
- import org .apache .lucene .analysis .no .NorwegianAnalyzer ;
69
44
import org .apache .lucene .analysis .path .PathHierarchyTokenizer ;
70
45
import org .apache .lucene .analysis .pattern .PatternTokenizer ;
71
- import org .apache .lucene .analysis .pt .PortugueseAnalyzer ;
72
46
import org .apache .lucene .analysis .reverse .ReverseStringFilter ;
73
- import org .apache .lucene .analysis .ro .RomanianAnalyzer ;
74
- import org .apache .lucene .analysis .ru .RussianAnalyzer ;
75
- import org .apache .lucene .analysis .shingle .ShingleFilter ;
76
- import org .apache .lucene .analysis .snowball .SnowballAnalyzer ;
77
47
import org .apache .lucene .analysis .snowball .SnowballFilter ;
78
48
import org .apache .lucene .analysis .standard .*;
79
- import org .apache .lucene .analysis .sv .SwedishAnalyzer ;
80
- import org .apache .lucene .analysis .th .ThaiAnalyzer ;
81
- import org .apache .lucene .analysis .tr .TurkishAnalyzer ;
82
- import org .apache .lucene .analysis .util .ElisionFilter ;
83
49
import org .apache .lucene .analysis .util .CharArraySet ;
50
+ import org .apache .lucene .analysis .util .ElisionFilter ;
51
+ import org .elasticsearch .Version ;
84
52
import org .elasticsearch .common .component .AbstractComponent ;
85
53
import org .elasticsearch .common .inject .Inject ;
86
54
import org .elasticsearch .common .lucene .Lucene ;
90
58
import org .elasticsearch .index .analysis .*;
91
59
92
60
import java .io .Reader ;
61
+ import java .util .Locale ;
93
62
import java .util .Map ;
94
63
95
64
import static org .elasticsearch .common .settings .ImmutableSettings .Builder .EMPTY_SETTINGS ;
100
69
public class IndicesAnalysisService extends AbstractComponent {
101
70
102
71
private final Map <String , PreBuiltAnalyzerProviderFactory > analyzerProviderFactories = ConcurrentCollections .newConcurrentMap ();
103
-
104
72
private final Map <String , PreBuiltTokenizerFactoryFactory > tokenizerFactories = ConcurrentCollections .newConcurrentMap ();
105
73
private final Map <String , PreBuiltTokenFilterFactoryFactory > tokenFilterFactories = ConcurrentCollections .newConcurrentMap ();
106
74
private final Map <String , PreBuiltCharFilterFactoryFactory > charFilterFactories = ConcurrentCollections .newConcurrentMap ();
@@ -113,52 +81,10 @@ public IndicesAnalysisService() {
113
81
public IndicesAnalysisService (Settings settings ) {
114
82
super (settings );
115
83
116
- StandardAnalyzer standardAnalyzer = new StandardAnalyzer (Lucene .ANALYZER_VERSION );
117
- analyzerProviderFactories .put ("default" , new PreBuiltAnalyzerProviderFactory ("default" , AnalyzerScope .INDICES , standardAnalyzer ));
118
- analyzerProviderFactories .put ("standard" , new PreBuiltAnalyzerProviderFactory ("standard" , AnalyzerScope .INDICES , standardAnalyzer ));
119
- analyzerProviderFactories .put ("keyword" , new PreBuiltAnalyzerProviderFactory ("keyword" , AnalyzerScope .INDICES , new KeywordAnalyzer ()));
120
- analyzerProviderFactories .put ("stop" , new PreBuiltAnalyzerProviderFactory ("stop" , AnalyzerScope .INDICES , new StopAnalyzer (Lucene .ANALYZER_VERSION )));
121
- analyzerProviderFactories .put ("whitespace" , new PreBuiltAnalyzerProviderFactory ("whitespace" , AnalyzerScope .INDICES , new WhitespaceAnalyzer (Lucene .ANALYZER_VERSION )));
122
- analyzerProviderFactories .put ("simple" , new PreBuiltAnalyzerProviderFactory ("simple" , AnalyzerScope .INDICES , new SimpleAnalyzer (Lucene .ANALYZER_VERSION )));
123
- analyzerProviderFactories .put ("classic" , new PreBuiltAnalyzerProviderFactory ("classic" , AnalyzerScope .INDICES , new ClassicAnalyzer (Lucene .ANALYZER_VERSION )));
124
-
125
- // extended ones
126
- analyzerProviderFactories .put ("pattern" , new PreBuiltAnalyzerProviderFactory ("pattern" , AnalyzerScope .INDICES , new PatternAnalyzer (Lucene .ANALYZER_VERSION , Regex .compile ("\\ W+" /*PatternAnalyzer.NON_WORD_PATTERN*/ , null ), true , StopAnalyzer .ENGLISH_STOP_WORDS_SET )));
127
- analyzerProviderFactories .put ("snowball" , new PreBuiltAnalyzerProviderFactory ("snowball" , AnalyzerScope .INDICES , new SnowballAnalyzer (Lucene .ANALYZER_VERSION , "English" , StopAnalyzer .ENGLISH_STOP_WORDS_SET )));
128
- analyzerProviderFactories .put ("standard_html_strip" , new PreBuiltAnalyzerProviderFactory ("standard_html_strip" , AnalyzerScope .INDICES , new StandardHtmlStripAnalyzer (Lucene .ANALYZER_VERSION )));
129
-
130
- analyzerProviderFactories .put ("arabic" , new PreBuiltAnalyzerProviderFactory ("arabic" , AnalyzerScope .INDICES , new ArabicAnalyzer (Lucene .ANALYZER_VERSION )));
131
- analyzerProviderFactories .put ("armenian" , new PreBuiltAnalyzerProviderFactory ("armenian" , AnalyzerScope .INDICES , new ArmenianAnalyzer (Lucene .ANALYZER_VERSION )));
132
- analyzerProviderFactories .put ("basque" , new PreBuiltAnalyzerProviderFactory ("basque" , AnalyzerScope .INDICES , new BasqueAnalyzer (Lucene .ANALYZER_VERSION )));
133
- analyzerProviderFactories .put ("brazilian" , new PreBuiltAnalyzerProviderFactory ("brazilian" , AnalyzerScope .INDICES , new BrazilianAnalyzer (Lucene .ANALYZER_VERSION )));
134
- analyzerProviderFactories .put ("bulgarian" , new PreBuiltAnalyzerProviderFactory ("bulgarian" , AnalyzerScope .INDICES , new BulgarianAnalyzer (Lucene .ANALYZER_VERSION )));
135
- analyzerProviderFactories .put ("catalan" , new PreBuiltAnalyzerProviderFactory ("catalan" , AnalyzerScope .INDICES , new CatalanAnalyzer (Lucene .ANALYZER_VERSION )));
136
- analyzerProviderFactories .put ("chinese" , new PreBuiltAnalyzerProviderFactory ("chinese" , AnalyzerScope .INDICES , new ChineseAnalyzer ()));
137
- analyzerProviderFactories .put ("cjk" , new PreBuiltAnalyzerProviderFactory ("cjk" , AnalyzerScope .INDICES , new CJKAnalyzer (Lucene .ANALYZER_VERSION )));
138
- analyzerProviderFactories .put ("czech" , new PreBuiltAnalyzerProviderFactory ("czech" , AnalyzerScope .INDICES , new CzechAnalyzer (Lucene .ANALYZER_VERSION )));
139
- analyzerProviderFactories .put ("dutch" , new PreBuiltAnalyzerProviderFactory ("dutch" , AnalyzerScope .INDICES , new DutchAnalyzer (Lucene .ANALYZER_VERSION )));
140
- analyzerProviderFactories .put ("danish" , new PreBuiltAnalyzerProviderFactory ("danish" , AnalyzerScope .INDICES , new DanishAnalyzer (Lucene .ANALYZER_VERSION )));
141
- analyzerProviderFactories .put ("english" , new PreBuiltAnalyzerProviderFactory ("english" , AnalyzerScope .INDICES , new EnglishAnalyzer (Lucene .ANALYZER_VERSION )));
142
- analyzerProviderFactories .put ("finnish" , new PreBuiltAnalyzerProviderFactory ("finnish" , AnalyzerScope .INDICES , new FinnishAnalyzer (Lucene .ANALYZER_VERSION )));
143
- analyzerProviderFactories .put ("french" , new PreBuiltAnalyzerProviderFactory ("french" , AnalyzerScope .INDICES , new FrenchAnalyzer (Lucene .ANALYZER_VERSION )));
144
- analyzerProviderFactories .put ("galician" , new PreBuiltAnalyzerProviderFactory ("galician" , AnalyzerScope .INDICES , new GalicianAnalyzer (Lucene .ANALYZER_VERSION )));
145
- analyzerProviderFactories .put ("german" , new PreBuiltAnalyzerProviderFactory ("german" , AnalyzerScope .INDICES , new GermanAnalyzer (Lucene .ANALYZER_VERSION )));
146
- analyzerProviderFactories .put ("greek" , new PreBuiltAnalyzerProviderFactory ("greek" , AnalyzerScope .INDICES , new GreekAnalyzer (Lucene .ANALYZER_VERSION )));
147
- analyzerProviderFactories .put ("hindi" , new PreBuiltAnalyzerProviderFactory ("hindi" , AnalyzerScope .INDICES , new HindiAnalyzer (Lucene .ANALYZER_VERSION )));
148
- analyzerProviderFactories .put ("hungarian" , new PreBuiltAnalyzerProviderFactory ("hungarian" , AnalyzerScope .INDICES , new HungarianAnalyzer (Lucene .ANALYZER_VERSION )));
149
- analyzerProviderFactories .put ("indonesian" , new PreBuiltAnalyzerProviderFactory ("indonesian" , AnalyzerScope .INDICES , new IndonesianAnalyzer (Lucene .ANALYZER_VERSION )));
150
- analyzerProviderFactories .put ("irish" , new PreBuiltAnalyzerProviderFactory ("irish" , AnalyzerScope .INDICES , new IrishAnalyzer (Lucene .ANALYZER_VERSION )));
151
- analyzerProviderFactories .put ("italian" , new PreBuiltAnalyzerProviderFactory ("italian" , AnalyzerScope .INDICES , new ItalianAnalyzer (Lucene .ANALYZER_VERSION )));
152
- analyzerProviderFactories .put ("latvian" , new PreBuiltAnalyzerProviderFactory ("latvian" , AnalyzerScope .INDICES , new LatvianAnalyzer (Lucene .ANALYZER_VERSION )));
153
- analyzerProviderFactories .put ("norwegian" , new PreBuiltAnalyzerProviderFactory ("norwegian" , AnalyzerScope .INDICES , new NorwegianAnalyzer (Lucene .ANALYZER_VERSION )));
154
- analyzerProviderFactories .put ("persian" , new PreBuiltAnalyzerProviderFactory ("persian" , AnalyzerScope .INDICES , new PersianAnalyzer (Lucene .ANALYZER_VERSION )));
155
- analyzerProviderFactories .put ("portuguese" , new PreBuiltAnalyzerProviderFactory ("portuguese" , AnalyzerScope .INDICES , new PortugueseAnalyzer (Lucene .ANALYZER_VERSION )));
156
- analyzerProviderFactories .put ("romanian" , new PreBuiltAnalyzerProviderFactory ("romanian" , AnalyzerScope .INDICES , new RomanianAnalyzer (Lucene .ANALYZER_VERSION )));
157
- analyzerProviderFactories .put ("russian" , new PreBuiltAnalyzerProviderFactory ("russian" , AnalyzerScope .INDICES , new RussianAnalyzer (Lucene .ANALYZER_VERSION )));
158
- analyzerProviderFactories .put ("spanish" , new PreBuiltAnalyzerProviderFactory ("spanish" , AnalyzerScope .INDICES , new SpanishAnalyzer (Lucene .ANALYZER_VERSION )));
159
- analyzerProviderFactories .put ("swedish" , new PreBuiltAnalyzerProviderFactory ("swedish" , AnalyzerScope .INDICES , new SwedishAnalyzer (Lucene .ANALYZER_VERSION )));
160
- analyzerProviderFactories .put ("turkish" , new PreBuiltAnalyzerProviderFactory ("turkish" , AnalyzerScope .INDICES , new TurkishAnalyzer (Lucene .ANALYZER_VERSION )));
161
- analyzerProviderFactories .put ("thai" , new PreBuiltAnalyzerProviderFactory ("thai" , AnalyzerScope .INDICES , new ThaiAnalyzer (Lucene .ANALYZER_VERSION )));
84
+ for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers .values ()) {
85
+ String name = preBuiltAnalyzerEnum .name ().toLowerCase (Locale .ROOT );
86
+ analyzerProviderFactories .put (name , new PreBuiltAnalyzerProviderFactory (name , AnalyzerScope .INDICES , preBuiltAnalyzerEnum .getAnalyzer (Version .CURRENT )));
87
+ }
162
88
163
89
// Base Tokenizers
164
90
tokenizerFactories .put ("standard" , new PreBuiltTokenizerFactoryFactory (new TokenizerFactory () {
@@ -757,7 +683,7 @@ public PreBuiltAnalyzerProviderFactory analyzerProviderFactory(String name) {
757
683
}
758
684
759
685
public boolean hasAnalyzer (String name ) {
760
- return analyzer (name ) != null ;
686
+ return analyzerProviderFactories . containsKey (name );
761
687
}
762
688
763
689
public Analyzer analyzer (String name ) {
0 commit comments