Skip to content

Commit ec0880d

Browse files
committed
Add version to prebuilt analyzers
This patch takes the version of the created index into account when a prebuilt analyzer is created. So, if an index was created with 0.90.4, then the prebuilt analyzers will be the same than on the 0.90.4 release. One reason for this feature is the possibility to change pre built analyzers like the standard one. The patch tries to reuse analyzers as mutch as possible. So even if version X.Y.Z and X.Y.A use the same lucene analyzers, the same instance is reused in order to prevent overcreation of lucene analyzer instances. Closes #3790
1 parent c9dab69 commit ec0880d

12 files changed

+841
-114
lines changed

src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,9 @@ public ClusterState execute(ClusterState currentState) throws Exception {
252252
indexSettingsBuilder.put(SETTING_AUTO_EXPAND_REPLICAS, settings.get(SETTING_AUTO_EXPAND_REPLICAS));
253253
}
254254

255-
indexSettingsBuilder.put(SETTING_VERSION_CREATED, version);
255+
if (indexSettingsBuilder.get(SETTING_VERSION_CREATED) == null) {
256+
indexSettingsBuilder.put(SETTING_VERSION_CREATED, version);
257+
}
256258
indexSettingsBuilder.put(SETTING_UUID, Strings.randomBase64UUID());
257259

258260
Settings actualIndexSettings = indexSettingsBuilder.build();

src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ protected void configure() {
387387
}
388388

389389

390-
// go over the tokenizers in the bindings and register the ones that are not configured
390+
// go over the analyzers in the bindings and register the ones that are not configured
391391
for (Map.Entry<String, Class<? extends AnalyzerProvider>> entry : analyzersBindings.analyzers.entrySet()) {
392392
String analyzerName = entry.getKey();
393393
Class<? extends AnalyzerProvider> clazz = entry.getValue();
@@ -408,7 +408,6 @@ protected void configure() {
408408
}
409409
}
410410

411-
412411
bind(AnalysisService.class).in(Scopes.SINGLETON);
413412
}
414413

src/main/java/org/elasticsearch/index/analysis/AnalysisService.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import com.google.common.collect.ImmutableMap;
2323
import org.apache.lucene.analysis.Analyzer;
2424
import org.elasticsearch.ElasticSearchIllegalArgumentException;
25+
import org.elasticsearch.Version;
26+
import org.elasticsearch.cluster.metadata.IndexMetaData;
2527
import org.elasticsearch.common.Nullable;
2628
import org.elasticsearch.common.Strings;
2729
import org.elasticsearch.common.component.CloseableComponent;
@@ -188,14 +190,13 @@ public AnalysisService(Index index, @IndexSettings Settings indexSettings, @Null
188190
if (indicesAnalysisService != null) {
189191
for (Map.Entry<String, PreBuiltAnalyzerProviderFactory> entry : indicesAnalysisService.analyzerProviderFactories().entrySet()) {
190192
String name = entry.getKey();
193+
Version indexVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
191194
if (!analyzerProviders.containsKey(name)) {
192-
analyzerProviders.put(name, entry.getValue().create(name, ImmutableSettings.Builder.EMPTY_SETTINGS));
195+
analyzerProviders.put(name, entry.getValue().create(name, ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, indexVersion).build()));
193196
}
194-
name = Strings.toCamelCase(entry.getKey());
195-
if (!name.equals(entry.getKey())) {
196-
if (!analyzerProviders.containsKey(name)) {
197-
analyzerProviders.put(name, entry.getValue().create(name, ImmutableSettings.Builder.EMPTY_SETTINGS));
198-
}
197+
String camelCaseName = Strings.toCamelCase(name);
198+
if (!camelCaseName.equals(entry.getKey()) && !analyzerProviders.containsKey(camelCaseName)) {
199+
analyzerProviders.put(camelCaseName, entry.getValue().create(name, ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, indexVersion).build()));
199200
}
200201
}
201202
}

src/main/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerProviderFactory.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@
2020
package org.elasticsearch.index.analysis;
2121

2222
import org.apache.lucene.analysis.Analyzer;
23+
import org.elasticsearch.Version;
24+
import org.elasticsearch.cluster.metadata.IndexMetaData;
2325
import org.elasticsearch.common.settings.Settings;
26+
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
27+
28+
import java.util.Locale;
2429

2530
/**
2631
*
@@ -30,15 +35,17 @@ public class PreBuiltAnalyzerProviderFactory implements AnalyzerProviderFactory
3035
private final PreBuiltAnalyzerProvider analyzerProvider;
3136

3237
public PreBuiltAnalyzerProviderFactory(String name, AnalyzerScope scope, Analyzer analyzer) {
33-
this(new PreBuiltAnalyzerProvider(name, scope, analyzer));
34-
}
35-
36-
public PreBuiltAnalyzerProviderFactory(PreBuiltAnalyzerProvider analyzerProvider) {
37-
this.analyzerProvider = analyzerProvider;
38+
analyzerProvider = new PreBuiltAnalyzerProvider(name, scope, analyzer);
3839
}
3940

4041
@Override
4142
public AnalyzerProvider create(String name, Settings settings) {
43+
Version indexVersion = settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
44+
if (!Version.CURRENT.equals(indexVersion)) {
45+
Analyzer analyzer = PreBuiltAnalyzers.valueOf(name.toUpperCase(Locale.ROOT)).getAnalyzer(indexVersion);
46+
return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDICES, analyzer);
47+
}
48+
4249
return analyzerProvider;
4350
}
4451

src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java

Lines changed: 9 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -22,66 +22,34 @@
2222
import org.apache.lucene.analysis.Analyzer;
2323
import org.apache.lucene.analysis.TokenStream;
2424
import org.apache.lucene.analysis.Tokenizer;
25-
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
2625
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
2726
import org.apache.lucene.analysis.ar.ArabicStemFilter;
28-
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
29-
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
3027
import org.apache.lucene.analysis.br.BrazilianStemFilter;
31-
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
3228
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
33-
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
34-
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
35-
import org.apache.lucene.analysis.commongrams.*;
29+
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
3630
import org.apache.lucene.analysis.core.*;
37-
import org.apache.lucene.analysis.cz.CzechAnalyzer;
3831
import org.apache.lucene.analysis.cz.CzechStemFilter;
39-
import org.apache.lucene.analysis.da.DanishAnalyzer;
40-
import org.apache.lucene.analysis.de.GermanAnalyzer;
4132
import org.apache.lucene.analysis.de.GermanStemFilter;
42-
import org.apache.lucene.analysis.el.GreekAnalyzer;
43-
import org.apache.lucene.analysis.en.EnglishAnalyzer;
4433
import org.apache.lucene.analysis.en.KStemFilter;
4534
import org.apache.lucene.analysis.en.PorterStemFilter;
46-
import org.apache.lucene.analysis.es.SpanishAnalyzer;
47-
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
48-
import org.apache.lucene.analysis.fa.PersianAnalyzer;
4935
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
50-
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
5136
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
5237
import org.apache.lucene.analysis.fr.FrenchStemFilter;
53-
import org.apache.lucene.analysis.ga.IrishAnalyzer;
54-
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
55-
import org.apache.lucene.analysis.hi.HindiAnalyzer;
56-
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
57-
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
58-
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
59-
import org.apache.lucene.analysis.it.ItalianAnalyzer;
60-
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
6138
import org.apache.lucene.analysis.miscellaneous.*;
6239
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
6340
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
6441
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
6542
import org.apache.lucene.analysis.ngram.NGramTokenizer;
66-
import org.apache.lucene.analysis.nl.DutchAnalyzer;
6743
import org.apache.lucene.analysis.nl.DutchStemFilter;
68-
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
6944
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
7045
import org.apache.lucene.analysis.pattern.PatternTokenizer;
7146
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
72-
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
7347
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
74-
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
75-
import org.apache.lucene.analysis.ru.RussianAnalyzer;
76-
import org.apache.lucene.analysis.shingle.ShingleFilter;
77-
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
7848
import org.apache.lucene.analysis.snowball.SnowballFilter;
7949
import org.apache.lucene.analysis.standard.*;
80-
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
81-
import org.apache.lucene.analysis.th.ThaiAnalyzer;
82-
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
83-
import org.apache.lucene.analysis.util.ElisionFilter;
8450
import org.apache.lucene.analysis.util.CharArraySet;
51+
import org.apache.lucene.analysis.util.ElisionFilter;
52+
import org.elasticsearch.Version;
8553
import org.elasticsearch.common.component.AbstractComponent;
8654
import org.elasticsearch.common.inject.Inject;
8755
import org.elasticsearch.common.lucene.Lucene;
@@ -91,6 +59,7 @@
9159
import org.elasticsearch.index.analysis.*;
9260

9361
import java.io.Reader;
62+
import java.util.Locale;
9463
import java.util.Map;
9564

9665
import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS;
@@ -101,7 +70,6 @@
10170
public class IndicesAnalysisService extends AbstractComponent {
10271

10372
private final Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = ConcurrentCollections.newConcurrentMap();
104-
10573
private final Map<String, PreBuiltTokenizerFactoryFactory> tokenizerFactories = ConcurrentCollections.newConcurrentMap();
10674
private final Map<String, PreBuiltTokenFilterFactoryFactory> tokenFilterFactories = ConcurrentCollections.newConcurrentMap();
10775
private final Map<String, PreBuiltCharFilterFactoryFactory> charFilterFactories = ConcurrentCollections.newConcurrentMap();
@@ -114,52 +82,10 @@ public IndicesAnalysisService() {
11482
public IndicesAnalysisService(Settings settings) {
11583
super(settings);
11684

117-
StandardAnalyzer standardAnalyzer = new StandardAnalyzer(Lucene.ANALYZER_VERSION);
118-
analyzerProviderFactories.put("default", new PreBuiltAnalyzerProviderFactory("default", AnalyzerScope.INDICES, standardAnalyzer));
119-
analyzerProviderFactories.put("standard", new PreBuiltAnalyzerProviderFactory("standard", AnalyzerScope.INDICES, standardAnalyzer));
120-
analyzerProviderFactories.put("keyword", new PreBuiltAnalyzerProviderFactory("keyword", AnalyzerScope.INDICES, new KeywordAnalyzer()));
121-
analyzerProviderFactories.put("stop", new PreBuiltAnalyzerProviderFactory("stop", AnalyzerScope.INDICES, new StopAnalyzer(Lucene.ANALYZER_VERSION)));
122-
analyzerProviderFactories.put("whitespace", new PreBuiltAnalyzerProviderFactory("whitespace", AnalyzerScope.INDICES, new WhitespaceAnalyzer(Lucene.ANALYZER_VERSION)));
123-
analyzerProviderFactories.put("simple", new PreBuiltAnalyzerProviderFactory("simple", AnalyzerScope.INDICES, new SimpleAnalyzer(Lucene.ANALYZER_VERSION)));
124-
analyzerProviderFactories.put("classic", new PreBuiltAnalyzerProviderFactory("classic", AnalyzerScope.INDICES, new ClassicAnalyzer(Lucene.ANALYZER_VERSION)));
125-
126-
// extended ones
127-
analyzerProviderFactories.put("pattern", new PreBuiltAnalyzerProviderFactory("pattern", AnalyzerScope.INDICES, new PatternAnalyzer(Lucene.ANALYZER_VERSION, Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
128-
analyzerProviderFactories.put("snowball", new PreBuiltAnalyzerProviderFactory("snowball", AnalyzerScope.INDICES, new SnowballAnalyzer(Lucene.ANALYZER_VERSION, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
129-
analyzerProviderFactories.put("standard_html_strip", new PreBuiltAnalyzerProviderFactory("standard_html_strip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION)));
130-
131-
analyzerProviderFactories.put("arabic", new PreBuiltAnalyzerProviderFactory("arabic", AnalyzerScope.INDICES, new ArabicAnalyzer(Lucene.ANALYZER_VERSION)));
132-
analyzerProviderFactories.put("armenian", new PreBuiltAnalyzerProviderFactory("armenian", AnalyzerScope.INDICES, new ArmenianAnalyzer(Lucene.ANALYZER_VERSION)));
133-
analyzerProviderFactories.put("basque", new PreBuiltAnalyzerProviderFactory("basque", AnalyzerScope.INDICES, new BasqueAnalyzer(Lucene.ANALYZER_VERSION)));
134-
analyzerProviderFactories.put("brazilian", new PreBuiltAnalyzerProviderFactory("brazilian", AnalyzerScope.INDICES, new BrazilianAnalyzer(Lucene.ANALYZER_VERSION)));
135-
analyzerProviderFactories.put("bulgarian", new PreBuiltAnalyzerProviderFactory("bulgarian", AnalyzerScope.INDICES, new BulgarianAnalyzer(Lucene.ANALYZER_VERSION)));
136-
analyzerProviderFactories.put("catalan", new PreBuiltAnalyzerProviderFactory("catalan", AnalyzerScope.INDICES, new CatalanAnalyzer(Lucene.ANALYZER_VERSION)));
137-
analyzerProviderFactories.put("chinese", new PreBuiltAnalyzerProviderFactory("chinese", AnalyzerScope.INDICES, new ChineseAnalyzer()));
138-
analyzerProviderFactories.put("cjk", new PreBuiltAnalyzerProviderFactory("cjk", AnalyzerScope.INDICES, new CJKAnalyzer(Lucene.ANALYZER_VERSION)));
139-
analyzerProviderFactories.put("czech", new PreBuiltAnalyzerProviderFactory("czech", AnalyzerScope.INDICES, new CzechAnalyzer(Lucene.ANALYZER_VERSION)));
140-
analyzerProviderFactories.put("dutch", new PreBuiltAnalyzerProviderFactory("dutch", AnalyzerScope.INDICES, new DutchAnalyzer(Lucene.ANALYZER_VERSION)));
141-
analyzerProviderFactories.put("danish", new PreBuiltAnalyzerProviderFactory("danish", AnalyzerScope.INDICES, new DanishAnalyzer(Lucene.ANALYZER_VERSION)));
142-
analyzerProviderFactories.put("english", new PreBuiltAnalyzerProviderFactory("english", AnalyzerScope.INDICES, new EnglishAnalyzer(Lucene.ANALYZER_VERSION)));
143-
analyzerProviderFactories.put("finnish", new PreBuiltAnalyzerProviderFactory("finnish", AnalyzerScope.INDICES, new FinnishAnalyzer(Lucene.ANALYZER_VERSION)));
144-
analyzerProviderFactories.put("french", new PreBuiltAnalyzerProviderFactory("french", AnalyzerScope.INDICES, new FrenchAnalyzer(Lucene.ANALYZER_VERSION)));
145-
analyzerProviderFactories.put("galician", new PreBuiltAnalyzerProviderFactory("galician", AnalyzerScope.INDICES, new GalicianAnalyzer(Lucene.ANALYZER_VERSION)));
146-
analyzerProviderFactories.put("german", new PreBuiltAnalyzerProviderFactory("german", AnalyzerScope.INDICES, new GermanAnalyzer(Lucene.ANALYZER_VERSION)));
147-
analyzerProviderFactories.put("greek", new PreBuiltAnalyzerProviderFactory("greek", AnalyzerScope.INDICES, new GreekAnalyzer(Lucene.ANALYZER_VERSION)));
148-
analyzerProviderFactories.put("hindi", new PreBuiltAnalyzerProviderFactory("hindi", AnalyzerScope.INDICES, new HindiAnalyzer(Lucene.ANALYZER_VERSION)));
149-
analyzerProviderFactories.put("hungarian", new PreBuiltAnalyzerProviderFactory("hungarian", AnalyzerScope.INDICES, new HungarianAnalyzer(Lucene.ANALYZER_VERSION)));
150-
analyzerProviderFactories.put("indonesian", new PreBuiltAnalyzerProviderFactory("indonesian", AnalyzerScope.INDICES, new IndonesianAnalyzer(Lucene.ANALYZER_VERSION)));
151-
analyzerProviderFactories.put("irish", new PreBuiltAnalyzerProviderFactory("irish", AnalyzerScope.INDICES, new IrishAnalyzer(Lucene.ANALYZER_VERSION)));
152-
analyzerProviderFactories.put("italian", new PreBuiltAnalyzerProviderFactory("italian", AnalyzerScope.INDICES, new ItalianAnalyzer(Lucene.ANALYZER_VERSION)));
153-
analyzerProviderFactories.put("latvian", new PreBuiltAnalyzerProviderFactory("latvian", AnalyzerScope.INDICES, new LatvianAnalyzer(Lucene.ANALYZER_VERSION)));
154-
analyzerProviderFactories.put("norwegian", new PreBuiltAnalyzerProviderFactory("norwegian", AnalyzerScope.INDICES, new NorwegianAnalyzer(Lucene.ANALYZER_VERSION)));
155-
analyzerProviderFactories.put("persian", new PreBuiltAnalyzerProviderFactory("persian", AnalyzerScope.INDICES, new PersianAnalyzer(Lucene.ANALYZER_VERSION)));
156-
analyzerProviderFactories.put("portuguese", new PreBuiltAnalyzerProviderFactory("portuguese", AnalyzerScope.INDICES, new PortugueseAnalyzer(Lucene.ANALYZER_VERSION)));
157-
analyzerProviderFactories.put("romanian", new PreBuiltAnalyzerProviderFactory("romanian", AnalyzerScope.INDICES, new RomanianAnalyzer(Lucene.ANALYZER_VERSION)));
158-
analyzerProviderFactories.put("russian", new PreBuiltAnalyzerProviderFactory("russian", AnalyzerScope.INDICES, new RussianAnalyzer(Lucene.ANALYZER_VERSION)));
159-
analyzerProviderFactories.put("spanish", new PreBuiltAnalyzerProviderFactory("spanish", AnalyzerScope.INDICES, new SpanishAnalyzer(Lucene.ANALYZER_VERSION)));
160-
analyzerProviderFactories.put("swedish", new PreBuiltAnalyzerProviderFactory("swedish", AnalyzerScope.INDICES, new SwedishAnalyzer(Lucene.ANALYZER_VERSION)));
161-
analyzerProviderFactories.put("turkish", new PreBuiltAnalyzerProviderFactory("turkish", AnalyzerScope.INDICES, new TurkishAnalyzer(Lucene.ANALYZER_VERSION)));
162-
analyzerProviderFactories.put("thai", new PreBuiltAnalyzerProviderFactory("thai", AnalyzerScope.INDICES, new ThaiAnalyzer(Lucene.ANALYZER_VERSION)));
85+
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
86+
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
87+
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT)));
88+
}
16389

16490
// Base Tokenizers
16591
tokenizerFactories.put("standard", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@@ -771,7 +697,7 @@ public PreBuiltAnalyzerProviderFactory analyzerProviderFactory(String name) {
771697
}
772698

773699
public boolean hasAnalyzer(String name) {
774-
return analyzer(name) != null;
700+
return analyzerProviderFactories.containsKey(name);
775701
}
776702

777703
public Analyzer analyzer(String name) {

0 commit comments

Comments
 (0)