Skip to content

Commit 1146a35

Browse files
committed
Move more token filters to analysis-common module
The following token filters were moved: arabic_stem, brazilian_stem, czech_stem, dutch_stem, french_stem, german_stem and russian_stem. Relates to #23658
1 parent 7e3cd6a commit 1146a35

File tree

15 files changed

+256
-51
lines changed

15 files changed

+256
-51
lines changed

core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,31 +30,25 @@
3030
import org.elasticsearch.index.analysis.AnalysisRegistry;
3131
import org.elasticsearch.index.analysis.AnalyzerProvider;
3232
import org.elasticsearch.index.analysis.ArabicAnalyzerProvider;
33-
import org.elasticsearch.index.analysis.ArabicStemTokenFilterFactory;
3433
import org.elasticsearch.index.analysis.ArmenianAnalyzerProvider;
3534
import org.elasticsearch.index.analysis.BasqueAnalyzerProvider;
3635
import org.elasticsearch.index.analysis.BrazilianAnalyzerProvider;
37-
import org.elasticsearch.index.analysis.BrazilianStemTokenFilterFactory;
3836
import org.elasticsearch.index.analysis.BulgarianAnalyzerProvider;
3937
import org.elasticsearch.index.analysis.CatalanAnalyzerProvider;
4038
import org.elasticsearch.index.analysis.CharFilterFactory;
4139
import org.elasticsearch.index.analysis.ChineseAnalyzerProvider;
4240
import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
4341
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
4442
import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
45-
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
4643
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
4744
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
48-
import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
4945
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
5046
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
5147
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
5248
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
5349
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
54-
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
5550
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
5651
import org.elasticsearch.index.analysis.GermanAnalyzerProvider;
57-
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
5852
import org.elasticsearch.index.analysis.GreekAnalyzerProvider;
5953
import org.elasticsearch.index.analysis.HindiAnalyzerProvider;
6054
import org.elasticsearch.index.analysis.HungarianAnalyzerProvider;
@@ -80,7 +74,6 @@
8074
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
8175
import org.elasticsearch.index.analysis.RomanianAnalyzerProvider;
8276
import org.elasticsearch.index.analysis.RussianAnalyzerProvider;
83-
import org.elasticsearch.index.analysis.RussianStemTokenFilterFactory;
8477
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
8578
import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
8679
import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
@@ -172,14 +165,6 @@ private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(Li
172165
tokenFilters.register("stop", StopTokenFilterFactory::new);
173166
tokenFilters.register("standard", StandardTokenFilterFactory::new);
174167
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
175-
tokenFilters.register("arabic_stem", ArabicStemTokenFilterFactory::new);
176-
tokenFilters.register("brazilian_stem", BrazilianStemTokenFilterFactory::new);
177-
tokenFilters.register("czech_stem", CzechStemTokenFilterFactory::new);
178-
tokenFilters.register("dutch_stem", DutchStemTokenFilterFactory::new);
179-
tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new);
180-
tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new);
181-
tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new);
182-
183168
tokenFilters.register("hunspell", requriesAnalysisSettings((indexSettings, env, name, settings) -> new HunspellTokenFilterFactory
184169
(indexSettings, name, settings, hunspellService)));
185170

core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -188,14 +188,6 @@ private void testSimpleConfiguration(Settings settings) throws IOException {
188188
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
189189
CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
190190
assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));
191-
192-
// // verify Czech stemmer
193-
// analyzer = analysisService.analyzer("czechAnalyzerWithStemmer").analyzer();
194-
// assertThat(analyzer, instanceOf(CustomAnalyzer.class));
195-
// CustomAnalyzer czechstemmeranalyzer = (CustomAnalyzer) analyzer;
196-
// assertThat(czechstemmeranalyzer.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
197-
// assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4));
198-
// assertThat(czechstemmeranalyzer.tokenFilters()[3], instanceOf(CzechStemTokenFilterFactory.class));
199191
}
200192

201193
public void testWordListPath() throws Exception {

core/src/test/resources/org/elasticsearch/index/analysis/test1.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,6 @@
3535
"custom6":{
3636
"tokenizer":"standard",
3737
"position_increment_gap": 256
38-
},
39-
"czechAnalyzerWithStemmer":{
40-
"tokenizer":"standard",
41-
"filter":["standard", "lowercase", "stop", "czech_stem"]
4238
}
4339
}
4440
}

core/src/test/resources/org/elasticsearch/index/analysis/test1.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,3 @@ index :
2828
custom7 :
2929
type : standard
3030
version: 3.6
31-
czechAnalyzerWithStemmer :
32-
tokenizer : standard
33-
filter : [standard, lowercase, stop, czech_stem]

core/src/main/java/org/elasticsearch/index/analysis/ArabicStemTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicStemTokenFilterFactory.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,18 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.TokenStream;
2323
import org.apache.lucene.analysis.ar.ArabicStemFilter;
2424
import org.elasticsearch.common.settings.Settings;
2525
import org.elasticsearch.env.Environment;
2626
import org.elasticsearch.index.IndexSettings;
27+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
2728

2829
public class ArabicStemTokenFilterFactory extends AbstractTokenFilterFactory {
2930

30-
public ArabicStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
31+
ArabicStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3132
super(indexSettings, name, settings);
3233
}
3334

core/src/main/java/org/elasticsearch/index/analysis/BrazilianStemTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BrazilianStemTokenFilterFactory.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.TokenStream;
2323
import org.apache.lucene.analysis.br.BrazilianStemFilter;
@@ -26,12 +26,14 @@
2626
import org.elasticsearch.common.settings.Settings;
2727
import org.elasticsearch.env.Environment;
2828
import org.elasticsearch.index.IndexSettings;
29+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
30+
import org.elasticsearch.index.analysis.Analysis;
2931

3032
public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory {
3133

3234
private final CharArraySet exclusions;
3335

34-
public BrazilianStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
36+
BrazilianStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3537
super(indexSettings, name, settings);
3638
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
3739
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,20 +92,26 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
9292
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
9393
filters.put("apostrophe", ApostropheFilterFactory::new);
9494
filters.put("arabic_normalization", ArabicNormalizationFilterFactory::new);
95+
filters.put("arabic_stem", ArabicStemTokenFilterFactory::new);
9596
filters.put("asciifolding", ASCIIFoldingTokenFilterFactory::new);
97+
filters.put("brazilian_stem", BrazilianStemTokenFilterFactory::new);
9698
filters.put("cjk_bigram", CJKBigramFilterFactory::new);
9799
filters.put("cjk_width", CJKWidthFilterFactory::new);
98100
filters.put("classic", ClassicFilterFactory::new);
101+
filters.put("czech_stem", CzechStemTokenFilterFactory::new);
99102
filters.put("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
100103
filters.put("decimal_digit", DecimalDigitFilterFactory::new);
101104
filters.put("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
102105
filters.put("dictionary_decompounder", requriesAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
106+
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
103107
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
104108
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
105109
filters.put("elision", ElisionTokenFilterFactory::new);
106110
filters.put("fingerprint", FingerprintTokenFilterFactory::new);
107111
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
112+
filters.put("french_stem", FrenchStemTokenFilterFactory::new);
108113
filters.put("german_normalization", GermanNormalizationFilterFactory::new);
114+
filters.put("german_stem", GermanStemTokenFilterFactory::new);
109115
filters.put("hindi_normalization", HindiNormalizationFilterFactory::new);
110116
filters.put("hyphenation_decompounder", requriesAnalysisSettings(HyphenationCompoundWordTokenFilterFactory::new));
111117
filters.put("indic_normalization", IndicNormalizationFilterFactory::new);
@@ -124,6 +130,7 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
124130
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
125131
filters.put("porter_stem", PorterStemTokenFilterFactory::new);
126132
filters.put("reverse", ReverseTokenFilterFactory::new);
133+
filters.put("russian_stem", RussianStemTokenFilterFactory::new);
127134
filters.put("scandinavian_folding", ScandinavianFoldingFilterFactory::new);
128135
filters.put("scandinavian_normalization", ScandinavianNormalizationFilterFactory::new);
129136
filters.put("serbian_normalization", SerbianNormalizationFilterFactory::new);

core/src/main/java/org/elasticsearch/index/analysis/CzechStemTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CzechStemTokenFilterFactory.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,18 @@
1616
* specific language governing permissions and limitations
1717
* under the License.
1818
*/
19-
package org.elasticsearch.index.analysis;
19+
package org.elasticsearch.analysis.common;
2020

2121
import org.apache.lucene.analysis.TokenStream;
2222
import org.apache.lucene.analysis.cz.CzechStemFilter;
2323
import org.elasticsearch.common.settings.Settings;
2424
import org.elasticsearch.env.Environment;
2525
import org.elasticsearch.index.IndexSettings;
26+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
2627

2728
public class CzechStemTokenFilterFactory extends AbstractTokenFilterFactory {
2829

29-
public CzechStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
30+
CzechStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3031
super(indexSettings, name, settings);
3132
}
3233

core/src/main/java/org/elasticsearch/index/analysis/DutchStemTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DutchStemTokenFilterFactory.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.CharArraySet;
2323
import org.apache.lucene.analysis.TokenStream;
@@ -26,13 +26,15 @@
2626
import org.elasticsearch.common.settings.Settings;
2727
import org.elasticsearch.env.Environment;
2828
import org.elasticsearch.index.IndexSettings;
29+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
30+
import org.elasticsearch.index.analysis.Analysis;
2931
import org.tartarus.snowball.ext.DutchStemmer;
3032

3133
public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
3234

3335
private final CharArraySet exclusions;
3436

35-
public DutchStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
37+
DutchStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3638
super(indexSettings, name, settings);
3739
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
3840
}

core/src/main/java/org/elasticsearch/index/analysis/FrenchStemTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FrenchStemTokenFilterFactory.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.CharArraySet;
2323
import org.apache.lucene.analysis.TokenStream;
@@ -26,13 +26,15 @@
2626
import org.elasticsearch.common.settings.Settings;
2727
import org.elasticsearch.env.Environment;
2828
import org.elasticsearch.index.IndexSettings;
29+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
30+
import org.elasticsearch.index.analysis.Analysis;
2931
import org.tartarus.snowball.ext.FrenchStemmer;
3032

3133
public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
3234

3335
private final CharArraySet exclusions;
3436

35-
public FrenchStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
37+
FrenchStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3638
super(indexSettings, name, settings);
3739
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
3840
}

core/src/main/java/org/elasticsearch/index/analysis/GermanStemTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanStemTokenFilterFactory.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.CharArraySet;
2323
import org.apache.lucene.analysis.TokenStream;
@@ -26,12 +26,14 @@
2626
import org.elasticsearch.common.settings.Settings;
2727
import org.elasticsearch.env.Environment;
2828
import org.elasticsearch.index.IndexSettings;
29+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
30+
import org.elasticsearch.index.analysis.Analysis;
2931

3032
public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
3133

3234
private final CharArraySet exclusions;
3335

34-
public GermanStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
36+
GermanStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3537
super(indexSettings, name, settings);
3638
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
3739
}

core/src/main/java/org/elasticsearch/index/analysis/RussianStemTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/RussianStemTokenFilterFactory.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.TokenStream;
2323
import org.apache.lucene.analysis.snowball.SnowballFilter;
2424
import org.elasticsearch.common.settings.Settings;
2525
import org.elasticsearch.env.Environment;
2626
import org.elasticsearch.index.IndexSettings;
27+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
2728

2829
public class RussianStemTokenFilterFactory extends AbstractTokenFilterFactory {
2930

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ protected Map<String, Class<?>> getTokenFilters() {
124124
filters.put("fingerprint", FingerprintTokenFilterFactory.class);
125125
filters.put("minhash", MinHashTokenFilterFactory.class);
126126
filters.put("scandinavianfolding", ScandinavianFoldingFilterFactory.class);
127+
filters.put("arabicstem", ArabicStemTokenFilterFactory.class);
128+
filters.put("brazilianstem", BrazilianStemTokenFilterFactory.class);
129+
filters.put("czechstem", CzechStemTokenFilterFactory.class);
130+
filters.put("germanstem", GermanStemTokenFilterFactory.class);
127131
return filters;
128132
}
129133

0 commit comments

Comments
 (0)