Skip to content

Commit e37a0ef

Browse files
authored
Upgrade to lucene-8.0.0-snapshot-67cdd21996 (#35816)
1 parent 96a741f commit e37a0ef

File tree

98 files changed

+196
-473
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+196
-473
lines changed

buildSrc/version.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
elasticsearch = 7.0.0
2-
lucene = 8.0.0-snapshot-6d9c714052
2+
lucene = 8.0.0-snapshot-67cdd21996
33

44
# optional dependencies
55
spatial4j = 0.7

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ASCIIFoldingTokenFilterFactory.java

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
import org.elasticsearch.env.Environment;
2727
import org.elasticsearch.index.IndexSettings;
2828
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
29-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
30-
import org.elasticsearch.index.analysis.TokenFilterFactory;
29+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
3130

3231
/**
3332
* Factory for ASCIIFoldingFilter.
3433
*/
3534
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory
36-
implements MultiTermAwareComponent {
35+
implements NormalizingTokenFilterFactory {
36+
3737
public static final ParseField PRESERVE_ORIGINAL = new ParseField("preserve_original");
3838
public static final boolean DEFAULT_PRESERVE_ORIGINAL = false;
3939

@@ -51,21 +51,8 @@ public TokenStream create(TokenStream tokenStream) {
5151
}
5252

5353
@Override
54-
public Object getMultiTermComponent() {
55-
if (preserveOriginal == false) {
56-
return this;
57-
} else {
58-
// See https://issues.apache.org/jira/browse/LUCENE-7536 for the reasoning
59-
return new TokenFilterFactory() {
60-
@Override
61-
public String name() {
62-
return ASCIIFoldingTokenFilterFactory.this.name();
63-
}
64-
@Override
65-
public TokenStream create(TokenStream tokenStream) {
66-
return new ASCIIFoldingFilter(tokenStream, false);
67-
}
68-
};
69-
}
54+
public TokenStream normalize(TokenStream tokenStream) {
55+
// Normalization should only emit a single token, so always turn off preserveOriginal
56+
return new ASCIIFoldingFilter(tokenStream, false);
7057
}
7158
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicNormalizationFilterFactory.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
import org.elasticsearch.env.Environment;
2525
import org.elasticsearch.index.IndexSettings;
2626
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
27-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
27+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
2828

29-
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
29+
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
3030

3131
ArabicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3232
super(indexSettings, name, settings);
@@ -37,8 +37,4 @@ public TokenStream create(TokenStream tokenStream) {
3737
return new ArabicNormalizationFilter(tokenStream);
3838
}
3939

40-
@Override
41-
public Object getMultiTermComponent() {
42-
return this;
43-
}
4440
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BengaliNormalizationFilterFactory.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
import org.elasticsearch.env.Environment;
2525
import org.elasticsearch.index.IndexSettings;
2626
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
27-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
27+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
2828

2929
/**
3030
* Factory for {@link BengaliNormalizationFilter}
3131
*/
32-
public class BengaliNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
32+
public class BengaliNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
3333

3434
BengaliNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3535
super(indexSettings, name, settings);
@@ -40,8 +40,4 @@ public TokenStream create(TokenStream tokenStream) {
4040
return new BengaliNormalizationFilter(tokenStream);
4141
}
4242

43-
@Override
44-
public Object getMultiTermComponent() {
45-
return this;
46-
}
4743
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CJKWidthFilterFactory.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@
2525
import org.elasticsearch.env.Environment;
2626
import org.elasticsearch.index.IndexSettings;
2727
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
28-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
28+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
2929

30-
public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
30+
public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
3131

3232
CJKWidthFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
3333
super(indexSettings, name, settings);
@@ -38,9 +38,4 @@ public TokenStream create(TokenStream tokenStream) {
3838
return new CJKWidthFilter(tokenStream);
3939
}
4040

41-
@Override
42-
public Object getMultiTermComponent() {
43-
return this;
44-
}
45-
4641
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import org.apache.logging.log4j.LogManager;
2323
import org.apache.lucene.analysis.Analyzer;
2424
import org.apache.lucene.analysis.CharArraySet;
25-
import org.apache.lucene.analysis.LowerCaseFilter;
2625
import org.apache.lucene.analysis.StopFilter;
2726
import org.apache.lucene.analysis.TokenStream;
2827
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
@@ -492,35 +491,26 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
492491
@Override
493492
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
494493
List<PreConfiguredTokenizer> tokenizers = new ArrayList<>();
495-
tokenizers.add(PreConfiguredTokenizer.singleton("keyword", KeywordTokenizer::new, null));
496-
tokenizers.add(PreConfiguredTokenizer.singleton("classic", ClassicTokenizer::new, null));
497-
tokenizers.add(PreConfiguredTokenizer.singleton("uax_url_email", UAX29URLEmailTokenizer::new, null));
498-
tokenizers.add(PreConfiguredTokenizer.singleton("path_hierarchy", PathHierarchyTokenizer::new, null));
499-
tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new, null));
500-
tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new, null));
501-
tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new, null));
494+
tokenizers.add(PreConfiguredTokenizer.singleton("keyword", KeywordTokenizer::new));
495+
tokenizers.add(PreConfiguredTokenizer.singleton("classic", ClassicTokenizer::new));
496+
tokenizers.add(PreConfiguredTokenizer.singleton("uax_url_email", UAX29URLEmailTokenizer::new));
497+
tokenizers.add(PreConfiguredTokenizer.singleton("path_hierarchy", PathHierarchyTokenizer::new));
498+
tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new));
499+
tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new));
500+
tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new));
502501
tokenizers.add(PreConfiguredTokenizer.singleton("edge_ngram",
503-
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE), null));
504-
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1), null));
505-
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new, null));
502+
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE)));
503+
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1)));
504+
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new));
506505
// TODO deprecate and remove in API
507-
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new, () -> new TokenFilterFactory() {
508-
@Override
509-
public String name() {
510-
return "lowercase";
511-
}
512-
513-
@Override
514-
public TokenStream create(TokenStream tokenStream) {
515-
return new LowerCaseFilter(tokenStream);
516-
}
517-
}));
506+
// This is already broken with normalization, so backwards compat isn't necessary?
507+
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new));
518508

519509
// Temporary shim for aliases. TODO deprecate after they are moved
520-
tokenizers.add(PreConfiguredTokenizer.singleton("nGram", NGramTokenizer::new, null));
510+
tokenizers.add(PreConfiguredTokenizer.singleton("nGram", NGramTokenizer::new));
521511
tokenizers.add(PreConfiguredTokenizer.singleton("edgeNGram",
522-
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE), null));
523-
tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new, null));
512+
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE)));
513+
tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new));
524514

525515
return tokenizers;
526516
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DecimalDigitFilterFactory.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
import org.elasticsearch.env.Environment;
2626
import org.elasticsearch.index.IndexSettings;
2727
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
28-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
28+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
2929

3030
/**
3131
* Factory for {@link DecimalDigitFilter}
3232
*/
33-
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
33+
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
3434

3535
DecimalDigitFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
3636
super(indexSettings, name, settings);
@@ -41,8 +41,4 @@ public TokenStream create(TokenStream tokenStream) {
4141
return new DecimalDigitFilter(tokenStream);
4242
}
4343

44-
@Override
45-
public Object getMultiTermComponent() {
46-
return this;
47-
}
4844
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
import org.elasticsearch.index.IndexSettings;
2828
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
2929
import org.elasticsearch.index.analysis.Analysis;
30-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
30+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
3131

32-
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
32+
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
3333

3434
private final CharArraySet articles;
3535

@@ -43,8 +43,4 @@ public TokenStream create(TokenStream tokenStream) {
4343
return new ElisionFilter(tokenStream, articles);
4444
}
4545

46-
@Override
47-
public Object getMultiTermComponent() {
48-
return this;
49-
}
5046
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanNormalizationFilterFactory.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
import org.elasticsearch.env.Environment;
2525
import org.elasticsearch.index.IndexSettings;
2626
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
27-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
27+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
2828

2929
/**
3030
* Factory for {@link GermanNormalizationFilter}
3131
*/
32-
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
32+
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
3333

3434
GermanNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3535
super(indexSettings, name, settings);
@@ -40,8 +40,4 @@ public TokenStream create(TokenStream tokenStream) {
4040
return new GermanNormalizationFilter(tokenStream);
4141
}
4242

43-
@Override
44-
public Object getMultiTermComponent() {
45-
return this;
46-
}
4743
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/HindiNormalizationFilterFactory.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
import org.elasticsearch.env.Environment;
2525
import org.elasticsearch.index.IndexSettings;
2626
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
27-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
27+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
2828

2929
/**
3030
* Factory for {@link HindiNormalizationFilter}
3131
*/
32-
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
32+
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
3333

3434
HindiNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3535
super(indexSettings, name, settings);
@@ -40,8 +40,4 @@ public TokenStream create(TokenStream tokenStream) {
4040
return new HindiNormalizationFilter(tokenStream);
4141
}
4242

43-
@Override
44-
public Object getMultiTermComponent() {
45-
return this;
46-
}
4743
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/IndicNormalizationFilterFactory.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
import org.elasticsearch.env.Environment;
2525
import org.elasticsearch.index.IndexSettings;
2626
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
27-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
27+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
2828

2929
/**
3030
* Factory for {@link IndicNormalizationFilter}
3131
*/
32-
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
32+
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
3333

3434
IndicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3535
super(indexSettings, name, settings);
@@ -40,8 +40,4 @@ public TokenStream create(TokenStream tokenStream) {
4040
return new IndicNormalizationFilter(tokenStream);
4141
}
4242

43-
@Override
44-
public Object getMultiTermComponent() {
45-
return this;
46-
}
4743
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenFilterFactory.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import org.elasticsearch.env.Environment;
2929
import org.elasticsearch.index.IndexSettings;
3030
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
31-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
31+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
3232

3333
/**
3434
* Factory for {@link LowerCaseFilter} and some language-specific variants
@@ -39,7 +39,7 @@
3939
* <li>turkish: {@link TurkishLowerCaseFilter}
4040
* </ul>
4141
*/
42-
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
42+
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
4343

4444
private final String lang;
4545

@@ -63,10 +63,6 @@ public TokenStream create(TokenStream tokenStream) {
6363
}
6464
}
6565

66-
@Override
67-
public Object getMultiTermComponent() {
68-
return this;
69-
}
7066
}
7167

7268

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MappingCharFilterFactory.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
import org.elasticsearch.index.IndexSettings;
2727
import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
2828
import org.elasticsearch.index.analysis.Analysis;
29-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
29+
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
3030

3131
import java.io.Reader;
3232
import java.util.List;
3333
import java.util.regex.Matcher;
3434
import java.util.regex.Pattern;
3535

36-
public class MappingCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
36+
public class MappingCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
3737

3838
private final NormalizeCharMap normMap;
3939

@@ -118,8 +118,4 @@ private String parseString(String s) {
118118
return new String(out, 0, writePos);
119119
}
120120

121-
@Override
122-
public Object getMultiTermComponent() {
123-
return this;
124-
}
125121
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,19 @@
1818
*/
1919
package org.elasticsearch.analysis.common;
2020

21-
import java.io.Reader;
22-
import java.util.regex.Pattern;
23-
2421
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
2522
import org.elasticsearch.common.Strings;
2623
import org.elasticsearch.common.regex.Regex;
2724
import org.elasticsearch.common.settings.Settings;
2825
import org.elasticsearch.env.Environment;
2926
import org.elasticsearch.index.IndexSettings;
3027
import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
31-
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
28+
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
29+
30+
import java.io.Reader;
31+
import java.util.regex.Pattern;
3232

33-
public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
33+
public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
3434

3535
private final Pattern pattern;
3636
private final String replacement;
@@ -59,8 +59,4 @@ public Reader create(Reader tokenStream) {
5959
return new PatternReplaceCharFilter(pattern, replacement, tokenStream);
6060
}
6161

62-
@Override
63-
public Object getMultiTermComponent() {
64-
return this;
65-
}
6662
}

0 commit comments

Comments
 (0)