Skip to content

Commit 8c01ed3

Browse files
committed
1 parent fd4086b commit 8c01ed3

File tree

2 files changed

+63
-3
lines changed

2 files changed

+63
-3
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
8484
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
8585
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
86+
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
8687
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
8788
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
8889
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
@@ -456,13 +457,15 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
456457
| WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
457458
| WordDelimiterFilter.SPLIT_ON_NUMERICS
458459
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null)));
459-
filters.add(PreConfiguredTokenFilter.singleton("word_delimiter_graph", false, false, input ->
460-
new WordDelimiterGraphFilter(input,
460+
filters.add(PreConfiguredTokenFilter.singletonWithVersion("word_delimiter_graph", false, false, (input, version) -> {
461+
boolean adjustOffsets = version.onOrAfter(Version.V_7_3_0);
462+
return new WordDelimiterGraphFilter(input, adjustOffsets, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE,
461463
WordDelimiterGraphFilter.GENERATE_WORD_PARTS
462464
| WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS
463465
| WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE
464466
| WordDelimiterGraphFilter.SPLIT_ON_NUMERICS
465-
| WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null)));
467+
| WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null);
468+
}));
466469
return filters;
467470
}
468471

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,24 @@
2020

2121
import org.apache.lucene.analysis.Tokenizer;
2222
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
23+
import org.elasticsearch.Version;
24+
import org.elasticsearch.cluster.metadata.IndexMetaData;
2325
import org.elasticsearch.common.settings.Settings;
2426
import org.elasticsearch.env.Environment;
27+
import org.elasticsearch.env.TestEnvironment;
28+
import org.elasticsearch.index.IndexSettings;
2529
import org.elasticsearch.index.analysis.AnalysisTestsHelper;
30+
import org.elasticsearch.index.analysis.IndexAnalyzers;
31+
import org.elasticsearch.index.analysis.NamedAnalyzer;
2632
import org.elasticsearch.index.analysis.TokenFilterFactory;
33+
import org.elasticsearch.indices.analysis.AnalysisModule;
2734
import org.elasticsearch.test.ESTestCase;
35+
import org.elasticsearch.test.IndexSettingsModule;
36+
import org.elasticsearch.test.VersionUtils;
2837

2938
import java.io.IOException;
3039
import java.io.StringReader;
40+
import java.util.Collections;
3141

3242
public class WordDelimiterGraphTokenFilterFactoryTests
3343
extends BaseWordDelimiterTokenFilterFactoryTestCase {
@@ -107,4 +117,51 @@ public void testAdjustingOffsets() throws IOException {
107117
assertTokenStreamContents(tokenFilter.create(tokenizer), expected, expectedStartOffsets, expectedEndOffsets, null,
108118
expectedIncr, expectedPosLen, null);
109119
}
120+
121+
public void testPreconfiguredFilter() throws IOException {
122+
// Before 7.3 we don't adjust offsets
123+
{
124+
Settings settings = Settings.builder()
125+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
126+
.build();
127+
Settings indexSettings = Settings.builder()
128+
.put(IndexMetaData.SETTING_VERSION_CREATED,
129+
VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, VersionUtils.getPreviousVersion(Version.V_7_3_0)))
130+
.put("index.analysis.analyzer.my_analyzer.tokenizer", "standard")
131+
.putList("index.analysis.analyzer.my_analyzer.filter", "word_delimiter_graph")
132+
.build();
133+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
134+
135+
try (IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings),
136+
Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings)) {
137+
138+
NamedAnalyzer analyzer = indexAnalyzers.get("my_analyzer");
139+
assertNotNull(analyzer);
140+
assertAnalyzesTo(analyzer, "h100", new String[]{"h", "100"}, new int[]{ 0, 0 }, new int[]{ 4, 4 });
141+
142+
}
143+
}
144+
145+
// Afger 7.3 we do adjust offsets
146+
{
147+
Settings settings = Settings.builder()
148+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
149+
.build();
150+
Settings indexSettings = Settings.builder()
151+
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
152+
.put("index.analysis.analyzer.my_analyzer.tokenizer", "standard")
153+
.putList("index.analysis.analyzer.my_analyzer.filter", "word_delimiter_graph")
154+
.build();
155+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
156+
157+
try (IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings),
158+
Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings)) {
159+
160+
NamedAnalyzer analyzer = indexAnalyzers.get("my_analyzer");
161+
assertNotNull(analyzer);
162+
assertAnalyzesTo(analyzer, "h100", new String[]{"h", "100"}, new int[]{ 0, 1 }, new int[]{ 1, 4 });
163+
164+
}
165+
}
166+
}
110167
}

0 commit comments

Comments
 (0)