Skip to content

Commit 736ed47

Browse files
authored
Check for deprecations when analyzers are built (#50908)
Generally speaking, deprecated analysis components in elasticsearch will issue deprecation warnings when they are first used. However, this means that no warnings are emitted when indexes are created with deprecated components, and users have to actually index a document to see warnings. This makes it much harder to see these warnings and act on them at appropriate times. This is worse in the case where components throw exceptions on upgrade. In this case, users will not be aware of a problem until a document is indexed, instead of at index creation time. This commit adds a new check that pushes an empty string through all user-defined analyzers and normalizers when an IndexAnalyzers object is built for each index; deprecation warnings and exceptions are now emitted when indexes are created or opened. Fixes #42349
1 parent b146740 commit 736ed47

File tree

6 files changed

+208
-41
lines changed

6 files changed

+208
-41
lines changed

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java

+7-31
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,15 @@
1919

2020
package org.elasticsearch.analysis.common;
2121

22-
import org.apache.lucene.analysis.MockTokenizer;
23-
import org.apache.lucene.analysis.Tokenizer;
2422
import org.elasticsearch.Version;
2523
import org.elasticsearch.cluster.metadata.IndexMetaData;
2624
import org.elasticsearch.common.settings.Settings;
2725
import org.elasticsearch.env.Environment;
28-
import org.elasticsearch.index.analysis.TokenFilterFactory;
2926
import org.elasticsearch.test.ESTestCase;
3027
import org.elasticsearch.test.IndexSettingsModule;
3128
import org.elasticsearch.test.VersionUtils;
3229

3330
import java.io.IOException;
34-
import java.io.StringReader;
35-
import java.util.Map;
3631

3732
public class CommonAnalysisPluginTests extends ESTestCase {
3833

@@ -51,13 +46,8 @@ public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException
5146
.build();
5247

5348
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
54-
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings),
55-
settings, commonAnalysisPlugin).tokenFilter;
56-
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
57-
Tokenizer tokenizer = new MockTokenizer();
58-
tokenizer.setReader(new StringReader("foo bar"));
59-
60-
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
49+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
50+
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
6151
assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
6252
+ "Please change the filter name to [ngram] instead.", ex.getMessage());
6353
}
@@ -69,12 +59,7 @@ public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException
6959
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram").put("index.analysis.filter.my_ngram.type", "nGram")
7060
.build();
7161
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
72-
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
73-
settingsPre7, commonAnalysisPlugin).tokenFilter;
74-
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
75-
Tokenizer tokenizer = new MockTokenizer();
76-
tokenizer.setReader(new StringReader("foo bar"));
77-
assertNotNull(tokenFilterFactory.create(tokenizer));
62+
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin);
7863
assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. "
7964
+ "Please change the filter name to [ngram] instead.");
8065
}
@@ -95,13 +80,8 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep
9580
.build();
9681

9782
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
98-
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings),
99-
settings, commonAnalysisPlugin).tokenFilter;
100-
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
101-
Tokenizer tokenizer = new MockTokenizer();
102-
tokenizer.setReader(new StringReader("foo bar"));
103-
104-
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
83+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
84+
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
10585
assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
10686
+ "Please change the filter name to [edge_ngram] instead.", ex.getMessage());
10787
}
@@ -116,12 +96,8 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep
11696
.build();
11797

11898
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
119-
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
120-
settingsPre7, commonAnalysisPlugin).tokenFilter;
121-
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
122-
Tokenizer tokenizer = new MockTokenizer();
123-
tokenizer.setReader(new StringReader("foo bar"));
124-
assertNotNull(tokenFilterFactory.create(tokenizer));
99+
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
100+
settingsPre7, commonAnalysisPlugin);
125101
assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
126102
+ "Please change the filter name to [edge_ngram] instead.");
127103
}

server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java

+22
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.elasticsearch.index.analysis;
2020

2121
import org.apache.lucene.analysis.Analyzer;
22+
import org.apache.lucene.analysis.TokenStream;
2223
import org.apache.lucene.analysis.core.KeywordTokenizer;
2324
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
2425
import org.elasticsearch.ElasticsearchException;
@@ -536,6 +537,10 @@ public IndexAnalyzers build(IndexSettings indexSettings,
536537
tokenFilterFactoryFactories, charFilterFactoryFactories);
537538
}
538539

540+
for (Analyzer analyzer : normalizers.values()) {
541+
analyzer.normalize("", ""); // check for deprecations
542+
}
543+
539544
if (!analyzers.containsKey(DEFAULT_ANALYZER_NAME)) {
540545
analyzers.put(DEFAULT_ANALYZER_NAME,
541546
produceAnalyzer(DEFAULT_ANALYZER_NAME,
@@ -599,6 +604,7 @@ private static NamedAnalyzer produceAnalyzer(String name,
599604
} else {
600605
analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
601606
}
607+
checkVersions(analyzer);
602608
return analyzer;
603609
}
604610

@@ -626,4 +632,20 @@ private void processNormalizerFactory(
626632
NamedAnalyzer normalizer = new NamedAnalyzer(name, normalizerFactory.scope(), normalizerF);
627633
normalizers.put(name, normalizer);
628634
}
635+
636+
// Some analysis components emit deprecation warnings or throw exceptions when used
637+
// with the wrong version of elasticsearch. These exceptions and warnings are
638+
// normally thrown when tokenstreams are constructed, which unless we build a
639+
// tokenstream up-front does not happen until a document is indexed. In order to
640+
// surface these warnings or exceptions as early as possible, we build an empty
641+
// tokenstream and pull it through an Analyzer at construction time.
642+
private static void checkVersions(Analyzer analyzer) {
643+
try (TokenStream ts = analyzer.tokenStream("", "")) {
644+
ts.reset();
645+
while (ts.incrementToken()) {}
646+
ts.end();
647+
} catch (IOException e) {
648+
throw new UncheckedIOException(e);
649+
}
650+
}
629651
}

server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java

+45-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.elasticsearch.index.analysis.AnalysisRegistry;
3939
import org.elasticsearch.index.analysis.CharFilterFactory;
4040
import org.elasticsearch.index.analysis.IndexAnalyzers;
41+
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
4142
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
4243
import org.elasticsearch.index.analysis.TokenFilterFactory;
4344
import org.elasticsearch.index.analysis.TokenizerFactory;
@@ -108,6 +109,25 @@ public TokenStream create(TokenStream tokenStream) {
108109
}
109110
}
110111

112+
class DeprecatedTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
113+
114+
DeprecatedTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
115+
super(indexSettings, name, settings);
116+
}
117+
118+
@Override
119+
public TokenStream create(TokenStream tokenStream) {
120+
deprecationLogger.deprecated("Using deprecated token filter [deprecated]");
121+
return tokenStream;
122+
}
123+
124+
@Override
125+
public TokenStream normalize(TokenStream tokenStream) {
126+
deprecationLogger.deprecated("Using deprecated token filter [deprecated]");
127+
return tokenStream;
128+
}
129+
}
130+
111131
class AppendCharFilterFactory extends AbstractCharFilterFactory {
112132

113133
final String suffix;
@@ -136,7 +156,7 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
136156

137157
@Override
138158
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
139-
return singletonMap("mock", MockFactory::new);
159+
return Map.of("mock", MockFactory::new, "deprecated", DeprecatedTokenFilterFactory::new);
140160
}
141161

142162
@Override
@@ -492,4 +512,28 @@ public void testExceedSetMaxTokenLimit() {
492512
assertEquals(e.getMessage(), "The number of tokens produced by calling _analyze has exceeded the allowed maximum of ["
493513
+ idxMaxTokenCount + "]." + " This limit can be set by changing the [index.analyze.max_token_count] index level setting.");
494514
}
515+
516+
public void testDeprecationWarnings() throws IOException {
517+
AnalyzeAction.Request req = new AnalyzeAction.Request();
518+
req.tokenizer("standard");
519+
req.addTokenFilter("lowercase");
520+
req.addTokenFilter("deprecated");
521+
req.text("test text");
522+
523+
AnalyzeAction.Response analyze =
524+
TransportAnalyzeAction.analyze(req, registry, mockIndexService(), maxTokenCount);
525+
assertEquals(2, analyze.getTokens().size());
526+
assertWarnings("Using deprecated token filter [deprecated]");
527+
528+
// normalizer
529+
req = new AnalyzeAction.Request();
530+
req.addTokenFilter("lowercase");
531+
req.addTokenFilter("deprecated");
532+
req.text("text");
533+
534+
analyze =
535+
TransportAnalyzeAction.analyze(req, registry, mockIndexService(), maxTokenCount);
536+
assertEquals(1, analyze.getTokens().size());
537+
assertWarnings("Using deprecated token filter [deprecated]");
538+
}
495539
}

server/src/test/java/org/elasticsearch/index/IndexModuleTests.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.elasticsearch.index;
2020

2121
import org.apache.lucene.analysis.Analyzer;
22+
import org.apache.lucene.analysis.standard.StandardTokenizer;
2223
import org.apache.lucene.index.AssertingDirectoryReader;
2324
import org.apache.lucene.index.DirectoryReader;
2425
import org.apache.lucene.index.FieldInvertState;
@@ -442,7 +443,7 @@ public Analyzer get() {
442443
final Analyzer analyzer = new Analyzer() {
443444
@Override
444445
protected TokenStreamComponents createComponents(String fieldName) {
445-
throw new AssertionError("should not be here");
446+
return new TokenStreamComponents(new StandardTokenizer());
446447
}
447448

448449
@Override

server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java

+130-5
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@
2020
package org.elasticsearch.index.analysis;
2121

2222
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
23-
2423
import org.apache.lucene.analysis.Analyzer;
2524
import org.apache.lucene.analysis.MockTokenFilter;
2625
import org.apache.lucene.analysis.TokenStream;
26+
import org.apache.lucene.analysis.Tokenizer;
2727
import org.apache.lucene.analysis.en.EnglishAnalyzer;
2828
import org.apache.lucene.analysis.standard.StandardAnalyzer;
29+
import org.apache.lucene.analysis.standard.StandardTokenizer;
2930
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
3031
import org.elasticsearch.Version;
3132
import org.elasticsearch.cluster.metadata.IndexMetaData;
@@ -108,19 +109,25 @@ public void testOverrideDefaultAnalyzer() throws IOException {
108109
public void testOverrideDefaultAnalyzerWithoutAnalysisModeAll() throws IOException {
109110
Version version = VersionUtils.randomVersion(random());
110111
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
111-
TokenFilterFactory tokenFilter = new AbstractTokenFilterFactory(IndexSettingsModule.newIndexSettings("index", settings),
112-
"my_filter", Settings.EMPTY) {
112+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("index", settings);
113+
TokenFilterFactory tokenFilter = new AbstractTokenFilterFactory(indexSettings, "my_filter", Settings.EMPTY) {
113114
@Override
114115
public AnalysisMode getAnalysisMode() {
115116
return randomFrom(AnalysisMode.SEARCH_TIME, AnalysisMode.INDEX_TIME);
116117
}
117118

118119
@Override
119120
public TokenStream create(TokenStream tokenStream) {
120-
return null;
121+
return tokenStream;
121122
}
122123
};
123-
Analyzer analyzer = new CustomAnalyzer(null, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter });
124+
TokenizerFactory tokenizer = new AbstractTokenizerFactory(indexSettings, Settings.EMPTY, "my_tokenizer") {
125+
@Override
126+
public Tokenizer create() {
127+
return new StandardTokenizer();
128+
}
129+
};
130+
Analyzer analyzer = new CustomAnalyzer(tokenizer, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter });
124131
MapperException ex = expectThrows(MapperException.class,
125132
() -> emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings),
126133
singletonMap("default", new PreBuiltAnalyzerProvider("default", AnalyzerScope.INDEX, analyzer)), emptyMap(),
@@ -264,4 +271,122 @@ public void testEnsureCloseInvocationProperlyDelegated() throws IOException {
264271
registry.close();
265272
verify(mock).close();
266273
}
274+
275+
public void testDeprecationsAndExceptions() throws IOException {
276+
277+
AnalysisPlugin plugin = new AnalysisPlugin() {
278+
279+
class MockFactory extends AbstractTokenFilterFactory {
280+
MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
281+
super(indexSettings, name, settings);
282+
}
283+
284+
@Override
285+
public TokenStream create(TokenStream tokenStream) {
286+
if (indexSettings.getIndexVersionCreated().equals(Version.CURRENT)) {
287+
deprecationLogger.deprecated("Using deprecated token filter [deprecated]");
288+
}
289+
return tokenStream;
290+
}
291+
}
292+
293+
class ExceptionFactory extends AbstractTokenFilterFactory {
294+
295+
ExceptionFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
296+
super(indexSettings, name, settings);
297+
}
298+
299+
@Override
300+
public TokenStream create(TokenStream tokenStream) {
301+
if (indexSettings.getIndexVersionCreated().equals(Version.CURRENT)) {
302+
throw new IllegalArgumentException("Cannot use token filter [exception]");
303+
}
304+
return tokenStream;
305+
}
306+
}
307+
308+
class UnusedMockFactory extends AbstractTokenFilterFactory {
309+
UnusedMockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
310+
super(indexSettings, name, settings);
311+
}
312+
313+
@Override
314+
public TokenStream create(TokenStream tokenStream) {
315+
deprecationLogger.deprecated("Using deprecated token filter [unused]");
316+
return tokenStream;
317+
}
318+
}
319+
320+
class NormalizerFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
321+
322+
NormalizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
323+
super(indexSettings, name, settings);
324+
}
325+
326+
@Override
327+
public TokenStream create(TokenStream tokenStream) {
328+
deprecationLogger.deprecated("Using deprecated token filter [deprecated_normalizer]");
329+
return tokenStream;
330+
}
331+
332+
}
333+
334+
@Override
335+
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
336+
return Map.of("deprecated", MockFactory::new, "unused", UnusedMockFactory::new,
337+
"deprecated_normalizer", NormalizerFactory::new, "exception", ExceptionFactory::new);
338+
}
339+
};
340+
341+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
342+
Settings indexSettings = Settings.builder()
343+
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
344+
.put("index.analysis.filter.deprecated.type", "deprecated")
345+
.put("index.analysis.analyzer.custom.tokenizer", "standard")
346+
.putList("index.analysis.analyzer.custom.filter", "lowercase", "deprecated")
347+
.build();
348+
349+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
350+
351+
new AnalysisModule(TestEnvironment.newEnvironment(settings),
352+
singletonList(plugin)).getAnalysisRegistry().build(idxSettings);
353+
354+
// We should only get a warning from the token filter that is referenced in settings
355+
assertWarnings("Using deprecated token filter [deprecated]");
356+
357+
indexSettings = Settings.builder()
358+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.getPreviousVersion())
359+
.put("index.analysis.filter.deprecated.type", "deprecated_normalizer")
360+
.putList("index.analysis.normalizer.custom.filter", "lowercase", "deprecated_normalizer")
361+
.put("index.analysis.filter.deprecated.type", "deprecated")
362+
.put("index.analysis.filter.exception.type", "exception")
363+
.put("index.analysis.analyzer.custom.tokenizer", "standard")
364+
// exception will not throw because we're not on Version.CURRENT
365+
.putList("index.analysis.analyzer.custom.filter", "lowercase", "deprecated", "exception")
366+
.build();
367+
idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
368+
369+
new AnalysisModule(TestEnvironment.newEnvironment(settings),
370+
singletonList(plugin)).getAnalysisRegistry().build(idxSettings);
371+
372+
// We should only get a warning from the normalizer, because we're on a version where 'deprecated'
373+
// works fine
374+
assertWarnings("Using deprecated token filter [deprecated_normalizer]");
375+
376+
indexSettings = Settings.builder()
377+
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
378+
.put("index.analysis.filter.exception.type", "exception")
379+
.put("index.analysis.analyzer.custom.tokenizer", "standard")
380+
// exception will not throw because we're not on Version.LATEST
381+
.putList("index.analysis.analyzer.custom.filter", "lowercase", "exception")
382+
.build();
383+
IndexSettings exceptionSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
384+
385+
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
386+
new AnalysisModule(TestEnvironment.newEnvironment(settings),
387+
singletonList(plugin)).getAnalysisRegistry().build(exceptionSettings);
388+
});
389+
assertEquals("Cannot use token filter [exception]", e.getMessage());
390+
391+
}
267392
}

0 commit comments

Comments
 (0)