diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java index 90190e42f2f85..607450b9322d6 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -19,20 +19,15 @@ package org.elasticsearch.analysis.common; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; -import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; import org.elasticsearch.test.VersionUtils; import java.io.IOException; -import java.io.StringReader; -import java.util.Map; public class CommonAnalysisPluginTests extends ESTestCase { @@ -51,13 +46,8 @@ public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException .build(); try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { - Map tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), - settings, commonAnalysisPlugin).tokenFilter; - TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); - Tokenizer tokenizer = new MockTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - - IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, + () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)); assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + "Please change the filter name to [ngram] instead.", ex.getMessage()); } @@ -69,12 +59,7 @@ public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram").put("index.analysis.filter.my_ngram.type", "nGram") .build(); try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { - Map tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), - settingsPre7, commonAnalysisPlugin).tokenFilter; - TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); - Tokenizer tokenizer = new MockTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); + createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin); assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. " + "Please change the filter name to [ngram] instead."); } @@ -95,13 +80,8 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep .build(); try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { - Map tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), - settings, commonAnalysisPlugin).tokenFilter; - TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); - Tokenizer tokenizer = new MockTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - - IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, + () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)); assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + "Please change the filter name to [edge_ngram] instead.", ex.getMessage()); } @@ -116,12 +96,8 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep .build(); try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { - Map tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), - settingsPre7, commonAnalysisPlugin).tokenFilter; - TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); - Tokenizer tokenizer = new MockTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); + createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), + settingsPre7, commonAnalysisPlugin); assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + "Please change the filter name to [edge_ngram] instead."); } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 882dcf6bb6cd9..31b343f300880 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.elasticsearch.ElasticsearchException; @@ -536,6 +537,10 @@ public IndexAnalyzers build(IndexSettings indexSettings, tokenFilterFactoryFactories, charFilterFactoryFactories); } + for (Analyzer analyzer : normalizers.values()) { + analyzer.normalize("", ""); // check for deprecations + } + if (!analyzers.containsKey(DEFAULT_ANALYZER_NAME)) { analyzers.put(DEFAULT_ANALYZER_NAME, produceAnalyzer(DEFAULT_ANALYZER_NAME, @@ -599,6 +604,7 @@ private static NamedAnalyzer produceAnalyzer(String name, } else { analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap); } + checkVersions(analyzer); return analyzer; } @@ -626,4 +632,20 @@ private void processNormalizerFactory( NamedAnalyzer normalizer = new NamedAnalyzer(name, normalizerFactory.scope(), normalizerF); normalizers.put(name, normalizer); } + + // Some analysis components emit deprecation warnings or throw exceptions when used + // with the wrong version of elasticsearch. These exceptions and warnings are + // normally thrown when tokenstreams are constructed, which unless we build a + // tokenstream up-front does not happen until a document is indexed. In order to + // surface these warnings or exceptions as early as possible, we build an empty + // tokenstream and pull it through an Analyzer at construction time. + private static void checkVersions(Analyzer analyzer) { + try (TokenStream ts = analyzer.tokenStream("", "")) { + ts.reset(); + while (ts.incrementToken()) {} + ts.end(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index 23129ae546fe6..7b344bf335c88 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -38,6 +38,7 @@ import org.elasticsearch.index.analysis.AnalysisRegistry; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory; import org.elasticsearch.index.analysis.PreConfiguredCharFilter; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; @@ -108,6 +109,25 @@ public TokenStream create(TokenStream tokenStream) { } } + class DeprecatedTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory { + + DeprecatedTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + super(indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + deprecationLogger.deprecated("Using deprecated token filter [deprecated]"); + return tokenStream; + } + + @Override + public TokenStream normalize(TokenStream tokenStream) { + deprecationLogger.deprecated("Using deprecated token filter [deprecated]"); + return tokenStream; + } + } + class AppendCharFilterFactory extends AbstractCharFilterFactory { final String suffix; @@ -136,7 +156,7 @@ public Map> getTokenizers() { @Override public Map> getTokenFilters() { - return singletonMap("mock", MockFactory::new); + return Map.of("mock", MockFactory::new, "deprecated", DeprecatedTokenFilterFactory::new); } @Override @@ -492,4 +512,28 @@ public void testExceedSetMaxTokenLimit() { assertEquals(e.getMessage(), "The number of tokens produced by calling _analyze has exceeded the allowed maximum of [" + idxMaxTokenCount + "]." + " This limit can be set by changing the [index.analyze.max_token_count] index level setting."); } + + public void testDeprecationWarnings() throws IOException { + AnalyzeAction.Request req = new AnalyzeAction.Request(); + req.tokenizer("standard"); + req.addTokenFilter("lowercase"); + req.addTokenFilter("deprecated"); + req.text("test text"); + + AnalyzeAction.Response analyze = + TransportAnalyzeAction.analyze(req, registry, mockIndexService(), maxTokenCount); + assertEquals(2, analyze.getTokens().size()); + assertWarnings("Using deprecated token filter [deprecated]"); + + // normalizer + req = new AnalyzeAction.Request(); + req.addTokenFilter("lowercase"); + req.addTokenFilter("deprecated"); + req.text("text"); + + analyze = + TransportAnalyzeAction.analyze(req, registry, mockIndexService(), maxTokenCount); + assertEquals(1, analyze.getTokens().size()); + assertWarnings("Using deprecated token filter [deprecated]"); + } } diff --git a/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java b/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java index adeb49faa8941..2c11b26580f3d 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.index; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.index.AssertingDirectoryReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInvertState; @@ -442,7 +443,7 @@ public Analyzer get() { final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - throw new AssertionError("should not be here"); + return new TokenStreamComponents(new StandardTokenizer()); } @Override diff --git a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java index f7df2ee97f932..4a156db4f2101 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java @@ -20,12 +20,13 @@ package org.elasticsearch.index.analysis; import com.carrotsearch.randomizedtesting.generators.RandomPicks; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -108,8 +109,8 @@ public void testOverrideDefaultAnalyzer() throws IOException { public void testOverrideDefaultAnalyzerWithoutAnalysisModeAll() throws IOException { Version version = VersionUtils.randomVersion(random()); Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); - TokenFilterFactory tokenFilter = new AbstractTokenFilterFactory(IndexSettingsModule.newIndexSettings("index", settings), - "my_filter", Settings.EMPTY) { + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("index", settings); + TokenFilterFactory tokenFilter = new AbstractTokenFilterFactory(indexSettings, "my_filter", Settings.EMPTY) { @Override public AnalysisMode getAnalysisMode() { return randomFrom(AnalysisMode.SEARCH_TIME, AnalysisMode.INDEX_TIME); @@ -117,10 +118,16 @@ public AnalysisMode getAnalysisMode() { @Override public TokenStream create(TokenStream tokenStream) { - return null; + return tokenStream; } }; - Analyzer analyzer = new CustomAnalyzer(null, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter }); + TokenizerFactory tokenizer = new AbstractTokenizerFactory(indexSettings, Settings.EMPTY, "my_tokenizer") { + @Override + public Tokenizer create() { + return new StandardTokenizer(); + } + }; + Analyzer analyzer = new CustomAnalyzer(tokenizer, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter }); MapperException ex = expectThrows(MapperException.class, () -> emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings), singletonMap("default", new PreBuiltAnalyzerProvider("default", AnalyzerScope.INDEX, analyzer)), emptyMap(), @@ -264,4 +271,122 @@ public void testEnsureCloseInvocationProperlyDelegated() throws IOException { registry.close(); verify(mock).close(); } + + public void testDeprecationsAndExceptions() throws IOException { + + AnalysisPlugin plugin = new AnalysisPlugin() { + + class MockFactory extends AbstractTokenFilterFactory { + MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + super(indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + if (indexSettings.getIndexVersionCreated().equals(Version.CURRENT)) { + deprecationLogger.deprecated("Using deprecated token filter [deprecated]"); + } + return tokenStream; + } + } + + class ExceptionFactory extends AbstractTokenFilterFactory { + + ExceptionFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + super(indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + if (indexSettings.getIndexVersionCreated().equals(Version.CURRENT)) { + throw new IllegalArgumentException("Cannot use token filter [exception]"); + } + return tokenStream; + } + } + + class UnusedMockFactory extends AbstractTokenFilterFactory { + UnusedMockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + super(indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + deprecationLogger.deprecated("Using deprecated token filter [unused]"); + return tokenStream; + } + } + + class NormalizerFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory { + + NormalizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + super(indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + deprecationLogger.deprecated("Using deprecated token filter [deprecated_normalizer]"); + return tokenStream; + } + + } + + @Override + public Map> getTokenFilters() { + return Map.of("deprecated", MockFactory::new, "unused", UnusedMockFactory::new, + "deprecated_normalizer", NormalizerFactory::new, "exception", ExceptionFactory::new); + } + }; + + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build(); + Settings indexSettings = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put("index.analysis.filter.deprecated.type", "deprecated") + .put("index.analysis.analyzer.custom.tokenizer", "standard") + .putList("index.analysis.analyzer.custom.filter", "lowercase", "deprecated") + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); + + new AnalysisModule(TestEnvironment.newEnvironment(settings), + singletonList(plugin)).getAnalysisRegistry().build(idxSettings); + + // We should only get a warning from the token filter that is referenced in settings + assertWarnings("Using deprecated token filter [deprecated]"); + + indexSettings = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.getPreviousVersion()) + .put("index.analysis.filter.deprecated.type", "deprecated_normalizer") + .putList("index.analysis.normalizer.custom.filter", "lowercase", "deprecated_normalizer") + .put("index.analysis.filter.deprecated.type", "deprecated") + .put("index.analysis.filter.exception.type", "exception") + .put("index.analysis.analyzer.custom.tokenizer", "standard") + // exception will not throw because we're not on Version.CURRENT + .putList("index.analysis.analyzer.custom.filter", "lowercase", "deprecated", "exception") + .build(); + idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); + + new AnalysisModule(TestEnvironment.newEnvironment(settings), + singletonList(plugin)).getAnalysisRegistry().build(idxSettings); + + // We should only get a warning from the normalizer, because we're on a version where 'deprecated' + // works fine + assertWarnings("Using deprecated token filter [deprecated_normalizer]"); + + indexSettings = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put("index.analysis.filter.exception.type", "exception") + .put("index.analysis.analyzer.custom.tokenizer", "standard") + // exception will not throw because we're not on Version.LATEST + .putList("index.analysis.analyzer.custom.filter", "lowercase", "exception") + .build(); + IndexSettings exceptionSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { + new AnalysisModule(TestEnvironment.newEnvironment(settings), + singletonList(plugin)).getAnalysisRegistry().build(exceptionSettings); + }); + assertEquals("Cannot use token filter [exception]", e.getMessage()); + + } } diff --git a/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java index 675305a17cce6..3c8455224e646 100644 --- a/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java +++ b/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java @@ -228,14 +228,13 @@ public void testUnderscoreInAnalyzerName() throws IOException { } } - public void testStandardFilterBWC() throws IOException { + public void testStandardFilterBWC() { Version version = VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.CURRENT); final Settings settings = Settings.builder().put("index.analysis.analyzer.my_standard.tokenizer", "standard") .put("index.analysis.analyzer.my_standard.filter", "standard") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(IndexMetaData.SETTING_VERSION_CREATED, version) .build(); - IndexAnalyzers analyzers = getIndexAnalyzers(settings); - IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> analyzers.get("my_standard").tokenStream("", "")); + IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> getIndexAnalyzers(settings)); assertThat(exc.getMessage(), equalTo("The [standard] token filter has been removed.")); }