Error on deprecated nGram and edgeNGram custom filters

Christoph Büscher · Christoph Büscher · commit 65c87d36a164 · 2019-12-20T11:53:13.000+01:00
The camel-case `nGram` and `edgeNGram` filter names were deprecated in 6. We currently throw errors on new indices when they are used. However these errors are currently only thrown for pre-configured filters, adding them as custom filters doesn't trigger the warning and error. This change adds the appropriate exceptions for `nGram` and `edgeNGram` respectively. Closes elastic#50360
diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@@ -118,9 +118,11 @@
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.regex.Regex;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.NamedXContentRegistry;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.env.NodeEnvironment;
+import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.AnalyzerProvider;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
@@ -238,7 +240,17 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
         filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
         filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
-        filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
+        filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
+            if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_7_6_0)) {
+                throw new IllegalArgumentException("The [edgeNGram] token filter name was deprecated in 6.4 and "
+                        + "cannot be used in new indices. Please change the filter name to [edge_ngram] instead.");
+            } else {
+                deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation",
+                        "The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+                                + "Please change the filter name to [edge_ngram] instead.");
+            }
+            return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings);
+        });
         filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
         filters.put("fingerprint", FingerprintTokenFilterFactory::new);
         filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
@@ -258,7 +270,17 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         filters.put("min_hash", MinHashTokenFilterFactory::new);
         filters.put("multiplexer", MultiplexerTokenFilterFactory::new);
         filters.put("ngram", NGramTokenFilterFactory::new);
-        filters.put("nGram", NGramTokenFilterFactory::new);
+        filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
+            if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_7_6_0)) {
+                throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+                        + "Please change the filter name to [ngram] instead.");
+            } else {
+                deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation",
+                        "The [nGram] token filter name is deprecated and will be removed in a future version. "
+                                + "Please change the filter name to [ngram] instead.");
+            }
+            return new NGramTokenFilterFactory(indexSettings, environment, name, settings);
+        });
         filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
         filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
         filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java
@@ -0,0 +1,240 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.analysis.common;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.IndexSettingsModule;
+import org.elasticsearch.test.VersionUtils;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Map;
+
+public class CommonAnalysisPluginTests extends ESTestCase {
+
+    /**
+     * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.0.0
+     */
+    public void testNGramDeprecationWarning() throws IOException {
+        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+                .put(IndexMetaData.SETTING_VERSION_CREATED,
+                        VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)))
+                .build();
+
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
+            TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
+            Tokenizer tokenizer = new MockTokenizer();
+            tokenizer.setReader(new StringReader("foo bar"));
+            assertNotNull(tokenFilterFactory.create(tokenizer));
+            assertWarnings(
+                    "The [nGram] token filter name is deprecated and will be removed in a future version. "
+                    + "Please change the filter name to [ngram] instead.");
+        }
+    }
+
+    /**
+     * Check that the deprecated name "nGram" throws an error since 7.0.0
+     */
+    public void testNGramDeprecationError() throws IOException {
+        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+                .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
+                .build();
+
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
+            TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
+            Tokenizer tokenizer = new MockTokenizer();
+            tokenizer.setReader(new StringReader("foo bar"));
+            IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
+            assertEquals(
+                    "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
+                    + " name to [ngram] instead.",
+                    ex.getMessage());
+        }
+    }
+
+    /**
+     * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.0.0
+     */
+    public void testEdgeNGramDeprecationWarning() throws IOException {
+        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+                .put(IndexMetaData.SETTING_VERSION_CREATED,
+                        VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)))
+                .build();
+
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
+            TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
+            Tokenizer tokenizer = new MockTokenizer();
+            tokenizer.setReader(new StringReader("foo bar"));
+            assertNotNull(tokenFilterFactory.create(tokenizer));
+            assertWarnings(
+                    "The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+                    + "Please change the filter name to [edge_ngram] instead.");
+        }
+    }
+
+    /**
+     * Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0
+     */
+    public void testEdgeNGramDeprecationError() throws IOException {
+        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+                .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
+                .build();
+
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
+            TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
+            Tokenizer tokenizer = new MockTokenizer();
+            tokenizer.setReader(new StringReader("foo bar"));
+            IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
+            assertEquals(
+                    "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
+                    + " name to [edge_ngram] instead.",
+                    ex.getMessage());
+        }
+    }
+    
+    /**
+     * Check that the deprecated "nGram" filter throws exception for indices created since 7.0.0 and
+     * logs a warning for earlier indices when the filter is used as a custom filter
+     */
+    public void testnGramFilterInCustomAnalyzerDeprecationError() throws IOException {
+        final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(IndexMetaData.SETTING_VERSION_CREATED,
+                VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, Version.CURRENT))
+            .put("index.analysis.analyzer.custom_analyzer.type", "custom")
+            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
+            .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
+            .put("index.analysis.filter.my_ngram.type", "nGram")
+            .build();
+
+        final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
+        IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
+            () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
+        assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+                + "Please change the filter name to [ngram] instead.", ex.getMessage());
+        
+        final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+                .put(IndexMetaData.SETTING_VERSION_CREATED,
+                    VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_7_5_2))
+                .put("index.analysis.analyzer.custom_analyzer.type", "custom")
+                .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
+                .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
+                .put("index.analysis.filter.my_ngram.type", "nGram")
+                .build();
+
+        createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin);
+        assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. "
+                + "Please change the filter name to [ngram] instead.");
+    }
+    
+    /**
+     * Check that the deprecated "edgeNGram" filter throws exception for indices created since 7.0.0 and
+     * logs a warning for earlier indices when the filter is used as a custom filter
+     */
+    public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
+        final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(IndexMetaData.SETTING_VERSION_CREATED,
+                VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, Version.CURRENT))
+            .put("index.analysis.analyzer.custom_analyzer.type", "custom")
+            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
+            .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
+            .put("index.analysis.filter.my_ngram.type", "edgeNGram")
+            .build();
+
+        final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
+        IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
+            () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
+        assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+                + "Please change the filter name to [edge_ngram] instead.", ex.getMessage());
+        
+        final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+                .put(IndexMetaData.SETTING_VERSION_CREATED,
+                    VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_7_5_2))
+                .put("index.analysis.analyzer.custom_analyzer.type", "custom")
+                .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
+                .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
+                .put("index.analysis.filter.my_ngram.type", "edgeNGram")
+                .build();
+
+        createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin);
+        assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+                + "Please change the filter name to [edge_ngram] instead.");
+    }
+
+    /**
+     * Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
+     */
+    public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException {
+        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(IndexMetaData.SETTING_VERSION_CREATED,
+                VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.CURRENT))
+            .put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
+            .putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
+            .build();
+
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
+        IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
+            () -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin));
+        assertEquals("[standard_html_strip] analyzer is not supported for new indices, " +
+            "use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter", ex.getMessage());
+    }
+
+    /**
+     * Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created since 6.5.0 until 7
+     */
+    public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException {
+        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(IndexMetaData.SETTING_VERSION_CREATED,
+                VersionUtils.randomVersionBetween(random(), Version.V_6_0_0,
+                    VersionUtils.getPreviousVersion(Version.V_7_0_0)))
+            .put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
+            .putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
+            .build();
+
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
+            Analyzer analyzer = analyzers.get("custom_analyzer");
+            assertNotNull(((NamedAnalyzer) analyzer).analyzer());
+            assertWarnings(
+                "Deprecated analyzer [standard_html_strip] used, " +
+                    "replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
+        }
+    }
+}