Skip to content

Commit ef18d3f

Browse files
author
Christoph Büscher
authored
Add analysis modes to restrict token filter use contexts (elastic#36103)
Currently token filter settings are treated as fixed once they are declared and used in an analyzer. This is done to prevent changes in analyzers that are already used actively to index documents, since changes to the analysis chain could corrupt the index. However, it would be safe to allow updates to token filters at search time ("search_analyzer"). This change introduces a new property of token filters that allows to mark them as only being usable at search or at index time. Any analyzer that uses these tokenfilters inherits that property and can be rejected if they are used in other contexts. This is a first step towards making specific token filters (e.g. synonym filter) updateable. Relates to elastic#29051
1 parent 6c6c44e commit ef18d3f

File tree

9 files changed

+445
-4
lines changed

9 files changed

+445
-4
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.analysis;
21+
22+
/**
23+
* Enum representing the mode in which token filters and analyzers are allowed to operate.
24+
* While most token filters are allowed both in index and search time analyzers, some are
25+
* restricted to be used only at index time, others at search time.
26+
*/
27+
public enum AnalysisMode {
28+
29+
/**
30+
* AnalysisMode representing analysis components that can be used only at index time
31+
*/
32+
INDEX_TIME("index time") {
33+
@Override
34+
public AnalysisMode merge(AnalysisMode other) {
35+
if (other == AnalysisMode.SEARCH_TIME) {
36+
throw new IllegalStateException("Cannot merge SEARCH_TIME and INDEX_TIME analysis mode.");
37+
}
38+
return AnalysisMode.INDEX_TIME;
39+
}
40+
},
41+
/**
42+
* AnalysisMode representing analysis components that can be used only at search time
43+
*/
44+
SEARCH_TIME("search time") {
45+
@Override
46+
public AnalysisMode merge(AnalysisMode other) {
47+
if (other == AnalysisMode.INDEX_TIME) {
48+
throw new IllegalStateException("Cannot merge SEARCH_TIME and INDEX_TIME analysis mode.");
49+
}
50+
return AnalysisMode.SEARCH_TIME;
51+
}
52+
},
53+
/**
54+
* AnalysisMode representing analysis components that can be used both at index and search time
55+
*/
56+
ALL("all") {
57+
@Override
58+
public AnalysisMode merge(AnalysisMode other) {
59+
return other;
60+
}
61+
};
62+
63+
private String readableName;
64+
65+
AnalysisMode(String name) {
66+
this.readableName = name;
67+
}
68+
69+
public String getReadableName() {
70+
return this.readableName;
71+
}
72+
73+
/**
74+
* Returns a mode that is compatible with both this mode and the other mode, that is:
75+
* <ul>
76+
* <li>ALL.merge(INDEX_TIME) == INDEX_TIME</li>
77+
* <li>ALL.merge(SEARCH_TIME) == SEARCH_TIME</li>
78+
* <li>INDEX_TIME.merge(SEARCH_TIME) throws an {@link IllegalStateException}</li>
79+
* </ul>
80+
*/
81+
abstract AnalysisMode merge(AnalysisMode other);
82+
}

server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020

2121
import org.apache.lucene.analysis.Analyzer;
2222
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
23-
import org.elasticsearch.Version;
24-
import org.elasticsearch.core.internal.io.IOUtils;
2523
import org.elasticsearch.ElasticsearchException;
24+
import org.elasticsearch.Version;
2625
import org.elasticsearch.cluster.metadata.IndexMetaData;
2726
import org.elasticsearch.common.settings.Settings;
27+
import org.elasticsearch.core.internal.io.IOUtils;
2828
import org.elasticsearch.env.Environment;
2929
import org.elasticsearch.index.Index;
3030
import org.elasticsearch.index.IndexSettings;
@@ -456,6 +456,7 @@ public IndexAnalyzers build(IndexSettings indexSettings,
456456
if (defaultAnalyzer == null) {
457457
throw new IllegalArgumentException("no default analyzer configured");
458458
}
459+
defaultAnalyzer.checkAllowedInMode(AnalysisMode.ALL);
459460
if (analyzers.containsKey("default_index")) {
460461
throw new IllegalArgumentException("setting [index.analysis.analyzer.default_index] is not supported anymore, use " +
461462
"[index.analysis.analyzer.default] instead for index [" + index.getName() + "]");

server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public final class CustomAnalyzer extends Analyzer {
3636

3737
private final int positionIncrementGap;
3838
private final int offsetGap;
39+
private final AnalysisMode analysisMode;
3940

4041
public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
4142
TokenFilterFactory[] tokenFilters) {
@@ -50,6 +51,12 @@ public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, C
5051
this.tokenFilters = tokenFilters;
5152
this.positionIncrementGap = positionIncrementGap;
5253
this.offsetGap = offsetGap;
54+
// merge and transfer token filter analysis modes with analyzer
55+
AnalysisMode mode = AnalysisMode.ALL;
56+
for (TokenFilterFactory f : tokenFilters) {
57+
mode = mode.merge(f.getAnalysisMode());
58+
}
59+
this.analysisMode = mode;
5360
}
5461

5562
/**
@@ -84,6 +91,10 @@ public int getOffsetGap(String field) {
8491
return this.offsetGap;
8592
}
8693

94+
public AnalysisMode getAnalysisMode() {
95+
return this.analysisMode;
96+
}
97+
8798
@Override
8899
protected TokenStreamComponents createComponents(String fieldName) {
89100
Tokenizer tokenizer = tokenizerFactory.create();

server/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121

2222
import org.apache.lucene.analysis.Analyzer;
2323
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
24+
import org.elasticsearch.index.mapper.MapperException;
2425

26+
import java.util.ArrayList;
27+
import java.util.List;
2528
import java.util.Objects;
2629

2730
/**
@@ -34,6 +37,7 @@ public class NamedAnalyzer extends DelegatingAnalyzerWrapper {
3437
private final AnalyzerScope scope;
3538
private final Analyzer analyzer;
3639
private final int positionIncrementGap;
40+
private final AnalysisMode analysisMode;
3741

3842
public NamedAnalyzer(NamedAnalyzer analyzer, int positionIncrementGap) {
3943
this(analyzer.name(), analyzer.scope(), analyzer.analyzer(), positionIncrementGap);
@@ -43,12 +47,17 @@ public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer) {
4347
this(name, scope, analyzer, Integer.MIN_VALUE);
4448
}
4549

46-
public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer, int positionIncrementGap) {
50+
NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer, int positionIncrementGap) {
4751
super(ERROR_STRATEGY);
4852
this.name = name;
4953
this.scope = scope;
5054
this.analyzer = analyzer;
5155
this.positionIncrementGap = positionIncrementGap;
56+
if (analyzer instanceof org.elasticsearch.index.analysis.CustomAnalyzer) {
57+
this.analysisMode = ((org.elasticsearch.index.analysis.CustomAnalyzer) analyzer).getAnalysisMode();
58+
} else {
59+
this.analysisMode = AnalysisMode.ALL;
60+
}
5261
}
5362

5463
/**
@@ -65,6 +74,13 @@ public AnalyzerScope scope() {
6574
return this.scope;
6675
}
6776

77+
/**
78+
* Returns whether this analyzer can be updated
79+
*/
80+
public AnalysisMode getAnalysisMode() {
81+
return this.analysisMode;
82+
}
83+
6884
/**
6985
* The actual analyzer.
7086
*/
@@ -85,9 +101,37 @@ public int getPositionIncrementGap(String fieldName) {
85101
return super.getPositionIncrementGap(fieldName);
86102
}
87103

104+
/**
105+
* Checks the wrapped analyzer for the provided restricted {@link AnalysisMode} and throws
106+
* an error if the analyzer is not allowed to run in that mode. The error contains more detailed information about
107+
* the offending filters that caused the analyzer to not be allowed in this mode.
108+
*/
109+
public void checkAllowedInMode(AnalysisMode mode) {
110+
Objects.requireNonNull(mode);
111+
if (this.getAnalysisMode() == AnalysisMode.ALL) {
112+
return; // everything allowed if this analyzer is in ALL mode
113+
}
114+
if (this.getAnalysisMode() != mode) {
115+
if (analyzer instanceof CustomAnalyzer) {
116+
TokenFilterFactory[] tokenFilters = ((CustomAnalyzer) analyzer).tokenFilters();
117+
List<String> offendingFilters = new ArrayList<>();
118+
for (TokenFilterFactory tokenFilter : tokenFilters) {
119+
if (tokenFilter.getAnalysisMode() != mode) {
120+
offendingFilters.add(tokenFilter.name());
121+
}
122+
}
123+
throw new MapperException("analyzer [" + name + "] contains filters " + offendingFilters
124+
+ " that are not allowed to run in " + mode.getReadableName() + " mode.");
125+
} else {
126+
throw new MapperException(
127+
"analyzer [" + name + "] contains components that are not allowed to run in " + mode.getReadableName() + " mode.");
128+
}
129+
}
130+
}
131+
88132
@Override
89133
public String toString() {
90-
return "analyzer name[" + name + "], analyzer [" + analyzer + "]";
134+
return "analyzer name[" + name + "], analyzer [" + analyzer + "], analysisMode [" + analysisMode + "]";
91135
}
92136

93137
/** It is an error if this is ever used, it means we screwed up! */

server/src/main/java/org/elasticsearch/index/analysis/TokenFilterFactory.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ default TokenFilterFactory getSynonymFilter() {
7474
return this;
7575
}
7676

77+
/**
78+
* Get the {@link AnalysisMode} this filter is allowed to be used in. The default is
79+
* {@link AnalysisMode#ALL}. Instances need to override this method to define their
80+
* own restrictions.
81+
*/
82+
default AnalysisMode getAnalysisMode() {
83+
return AnalysisMode.ALL;
84+
}
85+
7786
/**
7887
* A TokenFilterFactory that does no filtering to its TokenStream
7988
*/

server/src/main/java/org/elasticsearch/index/mapper/TypeParsers.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.ElasticsearchParseException;
2424
import org.elasticsearch.common.time.DateFormatter;
2525
import org.elasticsearch.common.xcontent.support.XContentMapValues;
26+
import org.elasticsearch.index.analysis.AnalysisMode;
2627
import org.elasticsearch.index.analysis.NamedAnalyzer;
2728
import org.elasticsearch.index.similarity.SimilarityProvider;
2829

@@ -80,18 +81,37 @@ private static void parseAnalyzersAndTermVectors(FieldMapper.Builder builder, St
8081
if (analyzer == null) {
8182
throw new MapperParsingException("analyzer [" + propNode.toString() + "] not found for field [" + name + "]");
8283
}
84+
analyzer.checkAllowedInMode(AnalysisMode.SEARCH_TIME);
8385
searchAnalyzer = analyzer;
8486
iterator.remove();
8587
} else if (propName.equals("search_quote_analyzer")) {
8688
NamedAnalyzer analyzer = parserContext.getIndexAnalyzers().get(propNode.toString());
8789
if (analyzer == null) {
8890
throw new MapperParsingException("analyzer [" + propNode.toString() + "] not found for field [" + name + "]");
8991
}
92+
analyzer.checkAllowedInMode(AnalysisMode.SEARCH_TIME);
9093
searchQuoteAnalyzer = analyzer;
9194
iterator.remove();
9295
}
9396
}
9497

98+
// check analyzers are allowed to work in the respective AnalysisMode
99+
{
100+
if (indexAnalyzer != null) {
101+
if (searchAnalyzer == null) {
102+
indexAnalyzer.checkAllowedInMode(AnalysisMode.ALL);
103+
} else {
104+
indexAnalyzer.checkAllowedInMode(AnalysisMode.INDEX_TIME);
105+
}
106+
}
107+
if (searchAnalyzer != null) {
108+
searchAnalyzer.checkAllowedInMode(AnalysisMode.SEARCH_TIME);
109+
}
110+
if (searchQuoteAnalyzer != null) {
111+
searchQuoteAnalyzer.checkAllowedInMode(AnalysisMode.SEARCH_TIME);
112+
}
113+
}
114+
95115
if (indexAnalyzer == null && searchAnalyzer != null) {
96116
throw new MapperParsingException("analyzer on field [" + name + "] must be set when search_analyzer is set");
97117
}

server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
package org.elasticsearch.index.analysis;
2121

2222
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
23+
24+
import org.apache.lucene.analysis.Analyzer;
2325
import org.apache.lucene.analysis.MockTokenFilter;
2426
import org.apache.lucene.analysis.TokenStream;
2527
import org.apache.lucene.analysis.en.EnglishAnalyzer;
@@ -31,6 +33,7 @@
3133
import org.elasticsearch.env.Environment;
3234
import org.elasticsearch.env.TestEnvironment;
3335
import org.elasticsearch.index.IndexSettings;
36+
import org.elasticsearch.index.mapper.MapperException;
3437
import org.elasticsearch.indices.analysis.AnalysisModule;
3538
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
3639
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
@@ -102,6 +105,29 @@ public void testOverrideDefaultAnalyzer() throws IOException {
102105
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
103106
}
104107

108+
public void testOverrideDefaultAnalyzerWithoutAnalysisModeAll() throws IOException {
109+
Version version = VersionUtils.randomVersion(random());
110+
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
111+
TokenFilterFactory tokenFilter = new AbstractTokenFilterFactory(IndexSettingsModule.newIndexSettings("index", settings),
112+
"my_filter", Settings.EMPTY) {
113+
@Override
114+
public AnalysisMode getAnalysisMode() {
115+
return randomFrom(AnalysisMode.SEARCH_TIME, AnalysisMode.INDEX_TIME);
116+
}
117+
118+
@Override
119+
public TokenStream create(TokenStream tokenStream) {
120+
return null;
121+
}
122+
};
123+
Analyzer analyzer = new CustomAnalyzer("tokenizerName", null, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter });
124+
MapperException ex = expectThrows(MapperException.class,
125+
() -> emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings),
126+
singletonMap("default", new PreBuiltAnalyzerProvider("my_analyzer", AnalyzerScope.INDEX, analyzer)), emptyMap(),
127+
emptyMap(), emptyMap(), emptyMap()));
128+
assertEquals("analyzer [my_analyzer] contains filters [my_filter] that are not allowed to run in all mode.", ex.getMessage());
129+
}
130+
105131
public void testOverrideDefaultIndexAnalyzerIsUnsupported() {
106132
Version version = VersionUtils.randomVersionBetween(random(), Version.V_6_0_0_alpha1, Version.CURRENT);
107133
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();

0 commit comments

Comments
 (0)