Skip to content

Commit fde8089

Browse files
author
Christoph Büscher
committed
Allow reloading of search time analyzers
Currently changing resources (like dictionaries, synonym files etc...) of search time analyzers is only possible by closing an index, changing the underlying resource (e.g. synonym files) and then re-opening the index for the change to take effect. This PR adds a new API endpoint that allows triggering reloading of certain analysis resources (currently token filters) that will then pick up changes in underlying file resources. To achieve this we introduce a new type of custom analyzer (ReloadableCustomAnalyzer) that uses a ReuseStrategy that allows swapping out analysis components. Custom analyzers that contain filters that are markes as "updateable" will automatically choose this implementation. This PR also adds this capability to `synonym` token filters for use in search time analyzers. Relates to elastic#29051
1 parent 665b656 commit fde8089

27 files changed

+1274
-44
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java

+13
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.elasticsearch.index.IndexSettings;
3131
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
3232
import org.elasticsearch.index.analysis.Analysis;
33+
import org.elasticsearch.index.analysis.AnalysisMode;
3334
import org.elasticsearch.index.analysis.CharFilterFactory;
3435
import org.elasticsearch.index.analysis.CustomAnalyzer;
3536
import org.elasticsearch.index.analysis.TokenFilterFactory;
@@ -50,6 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
5051
private final boolean lenient;
5152
protected final Settings settings;
5253
protected final Environment environment;
54+
private final boolean updateable;
5355

5456
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
5557
String name, Settings settings) {
@@ -65,9 +67,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
6567
this.expand = settings.getAsBoolean("expand", true);
6668
this.lenient = settings.getAsBoolean("lenient", false);
6769
this.format = settings.get("format", "");
70+
this.updateable = settings.getAsBoolean("updateable", false);
6871
this.environment = env;
6972
}
7073

74+
@Override
75+
public AnalysisMode getAnalysisMode() {
76+
return this.updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
77+
}
78+
7179
@Override
7280
public TokenStream create(TokenStream tokenStream) {
7381
throw new IllegalStateException("Call createPerAnalyzerSynonymFactory to specialize this factory for an analysis chain first");
@@ -98,6 +106,11 @@ public TokenFilterFactory getSynonymFilter() {
98106
// which doesn't support stacked input tokens
99107
return IDENTITY_FILTER;
100108
}
109+
110+
@Override
111+
public AnalysisMode getAnalysisMode() {
112+
return updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
113+
}
101114
};
102115
}
103116

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.analysis.common;
21+
22+
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
23+
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
24+
import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzersResponse;
25+
import org.elasticsearch.action.search.SearchResponse;
26+
import org.elasticsearch.common.settings.Settings;
27+
import org.elasticsearch.env.Environment;
28+
import org.elasticsearch.index.query.QueryBuilders;
29+
import org.elasticsearch.plugins.Plugin;
30+
import org.elasticsearch.test.ESIntegTestCase;
31+
import org.elasticsearch.test.InternalTestCluster;
32+
33+
import java.io.FileNotFoundException;
34+
import java.io.IOException;
35+
import java.io.OutputStreamWriter;
36+
import java.io.PrintWriter;
37+
import java.nio.charset.StandardCharsets;
38+
import java.nio.file.Files;
39+
import java.nio.file.Path;
40+
import java.nio.file.StandardOpenOption;
41+
import java.util.Arrays;
42+
import java.util.Collection;
43+
import java.util.HashSet;
44+
import java.util.Set;
45+
46+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
47+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
48+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
49+
50+
public class SynonymAnalyzerIT extends ESIntegTestCase {
51+
52+
@Override
53+
protected Collection<Class<? extends Plugin>> nodePlugins() {
54+
return Arrays.asList(CommonAnalysisPlugin.class);
55+
}
56+
57+
/**
58+
* This test needs to write to the config directory, this is difficult in an external cluster so we overwrite this to force running with
59+
* {@link InternalTestCluster}
60+
*/
61+
@Override
62+
protected boolean ignoreExternalCluster() {
63+
return true;
64+
}
65+
66+
public void testSynonymsUpdateable() throws FileNotFoundException, IOException, InterruptedException {
67+
Path config = internalCluster().getInstance(Environment.class).configFile();
68+
String synonymsFileName = "synonyms.txt";
69+
Path synonymsFile = config.resolve(synonymsFileName);
70+
Files.createFile(synonymsFile);
71+
assertTrue(Files.exists(synonymsFile));
72+
try (PrintWriter out = new PrintWriter(
73+
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.CREATE), StandardCharsets.UTF_8))) {
74+
out.println("foo, baz");
75+
}
76+
assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
77+
.put("index.number_of_shards", cluster().numDataNodes() * 2)
78+
.put("index.number_of_replicas", 1)
79+
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
80+
.put("analysis.analyzer.my_synonym_analyzer.filter", "my_synonym_filter")
81+
.put("analysis.filter.my_synonym_filter.type", "synonym")
82+
.put("analysis.filter.my_synonym_filter.updateable", "true")
83+
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
84+
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));
85+
86+
client().prepareIndex("test", "_doc", "1").setSource("field", "foo").get();
87+
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());
88+
89+
SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
90+
assertHitCount(response, 1L);
91+
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
92+
assertHitCount(response, 0L);
93+
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
94+
assertEquals(2, analyzeResponse.getTokens().size());
95+
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
96+
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());
97+
98+
// now update synonyms file several times and trigger reloading
99+
for (int i = 0; i < 10; i++) {
100+
String testTerm = randomAlphaOfLength(10);
101+
try (PrintWriter out = new PrintWriter(
102+
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
103+
out.println("foo, baz, " + testTerm);
104+
}
105+
ReloadAnalyzersResponse reloadResponse = client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet();
106+
assertNoFailures(reloadResponse);
107+
assertEquals(cluster().numDataNodes(), reloadResponse.getSuccessfulShards());
108+
109+
analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
110+
assertEquals(3, analyzeResponse.getTokens().size());
111+
Set<String> tokens = new HashSet<>();
112+
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
113+
assertTrue(tokens.contains("foo"));
114+
assertTrue(tokens.contains("baz"));
115+
assertTrue(tokens.contains(testTerm));
116+
117+
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
118+
assertHitCount(response, 1L);
119+
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", testTerm)).get();
120+
assertHitCount(response, 1L);
121+
}
122+
}
123+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.analysis.common;
21+
22+
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
23+
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
24+
import org.elasticsearch.action.search.SearchResponse;
25+
import org.elasticsearch.common.settings.Settings;
26+
import org.elasticsearch.index.query.QueryBuilders;
27+
import org.elasticsearch.plugins.Plugin;
28+
import org.elasticsearch.test.ESSingleNodeTestCase;
29+
30+
import java.io.FileNotFoundException;
31+
import java.io.IOException;
32+
import java.io.OutputStreamWriter;
33+
import java.io.PrintWriter;
34+
import java.nio.charset.StandardCharsets;
35+
import java.nio.file.Files;
36+
import java.nio.file.Path;
37+
import java.nio.file.StandardOpenOption;
38+
import java.util.Arrays;
39+
import java.util.Collection;
40+
import java.util.HashSet;
41+
import java.util.Set;
42+
43+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
44+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
45+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
46+
47+
public class SynonymAnalyzerTests extends ESSingleNodeTestCase {
48+
49+
@Override
50+
protected Collection<Class<? extends Plugin>> getPlugins() {
51+
return Arrays.asList(CommonAnalysisPlugin.class);
52+
}
53+
54+
public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
55+
String synonymsFileName = "synonyms.txt";
56+
Path configDir = node().getEnvironment().configFile();
57+
if (Files.exists(configDir) == false) {
58+
Files.createDirectory(configDir);
59+
}
60+
Path synonymsFile = configDir.resolve(synonymsFileName);
61+
if (Files.exists(synonymsFile) == false) {
62+
Files.createFile(synonymsFile);
63+
}
64+
try (PrintWriter out = new PrintWriter(
65+
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
66+
out.println("foo, baz");
67+
}
68+
69+
assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
70+
.put("index.number_of_shards", 5)
71+
.put("index.number_of_replicas", 0)
72+
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
73+
.putList("analysis.analyzer.my_synonym_analyzer.filter", "lowercase", "my_synonym_filter")
74+
.put("analysis.filter.my_synonym_filter.type", "synonym")
75+
.put("analysis.filter.my_synonym_filter.updateable", "true")
76+
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
77+
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));
78+
79+
client().prepareIndex("test", "_doc", "1").setSource("field", "Foo").get();
80+
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());
81+
82+
SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
83+
assertHitCount(response, 1L);
84+
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
85+
assertHitCount(response, 0L);
86+
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
87+
assertEquals(2, analyzeResponse.getTokens().size());
88+
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
89+
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());
90+
91+
// now update synonyms file and trigger reloading
92+
try (PrintWriter out = new PrintWriter(
93+
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
94+
out.println("foo, baz, buzz");
95+
}
96+
assertNoFailures(client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet());
97+
98+
analyzeResponse = client().admin().indices().prepareAnalyze("test", "Foo").setAnalyzer("my_synonym_analyzer").get();
99+
assertEquals(3, analyzeResponse.getTokens().size());
100+
Set<String> tokens = new HashSet<>();
101+
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
102+
assertTrue(tokens.contains("foo"));
103+
assertTrue(tokens.contains("baz"));
104+
assertTrue(tokens.contains("buzz"));
105+
106+
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
107+
assertHitCount(response, 1L);
108+
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
109+
assertHitCount(response, 1L);
110+
}
111+
}

server/src/main/java/org/elasticsearch/action/ActionModule.java

+3
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@
125125
import org.elasticsearch.action.admin.indices.recovery.TransportRecoveryAction;
126126
import org.elasticsearch.action.admin.indices.refresh.RefreshAction;
127127
import org.elasticsearch.action.admin.indices.refresh.TransportRefreshAction;
128+
import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzerAction;
129+
import org.elasticsearch.action.admin.indices.reloadanalyzer.TransportReloadAnalyzersAction;
128130
import org.elasticsearch.action.admin.indices.rollover.RolloverAction;
129131
import org.elasticsearch.action.admin.indices.rollover.TransportRolloverAction;
130132
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentsAction;
@@ -509,6 +511,7 @@ public <Request extends ActionRequest, Response extends ActionResponse> void reg
509511
actions.register(ClearScrollAction.INSTANCE, TransportClearScrollAction.class);
510512
actions.register(RecoveryAction.INSTANCE, TransportRecoveryAction.class);
511513
actions.register(NodesReloadSecureSettingsAction.INSTANCE, TransportNodesReloadSecureSettingsAction.class);
514+
actions.register(ReloadAnalyzerAction.INSTANCE, TransportReloadAnalyzersAction.class);
512515

513516
//Indexed scripts
514517
actions.register(PutStoredScriptAction.INSTANCE, TransportPutStoredScriptAction.class);

server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java

+30-10
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,12 @@
4949
import org.elasticsearch.index.analysis.AnalysisRegistry;
5050
import org.elasticsearch.index.analysis.CharFilterFactory;
5151
import org.elasticsearch.index.analysis.CustomAnalyzer;
52+
import org.elasticsearch.index.analysis.CustomAnalyzerProvider.AnalyzerComponents;
5253
import org.elasticsearch.index.analysis.IndexAnalyzers;
5354
import org.elasticsearch.index.analysis.NamedAnalyzer;
5455
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
5556
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
57+
import org.elasticsearch.index.analysis.ReloadableCustomAnalyzer;
5658
import org.elasticsearch.index.analysis.TokenFilterFactory;
5759
import org.elasticsearch.index.analysis.TokenizerFactory;
5860
import org.elasticsearch.index.mapper.KeywordFieldMapper;
@@ -299,18 +301,36 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
299301
}
300302
}
301303

302-
CustomAnalyzer customAnalyzer = null;
303-
if (analyzer instanceof CustomAnalyzer) {
304-
customAnalyzer = (CustomAnalyzer) analyzer;
305-
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
306-
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
304+
Analyzer customAnalyzer = null;
305+
// maybe unwrap analyzer from NamedAnalyzer
306+
Analyzer potentialCustomAnalyzer = analyzer;
307+
if (analyzer instanceof NamedAnalyzer) {
308+
potentialCustomAnalyzer = ((NamedAnalyzer) analyzer).analyzer();
309+
}
310+
if (potentialCustomAnalyzer instanceof CustomAnalyzer || potentialCustomAnalyzer instanceof ReloadableCustomAnalyzer) {
311+
customAnalyzer = potentialCustomAnalyzer;
307312
}
308313

309314
if (customAnalyzer != null) {
310-
// customAnalyzer = divide charfilter, tokenizer tokenfilters
311-
CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
312-
TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
313-
TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
315+
// divide charfilter, tokenizer tokenfilters
316+
CharFilterFactory[] charFilterFactories;
317+
TokenizerFactory tokenizerFactory;
318+
TokenFilterFactory[] tokenFilterFactories;
319+
String tokenizerName;
320+
if (customAnalyzer instanceof CustomAnalyzer) {
321+
CustomAnalyzer casted = (CustomAnalyzer) analyzer;
322+
charFilterFactories = casted.charFilters();
323+
tokenizerFactory = casted.tokenizerFactory();
324+
tokenFilterFactories = casted.tokenFilters();
325+
tokenizerName = casted.getTokenizerName();
326+
} else {
327+
// for ReloadableCustomAnalyzer we want to make sure we get the factories from the same components object
328+
AnalyzerComponents components = ((ReloadableCustomAnalyzer) customAnalyzer).getComponents();
329+
charFilterFactories = components.getCharFilters();
330+
tokenizerFactory = components.getTokenizerFactory();
331+
tokenFilterFactories = components.getTokenFilters();
332+
tokenizerName = components.getTokenizerName();
333+
}
314334

315335
String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
316336
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ?
@@ -370,7 +390,7 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
370390
}
371391
}
372392
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(
373-
customAnalyzer.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
393+
tokenizerName, tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
374394
} else {
375395
String name;
376396
if (analyzer instanceof NamedAnalyzer) {

0 commit comments

Comments
 (0)