Skip to content

Commit 2cc7f5a

Browse files
author
Christoph Büscher
authored
Allow reloading of search time analyzers (#43313)
Currently changing resources (like dictionaries, synonym files etc...) of search time analyzers is only possible by closing an index, changing the underlying resource (e.g. synonym files) and then re-opening the index for the change to take effect. This PR adds a new API endpoint that allows triggering reloading of certain analysis resources (currently token filters) that will then pick up changes in underlying file resources. To achieve this we introduce a new type of custom analyzer (ReloadableCustomAnalyzer) that uses a ReuseStrategy that allows swapping out analysis components. Custom analyzers that contain filters that are markes as "updateable" will automatically choose this implementation. This PR also adds this capability to `synonym` token filters for use in search time analyzers. Relates to #29051
1 parent 51b230f commit 2cc7f5a

File tree

38 files changed

+1454
-120
lines changed

38 files changed

+1454
-120
lines changed

client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -730,8 +730,8 @@ public void testApiNamingConventions() throws Exception {
730730
"indices.exists_type",
731731
"indices.get_upgrade",
732732
"indices.put_alias",
733-
"scripts_painless_execute",
734-
"render_search_template"
733+
"render_search_template",
734+
"scripts_painless_execute"
735735
};
736736
//These API are not required for high-level client feature completeness
737737
String[] notRequiredApi = new String[] {

docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ Additional settings are:
4343
* `expand` (defaults to `true`).
4444
* `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important
4545
to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request:
46+
47+
4648

4749
[source,js]
4850
--------------------------------------------------
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
[role="xpack"]
2+
[testenv="basic"]
3+
[[indices-reload-analyzers]]
4+
== Reload Search Analyzers
5+
6+
experimental[]
7+
8+
Reloads search analyzers and its resources.
9+
10+
Synonym filters (both `synonym` and `synonym_graph`) can be declared as
11+
updateable if they are only used in <<search-analyzer,search analyzers>>
12+
with the `updateable` flag:
13+
14+
[source,js]
15+
--------------------------------------------------
16+
PUT /my_index
17+
{
18+
"settings": {
19+
"index" : {
20+
"analysis" : {
21+
"analyzer" : {
22+
"my_synonyms" : {
23+
"tokenizer" : "whitespace",
24+
"filter" : ["synonym"]
25+
}
26+
},
27+
"filter" : {
28+
"synonym" : {
29+
"type" : "synonym",
30+
"synonyms_path" : "analysis/synonym.txt",
31+
"updateable" : true <1>
32+
}
33+
}
34+
}
35+
}
36+
},
37+
"mappings": {
38+
"properties": {
39+
"text": {
40+
"type": "text",
41+
"analyzer" : "standard",
42+
"search_analyzer": "my_synonyms" <2>
43+
}
44+
}
45+
}
46+
}
47+
--------------------------------------------------
48+
// CONSOLE
49+
50+
<1> Mark the synonym filter as updateable.
51+
<2> Synonym analyzer is usable as a search_analyzer.
52+
53+
NOTE: Trying to use the above analyzer as an index analyzer will result in an error.
54+
55+
Using the <<indices-reload-analyzers,analyzer reload API>>, you can trigger reloading of the
56+
synonym definition. The contents of the configured synonyms file will be reloaded and the
57+
synonyms definition the filter uses will be updated.
58+
59+
The `_reload_search_analyzers` API can be run on one or more indices and will trigger
60+
reloading of the synonyms from the configured file.
61+
62+
NOTE: Reloading will happen on every node the index has shards, so its important
63+
to update the synonym file contents on every data node (even the ones that don't currently
64+
hold shard copies; shards might be relocated there in the future) before calling
65+
reload to ensure the new state of the file is reflected everywhere in the cluster.
66+
67+
[source,js]
68+
--------------------------------------------------
69+
POST /my_index/_reload_search_analyzers
70+
--------------------------------------------------
71+
// CONSOLE
72+
// TEST[s/^/PUT my_index\n/]

docs/reference/rest-api/index.asciidoc

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ not be included yet.
1515
* <<data-frame-apis,{dataframe-cap} APIs>>
1616
* <<graph-explore-api,Graph Explore API>>
1717
* <<freeze-index-api>>, <<unfreeze-index-api>>
18+
* <<indices-reload-analyzers,Reload Search Analyzers API>>
1819
* <<index-lifecycle-management-api,Index lifecycle management APIs>>
1920
* <<licensing-apis,Licensing APIs>>
2021
* <<ml-apis,Machine Learning APIs>>
@@ -38,4 +39,5 @@ include::{es-repo-dir}/rollup/rollup-api.asciidoc[]
3839
include::{xes-repo-dir}/rest-api/security.asciidoc[]
3940
include::{es-repo-dir}/indices/apis/unfreeze.asciidoc[]
4041
include::{xes-repo-dir}/rest-api/watcher.asciidoc[]
42+
include::{es-repo-dir}/indices/apis/reload-analyzers.asciidoc[]
4143
include::defs.asciidoc[]

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java

+13
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.elasticsearch.index.IndexSettings;
3131
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
3232
import org.elasticsearch.index.analysis.Analysis;
33+
import org.elasticsearch.index.analysis.AnalysisMode;
3334
import org.elasticsearch.index.analysis.CharFilterFactory;
3435
import org.elasticsearch.index.analysis.CustomAnalyzer;
3536
import org.elasticsearch.index.analysis.TokenFilterFactory;
@@ -50,6 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
5051
private final boolean lenient;
5152
protected final Settings settings;
5253
protected final Environment environment;
54+
private final boolean updateable;
5355

5456
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
5557
String name, Settings settings) {
@@ -65,9 +67,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
6567
this.expand = settings.getAsBoolean("expand", true);
6668
this.lenient = settings.getAsBoolean("lenient", false);
6769
this.format = settings.get("format", "");
70+
this.updateable = settings.getAsBoolean("updateable", false);
6871
this.environment = env;
6972
}
7073

74+
@Override
75+
public AnalysisMode getAnalysisMode() {
76+
return this.updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
77+
}
78+
7179
@Override
7280
public TokenStream create(TokenStream tokenStream) {
7381
throw new IllegalStateException("Call createPerAnalyzerSynonymFactory to specialize this factory for an analysis chain first");
@@ -98,6 +106,11 @@ public TokenFilterFactory getSynonymFilter() {
98106
// which doesn't support stacked input tokens
99107
return IDENTITY_FILTER;
100108
}
109+
110+
@Override
111+
public AnalysisMode getAnalysisMode() {
112+
return updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
113+
}
101114
};
102115
}
103116

server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java

+26-20
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@
4242
import org.elasticsearch.index.IndexService;
4343
import org.elasticsearch.index.IndexSettings;
4444
import org.elasticsearch.index.analysis.AnalysisRegistry;
45+
import org.elasticsearch.index.analysis.AnalyzerComponents;
46+
import org.elasticsearch.index.analysis.AnalyzerComponentsProvider;
4547
import org.elasticsearch.index.analysis.CharFilterFactory;
46-
import org.elasticsearch.index.analysis.CustomAnalyzer;
4748
import org.elasticsearch.index.analysis.NameOrDefinition;
4849
import org.elasticsearch.index.analysis.NamedAnalyzer;
4950
import org.elasticsearch.index.analysis.TokenFilterFactory;
@@ -261,18 +262,23 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
261262
}
262263
}
263264

264-
CustomAnalyzer customAnalyzer = null;
265-
if (analyzer instanceof CustomAnalyzer) {
266-
customAnalyzer = (CustomAnalyzer) analyzer;
267-
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
268-
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
265+
// maybe unwrap analyzer from NamedAnalyzer
266+
Analyzer potentialCustomAnalyzer = analyzer;
267+
if (analyzer instanceof NamedAnalyzer) {
268+
potentialCustomAnalyzer = ((NamedAnalyzer) analyzer).analyzer();
269269
}
270270

271-
if (customAnalyzer != null) {
272-
// customAnalyzer = divide charfilter, tokenizer tokenfilters
273-
CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
274-
TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
275-
TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
271+
if (potentialCustomAnalyzer instanceof AnalyzerComponentsProvider) {
272+
AnalyzerComponentsProvider customAnalyzer = (AnalyzerComponentsProvider) potentialCustomAnalyzer;
273+
// note: this is not field-name dependent in our cases so we can leave out the argument
274+
int positionIncrementGap = potentialCustomAnalyzer.getPositionIncrementGap("");
275+
int offsetGap = potentialCustomAnalyzer.getOffsetGap("");
276+
AnalyzerComponents components = customAnalyzer.getComponents();
277+
// divide charfilter, tokenizer tokenfilters
278+
CharFilterFactory[] charFilterFactories = components.getCharFilters();
279+
TokenizerFactory tokenizerFactory = components.getTokenizerFactory();
280+
TokenFilterFactory[] tokenFilterFactories = components.getTokenFilters();
281+
String tokenizerName = components.getTokenizerName();
276282

277283
String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
278284
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ?
@@ -298,7 +304,7 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
298304
// analyzing only tokenizer
299305
Tokenizer tokenizer = tokenizerFactory.create();
300306
tokenizer.setReader(reader);
301-
tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, includeAttributes);
307+
tokenizerTokenListCreator.analyze(tokenizer, includeAttributes, positionIncrementGap, offsetGap);
302308

303309
// analyzing each tokenfilter
304310
if (tokenFilterFactories != null) {
@@ -308,7 +314,7 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
308314
}
309315
TokenStream stream = createStackedTokenStream(request.text()[textIndex],
310316
charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1);
311-
tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, customAnalyzer, includeAttributes);
317+
tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, includeAttributes, positionIncrementGap, offsetGap);
312318
}
313319
}
314320
}
@@ -331,8 +337,8 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
331337
tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
332338
}
333339
}
334-
detailResponse = new AnalyzeAction.DetailAnalyzeResponse(charFilteredLists, new AnalyzeAction.AnalyzeTokenList(
335-
customAnalyzer.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
340+
detailResponse = new AnalyzeAction.DetailAnalyzeResponse(charFilteredLists,
341+
new AnalyzeAction.AnalyzeTokenList(tokenizerName, tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
336342
} else {
337343
String name;
338344
if (analyzer instanceof NamedAnalyzer) {
@@ -343,8 +349,8 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
343349

344350
TokenListCreator tokenListCreator = new TokenListCreator(maxTokenCount);
345351
for (String text : request.text()) {
346-
tokenListCreator.analyze(analyzer.tokenStream("", text), analyzer,
347-
includeAttributes);
352+
tokenListCreator.analyze(analyzer.tokenStream("", text), includeAttributes, analyzer.getPositionIncrementGap(""),
353+
analyzer.getOffsetGap(""));
348354
}
349355
detailResponse
350356
= new AnalyzeAction.DetailAnalyzeResponse(new AnalyzeAction.AnalyzeTokenList(name, tokenListCreator.getArrayTokens()));
@@ -414,7 +420,7 @@ private static class TokenListCreator {
414420
tc = new TokenCounter(maxTokenCount);
415421
}
416422

417-
private void analyze(TokenStream stream, Analyzer analyzer, Set<String> includeAttributes) {
423+
private void analyze(TokenStream stream, Set<String> includeAttributes, int positionIncrementGap, int offsetGap) {
418424
try {
419425
stream.reset();
420426
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
@@ -437,8 +443,8 @@ private void analyze(TokenStream stream, Analyzer analyzer, Set<String> includeA
437443
lastOffset += offset.endOffset();
438444
lastPosition += posIncr.getPositionIncrement();
439445

440-
lastPosition += analyzer.getPositionIncrementGap("");
441-
lastOffset += analyzer.getOffsetGap("");
446+
lastPosition += positionIncrementGap;
447+
lastOffset += offsetGap;
442448

443449
} catch (IOException e) {
444450
throw new ElasticsearchException("failed to analyze", e);

server/src/main/java/org/elasticsearch/client/IndicesAdminClient.java

+1
Original file line numberDiff line numberDiff line change
@@ -818,4 +818,5 @@ public interface IndicesAdminClient extends ElasticsearchClient {
818818
* Swaps the index pointed to by an alias given all provided conditions are satisfied
819819
*/
820820
void rolloverIndex(RolloverRequest request, ActionListener<RolloverResponse> listener);
821+
821822
}

server/src/main/java/org/elasticsearch/client/Requests.java

-1
Original file line numberDiff line numberDiff line change
@@ -534,5 +534,4 @@ public static DeleteSnapshotRequest deleteSnapshotRequest(String repository, Str
534534
public static SnapshotsStatusRequest snapshotsStatusRequest(String repository) {
535535
return new SnapshotsStatusRequest(repository);
536536
}
537-
538537
}

server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,6 @@ public IndexAnalyzers build(IndexSettings indexSettings,
527527
Map<String, TokenizerFactory> tokenizerFactoryFactories,
528528
Map<String, CharFilterFactory> charFilterFactoryFactories,
529529
Map<String, TokenFilterFactory> tokenFilterFactoryFactories) {
530-
531530
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
532531
Map<String, NamedAnalyzer> normalizers = new HashMap<>();
533532
Map<String, NamedAnalyzer> whitespaceNormalizers = new HashMap<>();
@@ -569,9 +568,11 @@ public IndexAnalyzers build(IndexSettings indexSettings,
569568
return new IndexAnalyzers(analyzers, normalizers, whitespaceNormalizers);
570569
}
571570

572-
private static NamedAnalyzer produceAnalyzer(String name, AnalyzerProvider<?> analyzerFactory,
573-
Map<String, TokenFilterFactory> tokenFilters, Map<String, CharFilterFactory> charFilters,
574-
Map<String, TokenizerFactory> tokenizers) {
571+
private static NamedAnalyzer produceAnalyzer(String name,
572+
AnalyzerProvider<?> analyzerFactory,
573+
Map<String, TokenFilterFactory> tokenFilters,
574+
Map<String, CharFilterFactory> charFilters,
575+
Map<String, TokenizerFactory> tokenizers) {
575576
/*
576577
* Lucene defaults positionIncrementGap to 0 in all analyzers but
577578
* Elasticsearch defaults them to 0 only before version 2.0

0 commit comments

Comments
 (0)