Skip to content

Make Multiplexer inherit filter chains analysis mode #50662

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.AnalysisMode;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
Expand Down Expand Up @@ -67,12 +68,15 @@ public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory token
if (preserveOriginal) {
filters.add(IDENTITY_FILTER);
}
// also merge and transfer token filter analysis modes with analyzer
AnalysisMode mode = AnalysisMode.ALL;
for (String filter : filterNames) {
String[] parts = Strings.tokenizeToStringArray(filter, ",");
if (parts.length == 1) {
TokenFilterFactory factory = resolveFilterFactory(allFilters, parts[0]);
factory = factory.getChainAwareTokenFilterFactory(tokenizer, charFilters, previousTokenFilters, allFilters);
filters.add(factory);
mode = mode.merge(factory.getAnalysisMode());
} else {
List<TokenFilterFactory> existingChain = new ArrayList<>(previousTokenFilters);
List<TokenFilterFactory> chain = new ArrayList<>();
Expand All @@ -81,10 +85,12 @@ public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory token
factory = factory.getChainAwareTokenFilterFactory(tokenizer, charFilters, existingChain, allFilters);
chain.add(factory);
existingChain.add(factory);
mode = mode.merge(factory.getAnalysisMode());
}
filters.add(chainFilters(filter, chain));
}
}
final AnalysisMode analysisMode = mode;

return new TokenFilterFactory() {
@Override
Expand All @@ -105,6 +111,11 @@ public TokenStream create(TokenStream tokenStream) {
public TokenFilterFactory getSynonymFilter() {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}

@Override
public AnalysisMode getAnalysisMode() {
return analysisMode;
}
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,5 @@ public String getReadableName() {
* <li>INDEX_TIME.merge(SEARCH_TIME) throws an {@link IllegalStateException}</li>
* </ul>
*/
abstract AnalysisMode merge(AnalysisMode other);
public abstract AnalysisMode merge(AnalysisMode other);
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,35 +43,25 @@ protected Collection<Class<? extends Plugin>> getPlugins() {

public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
}
Path synonymsFile = configDir.resolve(synonymsFileName);
if (Files.exists(synonymsFile) == false) {
Files.createFile(synonymsFile);
}
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}
Path synonymsFile = setupSynonymsFile(synonymsFileName, "foo, baz");

final String indexName = "test";
final String synonymAnalyzerName = "synonym_analyzer";
final String synonymGraphAnalyzerName = "synonym_graph_analyzer";
assertAcked(client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.analyzer." + synonymGraphAnalyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + synonymGraphAnalyzerName + ".filter", "lowercase", "synonym_graph_filter")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)
.put("analysis.filter.synonym_graph_filter.type", "synonym_graph")
.put("analysis.filter.synonym_graph_filter.updateable", "true")
.put("analysis.filter.synonym_graph_filter.synonyms_path", synonymsFileName))
assertAcked(client().admin().indices().prepareCreate(indexName)
.setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.analyzer." + synonymGraphAnalyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + synonymGraphAnalyzerName + ".filter", "lowercase", "synonym_graph_filter")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)
.put("analysis.filter.synonym_graph_filter.type", "synonym_graph")
.put("analysis.filter.synonym_graph_filter.updateable", "true")
.put("analysis.filter.synonym_graph_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=" + synonymAnalyzerName));

client().prepareIndex(indexName).setId("1").setSource("field", "Foo").get();
Expand All @@ -84,8 +74,7 @@ public void testSynonymsUpdateable() throws FileNotFoundException, IOException {

{
for (String analyzerName : new String[] { synonymAnalyzerName, synonymGraphAnalyzerName }) {
Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName)
.get();
Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName).get();
assertEquals(2, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
Expand All @@ -109,8 +98,7 @@ public void testSynonymsUpdateable() throws FileNotFoundException, IOException {

{
for (String analyzerName : new String[] { synonymAnalyzerName, synonymGraphAnalyzerName }) {
Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName)
.get();
Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName).get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
Expand All @@ -126,8 +114,69 @@ public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
assertHitCount(response, 1L);
}

public void testSynonymsInMultiplexerUpdateable() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
Path synonymsFile = setupSynonymsFile(synonymsFileName, "foo, baz");

final String indexName = "test";
final String synonymAnalyzerName = "synonym_in_multiplexer_analyzer";
assertAcked(client().admin().indices().prepareCreate(indexName)
.setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "whitespace")
.putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "my_multiplexer")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)
.put("analysis.filter.my_multiplexer.type", "multiplexer")
.putList("analysis.filter.my_multiplexer.filters", "synonym_filter"))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=" + synonymAnalyzerName));

client().prepareIndex(indexName).setId("1").setSource("field", "foo").get();
assertNoFailures(client().admin().indices().prepareRefresh(indexName).execute().actionGet());

SearchResponse response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);

Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(synonymAnalyzerName).get();
assertEquals(2, analyzeResponse.getTokens().size());
final Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));

// now update synonyms file and trigger reloading
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, buzz");
}
ReloadAnalyzersResponse reloadResponse = client().execute(ReloadAnalyzerAction.INSTANCE, new ReloadAnalyzersRequest(indexName))
.actionGet();
assertNoFailures(reloadResponse);
Set<String> reloadedAnalyzers = reloadResponse.getReloadDetails().get(indexName).getReloadedAnalyzers();
assertEquals(1, reloadedAnalyzers.size());
assertTrue(reloadedAnalyzers.contains(synonymAnalyzerName));

analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(synonymAnalyzerName).get();
assertEquals(3, analyzeResponse.getTokens().size());
tokens.clear();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains("buzz"));

response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 1L);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also add a test asserting that a multiplexer containing updateable synonyms is rejected as an index-time analyzer as well?

public void testUpdateableSynonymsRejectedAtIndexTime() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
setupSynonymsFile(synonymsFileName, "foo, baz");
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
Expand All @@ -143,20 +192,52 @@ public void testUpdateableSynonymsRejectedAtIndexTime() throws FileNotFoundExcep

final String indexName = "test";
final String analyzerName = "my_synonym_analyzer";
MapperException ex = expectThrows(MapperException.class, () -> client().admin().indices().prepareCreate(indexName)
.setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + analyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + analyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=" + analyzerName).get());

assertEquals(
"Failed to parse mapping: analyzer [my_synonym_analyzer] "
+ "contains filters [synonym_filter] that are not allowed to run in all mode.",
ex.getMessage());
MapperException ex = expectThrows(MapperException.class,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This compression of multiple settings onto single lines seems to me to make test more difficult to follow, can we keep the indentation as it was before?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, this was not intended, must be the new Eclipse installation doing some autoformatting.

() -> client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + analyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + analyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=" + analyzerName).get());

assertEquals("Failed to parse mapping: analyzer [my_synonym_analyzer] "
+ "contains filters [synonym_filter] that are not allowed to run in all mode.", ex.getMessage());

// same for synonym filters in multiplexer chain
ex = expectThrows(MapperException.class,
() -> client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + analyzerName + ".tokenizer", "whitespace")
.putList("analysis.analyzer." + analyzerName + ".filter", "my_multiplexer")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)
.put("analysis.filter.my_multiplexer.type", "multiplexer")
.putList("analysis.filter.my_multiplexer.filters", "synonym_filter"))
.addMapping("_doc", "field", "type=text,analyzer=" + analyzerName).get());

assertEquals("Failed to parse mapping: analyzer [my_synonym_analyzer] "
+ "contains filters [my_multiplexer] that are not allowed to run in all mode.", ex.getMessage());
}

private Path setupSynonymsFile(String synonymsFileName, String content) throws IOException {
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
}
Path synonymsFile = configDir.resolve(synonymsFileName);
if (Files.exists(synonymsFile) == false) {
Files.createFile(synonymsFile);
}
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println(content);
}
return synonymsFile;
}

}