|
19 | 19 |
|
20 | 20 | package org.elasticsearch.index.analysis;
|
21 | 21 |
|
| 22 | +import org.apache.lucene.analysis.Analyzer; |
22 | 23 | import org.apache.lucene.analysis.TokenStream;
|
23 | 24 | import org.apache.lucene.analysis.Tokenizer;
|
24 | 25 | import org.apache.lucene.analysis.ja.JapaneseAnalyzer;
|
|
39 | 40 | import java.nio.file.Files;
|
40 | 41 | import java.nio.file.Path;
|
41 | 42 |
|
| 43 | +import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; |
| 44 | +import static org.hamcrest.CoreMatchers.containsString; |
42 | 45 | import static org.hamcrest.Matchers.equalTo;
|
43 | 46 | import static org.hamcrest.Matchers.greaterThan;
|
44 | 47 | import static org.hamcrest.Matchers.instanceOf;
|
@@ -307,4 +310,55 @@ public void testNumberFilterFactory() throws Exception {
|
307 | 310 | tokenizer.setReader(new StringReader(source));
|
308 | 311 | assertSimpleTSOutput(tokenFilter.create(tokenizer), expected);
|
309 | 312 | }
|
| 313 | + |
| 314 | + public void testKuromojiAnalyzerUserDict() throws Exception { |
| 315 | + Settings settings = Settings.builder() |
| 316 | + .put("index.analysis.analyzer.my_analyzer.type", "kuromoji") |
| 317 | + .putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "c++,c++,w,w", "制限スピード,制限スピード,セイゲンスピード,テスト名詞") |
| 318 | + .build(); |
| 319 | + TestAnalysis analysis = createTestAnalysis(settings); |
| 320 | + Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer"); |
| 321 | + try (TokenStream stream = analyzer.tokenStream("", "制限スピード")) { |
| 322 | + assertTokenStreamContents(stream, new String[]{"制限スピード"}); |
| 323 | + } |
| 324 | + |
| 325 | + try (TokenStream stream = analyzer.tokenStream("", "c++world")) { |
| 326 | + assertTokenStreamContents(stream, new String[]{"c++", "world"}); |
| 327 | + } |
| 328 | + } |
| 329 | + |
| 330 | + public void testKuromojiAnalyzerInvalidUserDictOption() throws Exception { |
| 331 | + Settings settings = Settings.builder() |
| 332 | + .put("index.analysis.analyzer.my_analyzer.type", "kuromoji") |
| 333 | + .put("index.analysis.analyzer.my_analyzer.user_dictionary", "user_dict.txt") |
| 334 | + .putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "c++,c++,w,w") |
| 335 | + .build(); |
| 336 | + IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings)); |
| 337 | + assertThat(exc.getMessage(), containsString("It is not allowed to use [user_dictionary] in conjunction " + |
| 338 | + "with [user_dictionary_rules]")); |
| 339 | + } |
| 340 | + |
| 341 | + public void testKuromojiAnalyzerDuplicateUserDictRule() throws Exception { |
| 342 | + Settings settings = Settings.builder() |
| 343 | + .put("index.analysis.analyzer.my_analyzer.type", "kuromoji") |
| 344 | + .putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", |
| 345 | + "c++,c++,w,w", "#comment", "制限スピード,制限スピード,セイゲンスピード,テスト名詞", "制限スピード,制限スピード,セイゲンスピード,テスト名詞") |
| 346 | + .build(); |
| 347 | + IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings)); |
| 348 | + assertThat(exc.getMessage(), containsString("[制限スピード] in user dictionary at line [3]")); |
| 349 | + } |
| 350 | + |
| 351 | + private TestAnalysis createTestAnalysis(Settings analysisSettings) throws IOException { |
| 352 | + InputStream dict = KuromojiAnalysisTests.class.getResourceAsStream("user_dict.txt"); |
| 353 | + Path home = createTempDir(); |
| 354 | + Path config = home.resolve("config"); |
| 355 | + Files.createDirectory(config); |
| 356 | + Files.copy(dict, config.resolve("user_dict.txt")); |
| 357 | + Settings settings = Settings.builder() |
| 358 | + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) |
| 359 | + .put(Environment.PATH_HOME_SETTING.getKey(), home) |
| 360 | + .put(analysisSettings) |
| 361 | + .build(); |
| 362 | + return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new AnalysisKuromojiPlugin()); |
| 363 | + } |
310 | 364 | }
|
0 commit comments