|
19 | 19 |
|
20 | 20 | package org.elasticsearch.index.analysis;
|
21 | 21 |
|
| 22 | +import org.apache.lucene.analysis.BaseTokenStreamTestCase; |
| 23 | +import org.apache.lucene.analysis.Tokenizer; |
| 24 | +import org.apache.lucene.analysis.core.WhitespaceTokenizer; |
22 | 25 | import org.elasticsearch.Version;
|
23 | 26 | import org.elasticsearch.cluster.metadata.IndexMetaData;
|
24 | 27 | import org.elasticsearch.common.settings.Settings;
|
25 | 28 | import org.elasticsearch.index.Index;
|
26 | 29 | import org.elasticsearch.plugin.analysis.AnalysisPhoneticPlugin;
|
27 | 30 | import org.elasticsearch.test.ESTestCase;
|
28 | 31 | import org.hamcrest.MatcherAssert;
|
| 32 | +import org.junit.Before; |
29 | 33 |
|
30 | 34 | import java.io.IOException;
|
| 35 | +import java.io.StringReader; |
31 | 36 |
|
32 | 37 | import static org.hamcrest.Matchers.instanceOf;
|
33 | 38 |
|
34 | 39 | public class SimplePhoneticAnalysisTests extends ESTestCase {
|
35 |
| - public void testPhoneticTokenFilterFactory() throws IOException { |
| 40 | + |
| 41 | + private TestAnalysis analysis; |
| 42 | + |
| 43 | + @Before |
| 44 | + public void setup() throws IOException { |
36 | 45 | String yaml = "/org/elasticsearch/index/analysis/phonetic-1.yml";
|
37 | 46 | Settings settings = Settings.builder().loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
|
38 | 47 | .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
39 | 48 | .build();
|
40 |
| - TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisPhoneticPlugin()); |
| 49 | + this.analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisPhoneticPlugin()); |
| 50 | + } |
| 51 | + |
| 52 | + public void testPhoneticTokenFilterFactory() throws IOException { |
41 | 53 | TokenFilterFactory filterFactory = analysis.tokenFilter.get("phonetic");
|
42 | 54 | MatcherAssert.assertThat(filterFactory, instanceOf(PhoneticTokenFilterFactory.class));
|
43 | 55 | }
|
| 56 | + |
| 57 | + public void testPhoneticTokenFilterBeiderMorseNoLanguage() throws IOException { |
| 58 | + TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilter"); |
| 59 | + Tokenizer tokenizer = new WhitespaceTokenizer(); |
| 60 | + tokenizer.setReader(new StringReader("ABADIAS")); |
| 61 | + String[] expected = new String[] { "abYdias", "abYdios", "abadia", "abadiaS", "abadias", "abadio", "abadioS", "abadios", "abodia", |
| 62 | + "abodiaS", "abodias", "abodio", "abodioS", "abodios", "avadias", "avadios", "avodias", "avodios", "obadia", "obadiaS", |
| 63 | + "obadias", "obadio", "obadioS", "obadios", "obodia", "obodiaS", "obodias", "obodioS" }; |
| 64 | + BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected); |
| 65 | + } |
| 66 | + |
| 67 | + public void testPhoneticTokenFilterBeiderMorseWithLanguage() throws IOException { |
| 68 | + TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilterfrench"); |
| 69 | + Tokenizer tokenizer = new WhitespaceTokenizer(); |
| 70 | + tokenizer.setReader(new StringReader("Rimbault")); |
| 71 | + String[] expected = new String[] { "rimbD", "rimbDlt", "rimba", "rimbalt", "rimbo", "rimbolt", "rimbu", "rimbult", "rmbD", "rmbDlt", |
| 72 | + "rmba", "rmbalt", "rmbo", "rmbolt", "rmbu", "rmbult" }; |
| 73 | + BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected); |
| 74 | + } |
44 | 75 | }
|
0 commit comments