Skip to content

Commit 9308c0a

Browse files
committed
Register smartcn analyzer, tokenizer and tokenfilter
When the plugin starts, it should register `smartcn` analyzer, `smartcn_sentence` tokenizer and `smartcn_word` token filter. Closes #12. (cherry picked from commit b8cd4c4)
1 parent 9e780fa commit 9308c0a

File tree

4 files changed

+179
-0
lines changed

4 files changed

+179
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.indices.analysis.smartcn;
21+
22+
import org.apache.lucene.analysis.TokenStream;
23+
import org.apache.lucene.analysis.Tokenizer;
24+
import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
25+
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
26+
import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
27+
import org.elasticsearch.common.component.AbstractComponent;
28+
import org.elasticsearch.common.inject.Inject;
29+
import org.elasticsearch.common.lucene.Lucene;
30+
import org.elasticsearch.common.settings.Settings;
31+
import org.elasticsearch.index.analysis.*;
32+
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
33+
34+
import java.io.Reader;
35+
36+
/**
37+
* Registers indices level analysis components so, if not explicitly configured, will be shared
38+
* among all indices.
39+
*/
40+
public class SmartChineseIndicesAnalysis extends AbstractComponent {
41+
42+
@Inject
43+
public SmartChineseIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) {
44+
super(settings);
45+
46+
// Register smartcn analyzer
47+
indicesAnalysisService.analyzerProviderFactories().put("smartcn", new PreBuiltAnalyzerProviderFactory("smartcn", AnalyzerScope.INDICES, new SmartChineseAnalyzer(Lucene.ANALYZER_VERSION)));
48+
49+
// Register smartcn_word token filter
50+
indicesAnalysisService.tokenFilterFactories().put("smartcn_word", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
51+
@Override public String name() {
52+
return "smartcn_word";
53+
}
54+
55+
@Override public TokenStream create(TokenStream tokenStream) {
56+
return new WordTokenFilter(tokenStream);
57+
}
58+
}));
59+
60+
// Register smartcn_sentence tokenizer
61+
indicesAnalysisService.tokenizerFactories().put("smartcn_sentence", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
62+
@Override
63+
public String name() {
64+
return "smartcn_sentence";
65+
}
66+
67+
@Override
68+
public Tokenizer create(Reader reader) {
69+
return new SentenceTokenizer(reader);
70+
}
71+
}));
72+
73+
74+
}
75+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.indices.analysis.smartcn;
21+
22+
import org.elasticsearch.common.inject.AbstractModule;
23+
24+
/**
25+
*/
26+
public class SmartChineseIndicesAnalysisModule extends AbstractModule {
27+
28+
@Override
29+
protected void configure() {
30+
bind(SmartChineseIndicesAnalysis.class).asEagerSingleton();
31+
}
32+
}

src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java

+10
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,15 @@
1919

2020
package org.elasticsearch.plugin.analysis.smartcn;
2121

22+
import org.elasticsearch.common.collect.ImmutableList;
23+
import org.elasticsearch.common.inject.Module;
2224
import org.elasticsearch.index.analysis.AnalysisModule;
2325
import org.elasticsearch.index.analysis.SmartChineseAnalysisBinderProcessor;
26+
import org.elasticsearch.indices.analysis.smartcn.SmartChineseIndicesAnalysisModule;
2427
import org.elasticsearch.plugins.AbstractPlugin;
2528

29+
import java.util.Collection;
30+
2631
/**
2732
*
2833
*/
@@ -38,6 +43,11 @@ public String description() {
3843
return "Smart Chinese analysis support";
3944
}
4045

46+
@Override
47+
public Collection<Class<? extends Module>> modules() {
48+
return ImmutableList.<Class<? extends Module>>of(SmartChineseIndicesAnalysisModule.class);
49+
}
50+
4151
public void onModule(AnalysisModule module) {
4252
module.addProcessor(new SmartChineseAnalysisBinderProcessor());
4353
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Licensed to Elasticsearch (the "Author") under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. Author licenses this
6+
* file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.analysis;
21+
22+
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
23+
import org.elasticsearch.test.ElasticsearchIntegrationTest;
24+
import org.junit.Test;
25+
26+
import java.util.concurrent.ExecutionException;
27+
28+
import static org.hamcrest.CoreMatchers.*;
29+
30+
@ElasticsearchIntegrationTest.ClusterScope(numNodes = 1, scope = ElasticsearchIntegrationTest.Scope.SUITE)
31+
public class SimpleSmartChineseIntegrationTests extends ElasticsearchIntegrationTest {
32+
33+
@Test
34+
public void testSmartcnAnalyzer() throws ExecutionException, InterruptedException {
35+
AnalyzeResponse response = client().admin().indices()
36+
.prepareAnalyze("叻出色").setAnalyzer("smartcn")
37+
.execute().get();
38+
39+
assertThat(response, notNullValue());
40+
assertThat(response.getTokens().size(), is(2));
41+
}
42+
43+
@Test
44+
public void testSmartcnTokenizer() throws ExecutionException, InterruptedException {
45+
AnalyzeResponse response = client().admin().indices()
46+
.prepareAnalyze("叻出色").setTokenizer("smartcn_sentence")
47+
.execute().get();
48+
49+
assertThat(response, notNullValue());
50+
assertThat(response.getTokens().size(), is(1));
51+
}
52+
53+
@Test
54+
public void testSmartcnTokenFilter() throws ExecutionException, InterruptedException {
55+
AnalyzeResponse response = client().admin().indices()
56+
.prepareAnalyze("叻出色").setTokenFilters("smartcn_word")
57+
.execute().get();
58+
59+
assertThat(response, notNullValue());
60+
assertThat(response.getTokens().size(), is(3));
61+
}
62+
}

0 commit comments

Comments
 (0)