Skip to content

Commit 5fcfad4

Browse files
committed
address review
1 parent a2af275 commit 5fcfad4

File tree

2 files changed

+15
-18
lines changed

2 files changed

+15
-18
lines changed

plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,8 @@
3030
import java.io.IOException;
3131
import java.io.Reader;
3232
import java.io.StringReader;
33-
import java.util.HashSet;
3433
import java.util.List;
3534
import java.util.Locale;
36-
import java.util.Set;
3735

3836
public class NoriTokenizerFactory extends AbstractTokenizerFactory {
3937
private static final String USER_DICT_PATH_OPTION = "user_dictionary";
@@ -49,18 +47,16 @@ public NoriTokenizerFactory(IndexSettings indexSettings, Environment env, String
4947
}
5048

5149
public static UserDictionary getUserDictionary(Environment env, Settings settings) {
50+
if (settings.get(USER_DICT_PATH_OPTION) != null && settings.get(USER_DICT_RULES_OPTION) != null) {
51+
throw new IllegalArgumentException("It is not allowed to use [" + USER_DICT_PATH_OPTION + "] in conjunction" +
52+
" with [" + USER_DICT_RULES_OPTION + "]");
53+
}
5254
List<String> ruleList = Analysis.getWordList(env, settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION);
5355
StringBuilder sb = new StringBuilder();
5456
if (ruleList == null || ruleList.isEmpty()) {
5557
return null;
5658
}
57-
// check for duplicate terms
58-
Set<String> terms = new HashSet<>();
5959
for (String line : ruleList) {
60-
String[] split = line.split("\\s+");
61-
if (terms.add(split[0]) == false) {
62-
throw new IllegalArgumentException("Found duplicate term: [" + split[0] + "] in user dictionary. ");
63-
}
6460
sb.append(line).append(System.lineSeparator());
6561
}
6662
try (Reader rulesReader = new StringReader(sb.toString())) {

plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,6 @@ public void testNoriAnalyzerUserDict() throws Exception {
9292
}
9393
}
9494

95-
public void testNoriAnalyzerUserDictWithDuplicates() throws Exception {
96-
Settings settings = Settings.builder()
97-
.put("index.analysis.analyzer.my_analyzer.type", "nori")
98-
.putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "세종", "C샤프", "세종", "세종 세 종")
99-
.build();
100-
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings));
101-
assertThat(exc.getMessage(), containsString("Found duplicate term: [세종]"));
102-
103-
}
104-
10595
public void testNoriAnalyzerUserDictPath() throws Exception {
10696
Settings settings = Settings.builder()
10797
.put("index.analysis.analyzer.my_analyzer.type", "nori")
@@ -118,6 +108,17 @@ public void testNoriAnalyzerUserDictPath() throws Exception {
118108
}
119109
}
120110

111+
public void testNoriAnalyzerInvalidUserDictOption() throws Exception {
112+
Settings settings = Settings.builder()
113+
.put("index.analysis.analyzer.my_analyzer.type", "nori")
114+
.put("index.analysis.analyzer.my_analyzer.user_dictionary", "user_dict.txt")
115+
.putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "c++", "C샤프", "세종", "세종시 세종 시")
116+
.build();
117+
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings));
118+
assertThat(exc.getMessage(), containsString("It is not allowed to use [user_dictionary] in conjunction " +
119+
"with [user_dictionary_rules]"));
120+
}
121+
121122
public void testNoriTokenizer() throws Exception {
122123
Settings settings = Settings.builder()
123124
.put("index.analysis.tokenizer.my_tokenizer.type", "nori_tokenizer")

0 commit comments

Comments
 (0)