Skip to content

Commit 7faef4a

Browse files
committed
[DE-751] multi_delimiter analyzer (#545)
* multi_delimiter analyzer * fixed multi_delimiter properties
1 parent 30fb240 commit 7faef4a

File tree

6 files changed

+170
-0
lines changed

6 files changed

+170
-0
lines changed

Diff for: core/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
public enum AnalyzerType {
2727
identity,
2828
delimiter,
29+
multi_delimiter,
2930
stem,
3031
norm,
3132
ngram,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import com.arangodb.entity.arangosearch.AnalyzerType;
25+
26+
import java.util.Objects;
27+
28+
/**
29+
* An Analyzer capable of breaking up text into tokens using multiple delimiters.
30+
* Unlike with the delimiter Analyzer, the multi_delimiter Analyzer does not support quoting fields.
31+
*
32+
* @author Michele Rastelli
33+
* @see <a href= "https://docs.arangodb.com/devel/index-and-search/analyzers/#multi_delimiter">API Documentation</a>
34+
* @since ArangoDB 3.12
35+
*/
36+
public final class MultiDelimiterAnalyzer extends SearchAnalyzer {
37+
private MultiDelimiterAnalyzerProperties properties;
38+
39+
public MultiDelimiterAnalyzer() {
40+
setType(AnalyzerType.multi_delimiter);
41+
}
42+
43+
public MultiDelimiterAnalyzerProperties getProperties() {
44+
return properties;
45+
}
46+
47+
public void setProperties(MultiDelimiterAnalyzerProperties properties) {
48+
this.properties = properties;
49+
}
50+
51+
@Override
52+
public boolean equals(Object o) {
53+
if (this == o) return true;
54+
if (o == null || getClass() != o.getClass()) return false;
55+
if (!super.equals(o)) return false;
56+
MultiDelimiterAnalyzer that = (MultiDelimiterAnalyzer) o;
57+
return Objects.equals(properties, that.properties);
58+
}
59+
60+
@Override
61+
public int hashCode() {
62+
return Objects.hash(super.hashCode(), properties);
63+
}
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import java.util.*;
25+
26+
/**
27+
* @author Michele Rastelli
28+
* @since ArangoDB 3.12
29+
*/
30+
public final class MultiDelimiterAnalyzerProperties {
31+
32+
private Collection<String> delimiter = Collections.emptyList();
33+
34+
/**
35+
* @return a list of strings of which each is considered as one delimiter that can be one or multiple characters
36+
* long. The delimiters must not overlap, which means that a delimiter cannot be a prefix of another delimiter.
37+
*/
38+
public Collection<String> getDelimiter() {
39+
return delimiter;
40+
}
41+
42+
public void setDelimiter(String... delimiter) {
43+
this.delimiter = Arrays.asList(delimiter);
44+
}
45+
46+
@Override
47+
public boolean equals(Object o) {
48+
if (this == o) return true;
49+
if (o == null || getClass() != o.getClass()) return false;
50+
MultiDelimiterAnalyzerProperties that = (MultiDelimiterAnalyzerProperties) o;
51+
return Objects.equals(delimiter, that.delimiter);
52+
}
53+
54+
@Override
55+
public int hashCode() {
56+
return Objects.hash(delimiter);
57+
}
58+
}

Diff for: core/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzer.java

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
@JsonSubTypes({
4040
@JsonSubTypes.Type(name = "identity", value = IdentityAnalyzer.class),
4141
@JsonSubTypes.Type(name = "delimiter", value = DelimiterAnalyzer.class),
42+
@JsonSubTypes.Type(name = "multi_delimiter", value = MultiDelimiterAnalyzer.class),
4243
@JsonSubTypes.Type(name = "stem", value = StemAnalyzer.class),
4344
@JsonSubTypes.Type(name = "norm", value = NormAnalyzer.class),
4445
@JsonSubTypes.Type(name = "ngram", value = NGramAnalyzer.class),

Diff for: driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java

+23
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,29 @@ void delimiterAnalyzerTyped(ArangoDatabaseAsync db) throws ExecutionException, I
482482
createGetAndDeleteTypedAnalyzer(db, analyzer);
483483
}
484484

485+
@ParameterizedTest
486+
@MethodSource("asyncDbs")
487+
void multiDelimiterAnalyzerTyped(ArangoDatabaseAsync db) throws ExecutionException, InterruptedException {
488+
assumeTrue(isAtLeastVersion(3, 12));
489+
490+
String name = "test-" + UUID.randomUUID();
491+
492+
Set<AnalyzerFeature> features = new HashSet<>();
493+
features.add(AnalyzerFeature.frequency);
494+
features.add(AnalyzerFeature.norm);
495+
features.add(AnalyzerFeature.position);
496+
497+
MultiDelimiterAnalyzerProperties properties = new MultiDelimiterAnalyzerProperties();
498+
properties.setDelimiter("-", ",", "...");
499+
500+
MultiDelimiterAnalyzer analyzer = new MultiDelimiterAnalyzer();
501+
analyzer.setFeatures(features);
502+
analyzer.setName(name);
503+
analyzer.setProperties(properties);
504+
505+
createGetAndDeleteTypedAnalyzer(db, analyzer);
506+
}
507+
485508
@ParameterizedTest
486509
@MethodSource("asyncDbs")
487510
void stemAnalyzerTyped(ArangoDatabaseAsync db) throws ExecutionException, InterruptedException {

Diff for: driver/src/test/java/com/arangodb/ArangoSearchTest.java

+23
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,29 @@ void delimiterAnalyzerTyped(ArangoDatabase db) {
481481
createGetAndDeleteTypedAnalyzer(db, analyzer);
482482
}
483483

484+
@ParameterizedTest
485+
@MethodSource("dbs")
486+
void multiDelimiterAnalyzerTyped(ArangoDatabase db) {
487+
assumeTrue(isAtLeastVersion(3, 12));
488+
489+
String name = "test-" + UUID.randomUUID();
490+
491+
Set<AnalyzerFeature> features = new HashSet<>();
492+
features.add(AnalyzerFeature.frequency);
493+
features.add(AnalyzerFeature.norm);
494+
features.add(AnalyzerFeature.position);
495+
496+
MultiDelimiterAnalyzerProperties properties = new MultiDelimiterAnalyzerProperties();
497+
properties.setDelimiter("-", ",", "...");
498+
499+
MultiDelimiterAnalyzer analyzer = new MultiDelimiterAnalyzer();
500+
analyzer.setFeatures(features);
501+
analyzer.setName(name);
502+
analyzer.setProperties(properties);
503+
504+
createGetAndDeleteTypedAnalyzer(db, analyzer);
505+
}
506+
484507
@ParameterizedTest
485508
@MethodSource("dbs")
486509
void stemAnalyzerTyped(ArangoDatabase db) {

0 commit comments

Comments
 (0)