Skip to content

Commit e23444b

Browse files
authored
wildcard analyzer (#546)
1 parent 3b3ba39 commit e23444b

File tree

6 files changed

+196
-2
lines changed

6 files changed

+196
-2
lines changed

Diff for: core/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,6 @@ public enum AnalyzerType {
4141
collation,
4242
classification,
4343
nearest_neighbors,
44-
minhash
44+
minhash,
45+
wildcard
4546
}

Diff for: core/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzer.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@
5454
@JsonSubTypes.Type(name = "collation", value = CollationAnalyzer.class),
5555
@JsonSubTypes.Type(name = "classification", value = ClassificationAnalyzer.class),
5656
@JsonSubTypes.Type(name = "nearest_neighbors", value = NearestNeighborsAnalyzer.class),
57-
@JsonSubTypes.Type(name = "minhash", value = MinHashAnalyzer.class)
57+
@JsonSubTypes.Type(name = "minhash", value = MinHashAnalyzer.class),
58+
@JsonSubTypes.Type(name = "wildcard", value = WildcardAnalyzer.class)
5859
})
5960
public abstract class SearchAnalyzer {
6061
private String name;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import com.arangodb.entity.arangosearch.AnalyzerType;
25+
26+
import java.util.Objects;
27+
28+
/**
29+
* An Analyzer that creates n-grams to enable fast partial matching for wildcard queries if you have large string
30+
* values, especially if you want to search for suffixes or substrings in the middle of strings (infixes) as opposed to
31+
* prefixes.
32+
* It can apply an Analyzer of your choice before creating the n-grams, for example, to normalize text for
33+
* case-insensitive and accent-insensitive search.
34+
*
35+
* @author Michele Rastelli
36+
* @see <a href= "https://docs.arangodb.com/3.12/index-and-search/analyzers/#wildcard">API Documentation</a>
37+
*/
38+
public final class WildcardAnalyzer extends SearchAnalyzer {
39+
private WildcardAnalyzerProperties properties;
40+
41+
public WildcardAnalyzer() {
42+
setType(AnalyzerType.wildcard);
43+
}
44+
45+
public WildcardAnalyzerProperties getProperties() {
46+
return properties;
47+
}
48+
49+
public void setProperties(WildcardAnalyzerProperties properties) {
50+
this.properties = properties;
51+
}
52+
53+
@Override
54+
public boolean equals(Object o) {
55+
if (this == o) return true;
56+
if (o == null || getClass() != o.getClass()) return false;
57+
if (!super.equals(o)) return false;
58+
WildcardAnalyzer that = (WildcardAnalyzer) o;
59+
return Objects.equals(properties, that.properties);
60+
}
61+
62+
@Override
63+
public int hashCode() {
64+
return Objects.hash(super.hashCode(), properties);
65+
}
66+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import java.util.Objects;
25+
26+
/**
27+
* @author Michele Rastelli
28+
*/
29+
public final class WildcardAnalyzerProperties {
30+
31+
private Integer ngramSize;
32+
private SearchAnalyzer analyzer;
33+
34+
/**
35+
* @return unsigned integer for the n-gram length, needs to be at least 2
36+
*/
37+
public Integer getNgramSize() {
38+
return ngramSize;
39+
}
40+
41+
/**
42+
* @param ngramSize unsigned integer for the n-gram length, needs to be at least 2
43+
*/
44+
public void setNgramSize(Integer ngramSize) {
45+
this.ngramSize = ngramSize;
46+
}
47+
48+
public SearchAnalyzer getAnalyzer() {
49+
return analyzer;
50+
}
51+
52+
public void setAnalyzer(SearchAnalyzer analyzer) {
53+
this.analyzer = analyzer;
54+
}
55+
56+
@Override
57+
public boolean equals(Object o) {
58+
if (this == o) return true;
59+
if (o == null || getClass() != o.getClass()) return false;
60+
WildcardAnalyzerProperties that = (WildcardAnalyzerProperties) o;
61+
return Objects.equals(ngramSize, that.ngramSize) && Objects.equals(analyzer, that.analyzer);
62+
}
63+
64+
@Override
65+
public int hashCode() {
66+
return Objects.hash(ngramSize, analyzer);
67+
}
68+
}

Diff for: driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java

+29
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,35 @@ void MinHashAnalyzer(ArangoDatabaseAsync db) throws ExecutionException, Interrup
10561056
createGetAndDeleteTypedAnalyzer(db, analyzer);
10571057
}
10581058

1059+
@ParameterizedTest
1060+
@MethodSource("asyncDbs")
1061+
void WildcardAnalyzer(ArangoDatabaseAsync db) throws ExecutionException, InterruptedException {
1062+
assumeTrue(isAtLeastVersion(3, 12));
1063+
1064+
NormAnalyzerProperties properties = new NormAnalyzerProperties();
1065+
properties.setLocale("ru");
1066+
properties.setAnalyzerCase(SearchAnalyzerCase.lower);
1067+
properties.setAccent(true);
1068+
1069+
NormAnalyzer normAnalyzer = new NormAnalyzer();
1070+
normAnalyzer.setProperties(properties);
1071+
1072+
WildcardAnalyzerProperties wildcardProperties = new WildcardAnalyzerProperties();
1073+
wildcardProperties.setNgramSize(3);
1074+
wildcardProperties.setAnalyzer(normAnalyzer);
1075+
1076+
Set<AnalyzerFeature> features = new HashSet<>();
1077+
features.add(AnalyzerFeature.frequency);
1078+
features.add(AnalyzerFeature.position);
1079+
1080+
WildcardAnalyzer wildcardAnalyzer = new WildcardAnalyzer();
1081+
wildcardAnalyzer.setName("test-" + UUID.randomUUID());
1082+
wildcardAnalyzer.setProperties(wildcardProperties);
1083+
wildcardAnalyzer.setFeatures(features);
1084+
1085+
createGetAndDeleteTypedAnalyzer(db, wildcardAnalyzer);
1086+
}
1087+
10591088
@ParameterizedTest
10601089
@MethodSource("asyncDbs")
10611090
void offsetFeature(ArangoDatabaseAsync db) throws ExecutionException, InterruptedException {

Diff for: driver/src/test/java/com/arangodb/ArangoSearchTest.java

+29
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,35 @@ void MinHashAnalyzer(ArangoDatabase db) {
10551055
createGetAndDeleteTypedAnalyzer(db, analyzer);
10561056
}
10571057

1058+
@ParameterizedTest
1059+
@MethodSource("dbs")
1060+
void WildcardAnalyzer(ArangoDatabase db) {
1061+
assumeTrue(isAtLeastVersion(3, 12));
1062+
1063+
NormAnalyzerProperties properties = new NormAnalyzerProperties();
1064+
properties.setLocale("ru");
1065+
properties.setAnalyzerCase(SearchAnalyzerCase.lower);
1066+
properties.setAccent(true);
1067+
1068+
NormAnalyzer normAnalyzer = new NormAnalyzer();
1069+
normAnalyzer.setProperties(properties);
1070+
1071+
WildcardAnalyzerProperties wildcardProperties = new WildcardAnalyzerProperties();
1072+
wildcardProperties.setNgramSize(3);
1073+
wildcardProperties.setAnalyzer(normAnalyzer);
1074+
1075+
Set<AnalyzerFeature> features = new HashSet<>();
1076+
features.add(AnalyzerFeature.frequency);
1077+
features.add(AnalyzerFeature.position);
1078+
1079+
WildcardAnalyzer wildcardAnalyzer = new WildcardAnalyzer();
1080+
wildcardAnalyzer.setName("test-" + UUID.randomUUID());
1081+
wildcardAnalyzer.setProperties(wildcardProperties);
1082+
wildcardAnalyzer.setFeatures(features);
1083+
1084+
createGetAndDeleteTypedAnalyzer(db, wildcardAnalyzer);
1085+
}
1086+
10581087
@ParameterizedTest
10591088
@MethodSource("dbs")
10601089
void offsetFeature(ArangoDatabase db) {

0 commit comments

Comments
 (0)