From 3c8612a27a3cbe8351e3c75e3f5f5f7fea2d0f64 Mon Sep 17 00:00:00 2001 From: Michele Rastelli Date: Wed, 28 Feb 2024 14:14:23 +0100 Subject: [PATCH 1/2] multi_delimiter analyzer --- .../entity/arangosearch/AnalyzerType.java | 1 + .../analyzer/MultiDelimiterAnalyzer.java | 64 +++++++++++++++++++ .../MultiDelimiterAnalyzerProperties.java | 58 +++++++++++++++++ .../arangosearch/analyzer/SearchAnalyzer.java | 1 + .../com/arangodb/ArangoSearchAsyncTest.java | 23 +++++++ .../java/com/arangodb/ArangoSearchTest.java | 23 +++++++ 6 files changed, 170 insertions(+) create mode 100644 core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzer.java create mode 100644 core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzerProperties.java diff --git a/core/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java b/core/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java index e92b0b191..3752ab590 100644 --- a/core/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java +++ b/core/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java @@ -26,6 +26,7 @@ public enum AnalyzerType { identity, delimiter, + multi_delimiter, stem, norm, ngram, diff --git a/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzer.java b/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzer.java new file mode 100644 index 000000000..d4a77216d --- /dev/null +++ b/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzer.java @@ -0,0 +1,64 @@ +/* + * DISCLAIMER + * + * Copyright 2016 ArangoDB GmbH, Cologne, Germany + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright holder is ArangoDB GmbH, Cologne, Germany + */ + +package com.arangodb.entity.arangosearch.analyzer; + + +import com.arangodb.entity.arangosearch.AnalyzerType; + +import java.util.Objects; + +/** + * An Analyzer capable of breaking up text into tokens using multiple delimiters. + * Unlike with the delimiter Analyzer, the multi_delimiter Analyzer does not support quoting fields. + * + * @author Michele Rastelli + * @see API Documentation + * @since ArangoDB 3.12 + */ +public final class MultiDelimiterAnalyzer extends SearchAnalyzer { + private MultiDelimiterAnalyzerProperties properties; + + public MultiDelimiterAnalyzer() { + setType(AnalyzerType.multi_delimiter); + } + + public MultiDelimiterAnalyzerProperties getProperties() { + return properties; + } + + public void setProperties(MultiDelimiterAnalyzerProperties properties) { + this.properties = properties; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + MultiDelimiterAnalyzer that = (MultiDelimiterAnalyzer) o; + return Objects.equals(properties, that.properties); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), properties); + } +} diff --git a/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzerProperties.java b/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzerProperties.java new file mode 100644 index 000000000..ae8104f61 --- /dev/null +++ b/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzerProperties.java @@ -0,0 +1,58 @@ +/* + * DISCLAIMER + * + * Copyright 2016 ArangoDB GmbH, Cologne, Germany + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright holder is ArangoDB GmbH, Cologne, Germany + */ + +package com.arangodb.entity.arangosearch.analyzer; + + +import java.util.*; + +/** + * @author Michele Rastelli + * @since ArangoDB 3.12 + */ +public final class MultiDelimiterAnalyzerProperties { + + private Collection delimiters = Collections.emptyList(); + + /** + * @return a list of strings of which each is considered as one delimiter that can be one or multiple characters + * long. The delimiters must not overlap, which means that a delimiter cannot be a prefix of another delimiter. + */ + public Collection getDelimiters() { + return delimiters; + } + + public void setDelimiters(String... delimiters) { + this.delimiters = Arrays.asList(delimiters); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MultiDelimiterAnalyzerProperties that = (MultiDelimiterAnalyzerProperties) o; + return Objects.equals(delimiters, that.delimiters); + } + + @Override + public int hashCode() { + return Objects.hash(delimiters); + } +} diff --git a/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzer.java b/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzer.java index 3c3a60ba6..860b2b9fa 100644 --- a/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzer.java +++ b/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzer.java @@ -39,6 +39,7 @@ @JsonSubTypes({ @JsonSubTypes.Type(name = "identity", value = IdentityAnalyzer.class), @JsonSubTypes.Type(name = "delimiter", value = DelimiterAnalyzer.class), + @JsonSubTypes.Type(name = "multi_delimiter", value = MultiDelimiterAnalyzer.class), @JsonSubTypes.Type(name = "stem", value = StemAnalyzer.class), @JsonSubTypes.Type(name = "norm", value = NormAnalyzer.class), @JsonSubTypes.Type(name = "ngram", value = NGramAnalyzer.class), diff --git a/driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java b/driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java index 39aa7ca2e..4b83623dc 100644 --- a/driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java +++ b/driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java @@ -482,6 +482,29 @@ void delimiterAnalyzerTyped(ArangoDatabaseAsync db) throws ExecutionException, I createGetAndDeleteTypedAnalyzer(db, analyzer); } + @ParameterizedTest + @MethodSource("asyncDbs") + void multiDelimiterAnalyzerTyped(ArangoDatabaseAsync db) throws ExecutionException, InterruptedException { + assumeTrue(isAtLeastVersion(3, 12)); + + String name = "test-" + UUID.randomUUID(); + + Set features = new HashSet<>(); + features.add(AnalyzerFeature.frequency); + features.add(AnalyzerFeature.norm); + features.add(AnalyzerFeature.position); + + MultiDelimiterAnalyzerProperties properties = new MultiDelimiterAnalyzerProperties(); + properties.setDelimiters("-", ",", "..."); + + MultiDelimiterAnalyzer analyzer = new MultiDelimiterAnalyzer(); + analyzer.setFeatures(features); + analyzer.setName(name); + analyzer.setProperties(properties); + + createGetAndDeleteTypedAnalyzer(db, analyzer); + } + @ParameterizedTest @MethodSource("asyncDbs") void stemAnalyzerTyped(ArangoDatabaseAsync db) throws ExecutionException, InterruptedException { diff --git a/driver/src/test/java/com/arangodb/ArangoSearchTest.java b/driver/src/test/java/com/arangodb/ArangoSearchTest.java index a4216477b..dd58beab7 100644 --- a/driver/src/test/java/com/arangodb/ArangoSearchTest.java +++ b/driver/src/test/java/com/arangodb/ArangoSearchTest.java @@ -481,6 +481,29 @@ void delimiterAnalyzerTyped(ArangoDatabase db) { createGetAndDeleteTypedAnalyzer(db, analyzer); } + @ParameterizedTest + @MethodSource("dbs") + void multiDelimiterAnalyzerTyped(ArangoDatabase db) { + assumeTrue(isAtLeastVersion(3, 12)); + + String name = "test-" + UUID.randomUUID(); + + Set features = new HashSet<>(); + features.add(AnalyzerFeature.frequency); + features.add(AnalyzerFeature.norm); + features.add(AnalyzerFeature.position); + + MultiDelimiterAnalyzerProperties properties = new MultiDelimiterAnalyzerProperties(); + properties.setDelimiters("-", ",", "..."); + + MultiDelimiterAnalyzer analyzer = new MultiDelimiterAnalyzer(); + analyzer.setFeatures(features); + analyzer.setName(name); + analyzer.setProperties(properties); + + createGetAndDeleteTypedAnalyzer(db, analyzer); + } + @ParameterizedTest @MethodSource("dbs") void stemAnalyzerTyped(ArangoDatabase db) { From 2bc4547accf47152fb0f77b589b66476af4e83ec Mon Sep 17 00:00:00 2001 From: Michele Rastelli Date: Wed, 28 Feb 2024 15:14:01 +0100 Subject: [PATCH 2/2] fixed multi_delimiter properties --- .../analyzer/MultiDelimiterAnalyzerProperties.java | 14 +++++++------- .../java/com/arangodb/ArangoSearchAsyncTest.java | 2 +- .../test/java/com/arangodb/ArangoSearchTest.java | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzerProperties.java b/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzerProperties.java index ae8104f61..8ddd2fbe2 100644 --- a/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzerProperties.java +++ b/core/src/main/java/com/arangodb/entity/arangosearch/analyzer/MultiDelimiterAnalyzerProperties.java @@ -29,18 +29,18 @@ */ public final class MultiDelimiterAnalyzerProperties { - private Collection delimiters = Collections.emptyList(); + private Collection delimiter = Collections.emptyList(); /** * @return a list of strings of which each is considered as one delimiter that can be one or multiple characters * long. The delimiters must not overlap, which means that a delimiter cannot be a prefix of another delimiter. */ - public Collection getDelimiters() { - return delimiters; + public Collection getDelimiter() { + return delimiter; } - public void setDelimiters(String... delimiters) { - this.delimiters = Arrays.asList(delimiters); + public void setDelimiter(String... delimiter) { + this.delimiter = Arrays.asList(delimiter); } @Override @@ -48,11 +48,11 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; MultiDelimiterAnalyzerProperties that = (MultiDelimiterAnalyzerProperties) o; - return Objects.equals(delimiters, that.delimiters); + return Objects.equals(delimiter, that.delimiter); } @Override public int hashCode() { - return Objects.hash(delimiters); + return Objects.hash(delimiter); } } diff --git a/driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java b/driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java index 4b83623dc..5c5e50875 100644 --- a/driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java +++ b/driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java @@ -495,7 +495,7 @@ void multiDelimiterAnalyzerTyped(ArangoDatabaseAsync db) throws ExecutionExcepti features.add(AnalyzerFeature.position); MultiDelimiterAnalyzerProperties properties = new MultiDelimiterAnalyzerProperties(); - properties.setDelimiters("-", ",", "..."); + properties.setDelimiter("-", ",", "..."); MultiDelimiterAnalyzer analyzer = new MultiDelimiterAnalyzer(); analyzer.setFeatures(features); diff --git a/driver/src/test/java/com/arangodb/ArangoSearchTest.java b/driver/src/test/java/com/arangodb/ArangoSearchTest.java index dd58beab7..ed974cb4f 100644 --- a/driver/src/test/java/com/arangodb/ArangoSearchTest.java +++ b/driver/src/test/java/com/arangodb/ArangoSearchTest.java @@ -494,7 +494,7 @@ void multiDelimiterAnalyzerTyped(ArangoDatabase db) { features.add(AnalyzerFeature.position); MultiDelimiterAnalyzerProperties properties = new MultiDelimiterAnalyzerProperties(); - properties.setDelimiters("-", ",", "..."); + properties.setDelimiter("-", ",", "..."); MultiDelimiterAnalyzer analyzer = new MultiDelimiterAnalyzer(); analyzer.setFeatures(features);