arangodb · rashtao · Jan 12, 2022 · Nov 28, 2021 · Nov 28, 2021 · Jan 12, 2022
diff --git a/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java b/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java
@@ -24,5 +24,5 @@
  * @author Michele Rastelli
  */
 public enum AnalyzerType {
-    identity, delimiter, stem, norm, ngram, text, pipeline, stopwords, aql, geojson, geopoint
+    identity, delimiter, stem, norm, ngram, text, pipeline, stopwords, aql, geojson, geopoint, segmentation, collation
 }
diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzer.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzer.java
@@ -0,0 +1,64 @@
+/*
+ * DISCLAIMER
+ *
+ * Copyright 2016 ArangoDB GmbH, Cologne, Germany
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Copyright holder is ArangoDB GmbH, Cologne, Germany
+ */
+
+package com.arangodb.entity.arangosearch.analyzer;
+
+
+import com.arangodb.entity.arangosearch.AnalyzerType;
+
+import java.util.Objects;
+
+/**
+ * An Analyzer capable of converting the input into a set of language-specific tokens. This makes comparisons follow the
+ * rules of the respective language, most notable in range queries against Views.
+ *
+ * @author Michele Rastelli
+ * @see <a href= "https://www.arangodb.com/docs/stable/arangosearch-analyzers.html#collation">API Documentation</a>
+ * @since ArangoDB 3.9
+ */
+public class CollationAnalyzer extends SearchAnalyzer {
+    public CollationAnalyzer() {
+        setType(AnalyzerType.collation);
+    }
+
+    private CollationAnalyzerProperties properties;
+
+    public CollationAnalyzerProperties getProperties() {
+        return properties;
+    }
+
+    public void setProperties(CollationAnalyzerProperties properties) {
+        this.properties = properties;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        if (!super.equals(o)) return false;
+        CollationAnalyzer that = (CollationAnalyzer) o;
+        return Objects.equals(properties, that.properties);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(super.hashCode(), properties);
+    }
+}
diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzerProperties.java
@@ -0,0 +1,60 @@
+/*
+ * DISCLAIMER
+ *
+ * Copyright 2016 ArangoDB GmbH, Cologne, Germany
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Copyright holder is ArangoDB GmbH, Cologne, Germany
+ */
+
+package com.arangodb.entity.arangosearch.analyzer;
+
+
+import java.util.Objects;
+
+/**
+ * @author Michele Rastelli
+ * @since ArangoDB 3.9
+ */
+public class CollationAnalyzerProperties {
+
+    private String locale;
+
+    /**
+     * @return a locale in the format `language[_COUNTRY][.encoding][@variant]` (square brackets denote optional parts),
+     * e.g. `de.utf-8` or `en_US.utf-8`. Only UTF-8 encoding is meaningful in ArangoDB.
+     * The locale is forwarded to ICU without checks. An invalid locale does not prevent the creation of the Analyzer.
+     * @see <a href= "https://www.arangodb.com/docs/stable/arangosearch-analyzers.html#supported-languages">Supported Languages</a>
+     */
+    public String getLocale() {
+        return locale;
+    }
+
+    public void setLocale(String locale) {
+        this.locale = locale;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        CollationAnalyzerProperties that = (CollationAnalyzerProperties) o;
+        return Objects.equals(locale, that.locale);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(locale);
+    }
+}
diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/NormAnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/NormAnalyzerProperties.java
@@ -66,6 +66,9 @@ public SearchAnalyzerCase getAnalyzerCase() {
         return analyzerCase;
     }
 
+    /**
+     * @param analyzerCase defaults to {@link SearchAnalyzerCase#none}
+     */
     public void setAnalyzerCase(SearchAnalyzerCase analyzerCase) {
         this.analyzerCase = analyzerCase;
     }

diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzerCase.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzerCase.java
@@ -36,7 +36,7 @@ public enum SearchAnalyzerCase {
     upper,
 
     /**
-     * to not change character case (default)
+     * to not change character case
      */
     none
 }
diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzer.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzer.java
@@ -0,0 +1,66 @@
+/*
+ * DISCLAIMER
+ *
+ * Copyright 2016 ArangoDB GmbH, Cologne, Germany
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Copyright holder is ArangoDB GmbH, Cologne, Germany
+ */
+
+package com.arangodb.entity.arangosearch.analyzer;
+
+
+import com.arangodb.entity.arangosearch.AnalyzerType;
+
+import java.util.Objects;
+
+/**
+ * An Analyzer capable of breaking up the input text into tokens in a language-agnostic manner, making it suitable for
+ * mixed language strings.
+ * It can optionally preserve all non-whitespace or all characters instead of keeping alphanumeric characters only, as
+ * well as apply case conversion.
+ *
+ * @author Michele Rastelli
+ * @see <a href= "https://www.arangodb.com/docs/stable/arangosearch-analyzers.html#segmentation">API Documentation</a>
+ * @since ArangoDB 3.9
+ */
+public class SegmentationAnalyzer extends SearchAnalyzer {
+    public SegmentationAnalyzer() {
+        setType(AnalyzerType.segmentation);
+    }
+
+    private SegmentationAnalyzerProperties properties;
+
+    public SegmentationAnalyzerProperties getProperties() {
+        return properties;
+    }
+
+    public void setProperties(SegmentationAnalyzerProperties properties) {
+        this.properties = properties;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        if (!super.equals(o)) return false;
+        SegmentationAnalyzer that = (SegmentationAnalyzer) o;
+        return Objects.equals(properties, that.properties);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(super.hashCode(), properties);
+    }
+}
diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzerProperties.java
@@ -0,0 +1,78 @@
+/*
+ * DISCLAIMER
+ *
+ * Copyright 2016 ArangoDB GmbH, Cologne, Germany
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Copyright holder is ArangoDB GmbH, Cologne, Germany
+ */
+
+package com.arangodb.entity.arangosearch.analyzer;
+
+
+import com.arangodb.velocypack.annotations.SerializedName;
+
+import java.util.Objects;
+
+/**
+ * @author Michele Rastelli
+ * @since ArangoDB 3.9
+ */
+public class SegmentationAnalyzerProperties {
+
+    @SerializedName("break")
+    private BreakMode breakMode;
+
+    @SerializedName("case")
+    private SearchAnalyzerCase analyzerCase;
+
+    public BreakMode getBreakMode() {
+        return breakMode;
+    }
+
+    /**
+     * @param breakMode defaults to {@link BreakMode#alpha}
+     */
+    public void setBreakMode(BreakMode breakMode) {
+        this.breakMode = breakMode;
+    }
+
+    public SearchAnalyzerCase getAnalyzerCase() {
+        return analyzerCase;
+    }
+
+    /**
+     * @param analyzerCase defaults to {@link SearchAnalyzerCase#lower}
+     */
+    public void setAnalyzerCase(SearchAnalyzerCase analyzerCase) {
+        this.analyzerCase = analyzerCase;
+    }
+
+    public enum BreakMode {
+        all, alpha, graphic
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        SegmentationAnalyzerProperties that = (SegmentationAnalyzerProperties) o;
+        return breakMode == that.breakMode && analyzerCase == that.analyzerCase;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(breakMode, analyzerCase);
+    }
+}
diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzer.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzer.java
@@ -26,14 +26,6 @@
 import java.util.Objects;
 
 /**
- * WARNING:
- * The implementation of Stopwords analyzer is not final in ArangoDB 3.8.0, so using it might result in unpredictable behavior.
- * This will be fixed in ArangoDB 3.8.1 and will have a different API.
- * Any usage of the current Java driver API related to it is therefore discouraged.
- * See related <a href="https://github.com/arangodb/arangodb-java-driver/issues/394">bug report</a>
- * <p>
- * <p>
- * <p>
  * An Analyzer capable of removing specified tokens from the input.
  *
  * @author Michele Rastelli

diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/TextAnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/TextAnalyzerProperties.java
@@ -80,6 +80,9 @@ public SearchAnalyzerCase getAnalyzerCase() {
         return analyzerCase;
     }
 
+    /**
+     * @param analyzerCase defaults to {@link SearchAnalyzerCase#lower}
+     */
     public void setAnalyzerCase(SearchAnalyzerCase analyzerCase) {
         this.analyzerCase = analyzerCase;
     }

diff --git a/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java b/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java
@@ -43,18 +43,7 @@
 import com.arangodb.entity.arangosearch.PrimarySort;
 import com.arangodb.entity.arangosearch.StoreValuesType;
 import com.arangodb.entity.arangosearch.StoredValue;
-import com.arangodb.entity.arangosearch.analyzer.AQLAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.DelimiterAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.GeoJSONAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.GeoPointAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.IdentityAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.NGramAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.NormAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.PipelineAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.SearchAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.StemAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.StopwordsAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.TextAnalyzer;
+import com.arangodb.entity.arangosearch.analyzer.*;
 import com.arangodb.model.CollectionSchema;
 import com.arangodb.model.ZKDIndexOptions;
 import com.arangodb.velocypack.VPackDeserializer;
@@ -120,6 +109,10 @@ public class VPackDeserializers {
                 return context.deserialize(vpack, GeoJSONAnalyzer.class);
             case geopoint:
                 return context.deserialize(vpack, GeoPointAnalyzer.class);
+            case segmentation:
+                return context.deserialize(vpack, SegmentationAnalyzer.class);
+            case collation:
+                return context.deserialize(vpack, CollationAnalyzer.class);
             default:
                 throw new IllegalArgumentException("Unknown analyzer type: " + type);
         }
-Original file line number
+Diff line change
@@ Expand Up / @@ -36,7 +36,7 @@ public enum SearchAnalyzerCase { @@
         upper,
         /**
-         * to not change character case (default)
+         * to not change character case
          */
         none
     }