Cleanup split strings by comma method

jasontedor · jasontedor · commit b01832ca44e8 · 2017-12-08T12:18:31.000-05:00
We have some methods Strings#splitStringByCommaToArray and Strings#splitStringByCommaToSet. It is not obvious that the former leaves whitespace and the latter trims it. We also have Strings#tokenizeToStringArray which tokenizes a string to an array, and trims whitespace. It seems the right thing to do here is to rename Strings#splitStringByCommaToSet to Strings#tokenizeByCommaToSet so that its name is aligned with another method that tokenizes by a delimiter and trims whitespace. We also cleanup the code here, removing an unneeded splitting by delimiter to set method. Relates #27715
diff --git a/core/src/main/java/org/elasticsearch/common/Strings.java b/core/src/main/java/org/elasticsearch/common/Strings.java
@@ -41,6 +41,7 @@
 import java.util.Set;
 import java.util.StringTokenizer;
 import java.util.TreeSet;
+import java.util.function.Supplier;
 
 import static java.util.Collections.unmodifiableSet;
 import static org.elasticsearch.common.util.set.Sets.newHashSet;
@@ -410,62 +411,27 @@ public static String[] toStringArray(Collection<String> collection) {
         return collection.toArray(new String[collection.size()]);
     }
 
-    public static Set<String> splitStringByCommaToSet(final String s) {
-        return splitStringToSet(s, ',');
-    }
-
-    public static String[] splitStringByCommaToArray(final String s) {
-        if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
-        else return s.split(",");
+    /**
+     * Tokenize the specified string by commas to a set, trimming whitespace and ignoring empty tokens.
+     *
+     * @param s the string to tokenize
+     * @return the set of tokens
+     */
+    public static Set<String> tokenizeByCommaToSet(final String s) {
+        if (s == null) return Collections.emptySet();
+        return tokenizeToCollection(s, ",", HashSet::new);
     }
 
     /**
-     * A convenience method for splitting a delimited string into
-     * a set and trimming leading and trailing whitespace from all
-     * split strings.
+     * Split the specified string by commas to an array.
      *
      * @param s the string to split
-     * @param c the delimiter to split on
-     * @return the set of split strings
-     */
-    public static Set<String> splitStringToSet(final String s, final char c) {
-        if (s == null || s.isEmpty()) {
-            return Collections.emptySet();
-        }
-        final char[] chars = s.toCharArray();
-        int count = 1;
-        for (final char x : chars) {
-            if (x == c) {
-                count++;
-            }
-        }
-        final Set<String> result = new HashSet<>(count);
-        final int len = chars.length;
-        int start = 0;  // starting index in chars of the current substring.
-        int pos = 0;    // current index in chars.
-        int end = 0; // the position of the end of the current token
-        for (; pos < len; pos++) {
-            if (chars[pos] == c) {
-                int size = end - start;
-                if (size > 0) { // only add non empty strings
-                    result.add(new String(chars, start, size));
-                }
-                start = pos + 1;
-                end = start;
-            } else if (Character.isWhitespace(chars[pos])) {
-                if (start == pos) {
-                    // skip over preceding whitespace
-                    start++;
-                }
-            } else {
-                end = pos + 1;
-            }
-        }
-        int size = end - start;
-        if (size > 0) {
-            result.add(new String(chars, start, size));
-        }
-        return result;
+     * @return the array of split values
+     * @see String#split(String)
+     */
+    public static String[] splitStringByCommaToArray(final String s) {
+        if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
+        else return s.split(",");
     }
 
     /**
@@ -499,56 +465,43 @@ public static String[] split(String toSplit, String delimiter) {
      * tokens. A delimiter is always a single character; for multi-character
      * delimiters, consider using <code>delimitedListToStringArray</code>
      *
-     * @param str        the String to tokenize
+     * @param s        the String to tokenize
      * @param delimiters the delimiter characters, assembled as String
      *                   (each of those characters is individually considered as delimiter).
      * @return an array of the tokens
      * @see java.util.StringTokenizer
      * @see java.lang.String#trim()
      * @see #delimitedListToStringArray
      */
-    public static String[] tokenizeToStringArray(String str, String delimiters) {
-        return tokenizeToStringArray(str, delimiters, true, true);
+    public static String[] tokenizeToStringArray(final String s, final String delimiters) {
+        return toStringArray(tokenizeToCollection(s, delimiters, ArrayList::new));
     }
 
     /**
-     * Tokenize the given String into a String array via a StringTokenizer.
-     * <p>The given delimiters string is supposed to consist of any number of
-     * delimiter characters. Each of those characters can be used to separate
-     * tokens. A delimiter is always a single character; for multi-character
-     * delimiters, consider using <code>delimitedListToStringArray</code>
+     * Tokenizes the specified string to a collection using the specified delimiters as the token delimiters. This method trims whitespace
+     * from tokens and ignores empty tokens.
      *
-     * @param str               the String to tokenize
-     * @param delimiters        the delimiter characters, assembled as String
-     *                          (each of those characters is individually considered as delimiter)
-     * @param trimTokens        trim the tokens via String's <code>trim</code>
-     * @param ignoreEmptyTokens omit empty tokens from the result array
-     *                          (only applies to tokens that are empty after trimming; StringTokenizer
-     *                          will not consider subsequent delimiters as token in the first place).
-     * @return an array of the tokens (<code>null</code> if the input String
-     *         was <code>null</code>)
+     * @param s          the string to tokenize.
+     * @param delimiters the token delimiters
+     * @param supplier   a collection supplier
+     * @param <T>        the type of the collection
+     * @return the tokens
      * @see java.util.StringTokenizer
-     * @see java.lang.String#trim()
-     * @see #delimitedListToStringArray
      */
-    public static String[] tokenizeToStringArray(
-            String str, String delimiters, boolean trimTokens, boolean ignoreEmptyTokens) {
-
-        if (str == null) {
+    private static <T extends Collection<String>> T tokenizeToCollection(
+            final String s, final String delimiters, final Supplier<T> supplier) {
+        if (s == null) {
             return null;
         }
-        StringTokenizer st = new StringTokenizer(str, delimiters);
-        List<String> tokens = new ArrayList<>();
-        while (st.hasMoreTokens()) {
-            String token = st.nextToken();
-            if (trimTokens) {
-                token = token.trim();
-            }
-            if (!ignoreEmptyTokens || token.length() > 0) {
+        final StringTokenizer tokenizer = new StringTokenizer(s, delimiters);
+        final T tokens = supplier.get();
+        while (tokenizer.hasMoreTokens()) {
+            final String token = tokenizer.nextToken().trim();
+            if (token.length() > 0) {
                 tokens.add(token);
             }
         }
-        return toStringArray(tokens);
+        return tokens;
     }
 
     /**
diff --git a/core/src/main/java/org/elasticsearch/rest/AbstractRestChannel.java b/core/src/main/java/org/elasticsearch/rest/AbstractRestChannel.java
@@ -94,7 +94,7 @@ public XContentBuilder newBuilder(@Nullable XContentType requestContentType, boo
         Set<String> includes = Collections.emptySet();
         Set<String> excludes = Collections.emptySet();
         if (useFiltering) {
-            Set<String> filters = Strings.splitStringByCommaToSet(filterPath);
+            Set<String> filters = Strings.tokenizeByCommaToSet(filterPath);
             includes = filters.stream().filter(INCLUDE_FILTER).collect(toSet());
             excludes = filters.stream().filter(EXCLUDE_FILTER).map(f -> f.substring(1)).collect(toSet());
         }
diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesInfoAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesInfoAction.java
@@ -76,7 +76,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
         // still, /_nodes/_local (or any other node id) should work and be treated as usual
         // this means one must differentiate between allowed metrics and arbitrary node ids in the same place
         if (request.hasParam("nodeId") && !request.hasParam("metrics")) {
-            Set<String> metricsOrNodeIds = Strings.splitStringByCommaToSet(request.param("nodeId", "_all"));
+            Set<String> metricsOrNodeIds = Strings.tokenizeByCommaToSet(request.param("nodeId", "_all"));
             boolean isMetricsOnly = ALLOWED_METRICS.containsAll(metricsOrNodeIds);
             if (isMetricsOnly) {
                 nodeIds = new String[]{"_all"};
@@ -87,7 +87,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
             }
         } else {
             nodeIds = Strings.splitStringByCommaToArray(request.param("nodeId", "_all"));
-            metrics = Strings.splitStringByCommaToSet(request.param("metrics", "_all"));
+            metrics = Strings.tokenizeByCommaToSet(request.param("metrics", "_all"));
         }
 
         final NodesInfoRequest nodesInfoRequest = new NodesInfoRequest(nodeIds);
diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java
@@ -92,7 +92,7 @@ public String getName() {
     @Override
     public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
         String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
-        Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
+        Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
 
         NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(nodesIds);
         nodesStatsRequest.timeout(request.param("timeout"));
@@ -134,7 +134,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
 
             // check for index specific metrics
             if (metrics.contains("indices")) {
-                Set<String> indexMetrics = Strings.splitStringByCommaToSet(request.param("index_metric", "_all"));
+                Set<String> indexMetrics = Strings.tokenizeByCommaToSet(request.param("index_metric", "_all"));
                 if (indexMetrics.size() == 1 && indexMetrics.contains("_all")) {
                     nodesStatsRequest.indices(CommonStatsFlags.ALL);
                 } else {
diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesUsageAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesUsageAction.java
@@ -56,7 +56,7 @@ public RestNodesUsageAction(Settings settings, RestController controller) {
     @Override
     protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException {
         String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
-        Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
+        Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
 
         NodesUsageRequest nodesUsageRequest = new NodesUsageRequest(nodesIds);
         nodesUsageRequest.timeout(request.param("timeout"));
diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestIndicesStatsAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestIndicesStatsAction.java
@@ -91,7 +91,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
         indicesStatsRequest.indices(Strings.splitStringByCommaToArray(request.param("index")));
         indicesStatsRequest.types(Strings.splitStringByCommaToArray(request.param("types")));
 
-        Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
+        Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
         // short cut, if no metrics have been specified in URI
         if (metrics.size() == 1 && metrics.contains("_all")) {
             indicesStatsRequest.all();
diff --git a/core/src/test/java/org/elasticsearch/common/StringsTests.java b/core/src/test/java/org/elasticsearch/common/StringsTests.java
@@ -90,30 +90,15 @@ public void testToStringToXContent() {
     }
 
     public void testSplitStringToSet() {
-        assertEquals(Strings.splitStringByCommaToSet(null), Sets.newHashSet());
-        assertEquals(Strings.splitStringByCommaToSet(""), Sets.newHashSet());
-        assertEquals(Strings.splitStringByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringByCommaToSet(" a ,  b, c  "), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
-        assertEquals(Strings.splitStringByCommaToSet(" a "), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringByCommaToSet("   a   "), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringByCommaToSet("   aa   "), Sets.newHashSet("aa"));
-        assertEquals(Strings.splitStringByCommaToSet("   "), Sets.newHashSet());
-
-        assertEquals(Strings.splitStringToSet(null, ' '), Sets.newHashSet());
-        assertEquals(Strings.splitStringToSet("", ' '), Sets.newHashSet());
-        assertEquals(Strings.splitStringToSet("a b c", ' '), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringToSet("a, b, c", ' '), Sets.newHashSet("a,","b,","c"));
-        assertEquals(Strings.splitStringToSet(" a   b c  ", ' '), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringToSet("  a   b   c  ", ' '), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringToSet("aa bb cc", ' '), Sets.newHashSet("aa","bb","cc"));
-        assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringToSet("    a    ", ' '), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringToSet(" a   ", ' '), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringToSet("a   ", ' '), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringToSet("   aa   ", ' '), Sets.newHashSet("aa"));
-        assertEquals(Strings.splitStringToSet("aa   ", ' '), Sets.newHashSet("aa"));
-        assertEquals(Strings.splitStringToSet("   ", ' '), Sets.newHashSet());
+        assertEquals(Strings.tokenizeByCommaToSet(null), Sets.newHashSet());
+        assertEquals(Strings.tokenizeByCommaToSet(""), Sets.newHashSet());
+        assertEquals(Strings.tokenizeByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
+        assertEquals(Strings.tokenizeByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
+        assertEquals(Strings.tokenizeByCommaToSet(" a ,  b, c  "), Sets.newHashSet("a","b","c"));
+        assertEquals(Strings.tokenizeByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
+        assertEquals(Strings.tokenizeByCommaToSet(" a "), Sets.newHashSet("a"));
+        assertEquals(Strings.tokenizeByCommaToSet("   a   "), Sets.newHashSet("a"));
+        assertEquals(Strings.tokenizeByCommaToSet("   aa   "), Sets.newHashSet("aa"));
+        assertEquals(Strings.tokenizeByCommaToSet("   "), Sets.newHashSet());
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ public XContentBuilder newBuilder(@Nullable XContentType requestContentType, boo`
`94`	`94`	`Set<String> includes = Collections.emptySet();`
`95`	`95`	`Set<String> excludes = Collections.emptySet();`
`96`	`96`	`if (useFiltering) {`
`97`		`- Set<String> filters = Strings.splitStringByCommaToSet(filterPath);`
	`97`	`+ Set<String> filters = Strings.tokenizeByCommaToSet(filterPath);`
`98`	`98`	`includes = filters.stream().filter(INCLUDE_FILTER).collect(toSet());`
`99`	`99`	`excludes = filters.stream().filter(EXCLUDE_FILTER).map(f -> f.substring(1)).collect(toSet());`
`100`	`100`	`}`