Skip to content

Commit b01832c

Browse files
committed
Cleanup split strings by comma method
We have some methods Strings#splitStringByCommaToArray and Strings#splitStringByCommaToSet. It is not obvious that the former leaves whitespace and the latter trims it. We also have Strings#tokenizeToStringArray which tokenizes a string to an array, and trims whitespace. It seems the right thing to do here is to rename Strings#splitStringByCommaToSet to Strings#tokenizeByCommaToSet so that its name is aligned with another method that tokenizes by a delimiter and trims whitespace. We also cleanup the code here, removing an unneeded splitting by delimiter to set method. Relates #27715
1 parent 4a620de commit b01832c

File tree

7 files changed

+53
-115
lines changed

7 files changed

+53
-115
lines changed

core/src/main/java/org/elasticsearch/common/Strings.java

Lines changed: 36 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import java.util.Set;
4242
import java.util.StringTokenizer;
4343
import java.util.TreeSet;
44+
import java.util.function.Supplier;
4445

4546
import static java.util.Collections.unmodifiableSet;
4647
import static org.elasticsearch.common.util.set.Sets.newHashSet;
@@ -410,62 +411,27 @@ public static String[] toStringArray(Collection<String> collection) {
410411
return collection.toArray(new String[collection.size()]);
411412
}
412413

413-
public static Set<String> splitStringByCommaToSet(final String s) {
414-
return splitStringToSet(s, ',');
415-
}
416-
417-
public static String[] splitStringByCommaToArray(final String s) {
418-
if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
419-
else return s.split(",");
414+
/**
415+
* Tokenize the specified string by commas to a set, trimming whitespace and ignoring empty tokens.
416+
*
417+
* @param s the string to tokenize
418+
* @return the set of tokens
419+
*/
420+
public static Set<String> tokenizeByCommaToSet(final String s) {
421+
if (s == null) return Collections.emptySet();
422+
return tokenizeToCollection(s, ",", HashSet::new);
420423
}
421424

422425
/**
423-
* A convenience method for splitting a delimited string into
424-
* a set and trimming leading and trailing whitespace from all
425-
* split strings.
426+
* Split the specified string by commas to an array.
426427
*
427428
* @param s the string to split
428-
* @param c the delimiter to split on
429-
* @return the set of split strings
430-
*/
431-
public static Set<String> splitStringToSet(final String s, final char c) {
432-
if (s == null || s.isEmpty()) {
433-
return Collections.emptySet();
434-
}
435-
final char[] chars = s.toCharArray();
436-
int count = 1;
437-
for (final char x : chars) {
438-
if (x == c) {
439-
count++;
440-
}
441-
}
442-
final Set<String> result = new HashSet<>(count);
443-
final int len = chars.length;
444-
int start = 0; // starting index in chars of the current substring.
445-
int pos = 0; // current index in chars.
446-
int end = 0; // the position of the end of the current token
447-
for (; pos < len; pos++) {
448-
if (chars[pos] == c) {
449-
int size = end - start;
450-
if (size > 0) { // only add non empty strings
451-
result.add(new String(chars, start, size));
452-
}
453-
start = pos + 1;
454-
end = start;
455-
} else if (Character.isWhitespace(chars[pos])) {
456-
if (start == pos) {
457-
// skip over preceding whitespace
458-
start++;
459-
}
460-
} else {
461-
end = pos + 1;
462-
}
463-
}
464-
int size = end - start;
465-
if (size > 0) {
466-
result.add(new String(chars, start, size));
467-
}
468-
return result;
429+
* @return the array of split values
430+
* @see String#split(String)
431+
*/
432+
public static String[] splitStringByCommaToArray(final String s) {
433+
if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
434+
else return s.split(",");
469435
}
470436

471437
/**
@@ -499,56 +465,43 @@ public static String[] split(String toSplit, String delimiter) {
499465
* tokens. A delimiter is always a single character; for multi-character
500466
* delimiters, consider using <code>delimitedListToStringArray</code>
501467
*
502-
* @param str the String to tokenize
468+
* @param s the String to tokenize
503469
* @param delimiters the delimiter characters, assembled as String
504470
* (each of those characters is individually considered as delimiter).
505471
* @return an array of the tokens
506472
* @see java.util.StringTokenizer
507473
* @see java.lang.String#trim()
508474
* @see #delimitedListToStringArray
509475
*/
510-
public static String[] tokenizeToStringArray(String str, String delimiters) {
511-
return tokenizeToStringArray(str, delimiters, true, true);
476+
public static String[] tokenizeToStringArray(final String s, final String delimiters) {
477+
return toStringArray(tokenizeToCollection(s, delimiters, ArrayList::new));
512478
}
513479

514480
/**
515-
* Tokenize the given String into a String array via a StringTokenizer.
516-
* <p>The given delimiters string is supposed to consist of any number of
517-
* delimiter characters. Each of those characters can be used to separate
518-
* tokens. A delimiter is always a single character; for multi-character
519-
* delimiters, consider using <code>delimitedListToStringArray</code>
481+
* Tokenizes the specified string to a collection using the specified delimiters as the token delimiters. This method trims whitespace
482+
* from tokens and ignores empty tokens.
520483
*
521-
* @param str the String to tokenize
522-
* @param delimiters the delimiter characters, assembled as String
523-
* (each of those characters is individually considered as delimiter)
524-
* @param trimTokens trim the tokens via String's <code>trim</code>
525-
* @param ignoreEmptyTokens omit empty tokens from the result array
526-
* (only applies to tokens that are empty after trimming; StringTokenizer
527-
* will not consider subsequent delimiters as token in the first place).
528-
* @return an array of the tokens (<code>null</code> if the input String
529-
* was <code>null</code>)
484+
* @param s the string to tokenize.
485+
* @param delimiters the token delimiters
486+
* @param supplier a collection supplier
487+
* @param <T> the type of the collection
488+
* @return the tokens
530489
* @see java.util.StringTokenizer
531-
* @see java.lang.String#trim()
532-
* @see #delimitedListToStringArray
533490
*/
534-
public static String[] tokenizeToStringArray(
535-
String str, String delimiters, boolean trimTokens, boolean ignoreEmptyTokens) {
536-
537-
if (str == null) {
491+
private static <T extends Collection<String>> T tokenizeToCollection(
492+
final String s, final String delimiters, final Supplier<T> supplier) {
493+
if (s == null) {
538494
return null;
539495
}
540-
StringTokenizer st = new StringTokenizer(str, delimiters);
541-
List<String> tokens = new ArrayList<>();
542-
while (st.hasMoreTokens()) {
543-
String token = st.nextToken();
544-
if (trimTokens) {
545-
token = token.trim();
546-
}
547-
if (!ignoreEmptyTokens || token.length() > 0) {
496+
final StringTokenizer tokenizer = new StringTokenizer(s, delimiters);
497+
final T tokens = supplier.get();
498+
while (tokenizer.hasMoreTokens()) {
499+
final String token = tokenizer.nextToken().trim();
500+
if (token.length() > 0) {
548501
tokens.add(token);
549502
}
550503
}
551-
return toStringArray(tokens);
504+
return tokens;
552505
}
553506

554507
/**

core/src/main/java/org/elasticsearch/rest/AbstractRestChannel.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public XContentBuilder newBuilder(@Nullable XContentType requestContentType, boo
9494
Set<String> includes = Collections.emptySet();
9595
Set<String> excludes = Collections.emptySet();
9696
if (useFiltering) {
97-
Set<String> filters = Strings.splitStringByCommaToSet(filterPath);
97+
Set<String> filters = Strings.tokenizeByCommaToSet(filterPath);
9898
includes = filters.stream().filter(INCLUDE_FILTER).collect(toSet());
9999
excludes = filters.stream().filter(EXCLUDE_FILTER).map(f -> f.substring(1)).collect(toSet());
100100
}

core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesInfoAction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
7676
// still, /_nodes/_local (or any other node id) should work and be treated as usual
7777
// this means one must differentiate between allowed metrics and arbitrary node ids in the same place
7878
if (request.hasParam("nodeId") && !request.hasParam("metrics")) {
79-
Set<String> metricsOrNodeIds = Strings.splitStringByCommaToSet(request.param("nodeId", "_all"));
79+
Set<String> metricsOrNodeIds = Strings.tokenizeByCommaToSet(request.param("nodeId", "_all"));
8080
boolean isMetricsOnly = ALLOWED_METRICS.containsAll(metricsOrNodeIds);
8181
if (isMetricsOnly) {
8282
nodeIds = new String[]{"_all"};
@@ -87,7 +87,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
8787
}
8888
} else {
8989
nodeIds = Strings.splitStringByCommaToArray(request.param("nodeId", "_all"));
90-
metrics = Strings.splitStringByCommaToSet(request.param("metrics", "_all"));
90+
metrics = Strings.tokenizeByCommaToSet(request.param("metrics", "_all"));
9191
}
9292

9393
final NodesInfoRequest nodesInfoRequest = new NodesInfoRequest(nodeIds);

core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public String getName() {
9292
@Override
9393
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
9494
String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
95-
Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
95+
Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
9696

9797
NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(nodesIds);
9898
nodesStatsRequest.timeout(request.param("timeout"));
@@ -134,7 +134,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
134134

135135
// check for index specific metrics
136136
if (metrics.contains("indices")) {
137-
Set<String> indexMetrics = Strings.splitStringByCommaToSet(request.param("index_metric", "_all"));
137+
Set<String> indexMetrics = Strings.tokenizeByCommaToSet(request.param("index_metric", "_all"));
138138
if (indexMetrics.size() == 1 && indexMetrics.contains("_all")) {
139139
nodesStatsRequest.indices(CommonStatsFlags.ALL);
140140
} else {

core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesUsageAction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public RestNodesUsageAction(Settings settings, RestController controller) {
5656
@Override
5757
protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException {
5858
String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
59-
Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
59+
Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
6060

6161
NodesUsageRequest nodesUsageRequest = new NodesUsageRequest(nodesIds);
6262
nodesUsageRequest.timeout(request.param("timeout"));

core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestIndicesStatsAction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
9191
indicesStatsRequest.indices(Strings.splitStringByCommaToArray(request.param("index")));
9292
indicesStatsRequest.types(Strings.splitStringByCommaToArray(request.param("types")));
9393

94-
Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
94+
Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
9595
// short cut, if no metrics have been specified in URI
9696
if (metrics.size() == 1 && metrics.contains("_all")) {
9797
indicesStatsRequest.all();

core/src/test/java/org/elasticsearch/common/StringsTests.java

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -90,30 +90,15 @@ public void testToStringToXContent() {
9090
}
9191

9292
public void testSplitStringToSet() {
93-
assertEquals(Strings.splitStringByCommaToSet(null), Sets.newHashSet());
94-
assertEquals(Strings.splitStringByCommaToSet(""), Sets.newHashSet());
95-
assertEquals(Strings.splitStringByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
96-
assertEquals(Strings.splitStringByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
97-
assertEquals(Strings.splitStringByCommaToSet(" a , b, c "), Sets.newHashSet("a","b","c"));
98-
assertEquals(Strings.splitStringByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
99-
assertEquals(Strings.splitStringByCommaToSet(" a "), Sets.newHashSet("a"));
100-
assertEquals(Strings.splitStringByCommaToSet(" a "), Sets.newHashSet("a"));
101-
assertEquals(Strings.splitStringByCommaToSet(" aa "), Sets.newHashSet("aa"));
102-
assertEquals(Strings.splitStringByCommaToSet(" "), Sets.newHashSet());
103-
104-
assertEquals(Strings.splitStringToSet(null, ' '), Sets.newHashSet());
105-
assertEquals(Strings.splitStringToSet("", ' '), Sets.newHashSet());
106-
assertEquals(Strings.splitStringToSet("a b c", ' '), Sets.newHashSet("a","b","c"));
107-
assertEquals(Strings.splitStringToSet("a, b, c", ' '), Sets.newHashSet("a,","b,","c"));
108-
assertEquals(Strings.splitStringToSet(" a b c ", ' '), Sets.newHashSet("a","b","c"));
109-
assertEquals(Strings.splitStringToSet(" a b c ", ' '), Sets.newHashSet("a","b","c"));
110-
assertEquals(Strings.splitStringToSet("aa bb cc", ' '), Sets.newHashSet("aa","bb","cc"));
111-
assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
112-
assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
113-
assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
114-
assertEquals(Strings.splitStringToSet("a ", ' '), Sets.newHashSet("a"));
115-
assertEquals(Strings.splitStringToSet(" aa ", ' '), Sets.newHashSet("aa"));
116-
assertEquals(Strings.splitStringToSet("aa ", ' '), Sets.newHashSet("aa"));
117-
assertEquals(Strings.splitStringToSet(" ", ' '), Sets.newHashSet());
93+
assertEquals(Strings.tokenizeByCommaToSet(null), Sets.newHashSet());
94+
assertEquals(Strings.tokenizeByCommaToSet(""), Sets.newHashSet());
95+
assertEquals(Strings.tokenizeByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
96+
assertEquals(Strings.tokenizeByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
97+
assertEquals(Strings.tokenizeByCommaToSet(" a , b, c "), Sets.newHashSet("a","b","c"));
98+
assertEquals(Strings.tokenizeByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
99+
assertEquals(Strings.tokenizeByCommaToSet(" a "), Sets.newHashSet("a"));
100+
assertEquals(Strings.tokenizeByCommaToSet(" a "), Sets.newHashSet("a"));
101+
assertEquals(Strings.tokenizeByCommaToSet(" aa "), Sets.newHashSet("aa"));
102+
assertEquals(Strings.tokenizeByCommaToSet(" "), Sets.newHashSet());
118103
}
119104
}

0 commit comments

Comments
 (0)