Skip to content

Cleanup split strings by comma method #27715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 8, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 36 additions & 83 deletions core/src/main/java/org/elasticsearch/common/Strings.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.function.Supplier;

import static java.util.Collections.unmodifiableSet;
import static org.elasticsearch.common.util.set.Sets.newHashSet;
Expand Down Expand Up @@ -410,62 +411,27 @@ public static String[] toStringArray(Collection<String> collection) {
return collection.toArray(new String[collection.size()]);
}

public static Set<String> splitStringByCommaToSet(final String s) {
return splitStringToSet(s, ',');
}

public static String[] splitStringByCommaToArray(final String s) {
if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
else return s.split(",");
/**
* Tokenize the specified string by commas to a set, trimming whitespace and ignoring empty tokens.
*
* @param s the string to tokenize
* @return the set of tokens
*/
public static Set<String> tokenizeByCommaToSet(final String s) {
if (s == null) return Collections.emptySet();
return tokenizeToCollection(s, ",", HashSet::new);
}

/**
* A convenience method for splitting a delimited string into
* a set and trimming leading and trailing whitespace from all
* split strings.
* Split the specified string by commas to an array.
*
* @param s the string to split
* @param c the delimiter to split on
* @return the set of split strings
*/
public static Set<String> splitStringToSet(final String s, final char c) {
if (s == null || s.isEmpty()) {
return Collections.emptySet();
}
final char[] chars = s.toCharArray();
int count = 1;
for (final char x : chars) {
if (x == c) {
count++;
}
}
final Set<String> result = new HashSet<>(count);
final int len = chars.length;
int start = 0; // starting index in chars of the current substring.
int pos = 0; // current index in chars.
int end = 0; // the position of the end of the current token
for (; pos < len; pos++) {
if (chars[pos] == c) {
int size = end - start;
if (size > 0) { // only add non empty strings
result.add(new String(chars, start, size));
}
start = pos + 1;
end = start;
} else if (Character.isWhitespace(chars[pos])) {
if (start == pos) {
// skip over preceding whitespace
start++;
}
} else {
end = pos + 1;
}
}
int size = end - start;
if (size > 0) {
result.add(new String(chars, start, size));
}
return result;
* @return the array of split values
* @see String#split(String)
*/
public static String[] splitStringByCommaToArray(final String s) {
if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
else return s.split(",");
}

/**
Expand Down Expand Up @@ -499,56 +465,43 @@ public static String[] split(String toSplit, String delimiter) {
* tokens. A delimiter is always a single character; for multi-character
* delimiters, consider using <code>delimitedListToStringArray</code>
*
* @param str the String to tokenize
* @param s the String to tokenize
* @param delimiters the delimiter characters, assembled as String
* (each of those characters is individually considered as delimiter).
* @return an array of the tokens
* @see java.util.StringTokenizer
* @see java.lang.String#trim()
* @see #delimitedListToStringArray
*/
public static String[] tokenizeToStringArray(String str, String delimiters) {
return tokenizeToStringArray(str, delimiters, true, true);
public static String[] tokenizeToStringArray(final String s, final String delimiters) {
return toStringArray(tokenizeToCollection(s, delimiters, ArrayList::new));
}

/**
* Tokenize the given String into a String array via a StringTokenizer.
* <p>The given delimiters string is supposed to consist of any number of
* delimiter characters. Each of those characters can be used to separate
* tokens. A delimiter is always a single character; for multi-character
* delimiters, consider using <code>delimitedListToStringArray</code>
* Tokenizes the specified string to a collection using the specified delimiters as the token delimiters. This method trims whitespace
* from tokens and ignores empty tokens.
*
* @param str the String to tokenize
* @param delimiters the delimiter characters, assembled as String
* (each of those characters is individually considered as delimiter)
* @param trimTokens trim the tokens via String's <code>trim</code>
* @param ignoreEmptyTokens omit empty tokens from the result array
* (only applies to tokens that are empty after trimming; StringTokenizer
* will not consider subsequent delimiters as token in the first place).
* @return an array of the tokens (<code>null</code> if the input String
* was <code>null</code>)
* @param s the string to tokenize.
* @param delimiters the token delimiters
* @param supplier a collection supplier
* @param <T> the type of the collection
* @return the tokens
* @see java.util.StringTokenizer
* @see java.lang.String#trim()
* @see #delimitedListToStringArray
*/
public static String[] tokenizeToStringArray(
String str, String delimiters, boolean trimTokens, boolean ignoreEmptyTokens) {

if (str == null) {
private static <T extends Collection<String>> T tokenizeToCollection(
final String s, final String delimiters, final Supplier<T> supplier) {
if (s == null) {
return null;
}
StringTokenizer st = new StringTokenizer(str, delimiters);
List<String> tokens = new ArrayList<>();
while (st.hasMoreTokens()) {
String token = st.nextToken();
if (trimTokens) {
token = token.trim();
}
if (!ignoreEmptyTokens || token.length() > 0) {
final StringTokenizer tokenizer = new StringTokenizer(s, delimiters);
final T tokens = supplier.get();
while (tokenizer.hasMoreTokens()) {
final String token = tokenizer.nextToken().trim();
if (token.length() > 0) {
tokens.add(token);
}
}
return toStringArray(tokens);
return tokens;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public XContentBuilder newBuilder(@Nullable XContentType requestContentType, boo
Set<String> includes = Collections.emptySet();
Set<String> excludes = Collections.emptySet();
if (useFiltering) {
Set<String> filters = Strings.splitStringByCommaToSet(filterPath);
Set<String> filters = Strings.tokenizeByCommaToSet(filterPath);
includes = filters.stream().filter(INCLUDE_FILTER).collect(toSet());
excludes = filters.stream().filter(EXCLUDE_FILTER).map(f -> f.substring(1)).collect(toSet());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
// still, /_nodes/_local (or any other node id) should work and be treated as usual
// this means one must differentiate between allowed metrics and arbitrary node ids in the same place
if (request.hasParam("nodeId") && !request.hasParam("metrics")) {
Set<String> metricsOrNodeIds = Strings.splitStringByCommaToSet(request.param("nodeId", "_all"));
Set<String> metricsOrNodeIds = Strings.tokenizeByCommaToSet(request.param("nodeId", "_all"));
boolean isMetricsOnly = ALLOWED_METRICS.containsAll(metricsOrNodeIds);
if (isMetricsOnly) {
nodeIds = new String[]{"_all"};
Expand All @@ -87,7 +87,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
}
} else {
nodeIds = Strings.splitStringByCommaToArray(request.param("nodeId", "_all"));
metrics = Strings.splitStringByCommaToSet(request.param("metrics", "_all"));
metrics = Strings.tokenizeByCommaToSet(request.param("metrics", "_all"));
}

final NodesInfoRequest nodesInfoRequest = new NodesInfoRequest(nodeIds);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public String getName() {
@Override
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));

NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(nodesIds);
nodesStatsRequest.timeout(request.param("timeout"));
Expand Down Expand Up @@ -134,7 +134,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC

// check for index specific metrics
if (metrics.contains("indices")) {
Set<String> indexMetrics = Strings.splitStringByCommaToSet(request.param("index_metric", "_all"));
Set<String> indexMetrics = Strings.tokenizeByCommaToSet(request.param("index_metric", "_all"));
if (indexMetrics.size() == 1 && indexMetrics.contains("_all")) {
nodesStatsRequest.indices(CommonStatsFlags.ALL);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public RestNodesUsageAction(Settings settings, RestController controller) {
@Override
protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException {
String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));

NodesUsageRequest nodesUsageRequest = new NodesUsageRequest(nodesIds);
nodesUsageRequest.timeout(request.param("timeout"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
indicesStatsRequest.indices(Strings.splitStringByCommaToArray(request.param("index")));
indicesStatsRequest.types(Strings.splitStringByCommaToArray(request.param("types")));

Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
// short cut, if no metrics have been specified in URI
if (metrics.size() == 1 && metrics.contains("_all")) {
indicesStatsRequest.all();
Expand Down
35 changes: 10 additions & 25 deletions core/src/test/java/org/elasticsearch/common/StringsTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,30 +90,15 @@ public void testToStringToXContent() {
}

public void testSplitStringToSet() {
assertEquals(Strings.splitStringByCommaToSet(null), Sets.newHashSet());
assertEquals(Strings.splitStringByCommaToSet(""), Sets.newHashSet());
assertEquals(Strings.splitStringByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringByCommaToSet(" a , b, c "), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
assertEquals(Strings.splitStringByCommaToSet(" a "), Sets.newHashSet("a"));
assertEquals(Strings.splitStringByCommaToSet(" a "), Sets.newHashSet("a"));
assertEquals(Strings.splitStringByCommaToSet(" aa "), Sets.newHashSet("aa"));
assertEquals(Strings.splitStringByCommaToSet(" "), Sets.newHashSet());

assertEquals(Strings.splitStringToSet(null, ' '), Sets.newHashSet());
assertEquals(Strings.splitStringToSet("", ' '), Sets.newHashSet());
assertEquals(Strings.splitStringToSet("a b c", ' '), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringToSet("a, b, c", ' '), Sets.newHashSet("a,","b,","c"));
assertEquals(Strings.splitStringToSet(" a b c ", ' '), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringToSet(" a b c ", ' '), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringToSet("aa bb cc", ' '), Sets.newHashSet("aa","bb","cc"));
assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
assertEquals(Strings.splitStringToSet("a ", ' '), Sets.newHashSet("a"));
assertEquals(Strings.splitStringToSet(" aa ", ' '), Sets.newHashSet("aa"));
assertEquals(Strings.splitStringToSet("aa ", ' '), Sets.newHashSet("aa"));
assertEquals(Strings.splitStringToSet(" ", ' '), Sets.newHashSet());
assertEquals(Strings.tokenizeByCommaToSet(null), Sets.newHashSet());
assertEquals(Strings.tokenizeByCommaToSet(""), Sets.newHashSet());
assertEquals(Strings.tokenizeByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
assertEquals(Strings.tokenizeByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
assertEquals(Strings.tokenizeByCommaToSet(" a , b, c "), Sets.newHashSet("a","b","c"));
assertEquals(Strings.tokenizeByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
assertEquals(Strings.tokenizeByCommaToSet(" a "), Sets.newHashSet("a"));
assertEquals(Strings.tokenizeByCommaToSet(" a "), Sets.newHashSet("a"));
assertEquals(Strings.tokenizeByCommaToSet(" aa "), Sets.newHashSet("aa"));
assertEquals(Strings.tokenizeByCommaToSet(" "), Sets.newHashSet());
}
}