|
41 | 41 | import java.util.Set;
|
42 | 42 | import java.util.StringTokenizer;
|
43 | 43 | import java.util.TreeSet;
|
| 44 | +import java.util.function.Supplier; |
44 | 45 |
|
45 | 46 | import static java.util.Collections.unmodifiableSet;
|
46 | 47 | import static org.elasticsearch.common.util.set.Sets.newHashSet;
|
@@ -410,62 +411,27 @@ public static String[] toStringArray(Collection<String> collection) {
|
410 | 411 | return collection.toArray(new String[collection.size()]);
|
411 | 412 | }
|
412 | 413 |
|
413 |
| - public static Set<String> splitStringByCommaToSet(final String s) { |
414 |
| - return splitStringToSet(s, ','); |
415 |
| - } |
416 |
| - |
417 |
| - public static String[] splitStringByCommaToArray(final String s) { |
418 |
| - if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY; |
419 |
| - else return s.split(","); |
| 414 | + /** |
| 415 | + * Tokenize the specified string by commas to a set, trimming whitespace and ignoring empty tokens. |
| 416 | + * |
| 417 | + * @param s the string to tokenize |
| 418 | + * @return the set of tokens |
| 419 | + */ |
| 420 | + public static Set<String> tokenizeByCommaToSet(final String s) { |
| 421 | + if (s == null) return Collections.emptySet(); |
| 422 | + return tokenizeToCollection(s, ",", HashSet::new); |
420 | 423 | }
|
421 | 424 |
|
422 | 425 | /**
|
423 |
| - * A convenience method for splitting a delimited string into |
424 |
| - * a set and trimming leading and trailing whitespace from all |
425 |
| - * split strings. |
| 426 | + * Split the specified string by commas to an array. |
426 | 427 | *
|
427 | 428 | * @param s the string to split
|
428 |
| - * @param c the delimiter to split on |
429 |
| - * @return the set of split strings |
430 |
| - */ |
431 |
| - public static Set<String> splitStringToSet(final String s, final char c) { |
432 |
| - if (s == null || s.isEmpty()) { |
433 |
| - return Collections.emptySet(); |
434 |
| - } |
435 |
| - final char[] chars = s.toCharArray(); |
436 |
| - int count = 1; |
437 |
| - for (final char x : chars) { |
438 |
| - if (x == c) { |
439 |
| - count++; |
440 |
| - } |
441 |
| - } |
442 |
| - final Set<String> result = new HashSet<>(count); |
443 |
| - final int len = chars.length; |
444 |
| - int start = 0; // starting index in chars of the current substring. |
445 |
| - int pos = 0; // current index in chars. |
446 |
| - int end = 0; // the position of the end of the current token |
447 |
| - for (; pos < len; pos++) { |
448 |
| - if (chars[pos] == c) { |
449 |
| - int size = end - start; |
450 |
| - if (size > 0) { // only add non empty strings |
451 |
| - result.add(new String(chars, start, size)); |
452 |
| - } |
453 |
| - start = pos + 1; |
454 |
| - end = start; |
455 |
| - } else if (Character.isWhitespace(chars[pos])) { |
456 |
| - if (start == pos) { |
457 |
| - // skip over preceding whitespace |
458 |
| - start++; |
459 |
| - } |
460 |
| - } else { |
461 |
| - end = pos + 1; |
462 |
| - } |
463 |
| - } |
464 |
| - int size = end - start; |
465 |
| - if (size > 0) { |
466 |
| - result.add(new String(chars, start, size)); |
467 |
| - } |
468 |
| - return result; |
| 429 | + * @return the array of split values |
| 430 | + * @see String#split(String) |
| 431 | + */ |
| 432 | + public static String[] splitStringByCommaToArray(final String s) { |
| 433 | + if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY; |
| 434 | + else return s.split(","); |
469 | 435 | }
|
470 | 436 |
|
471 | 437 | /**
|
@@ -499,56 +465,43 @@ public static String[] split(String toSplit, String delimiter) {
|
499 | 465 | * tokens. A delimiter is always a single character; for multi-character
|
500 | 466 | * delimiters, consider using <code>delimitedListToStringArray</code>
|
501 | 467 | *
|
502 |
| - * @param str the String to tokenize |
| 468 | + * @param s the String to tokenize |
503 | 469 | * @param delimiters the delimiter characters, assembled as String
|
504 | 470 | * (each of those characters is individually considered as delimiter).
|
505 | 471 | * @return an array of the tokens
|
506 | 472 | * @see java.util.StringTokenizer
|
507 | 473 | * @see java.lang.String#trim()
|
508 | 474 | * @see #delimitedListToStringArray
|
509 | 475 | */
|
510 |
| - public static String[] tokenizeToStringArray(String str, String delimiters) { |
511 |
| - return tokenizeToStringArray(str, delimiters, true, true); |
| 476 | + public static String[] tokenizeToStringArray(final String s, final String delimiters) { |
| 477 | + return toStringArray(tokenizeToCollection(s, delimiters, ArrayList::new)); |
512 | 478 | }
|
513 | 479 |
|
514 | 480 | /**
|
515 |
| - * Tokenize the given String into a String array via a StringTokenizer. |
516 |
| - * <p>The given delimiters string is supposed to consist of any number of |
517 |
| - * delimiter characters. Each of those characters can be used to separate |
518 |
| - * tokens. A delimiter is always a single character; for multi-character |
519 |
| - * delimiters, consider using <code>delimitedListToStringArray</code> |
| 481 | + * Tokenizes the specified string to a collection using the specified delimiters as the token delimiters. This method trims whitespace |
| 482 | + * from tokens and ignores empty tokens. |
520 | 483 | *
|
521 |
| - * @param str the String to tokenize |
522 |
| - * @param delimiters the delimiter characters, assembled as String |
523 |
| - * (each of those characters is individually considered as delimiter) |
524 |
| - * @param trimTokens trim the tokens via String's <code>trim</code> |
525 |
| - * @param ignoreEmptyTokens omit empty tokens from the result array |
526 |
| - * (only applies to tokens that are empty after trimming; StringTokenizer |
527 |
| - * will not consider subsequent delimiters as token in the first place). |
528 |
| - * @return an array of the tokens (<code>null</code> if the input String |
529 |
| - * was <code>null</code>) |
| 484 | + * @param s the string to tokenize. |
| 485 | + * @param delimiters the token delimiters |
| 486 | + * @param supplier a collection supplier |
| 487 | + * @param <T> the type of the collection |
| 488 | + * @return the tokens |
530 | 489 | * @see java.util.StringTokenizer
|
531 |
| - * @see java.lang.String#trim() |
532 |
| - * @see #delimitedListToStringArray |
533 | 490 | */
|
534 |
| - public static String[] tokenizeToStringArray( |
535 |
| - String str, String delimiters, boolean trimTokens, boolean ignoreEmptyTokens) { |
536 |
| - |
537 |
| - if (str == null) { |
| 491 | + private static <T extends Collection<String>> T tokenizeToCollection( |
| 492 | + final String s, final String delimiters, final Supplier<T> supplier) { |
| 493 | + if (s == null) { |
538 | 494 | return null;
|
539 | 495 | }
|
540 |
| - StringTokenizer st = new StringTokenizer(str, delimiters); |
541 |
| - List<String> tokens = new ArrayList<>(); |
542 |
| - while (st.hasMoreTokens()) { |
543 |
| - String token = st.nextToken(); |
544 |
| - if (trimTokens) { |
545 |
| - token = token.trim(); |
546 |
| - } |
547 |
| - if (!ignoreEmptyTokens || token.length() > 0) { |
| 496 | + final StringTokenizer tokenizer = new StringTokenizer(s, delimiters); |
| 497 | + final T tokens = supplier.get(); |
| 498 | + while (tokenizer.hasMoreTokens()) { |
| 499 | + final String token = tokenizer.nextToken().trim(); |
| 500 | + if (token.length() > 0) { |
548 | 501 | tokens.add(token);
|
549 | 502 | }
|
550 | 503 | }
|
551 |
| - return toStringArray(tokens); |
| 504 | + return tokens; |
552 | 505 | }
|
553 | 506 |
|
554 | 507 | /**
|
|
0 commit comments