Skip to content

Commit 9cecc31

Browse files
committed
Shortcut simple patterns ending in * (#43904)
When profiling a call to `AllocationService#reroute()` in a large cluster containing allocation filters of the form `node-name-*` I observed a nontrivial amount of time spent in `Regex#simpleMatch` due to these allocation filters. Patterns ending in a wildcard are not uncommon, and this change treats them as a special case in `Regex#simpleMatch` in order to shave a bit of time off this calculation. It also uses `String#regionMatches()` to avoid an allocation in the case that the pattern's only wildcard is at the start. Microbenchmark results before this change: Result "org.elasticsearch.common.regex.RegexStartsWithBenchmark.performSimpleMatch": 1113.839 ±(99.9%) 6.338 ns/op [Average] (min, avg, max) = (1102.388, 1113.839, 1135.783), stdev = 9.486 CI (99.9%): [1107.502, 1120.177] (assumes normal distribution) Microbenchmark results with this change applied: Result "org.elasticsearch.common.regex.RegexStartsWithBenchmark.performSimpleMatch": 433.190 ±(99.9%) 0.644 ns/op [Average] (min, avg, max) = (431.518, 433.190, 435.456), stdev = 0.964 CI (99.9%): [432.546, 433.833] (assumes normal distribution) The microbenchmark in question was: @fork(3) @WarmUp(iterations = 10) @measurement(iterations = 10) @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @SuppressWarnings("unused") //invoked by benchmarking framework public class RegexStartsWithBenchmark { private static final String testString = "abcdefghijklmnopqrstuvwxyz"; private static final String[] patterns; static { patterns = new String[testString.length() + 1]; for (int i = 0; i <= testString.length(); i++) { patterns[i] = testString.substring(0, i) + "*"; } } @benchmark public void performSimpleMatch() { for (int i = 0; i < patterns.length; i++) { Regex.simpleMatch(patterns[i], testString); } } }
1 parent 3317169 commit 9cecc31

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

server/src/main/java/org/elasticsearch/common/regex/Regex.java

+8-7
Original file line numberDiff line numberDiff line change
@@ -88,22 +88,23 @@ public static boolean simpleMatch(String pattern, String str) {
8888
if (pattern == null || str == null) {
8989
return false;
9090
}
91-
int firstIndex = pattern.indexOf('*');
91+
final int firstIndex = pattern.indexOf('*');
9292
if (firstIndex == -1) {
9393
return pattern.equals(str);
9494
}
9595
if (firstIndex == 0) {
9696
if (pattern.length() == 1) {
9797
return true;
9898
}
99-
int nextIndex = pattern.indexOf('*', firstIndex + 1);
99+
final int nextIndex = pattern.indexOf('*', firstIndex + 1);
100100
if (nextIndex == -1) {
101-
return str.endsWith(pattern.substring(1));
101+
// str.endsWith(pattern.substring(1)), but avoiding the construction of pattern.substring(1):
102+
return str.regionMatches(str.length() - pattern.length() + 1, pattern, 1, pattern.length() - 1);
102103
} else if (nextIndex == 1) {
103104
// Double wildcard "**" - skipping the first "*"
104105
return simpleMatch(pattern.substring(1), str);
105106
}
106-
String part = pattern.substring(1, nextIndex);
107+
final String part = pattern.substring(1, nextIndex);
107108
int partIndex = str.indexOf(part);
108109
while (partIndex != -1) {
109110
if (simpleMatch(pattern.substring(nextIndex), str.substring(partIndex + part.length()))) {
@@ -113,9 +114,9 @@ public static boolean simpleMatch(String pattern, String str) {
113114
}
114115
return false;
115116
}
116-
return (str.length() >= firstIndex &&
117-
pattern.substring(0, firstIndex).equals(str.substring(0, firstIndex)) &&
118-
simpleMatch(pattern.substring(firstIndex), str.substring(firstIndex)));
117+
return str.regionMatches(0, pattern, 0, firstIndex)
118+
&& (firstIndex == pattern.length() - 1 // only wildcard in pattern is at the end, so no need to look at the rest of the string
119+
|| simpleMatch(pattern.substring(firstIndex), str.substring(firstIndex)));
119120
}
120121

121122
/**

server/src/test/java/org/elasticsearch/common/regex/RegexTests.java

+20
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,24 @@ public void testDoubleWildcardMatch() {
6363
assertTrue(Regex.simpleMatch("fff*******ddd", "fffabcddd"));
6464
assertFalse(Regex.simpleMatch("fff******ddd", "fffabcdd"));
6565
}
66+
67+
public void testSimpleMatch() {
68+
for (int i = 0; i < 1000; i++) {
69+
final String matchingString = randomAlphaOfLength(between(0, 50));
70+
71+
// construct a pattern that matches this string by repeatedly replacing random substrings with '*' characters
72+
String pattern = matchingString;
73+
for (int shrink = between(0, 5); shrink > 0; shrink--) {
74+
final int shrinkStart = between(0, pattern.length());
75+
final int shrinkEnd = between(shrinkStart, pattern.length());
76+
pattern = pattern.substring(0, shrinkStart) + "*" + pattern.substring(shrinkEnd);
77+
}
78+
assertTrue("[" + pattern + "] should match [" + matchingString + "]", Regex.simpleMatch(pattern, matchingString));
79+
80+
// construct a pattern that does not match this string by inserting a non-matching character (a digit)
81+
final int insertPos = between(0, pattern.length());
82+
pattern = pattern.substring(0, insertPos) + between(0, 9) + pattern.substring(insertPos);
83+
assertFalse("[" + pattern + "] should not match [" + matchingString + "]", Regex.simpleMatch(pattern, matchingString));
84+
}
85+
}
6686
}

0 commit comments

Comments
 (0)