Skip to content

Commit 661bf53

Browse files
committed
literal: fix reverse suffix optimization
This commit fixes a bug where the reverse suffix literal optimization wasn't quite right. It was too eagerly skipping past parts of the input without verifying that there was no match. We fix this by being a bit more careful with what we're searching by keeping track of the starting position of the last literal matched. Subsequent literal searches then start immediately after the last one. This is necessary in particular when the suffix literal can have overlapping matches. e.g., searching `000` in `0000` can match at either positions 0 or 1, but searching `abc` in `abcd` can only match as position 0. This was initially reported as a bug against ripgrep: BurntSushi/ripgrep#1203
1 parent 60d087a commit 661bf53

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

src/exec.rs

+9-4
Original file line numberDiff line numberDiff line change
@@ -745,12 +745,13 @@ impl<'c> ExecNoSync<'c> {
745745
debug_assert!(lcs.len() >= 1);
746746
let mut start = original_start;
747747
let mut end = start;
748+
let mut last_literal_match = 0;
748749
while end <= text.len() {
749-
start = end;
750-
end += match lcs.find(&text[end..]) {
750+
last_literal_match += match lcs.find(&text[last_literal_match..]) {
751751
None => return Some(NoMatch(text.len())),
752-
Some(start) => start + lcs.len(),
752+
Some(i) => i,
753753
};
754+
end = last_literal_match + lcs.len();
754755
match dfa::Fsm::reverse(
755756
&self.ro.dfa_reverse,
756757
self.cache,
@@ -760,7 +761,11 @@ impl<'c> ExecNoSync<'c> {
760761
) {
761762
Match(0) | NoMatch(0) => return None,
762763
Match(s) => return Some(Match((s + start, end))),
763-
NoMatch(_) => continue,
764+
NoMatch(i) => {
765+
start = i;
766+
last_literal_match += 1;
767+
continue;
768+
}
764769
Quit => return Some(Quit),
765770
};
766771
}

tests/regression.rs

+3
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1)));
8282
ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false);
8383
ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false);
8484

85+
// See: https://github.com/BurntSushi/ripgrep/issues/1203
86+
ismatch!(wat1, r"[0-4][0-4][0-4]000", "153.230000", true);
87+
8588
// See: https://github.com/rust-lang/regex/issues/334
8689
mat!(captures_after_dfa_premature_end, r"a(b*(X|$))?", "abcbX",
8790
Some((0, 1)), None, None);

0 commit comments

Comments
 (0)