Skip to content

Commit cd8f6eb

Browse files
committed
compiler: fix RegexSet bug
When compiling a RegexSet, it was possible for the jump locations to become incorrect if the last regex in the set had a starting location that didn't correspond to the beginning of its program. This can happen in simple cases like when your set consists of the regexes `a` and `β`. In particular, the program for `β` is: 0: Bytes(\xB2) (goto 2) 1: Bytes(\xCE) (goto 0) 2: MATCH Where the entry point is `1` instead of `0`. To fix this, we compile a set of regexes similarly to how we compile `a|β`, where we handle the holes produced by sub-expressions correctly. Fixes #353
1 parent 68bc958 commit cd8f6eb

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

Diff for: src/compile.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -178,18 +178,19 @@ impl Compiler {
178178
}
179179
self.fill_to_next(dotstar_patch.hole);
180180

181+
let mut prev_hole = Hole::None;
181182
for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() {
183+
self.fill_to_next(prev_hole);
182184
let split = self.push_split_hole();
183185
let Patch { hole, entry } = try!(self.c_capture(0, expr));
184186
self.fill_to_next(hole);
185187
self.compiled.matches.push(self.insts.len());
186188
self.push_compiled(Inst::Match(i));
187-
188-
let next = self.insts.len();
189-
self.fill_split(split, Some(entry), Some(next));
189+
prev_hole = self.fill_split(split, Some(entry), None);
190190
}
191191
let i = exprs.len() - 1;
192-
let Patch { hole, .. } = try!(self.c_capture(0, &exprs[i]));
192+
let Patch { hole, entry } = try!(self.c_capture(0, &exprs[i]));
193+
self.fill(prev_hole, entry);
193194
self.fill_to_next(hole);
194195
self.compiled.matches.push(self.insts.len());
195196
self.push_compiled(Inst::Match(i));

Diff for: tests/set.rs

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ matset!(set14, &[r".*", "a"], "zzzzzz", 0);
1515
matset!(set15, &[r"\ba\b"], "hello a bye", 0);
1616
matset!(set16, &["a"], "a", 0);
1717
matset!(set17, &[".*a"], "a", 0);
18+
matset!(set18, &["a", "β"], "β", 1);
1819

1920
nomatset!(nset1, &["a", "a"], "b");
2021
nomatset!(nset2, &["^foo", "bar$"], "bar foo");

0 commit comments

Comments
 (0)