Skip to content

Commit 25eece1

Browse files
committed
Fix a bug in capture with match
Fixes rust-lang#557
1 parent 60d087a commit 25eece1

File tree

3 files changed

+44
-19
lines changed

3 files changed

+44
-19
lines changed

src/backtrack.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
9898
slots: &'s mut [Slot],
9999
input: I,
100100
start: usize,
101+
end: usize,
101102
) -> bool {
102103
let mut cache = cache.borrow_mut();
103104
let cache = &mut cache.backtrack;
@@ -109,7 +110,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
109110
slots: slots,
110111
m: cache,
111112
};
112-
b.exec_(start)
113+
b.exec_(start, end)
113114
}
114115

115116
/// Clears the cache such that the backtracking engine can be executed
@@ -147,7 +148,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
147148

148149
/// Start backtracking at the given position in the input, but also look
149150
/// for literal prefixes.
150-
fn exec_(&mut self, mut at: InputAt) -> bool {
151+
fn exec_(&mut self, mut at: InputAt, end: usize) -> bool {
151152
self.clear();
152153
// If this is an anchored regex at the beginning of the input, then
153154
// we're either already done or we only need to try backtracking once.
@@ -170,7 +171,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
170171
if matched && self.prog.matches.len() == 1 {
171172
return true;
172173
}
173-
if at.is_end() {
174+
if at.pos() == end {
174175
break;
175176
}
176177
at = self.input.at(at.next_pos());

src/exec.rs

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
use std::cell::RefCell;
1212
use std::collections::HashMap;
13-
use std::cmp;
1413
use std::sync::Arc;
1514

1615
use thread_local::CachedThreadLocal;
@@ -877,13 +876,7 @@ impl<'c> ExecNoSync<'c> {
877876
match_start: usize,
878877
match_end: usize,
879878
) -> Option<(usize, usize)> {
880-
// We can't use match_end directly, because we may need to examine one
881-
// "character" after the end of a match for lookahead operators. We
882-
// need to move two characters beyond the end, since some look-around
883-
// operations may falsely assume a premature end of text otherwise.
884-
let e = cmp::min(
885-
next_utf8(text, next_utf8(text, match_end)), text.len());
886-
self.captures_nfa(slots, &text[..e], match_start)
879+
self.captures_nfa_type_with_end(MatchNfaType::Auto, slots, text, match_start, match_end)
887880
}
888881

889882
/// Like find_nfa, but fills in captures.
@@ -905,8 +898,19 @@ impl<'c> ExecNoSync<'c> {
905898
slots: &mut [Slot],
906899
text: &[u8],
907900
start: usize,
901+
) -> Option<(usize, usize)>{
902+
self.captures_nfa_type_with_end(ty, slots, text, start, text.len())
903+
}
904+
905+
fn captures_nfa_type_with_end(
906+
&self,
907+
ty: MatchNfaType,
908+
slots: &mut [Slot],
909+
text: &[u8],
910+
start: usize,
911+
end: usize,
908912
) -> Option<(usize, usize)> {
909-
if self.exec_nfa(ty, &mut [false], slots, false, text, start) {
913+
if self.exec_nfa_with_end(ty, &mut [false], slots, false, text, start, end) {
910914
match (slots[0], slots[1]) {
911915
(Some(s), Some(e)) => Some((s, e)),
912916
_ => None,
@@ -917,13 +921,26 @@ impl<'c> ExecNoSync<'c> {
917921
}
918922

919923
fn exec_nfa(
924+
&self,
925+
ty: MatchNfaType,
926+
matches: &mut [bool],
927+
slots: &mut [Slot],
928+
quit_after_match: bool,
929+
text: &[u8],
930+
start: usize,
931+
) -> bool {
932+
self.exec_nfa_with_end(ty, matches, slots, quit_after_match, text, start, text.len())
933+
}
934+
935+
fn exec_nfa_with_end(
920936
&self,
921937
mut ty: MatchNfaType,
922938
matches: &mut [bool],
923939
slots: &mut [Slot],
924940
quit_after_match: bool,
925941
text: &[u8],
926942
start: usize,
943+
end: usize,
927944
) -> bool {
928945
use self::MatchNfaType::*;
929946
if let Auto = ty {
@@ -935,10 +952,10 @@ impl<'c> ExecNoSync<'c> {
935952
}
936953
match ty {
937954
Auto => unreachable!(),
938-
Backtrack => self.exec_backtrack(matches, slots, text, start),
955+
Backtrack => self.exec_backtrack(matches, slots, text, start, end),
939956
PikeVM => {
940957
self.exec_pikevm(
941-
matches, slots, quit_after_match, text, start)
958+
matches, slots, quit_after_match, text, start, end)
942959
}
943960
}
944961
}
@@ -951,6 +968,7 @@ impl<'c> ExecNoSync<'c> {
951968
quit_after_match: bool,
952969
text: &[u8],
953970
start: usize,
971+
end: usize,
954972
) -> bool {
955973
if self.ro.nfa.uses_bytes() {
956974
pikevm::Fsm::exec(
@@ -960,7 +978,8 @@ impl<'c> ExecNoSync<'c> {
960978
slots,
961979
quit_after_match,
962980
ByteInput::new(text, self.ro.nfa.only_utf8),
963-
start)
981+
start,
982+
end)
964983
} else {
965984
pikevm::Fsm::exec(
966985
&self.ro.nfa,
@@ -969,7 +988,8 @@ impl<'c> ExecNoSync<'c> {
969988
slots,
970989
quit_after_match,
971990
CharInput::new(text),
972-
start)
991+
start,
992+
end)
973993
}
974994
}
975995

@@ -980,6 +1000,7 @@ impl<'c> ExecNoSync<'c> {
9801000
slots: &mut [Slot],
9811001
text: &[u8],
9821002
start: usize,
1003+
end: usize
9831004
) -> bool {
9841005
if self.ro.nfa.uses_bytes() {
9851006
backtrack::Bounded::exec(
@@ -988,15 +1009,15 @@ impl<'c> ExecNoSync<'c> {
9881009
matches,
9891010
slots,
9901011
ByteInput::new(text, self.ro.nfa.only_utf8),
991-
start)
1012+
start, end)
9921013
} else {
9931014
backtrack::Bounded::exec(
9941015
&self.ro.nfa,
9951016
self.cache,
9961017
matches,
9971018
slots,
9981019
CharInput::new(text),
999-
start)
1020+
start, end)
10001021
}
10011022
}
10021023

src/pikevm.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ impl<'r, I: Input> Fsm<'r, I> {
107107
quit_after_match: bool,
108108
input: I,
109109
start: usize,
110+
end: usize,
110111
) -> bool {
111112
let mut cache = cache.borrow_mut();
112113
let cache = &mut cache.pikevm;
@@ -124,6 +125,7 @@ impl<'r, I: Input> Fsm<'r, I> {
124125
slots,
125126
quit_after_match,
126127
at,
128+
end,
127129
)
128130
}
129131

@@ -135,6 +137,7 @@ impl<'r, I: Input> Fsm<'r, I> {
135137
slots: &mut [Slot],
136138
quit_after_match: bool,
137139
mut at: InputAt,
140+
end: usize,
138141
) -> bool {
139142
let mut matched = false;
140143
let mut all_matched = false;
@@ -212,7 +215,7 @@ impl<'r, I: Input> Fsm<'r, I> {
212215
}
213216
}
214217
}
215-
if at.is_end() {
218+
if at.pos() == end {
216219
break;
217220
}
218221
at = at_next;

0 commit comments

Comments
 (0)