Skip to content

Commit 7297f23

Browse files
committed
Auto merge of #343 - BurntSushi:fixes, r=BurntSushi
Fixes This PR contains a series of commits that fixes several minor bugs. Fixes #321, Fixes #334, Fixes #326, Fixes #333, Fixes #338
2 parents 7dfa895 + 9ae9418 commit 7297f23

File tree

7 files changed

+89
-15
lines changed

7 files changed

+89
-15
lines changed

Diff for: regex-syntax/src/literals.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ fn repeat_range_literals<F: FnMut(&Expr, &mut Literals)>(
819819
let n = cmp::min(lits.limit_size, min as usize);
820820
let es = iter::repeat(e.clone()).take(n).collect();
821821
f(&Concat(es), lits);
822-
if n < min as usize {
822+
if n < min as usize || lits.contains_empty() {
823823
lits.cut();
824824
}
825825
}
@@ -1156,8 +1156,9 @@ mod tests {
11561156

11571157
// Test regexes with empty assertions.
11581158
test_lit!(pfx_empty1, prefixes, "^a", M("a"));
1159-
test_lit!(pfx_empty2, prefixes, "^abc", M("abc"));
1160-
test_lit!(pfx_empty3, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));
1159+
test_lit!(pfx_empty2, prefixes, "a${2}", C("a"));
1160+
test_lit!(pfx_empty3, prefixes, "^abc", M("abc"));
1161+
test_lit!(pfx_empty4, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));
11611162

11621163
// Make sure some curious regexes have no prefixes.
11631164
test_lit!(pfx_nothing1, prefixes, ".");
@@ -1306,6 +1307,7 @@ mod tests {
13061307

13071308
// Test regexes with empty assertions.
13081309
test_lit!(sfx_empty1, suffixes, "a$", M("a"));
1310+
test_lit!(sfx_empty2, suffixes, "${2}a", C("a"));
13091311

13101312
// Make sure some curious regexes have no suffixes.
13111313
test_lit!(sfx_nothing1, suffixes, ".");

Diff for: src/exec.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -850,9 +850,12 @@ impl<'c> ExecNoSync<'c> {
850850
match_start: usize,
851851
match_end: usize,
852852
) -> Option<(usize, usize)> {
853-
// We can't use match_end directly, because we may need to examine
854-
// one "character" after the end of a match for lookahead operators.
855-
let e = cmp::min(next_utf8(text, match_end), text.len());
853+
// We can't use match_end directly, because we may need to examine one
854+
// "character" after the end of a match for lookahead operators. We
855+
// need to move two characters beyond the end, since some look-around
856+
// operations may falsely assume a premature end of text otherwise.
857+
let e = cmp::min(
858+
next_utf8(text, next_utf8(text, match_end)), text.len());
856859
self.captures_nfa(slots, &text[..e], match_start)
857860
}
858861

Diff for: src/lib.rs

+4
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
161161
# }
162162
```
163163
164+
If you wish to match against whitespace in this mode, you can still use `\s`,
165+
`\n`, `\t`, etc. For escaping a single space character, you can use its hex
166+
character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`.
167+
164168
# Example: match multiple regular expressions simultaneously
165169
166170
This demonstrates how to use a `RegexSet` to match multiple (possibly

Diff for: src/re_builder.rs

-4
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ impl RegexBuilder {
115115
}
116116

117117
/// Set the value for the Unicode (`u`) flag.
118-
///
119-
/// For byte based regular expressions, this is disabled by default.
120118
pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {
121119
self.0.unicode = yes;
122120
self
@@ -228,8 +226,6 @@ impl RegexSetBuilder {
228226
}
229227

230228
/// Set the value for the Unicode (`u`) flag.
231-
///
232-
/// For byte based regular expressions, this is disabled by default.
233229
pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder {
234230
self.0.unicode = yes;
235231
self

Diff for: src/re_bytes.rs

+32-5
Original file line numberDiff line numberDiff line change
@@ -427,12 +427,23 @@ impl Regex {
427427
/// Note that using `$2` instead of `$first` or `$1` instead of `$last`
428428
/// would produce the same result. To write a literal `$` use `$$`.
429429
///
430-
/// If `$name` isn't a valid capture group (whether the name doesn't exist
431-
/// or isn't a valid index), then it is replaced with the empty string.
430+
/// Sometimes the replacement string requires use of curly braces to
431+
/// delineate a capture group replacement and surrounding literal text.
432+
/// For example, if we wanted to join two words together with an
433+
/// underscore:
432434
///
433-
/// The longest possible name is used. e.g., `$1a` looks up the capture
434-
/// group named `1a` and not the capture group at index `1`. To exert more
435-
/// precise control over the name, use braces, e.g., `${1}a`.
435+
/// ```rust
436+
/// # extern crate regex; use regex::bytes::Regex;
437+
/// # fn main() {
438+
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
439+
/// let result = re.replace(b"deep fried", &b"${first}_$second"[..]);
440+
/// assert_eq!(result, &b"deep_fried"[..]);
441+
/// # }
442+
/// ```
443+
///
444+
/// Without the curly braces, the capture group name `first_` would be
445+
/// used, and since it doesn't exist, it would be replaced with the empty
446+
/// string.
436447
///
437448
/// Finally, sometimes you just want to replace a literal string with no
438449
/// regard for capturing group expansion. This can be done by wrapping a
@@ -778,6 +789,22 @@ impl<'t> Captures<'t> {
778789
/// Returns the match associated with the capture group at index `i`. If
779790
/// `i` does not correspond to a capture group, or if the capture group
780791
/// did not participate in the match, then `None` is returned.
792+
///
793+
/// # Examples
794+
///
795+
/// Get the text of the match with a default of an empty string if this
796+
/// group didn't participate in the match:
797+
///
798+
/// ```rust
799+
/// # use regex::bytes::Regex;
800+
/// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
801+
/// let caps = re.captures(b"abc123").unwrap();
802+
///
803+
/// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes());
804+
/// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes());
805+
/// assert_eq!(text1, &b"123"[..]);
806+
/// assert_eq!(text2, &b""[..]);
807+
/// ```
781808
pub fn get(&self, i: usize) -> Option<Match<'t>> {
782809
self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
783810
}

Diff for: src/re_unicode.rs

+34
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,24 @@ impl Regex {
501501
/// Note that using `$2` instead of `$first` or `$1` instead of `$last`
502502
/// would produce the same result. To write a literal `$` use `$$`.
503503
///
504+
/// Sometimes the replacement string requires use of curly braces to
505+
/// delineate a capture group replacement and surrounding literal text.
506+
/// For example, if we wanted to join two words together with an
507+
/// underscore:
508+
///
509+
/// ```rust
510+
/// # extern crate regex; use regex::Regex;
511+
/// # fn main() {
512+
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
513+
/// let result = re.replace("deep fried", "${first}_$second");
514+
/// assert_eq!(result, "deep_fried");
515+
/// # }
516+
/// ```
517+
///
518+
/// Without the curly braces, the capture group name `first_` would be
519+
/// used, and since it doesn't exist, it would be replaced with the empty
520+
/// string.
521+
///
504522
/// Finally, sometimes you just want to replace a literal string with no
505523
/// regard for capturing group expansion. This can be done by wrapping a
506524
/// byte string with `NoExpand`:
@@ -916,6 +934,22 @@ impl<'t> Captures<'t> {
916934
/// Returns the match associated with the capture group at index `i`. If
917935
/// `i` does not correspond to a capture group, or if the capture group
918936
/// did not participate in the match, then `None` is returned.
937+
///
938+
/// # Examples
939+
///
940+
/// Get the text of the match with a default of an empty string if this
941+
/// group didn't participate in the match:
942+
///
943+
/// ```rust
944+
/// # use regex::Regex;
945+
/// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
946+
/// let caps = re.captures("abc123").unwrap();
947+
///
948+
/// let text1 = caps.get(1).map_or("", |m| m.as_str());
949+
/// let text2 = caps.get(2).map_or("", |m| m.as_str());
950+
/// assert_eq!(text1, "123");
951+
/// assert_eq!(text2, "");
952+
/// ```
919953
pub fn get(&self, i: usize) -> Option<Match<'t>> {
920954
self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
921955
}

Diff for: tests/regression.rs

+8
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,11 @@ mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4)));
8282
mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4)));
8383
mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4)));
8484
mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1)));
85+
86+
// See: https://github.com/rust-lang/regex/issues/321
87+
ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false);
88+
ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false);
89+
90+
// See: https://github.com/rust-lang/regex/issues/334
91+
mat!(captures_after_dfa_premature_end, r"a(b*(X|$))?", "abcbX",
92+
Some((0, 1)), None, None);

0 commit comments

Comments
 (0)