Skip to content

Fixes #343

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 18, 2017
Merged

Fixes #343

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions regex-syntax/src/literals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ fn repeat_range_literals<F: FnMut(&Expr, &mut Literals)>(
let n = cmp::min(lits.limit_size, min as usize);
let es = iter::repeat(e.clone()).take(n).collect();
f(&Concat(es), lits);
if n < min as usize {
if n < min as usize || lits.contains_empty() {
lits.cut();
}
}
Expand Down Expand Up @@ -1156,8 +1156,9 @@ mod tests {

// Test regexes with empty assertions.
test_lit!(pfx_empty1, prefixes, "^a", M("a"));
test_lit!(pfx_empty2, prefixes, "^abc", M("abc"));
test_lit!(pfx_empty3, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));
test_lit!(pfx_empty2, prefixes, "a${2}", C("a"));
test_lit!(pfx_empty3, prefixes, "^abc", M("abc"));
test_lit!(pfx_empty4, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));

// Make sure some curious regexes have no prefixes.
test_lit!(pfx_nothing1, prefixes, ".");
Expand Down Expand Up @@ -1306,6 +1307,7 @@ mod tests {

// Test regexes with empty assertions.
test_lit!(sfx_empty1, suffixes, "a$", M("a"));
test_lit!(sfx_empty2, suffixes, "${2}a", C("a"));

// Make sure some curious regexes have no suffixes.
test_lit!(sfx_nothing1, suffixes, ".");
Expand Down
9 changes: 6 additions & 3 deletions src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -850,9 +850,12 @@ impl<'c> ExecNoSync<'c> {
match_start: usize,
match_end: usize,
) -> Option<(usize, usize)> {
// We can't use match_end directly, because we may need to examine
// one "character" after the end of a match for lookahead operators.
let e = cmp::min(next_utf8(text, match_end), text.len());
// We can't use match_end directly, because we may need to examine one
// "character" after the end of a match for lookahead operators. We
// need to move two characters beyond the end, since some look-around
// operations may falsely assume a premature end of text otherwise.
let e = cmp::min(
next_utf8(text, next_utf8(text, match_end)), text.len());
self.captures_nfa(slots, &text[..e], match_start)
}

Expand Down
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
# }
```

If you wish to match against whitespace in this mode, you can still use `\s`,
`\n`, `\t`, etc. For escaping a single space character, you can use its hex
character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`.

# Example: match multiple regular expressions simultaneously

This demonstrates how to use a `RegexSet` to match multiple (possibly
Expand Down
4 changes: 0 additions & 4 deletions src/re_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@ impl RegexBuilder {
}

/// Set the value for the Unicode (`u`) flag.
///
/// For byte based regular expressions, this is disabled by default.
pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {
self.0.unicode = yes;
self
Expand Down Expand Up @@ -228,8 +226,6 @@ impl RegexSetBuilder {
}

/// Set the value for the Unicode (`u`) flag.
///
/// For byte based regular expressions, this is disabled by default.
pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder {
self.0.unicode = yes;
self
Expand Down
37 changes: 32 additions & 5 deletions src/re_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -427,12 +427,23 @@ impl Regex {
/// Note that using `$2` instead of `$first` or `$1` instead of `$last`
/// would produce the same result. To write a literal `$` use `$$`.
///
/// If `$name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
/// Sometimes the replacement string requires use of curly braces to
/// delineate a capture group replacement and surrounding literal text.
/// For example, if we wanted to join two words together with an
/// underscore:
///
/// The longest possible name is used. e.g., `$1a` looks up the capture
/// group named `1a` and not the capture group at index `1`. To exert more
/// precise control over the name, use braces, e.g., `${1}a`.
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
/// let result = re.replace(b"deep fried", &b"${first}_$second"[..]);
/// assert_eq!(result, &b"deep_fried"[..]);
/// # }
/// ```
///
/// Without the curly braces, the capture group name `first_` would be
/// used, and since it doesn't exist, it would be replaced with the empty
/// string.
///
/// Finally, sometimes you just want to replace a literal string with no
/// regard for capturing group expansion. This can be done by wrapping a
Expand Down Expand Up @@ -778,6 +789,22 @@ impl<'t> Captures<'t> {
/// Returns the match associated with the capture group at index `i`. If
/// `i` does not correspond to a capture group, or if the capture group
/// did not participate in the match, then `None` is returned.
///
/// # Examples
///
/// Get the text of the match with a default of an empty string if this
/// group didn't participate in the match:
///
/// ```rust
/// # use regex::bytes::Regex;
/// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
/// let caps = re.captures(b"abc123").unwrap();
///
/// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes());
/// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes());
/// assert_eq!(text1, &b"123"[..]);
/// assert_eq!(text2, &b""[..]);
/// ```
pub fn get(&self, i: usize) -> Option<Match<'t>> {
self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
}
Expand Down
34 changes: 34 additions & 0 deletions src/re_unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,24 @@ impl Regex {
/// Note that using `$2` instead of `$first` or `$1` instead of `$last`
/// would produce the same result. To write a literal `$` use `$$`.
///
/// Sometimes the replacement string requires use of curly braces to
/// delineate a capture group replacement and surrounding literal text.
/// For example, if we wanted to join two words together with an
/// underscore:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
/// let result = re.replace("deep fried", "${first}_$second");
/// assert_eq!(result, "deep_fried");
/// # }
/// ```
///
/// Without the curly braces, the capture group name `first_` would be
/// used, and since it doesn't exist, it would be replaced with the empty
/// string.
///
/// Finally, sometimes you just want to replace a literal string with no
/// regard for capturing group expansion. This can be done by wrapping a
/// byte string with `NoExpand`:
Expand Down Expand Up @@ -916,6 +934,22 @@ impl<'t> Captures<'t> {
/// Returns the match associated with the capture group at index `i`. If
/// `i` does not correspond to a capture group, or if the capture group
/// did not participate in the match, then `None` is returned.
///
/// # Examples
///
/// Get the text of the match with a default of an empty string if this
/// group didn't participate in the match:
///
/// ```rust
/// # use regex::Regex;
/// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
/// let caps = re.captures("abc123").unwrap();
///
/// let text1 = caps.get(1).map_or("", |m| m.as_str());
/// let text2 = caps.get(2).map_or("", |m| m.as_str());
/// assert_eq!(text1, "123");
/// assert_eq!(text2, "");
/// ```
pub fn get(&self, i: usize) -> Option<Match<'t>> {
self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
}
Expand Down
8 changes: 8 additions & 0 deletions tests/regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,11 @@ mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4)));
mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4)));
mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4)));
mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1)));

// See: https://github.com/rust-lang/regex/issues/321
ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false);
ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false);

// See: https://github.com/rust-lang/regex/issues/334
mat!(captures_after_dfa_premature_end, r"a(b*(X|$))?", "abcbX",
Some((0, 1)), None, None);