Skip to content

rollup recent PRs + miscellaneous fixes #567

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
1.1.3 (2019-03-30)
==================
This releases fixes a few bugs and adds a performance improvement when a regex
is a simple alternation of literals.

Performance improvements:

* [OPT #566](https://github.com/rust-lang/regex/pull/566):
Upgrades `aho-corasick` to 0.7 and uses it for `foo|bar|...|quux` regexes.

Bug fixes:

* [BUG #527](https://github.com/rust-lang/regex/issues/527):
Fix a bug where the parser would panic on patterns like `((?x))`.
* [BUG #555](https://github.com/rust-lang/regex/issues/555):
Fix a bug where the parser would panic on patterns like `(?m){1,1}`.
* [BUG #557](https://github.com/rust-lang/regex/issues/557):
Fix a bug where captures could lead to an incorrect match.


1.1.2 (2019-02-27)
==================
This release fixes a bug found in the fix introduced in 1.1.1.
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ lazy_static = "1"
# For property based tests.
quickcheck = { version = "0.7", default-features = false }
# For generating random test data.
rand = "0.5"
rand = "0.6.5"

[features]
default = ["use_std"]
Expand Down
9 changes: 1 addition & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Add this to your `Cargo.toml`:
regex = "1"
```

and this to your crate root:
and this to your crate root (if you're using Rust 2015):

```rust
extern crate regex;
Expand All @@ -42,8 +42,6 @@ Here's a simple example that matches a date in YYYY-MM-DD format and prints the
year, month and day:

```rust
extern crate regex;

use regex::Regex;

fn main() {
Expand All @@ -66,8 +64,6 @@ If you have lots of dates in text that you'd like to iterate over, then it's
easy to adapt the above example with an iterator:

```rust
extern crate regex;

use regex::Regex;

const TO_SEARCH: &'static str = "
Expand Down Expand Up @@ -112,9 +108,6 @@ regular expressions are compiled exactly once.
For example:

```rust
#[macro_use] extern crate lazy_static;
extern crate regex;

use regex::Regex;

fn some_helper_function(text: &str) -> bool {
Expand Down
18 changes: 15 additions & 3 deletions ci/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,19 @@ if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
(cd bench && ./run rust --no-run --verbose)

# Test minimal versions.
cargo +nightly generate-lockfile -Z minimal-versions
cargo build --verbose
cargo test --verbose
#
# For now, we remove this check, because it doesn't seem possible to convince
# some maintainers of *core* crates that this is a worthwhile test to add.
# In particular, this test uncovers any *incorrect* dependency specification
# in the chain of dependencies.
#
# We might consider figuring out how to migrate off of rand in order to get
# this check working. (This will be hard, since it either requires dropping
# quickcheck or migrating quickcheck off of rand, which is just probably
# not practical.)
#
# So frustrating.
# cargo +nightly generate-lockfile -Z minimal-versions
# cargo build --verbose
# cargo test --verbose
fi
26 changes: 26 additions & 0 deletions regex-capi/ctest/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,31 @@ bool test_regex_set_options() {
return passed;
}

bool test_escape() {
bool passed = true;

const char *pattern = "^[a-z]+.*$";
const char *expected_escaped = "\\^\\[a\\-z\\]\\+\\.\\*\\$";

const char *escaped = rure_escape_must(pattern);
if (!escaped) {
if (DEBUG) {
fprintf(stderr,
"[test_captures] expected escaped, but got no escaped\n");
}
passed = false;
} else if (strcmp(escaped, expected_escaped) != 0) {
if (DEBUG) {
fprintf(stderr,
"[test_captures] expected \"%s\", but got \"%s\"\n",
expected_escaped, escaped);
}
passed = false;
}
rure_cstring_free((char *) escaped);
return passed;
}

void run_test(bool (test)(), const char *name, bool *passed) {
if (!test()) {
*passed = false;
Expand All @@ -557,6 +582,7 @@ int main() {
run_test(test_regex_set_options, "test_regex_set_options", &passed);
run_test(test_regex_set_match_start, "test_regex_set_match_start",
&passed);
run_test(test_escape, "test_escape", &passed);

if (!passed) {
exit(1);
Expand Down
23 changes: 23 additions & 0 deletions regex-capi/include/rure.h
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,29 @@ void rure_error_free(rure_error *err);
*/
const char *rure_error_message(rure_error *err);

/*
* rure_escape_must returns a NUL terminated string where all meta characters
* have been escaped. If escaping fails for any reason, an error message is
* printed to stderr and the process is aborted.
*
* The pattern given should be in UTF-8. For convenience, this accepts a C
* string, which means the pattern cannot contain a NUL byte. These correspond
* to the only two failure conditions of this function. That is, if the caller
* guarantees that the given pattern is valid UTF-8 and does not contain a
* NUL byte, then this is guaranteed to succeed (modulo out-of-memory errors).
*
* The pointer returned must not be freed directly. Instead, it should be freed
* by calling rure_cstring_free.
*/
const char *rure_escape_must(const char *pattern);

/*
* rure_cstring_free frees the string given.
*
* This must be called at most once per string.
*/
void rure_cstring_free(char *s);

#ifdef __cplusplus
}
#endif
Expand Down
5 changes: 4 additions & 1 deletion regex-capi/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use ::std::ffi;
use ::std::ffi::CString;
use ::std::fmt;
use ::std::str;
Expand All @@ -16,6 +17,7 @@ pub enum ErrorKind {
None,
Str(str::Utf8Error),
Regex(regex::Error),
Nul(ffi::NulError),
}

impl Error {
Expand All @@ -29,7 +31,7 @@ impl Error {
pub fn is_err(&self) -> bool {
match self.kind {
ErrorKind::None => false,
ErrorKind::Str(_) | ErrorKind::Regex(_) => true,
ErrorKind::Str(_) | ErrorKind::Regex(_) | ErrorKind::Nul(_) => true,
}
}
}
Expand All @@ -40,6 +42,7 @@ impl fmt::Display for Error {
ErrorKind::None => write!(f, "no error"),
ErrorKind::Str(ref e) => e.fmt(f),
ErrorKind::Regex(ref e) => e.fmt(f),
ErrorKind::Nul(ref e) => e.fmt(f),
}
}
}
Expand Down
3 changes: 0 additions & 3 deletions regex-capi/src/macros.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

macro_rules! ffi_fn {
(fn $name:ident($($arg:ident: $arg_ty:ty),*,) -> $ret:ty $body:block) => {
ffi_fn!(fn $name($($arg: $arg_ty),*) -> $ret $body);
Expand Down Expand Up @@ -35,5 +34,3 @@ macro_rules! ffi_fn {
ffi_fn!(fn $name($($arg: $arg_ty),*) -> () $body);
};
}


60 changes: 60 additions & 0 deletions regex-capi/src/rure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -570,3 +570,63 @@ ffi_fn! {
unsafe { (*re).len() }
}
}

ffi_fn! {
fn rure_escape_must(pattern: *const c_char) -> *const c_char {
let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
let pat = pattern as *const u8;
let mut err = Error::new(ErrorKind::None);
let esc = rure_escape(pat, len, &mut err);
if err.is_err() {
let _ = writeln!(&mut io::stderr(), "{}", err);
let _ = writeln!(
&mut io::stderr(), "aborting from rure_escape_must");
unsafe { abort() }
}
esc
}
}

/// A helper function that implements fallible escaping in a way that returns
/// an error if escaping failed.
///
/// This should ideally be exposed, but it needs API design work. In
/// particular, this should not return a C string, but a `const uint8_t *`
/// instead, since it may contain a NUL byte.
fn rure_escape(
pattern: *const u8,
length: size_t,
error: *mut Error
) -> *const c_char {
let pat: &[u8] = unsafe { slice::from_raw_parts(pattern, length) };
let str_pat = match str::from_utf8(pat) {
Ok(val) => val,
Err(err) => {
unsafe {
if !error.is_null() {
*error = Error::new(ErrorKind::Str(err));
}
return ptr::null();
}
}
};
let esc_pat = regex::escape(str_pat);
let c_esc_pat = match CString::new(esc_pat) {
Ok(val) => val,
Err(err) => {
unsafe {
if !error.is_null() {
*error = Error::new(ErrorKind::Nul(err));
}
return ptr::null();
}
}
};
c_esc_pat.into_raw() as *const c_char
}

ffi_fn! {
fn rure_cstring_free(s: *mut c_char) {
unsafe { CString::from_raw(s); }
}
}
19 changes: 19 additions & 0 deletions regex-syntax/src/ast/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,13 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
ast::ErrorKind::RepetitionMissing,
)),
};
match ast {
Ast::Empty(_) | Ast::Flags(_) => return Err(self.error(
self.span(),
ast::ErrorKind::RepetitionMissing,
)),
_ => {}
}
if !self.bump_and_bump_space() {
return Err(self.error(
Span::new(start, self.pos()),
Expand Down Expand Up @@ -3124,6 +3131,18 @@ bar
ast: Box::new(lit('a', 0)),
})));

assert_eq!(
parser(r"(?i){0}").parse().unwrap_err(),
TestError {
span: span(4..4),
kind: ast::ErrorKind::RepetitionMissing,
});
assert_eq!(
parser(r"(?m){1,1}").parse().unwrap_err(),
TestError {
span: span(4..4),
kind: ast::ErrorKind::RepetitionMissing,
});
assert_eq!(
parser(r"a{").parse().unwrap_err(),
TestError {
Expand Down
19 changes: 14 additions & 5 deletions regex-syntax/src/hir/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,6 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
type Err = Error;

fn finish(self) -> Result<Hir> {
if self.trans().stack.borrow().is_empty() {
// This can happen if the Ast given consists of a single set of
// flags. e.g., `(?i)`. /shrug
return Ok(Hir::empty());
}
// ... otherwise, we should have exactly one HIR on the stack.
assert_eq!(self.trans().stack.borrow().len(), 1);
Ok(self.pop().unwrap().unwrap_expr())
Expand Down Expand Up @@ -287,6 +282,16 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
}
Ast::Flags(ref x) => {
self.set_flags(&x.flags);
// Flags in the AST are generally considered directives and
// not actual sub-expressions. However, they can be used in
// the concrete syntax like `((?i))`, and we need some kind of
// indication of an expression there, and Empty is the correct
// choice.
//
// There can also be things like `(?i)+`, but we rule those out
// in the parser. In the future, we might allow them for
// consistency sake.
self.push(HirFrame::Expr(Hir::empty()));
}
Ast::Literal(ref x) => {
self.push(HirFrame::Expr(self.hir_literal(x)?));
Expand Down Expand Up @@ -1547,6 +1552,10 @@ mod tests {
hir_group_name(2, "foo", hir_lit("b")),
hir_group(3, hir_lit("c")),
]));
assert_eq!(t("()"), hir_group(1, Hir::empty()));
assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
}

#[test]
Expand Down
7 changes: 4 additions & 3 deletions src/backtrack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
slots: &'s mut [Slot],
input: I,
start: usize,
end: usize,
) -> bool {
let mut cache = cache.borrow_mut();
let cache = &mut cache.backtrack;
Expand All @@ -109,7 +110,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
slots: slots,
m: cache,
};
b.exec_(start)
b.exec_(start, end)
}

/// Clears the cache such that the backtracking engine can be executed
Expand Down Expand Up @@ -147,7 +148,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {

/// Start backtracking at the given position in the input, but also look
/// for literal prefixes.
fn exec_(&mut self, mut at: InputAt) -> bool {
fn exec_(&mut self, mut at: InputAt, end: usize) -> bool {
self.clear();
// If this is an anchored regex at the beginning of the input, then
// we're either already done or we only need to try backtracking once.
Expand All @@ -170,7 +171,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
if matched && self.prog.matches.len() == 1 {
return true;
}
if at.is_end() {
if at.pos() == end {
break;
}
at = self.input.at(at.next_pos());
Expand Down
Loading