diff --git a/HACKING.md b/HACKING.md index 9556de6ecc..a106c4b337 100644 --- a/HACKING.md +++ b/HACKING.md @@ -270,9 +270,9 @@ N.B. To run tests for the `regex!` macro, use: The benchmarking in this crate is made up of many micro-benchmarks. Currently, there are two primary sets of benchmarks: the benchmarks that were adopted -at this library's inception (in `benches/src/misc.rs`) and a newer set of +at this library's inception (in `bench/src/misc.rs`) and a newer set of benchmarks meant to test various optimizations. Specifically, the latter set -contain some analysis and are in `benches/src/sherlock.rs`. Also, the latter +contain some analysis and are in `bench/src/sherlock.rs`. Also, the latter set are all executed on the same lengthy input whereas the former benchmarks are executed on strings of varying length. @@ -299,20 +299,20 @@ library benchmarks (especially RE2). If you're hacking on one of the matching engines and just want to see benchmarks, then all you need to run is: - $ ./run-bench rust + $ ./bench/run rust If you want to compare your results with older benchmarks, then try: - $ ./run-bench rust | tee old + $ ./bench/run rust | tee old $ ... make it faster - $ ./run-bench rust | tee new - $ cargo-benchcmp old new --improvements + $ ./bench/run rust | tee new + $ cargo benchcmp old new --improvements The `cargo-benchcmp` utility is available here: https://github.com/BurntSushi/cargo-benchcmp -The `run-bench` utility can run benchmarks for PCRE and Oniguruma too. See -`./run-bench --help`. +The `./bench/run` utility can run benchmarks for PCRE and Oniguruma too. See +`./bench/bench --help`. ## Dev Docs diff --git a/bench/Cargo.toml b/bench/Cargo.toml index 8875fed487..2448636887 100644 --- a/bench/Cargo.toml +++ b/bench/Cargo.toml @@ -40,8 +40,7 @@ bench = false # Doing anything else will probably result in weird "duplicate definition" # compiler errors. # -# Tip: use the run-bench script in the root of this repository to run -# benchmarks. +# Tip: use the `bench/run` script (in this directory) to run benchmarks. [features] re-pcre1 = ["libpcre-sys"] re-pcre2 = [] diff --git a/bench/src/bench.rs b/bench/src/bench.rs index a45079edc0..92e780e6b4 100644 --- a/bench/src/bench.rs +++ b/bench/src/bench.rs @@ -236,6 +236,41 @@ macro_rules! bench_find { } } +// USAGE: bench_captures!(name, pattern, groups, haystack); +// +// CONTRACT: +// Given: +// ident, the desired benchmarking function name +// pattern : ::Regex, the regular expression to be executed +// groups : usize, the number of capture groups +// haystack : String, the string to search +// bench_captures will benchmark how fast re.captures() produces +// the capture groups in question. +macro_rules! bench_captures { + ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { + + #[cfg(feature = "re-rust")] + #[bench] + fn $name(b: &mut Bencher) { + use std::sync::Mutex; + + lazy_static! { + static ref RE: Mutex = Mutex::new($pattern); + static ref TEXT: Mutex = Mutex::new(text!($haystack)); + }; + let re = RE.lock().unwrap(); + let text = TEXT.lock().unwrap(); + b.bytes = text.len() as u64; + b.iter(|| { + match re.captures(&text) { + None => assert!(false, "no captures"), + Some(caps) => assert_eq!($count + 1, caps.len()), + } + }); + } + } +} + mod ffi; mod misc; mod regexdna; diff --git a/bench/src/misc.rs b/bench/src/misc.rs index 86f93c4878..859b59c259 100644 --- a/bench/src/misc.rs +++ b/bench/src/misc.rs @@ -191,3 +191,85 @@ macro_rules! reallyhard2 { () => (r"\w+\s+Holmes") } bench_match!(reallyhard2_1K, reallyhard2!(), get_text(TXT_1K, reallyhard2_suffix())); + + +// +// Benchmarks to justify the short-haystack NFA fallthrough optimization +// implemented by `read_captures_at` in regex/src/exec.rs. See github issue +// #348. +// +// The procedure used to try to determine the right hardcoded cutoff +// for the short-haystack optimization in issue #348 is as follows. +// +// ``` +// > cd bench +// > cargo bench --features re-rust short_hay | tee dfa-nfa.res +// > # modify the `MatchType::Dfa` branch in exec.rs:read_captures_at +// > # to just execute the nfa +// > cargo bench --features re-rust short_hay | tee nfa-only.res +// > cargo benchcmp dfa-nfa.res nfa-only.res +// ``` +// +// The expected result is that short inputs will go faster under +// the nfa-only mode, but at some turnover point the dfa-nfa mode +// will start to win again. Unfortunately, that is not what happened. +// Instead there was no noticeable change in the bench results, so +// I've opted to just do the more conservative anchor optimization. +// +bench_captures!(short_haystack_1x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + String::from("aaaabbbbccccbbbdddd")); +bench_captures!(short_haystack_2x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + format!("{}bbbbccccbbb{}", + repeat("aaaa").take(2).collect::(), + repeat("dddd").take(2).collect::(), + )); +bench_captures!(short_haystack_3x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + format!("{}bbbbccccbbb{}", + repeat("aaaa").take(3).collect::(), + repeat("dddd").take(3).collect::(), + )); +bench_captures!(short_haystack_4x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + format!("{}bbbbccccbbb{}", + repeat("aaaa").take(4).collect::(), + repeat("dddd").take(4).collect::(), + )); +bench_captures!(short_haystack_10x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + format!("{}bbbbccccbbb{}", + repeat("aaaa").take(10).collect::(), + repeat("dddd").take(10).collect::(), + )); +bench_captures!(short_haystack_100x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + format!("{}bbbbccccbbb{}", + repeat("aaaa").take(100).collect::(), + repeat("dddd").take(100).collect::(), + )); +bench_captures!(short_haystack_1000x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + format!("{}bbbbccccbbb{}", + repeat("aaaa").take(1000).collect::(), + repeat("dddd").take(1000).collect::(), + )); +bench_captures!(short_haystack_10000x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + format!("{}bbbbccccbbb{}", + repeat("aaaa").take(10000).collect::(), + repeat("dddd").take(10000).collect::(), + )); +bench_captures!(short_haystack_100000x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + format!("{}bbbbccccbbb{}", + repeat("aaaa").take(100000).collect::(), + repeat("dddd").take(100000).collect::(), + )); +bench_captures!(short_haystack_1000000x, + Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 2, + format!("{}bbbbccccbbb{}", + repeat("aaaa").take(1000000).collect::(), + repeat("dddd").take(1000000).collect::(), + )); diff --git a/src/exec.rs b/src/exec.rs index 458e47d3b0..d12a725cf0 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -554,12 +554,16 @@ impl<'c> RegularExpression for ExecNoSync<'c> { }) } MatchType::Dfa => { - match self.find_dfa_forward(text, start) { - dfa::Result::Match((s, e)) => { - self.captures_nfa_with_match(slots, text, s, e) + if self.ro.nfa.is_anchored_start { + self.captures_nfa(slots, text, start) + } else { + match self.find_dfa_forward(text, start) { + dfa::Result::Match((s, e)) => { + self.captures_nfa_with_match(slots, text, s, e) + } + dfa::Result::NoMatch(_) => None, + dfa::Result::Quit => self.captures_nfa(slots, text, start), } - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.captures_nfa(slots, text, start), } } MatchType::DfaAnchoredReverse => {