-
Notifications
You must be signed in to change notification settings - Fork 461
/
Copy pathbench.rs
330 lines (309 loc) · 10.4 KB
/
bench.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Enable the benchmarking harness.
#![feature(test)]
#[macro_use]
extern crate lazy_static;
#[cfg(not(any(feature = "re-rust", feature = "re-rust-bytes")))]
extern crate libc;
#[cfg(feature = "re-pcre1")]
extern crate libpcre_sys;
#[cfg(feature = "re-onig")]
extern crate onig;
#[cfg(any(
feature = "re-rust",
feature = "re-rust-bytes",
))]
extern crate regex;
#[cfg(feature = "re-rust")]
extern crate regex_syntax;
extern crate test;
#[cfg(feature = "re-onig")]
pub use ffi::onig::Regex;
#[cfg(feature = "re-pcre1")]
pub use ffi::pcre1::Regex;
#[cfg(feature = "re-pcre2")]
pub use ffi::pcre2::Regex;
#[cfg(any(
feature = "re-stdcpp",
feature = "re-boost",
))]
pub use ffi::stdcpp::Regex;
#[cfg(feature = "re-re2")]
pub use ffi::re2::Regex;
#[cfg(feature = "re-dphobos")]
pub use ffi::d_phobos::Regex;
#[cfg(feature = "re-rust")]
pub use regex::{Regex, RegexSet};
#[cfg(feature = "re-rust-bytes")]
pub use regex::bytes::{Regex, RegexSet};
#[cfg(feature = "re-tcl")]
pub use ffi::tcl::Regex;
// Usage: regex!(pattern)
//
// Builds a ::Regex from a borrowed string.
//
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
// native and dynamic regexes.
macro_rules! regex {
($re:expr) => { ::Regex::new(&$re.to_owned()).unwrap() }
}
// Usage: text!(haystack)
//
// Builds a ::Text from an owned string.
//
// This macro is called on every input searched in every benchmark. It is
// called exactly once per benchmark and its time is not included in the
// benchmark timing.
//
// The text given to the macro is always a String, which is guaranteed to be
// valid UTF-8.
//
// The return type should be an owned value that can deref to whatever the
// regex accepts in its `is_match` and `find_iter` methods.
#[cfg(feature = "re-tcl")]
macro_rules! text {
($text:expr) => {{
use ffi::tcl::Text;
Text::new($text)
}}
}
#[cfg(feature = "re-rust-bytes")]
macro_rules! text {
($text:expr) => {{
let text: String = $text;
text.into_bytes()
}}
}
#[cfg(any(
feature = "re-onig",
feature = "re-pcre1",
feature = "re-pcre2",
feature = "re-stdcpp",
feature = "re-boost",
feature = "re-re2",
feature = "re-dphobos",
feature = "re-rust",
))]
macro_rules! text {
($text:expr) => { $text }
}
// The type of the value yielded by the `text!` macro defined above.
#[cfg(feature = "re-tcl")]
type Text = ffi::tcl::Text;
#[cfg(feature = "re-rust-bytes")]
type Text = Vec<u8>;
#[cfg(any(
feature = "re-onig",
feature = "re-pcre1",
feature = "re-pcre2",
feature = "re-stdcpp",
feature = "re-boost",
feature = "re-re2",
feature = "re-dphobos",
feature = "re-rust",
))]
type Text = String;
// Macros for writing benchmarks easily. We provide macros for benchmarking
// matches, non-matches and for finding all successive non-overlapping matches
// in a string (including a check that the count is correct).
// USAGE: bench_match!(name, pattern, haystack)
//
// This benchmarks how fast a regular expression can report whether it matches
// a particular haystack. If the regex doesn't match, then the benchmark fails.
// Regexes are compiled exactly once.
//
// name is an identifier for the benchmark.
//
// pattern should be a &'static str representing the regular expression.
//
// haystack should be a String.
macro_rules! bench_match {
($name:ident, $pattern:expr, $haystack:expr) => {
bench_is_match!($name, true, regex!($pattern), $haystack);
}
}
// USAGE: bench_not_match!(name, pattern, haystack)
//
// This benchmarks how fast a regular expression can report whether it matches
// a particular haystack. If the regex matches, then the benchmark fails.
// Regexes are compiled exactly once.
//
// name is an identifier for the benchmark.
//
// pattern should be a &'static str representing the regular expression.
//
// haystack should be a String.
macro_rules! bench_not_match {
($name:ident, $pattern:expr, $haystack:expr) => {
bench_is_match!($name, false, regex!($pattern), $haystack);
}
}
// USAGE: bench_is_match!(name, is_match, regex, haystack)
//
// This benchmarks how fast a regular expression can report whether it matches
// a particular haystack. If the regex match status doesn't match is_match,
// then the benchmark fails. Regexes are compiled exactly once.
//
// name is an identifier for the benchmark.
//
// is_match reports whether the regex is expected to match the haystack or not.
//
// regex should be a ::Regex.
//
// haystack should be a String.
macro_rules! bench_is_match {
($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
use std::sync::Mutex;
// Why do we use lazy_static here? It seems sensible to just
// compile a regex outside of the b.iter() call and be done with
// it. However, it seems like Rust's benchmark harness actually
// calls the entire benchmark function multiple times. This doesn't
// factor into the timings reported in the benchmarks, but it does
// make the benchmarks take substantially longer to run because
// they're spending a lot of time recompiling regexes.
lazy_static! {
static ref RE: Mutex<Regex> = Mutex::new($re);
static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
};
let re = RE.lock().unwrap();
let text = TEXT.lock().unwrap();
b.bytes = text.len() as u64;
b.iter(|| {
if re.is_match(&text) != $is_match {
if $is_match {
panic!("expected match, got not match");
} else {
panic!("expected no match, got match");
}
}
});
}
}
}
// USAGE: bench_find!(name, pattern, count, haystack)
//
// This benchmarks how fast a regular expression can count all successive
// non-overlapping matches in haystack. If the count reported does not match
// the count given, then the benchmark fails.
//
// name is an identifier for the benchmark.
//
// pattern should be a &'static str representing the regular expression.
//
// haystack should be a String.
macro_rules! bench_find {
($name:ident, $pattern:expr, $count:expr, $haystack:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
use std::sync::Mutex;
lazy_static! {
static ref RE: Mutex<Regex> = Mutex::new(regex!($pattern));
static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
};
let re = RE.lock().unwrap();
let text = TEXT.lock().unwrap();
b.bytes = text.len() as u64;
b.iter(|| {
let count = re.find_iter(&text).count();
assert_eq!($count, count)
});
}
}
}
// USAGE: bench_captures!(name, pattern, groups, haystack);
//
// CONTRACT:
// Given:
// ident, the desired benchmarking function name
// pattern : ::Regex, the regular expression to be executed
// groups : usize, the number of capture groups
// haystack : String, the string to search
// bench_captures will benchmark how fast re.captures() produces
// the capture groups in question.
macro_rules! bench_captures {
($name:ident, $pattern:expr, $count:expr, $haystack:expr) => {
#[cfg(feature = "re-rust")]
#[bench]
fn $name(b: &mut Bencher) {
use std::sync::Mutex;
lazy_static! {
static ref RE: Mutex<Regex> = Mutex::new($pattern);
static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
};
let re = RE.lock().unwrap();
let text = TEXT.lock().unwrap();
b.bytes = text.len() as u64;
b.iter(|| {
match re.captures(&text) {
None => assert!(false, "no captures"),
Some(caps) => assert_eq!($count + 1, caps.len()),
}
});
}
}
}
// USAGE: bench_is_match_set!(name, is_match, regex, haystack)
macro_rules! bench_is_match_set {
($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
use std::sync::Mutex;
lazy_static! {
static ref RE: Mutex<RegexSet> = Mutex::new($re);
static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
};
let re = RE.lock().unwrap();
let text = TEXT.lock().unwrap();
b.bytes = text.len() as u64;
b.iter(|| {
if re.is_match(&text) != $is_match {
if $is_match {
panic!("expected match, got not match");
} else {
panic!("expected no match, got match");
}
}
});
}
}
}
// USAGE: bench_matches_set!(name, is_match, regex, haystack)
macro_rules! bench_matches_set {
($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
use std::sync::Mutex;
lazy_static! {
static ref RE: Mutex<RegexSet> = Mutex::new($re);
static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
};
let re = RE.lock().unwrap();
let text = TEXT.lock().unwrap();
b.bytes = text.len() as u64;
b.iter(|| {
if re.matches(&text).matched_any() != $is_match {
if $is_match {
panic!("expected match, got not match");
} else {
panic!("expected no match, got match");
}
}
});
}
}
}
mod ffi;
mod misc;
mod regexdna;
mod sherlock;