Skip to content

Commit 4e3a107

Browse files
mkrupcaleBurntSushi
authored andcommitted
bench: add boost
This commit adds a new `re-boost` feature that enables benchmarking Boost's regex implementation. Closes rust-lang#459
1 parent 00a66de commit 4e3a107

File tree

10 files changed

+86
-27
lines changed

10 files changed

+86
-27
lines changed

bench/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,9 @@ re-pcre1 = ["libpcre-sys"]
4747
re-pcre2 = []
4848
re-onig = ["onig"]
4949
re-stdcpp = []
50-
re-re2 = []
5150
libcxx = []
51+
re-boost = []
52+
re-re2 = []
5253
re-dphobos = []
5354
re-dphobos-dmd = ["re-dphobos"]
5455
re-dphobos-dmd-ct = ["re-dphobos-dmd"]

bench/build.rs

+9
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,15 @@ fn main() {
3535
.compile("libcstdcpp.a");
3636
}
3737
}
38+
if env::var("CARGO_FEATURE_RE_BOOST").is_ok() {
39+
// stdcpp is a C++ library, so we need to compile our shim layer.
40+
cc::Build::new()
41+
.cpp(true)
42+
.file("src/ffi/stdcpp.cpp")
43+
.define("USE_BOOST", None)
44+
.compile("libcboost.a");
45+
println!("cargo:rustc-link-lib=boost_regex");
46+
}
3847
if env::var("CARGO_FEATURE_RE_RE2").is_ok() {
3948
// RE2 is a C++ library, so we need to compile our shim layer.
4049
cc::Build::new()

bench/compile

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33
exec cargo build \
44
--release \
5-
--features 're-stdcpp re-re2 re-onig re-pcre1 re-pcre2 re-rust re-rust-bytes re-tcl re-dphobos-dmd re-dphobos-ldc' \
5+
--features 're-stdcpp re-boost re-re2 re-onig re-pcre1 re-pcre2 re-rust re-rust-bytes re-tcl re-dphobos-dmd re-dphobos-ldc' \
66
"$@"

bench/run

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22

33
usage() {
4-
echo "Usage: $(basename $0) [dphobos-dmd | dphobos-ldc | dphobos-dmd-ct | dphobos-ldc-ct | rust | rust-bytes | pcre1 | pcre2 | stdcpp | stdcpp-libcxx | re2 | onig | tcl ]" >&2
4+
echo "Usage: $(basename $0) [dphobos-dmd | dphobos-ldc | dphobos-dmd-ct | dphobos-ldc-ct | rust | rust-bytes | pcre1 | pcre2 | stdcpp | stdcpp-libcxx | boost | re2 | onig | tcl ]" >&2
55
exit 1
66
}
77

@@ -36,6 +36,9 @@ case $which in
3636
stdcpp-libcxx)
3737
exec cargo bench --bench bench --features 're-stdcpp libcxx' "$@"
3838
;;
39+
boost)
40+
exec cargo bench --bench bench --features re-boost "$@"
41+
;;
3942
re2)
4043
exec cargo bench --bench bench --features re-re2 "$@"
4144
;;

bench/src/bench.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,16 @@ extern crate regex;
2828
extern crate regex_syntax;
2929
extern crate test;
3030

31-
3231
#[cfg(feature = "re-onig")]
3332
pub use ffi::onig::Regex;
3433
#[cfg(feature = "re-pcre1")]
3534
pub use ffi::pcre1::Regex;
3635
#[cfg(feature = "re-pcre2")]
3736
pub use ffi::pcre2::Regex;
38-
#[cfg(feature = "re-stdcpp")]
37+
#[cfg(any(
38+
feature = "re-stdcpp",
39+
feature = "re-boost",
40+
))]
3941
pub use ffi::stdcpp::Regex;
4042
#[cfg(feature = "re-re2")]
4143
pub use ffi::re2::Regex;
@@ -93,6 +95,7 @@ macro_rules! text {
9395
feature = "re-pcre1",
9496
feature = "re-pcre2",
9597
feature = "re-stdcpp",
98+
feature = "re-boost",
9699
feature = "re-re2",
97100
feature = "re-dphobos",
98101
feature = "re-rust",
@@ -111,6 +114,7 @@ type Text = Vec<u8>;
111114
feature = "re-pcre1",
112115
feature = "re-pcre2",
113116
feature = "re-stdcpp",
117+
feature = "re-boost",
114118
feature = "re-re2",
115119
feature = "re-dphobos",
116120
feature = "re-rust",

bench/src/ffi/mod.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@ pub mod onig;
2020
pub mod pcre1;
2121
#[cfg(feature = "re-pcre2")]
2222
pub mod pcre2;
23-
#[cfg(feature = "re-stdcpp")]
23+
#[cfg(any(
24+
feature = "re-stdcpp",
25+
feature = "re-boost",
26+
))]
2427
pub mod stdcpp;
2528
#[cfg(feature = "re-re2")]
2629
pub mod re2;

bench/src/ffi/stdcpp.cpp

+26-15
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
1+
#ifdef USE_BOOST
2+
#include <boost/regex.hpp>
3+
#else
14
#include <regex>
5+
#endif
26

37
extern "C" {
8+
9+
#ifdef USE_BOOST
10+
namespace regex_ns = boost;
11+
#else
12+
namespace regex_ns = std;
13+
#endif
14+
415
typedef void stdcpp_regexp;
516

617
typedef struct stdcpp_string {
@@ -9,34 +20,34 @@ extern "C" {
920
} stdcpp_string;
1021

1122
stdcpp_regexp* stdcpp_regexp_new(stdcpp_string pat) {
12-
return reinterpret_cast<stdcpp_regexp*>(new std::regex(pat.text,
13-
pat.len,
14-
std::regex::optimize));
23+
return reinterpret_cast<stdcpp_regexp*>(new regex_ns::regex(pat.text,
24+
pat.len,
25+
regex_ns::regex::optimize));
1526
}
1627

1728
void stdcpp_regexp_free(stdcpp_regexp *re) {
18-
delete reinterpret_cast<std::regex*>(re);
29+
delete reinterpret_cast<regex_ns::regex*>(re);
1930
}
2031

2132
bool stdcpp_regexp_match(stdcpp_regexp *re, stdcpp_string text,
2233
int startpos, int endpos) {
23-
std::regex cpp_re(*reinterpret_cast<std::regex*>(re));
24-
return std::regex_search(text.text + startpos, text.text + endpos,
25-
cpp_re);
34+
regex_ns::regex cpp_re(*reinterpret_cast<regex_ns::regex*>(re));
35+
return regex_ns::regex_search(text.text + startpos, text.text + endpos,
36+
cpp_re);
2637
}
2738

2839
bool stdcpp_regexp_find(stdcpp_regexp *re, stdcpp_string text,
2940
int startpos, int endpos,
3041
int *match_start, int *match_end) {
31-
std::regex cpp_re(*reinterpret_cast<std::regex*>(re));
32-
std::cmatch result;
33-
bool matched;
34-
matched = std::regex_search(text.text + startpos, text.text + endpos,
35-
result, cpp_re);
36-
if (matched) {
42+
regex_ns::regex cpp_re(*reinterpret_cast<regex_ns::regex*>(re));
43+
regex_ns::cmatch result;
44+
bool matched;
45+
matched = regex_ns::regex_search(text.text + startpos, text.text + endpos,
46+
result, cpp_re);
47+
if (matched) {
3748
*match_start = result[0].first - text.text;
3849
*match_end = *match_start + result.length(0);
39-
}
40-
return matched;
50+
}
51+
return matched;
4152
}
4253
}

bench/src/main.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,15 @@ fn count_pcre2(pat: &str, haystack: &str) -> usize {
135135
Regex::new(pat).unwrap().find_iter(haystack).count()
136136
}
137137

138+
#[cfg(not(any(
139+
feature = "re-stdcpp",
140+
feature = "re-boost",
141+
)))]
138142
nada!("re-stdcpp", count_stdcpp);
139-
#[cfg(feature = "re-stdcpp")]
143+
#[cfg(any(
144+
feature = "re-stdcpp",
145+
feature = "re-boost",
146+
))]
140147
fn count_stdcpp(pat: &str, haystack: &str) -> usize {
141148
use ffi::stdcpp::Regex;
142149
Regex::new(pat).unwrap().find_iter(haystack).count()

bench/src/misc.rs

+2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ bench_match!(match_class_in_range, "[ac]", {
4646
});
4747

4848
#[cfg(not(feature = "re-rust-bytes"))]
49+
// std C++ does not support unicode character classes
4950
#[cfg(not(feature = "re-stdcpp"))]
51+
#[cfg(not(feature = "re-boost"))]
5052
#[cfg(not(feature = "re-tcl"))]
5153
bench_match!(match_class_unicode, r"\p{L}", {
5254
format!("{}a", repeat("☃5☃5").take(20).collect::<String>())

bench/src/sherlock.rs

+24-5
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,14 @@ sherlock!(the_whitespace, r"the\s+\w+", 5410);
106106
#[cfg(not(feature = "re-pcre1"))]
107107
#[cfg(not(feature = "re-pcre2"))]
108108
#[cfg(not(feature = "re-stdcpp"))]
109+
#[cfg(not(feature = "re-boost"))]
109110
#[cfg(not(feature = "re-tcl"))]
110111
sherlock!(everything_greedy, r".*", 13053);
111112
// std::regex . does not match \r
112-
#[cfg(feature = "re-stdcpp")]
113+
#[cfg(any(
114+
feature = "re-stdcpp",
115+
feature = "re-boost",
116+
))]
113117
sherlock!(everything_greedy, r"[^\n]*", 13053);
114118
#[cfg(not(feature = "re-dphobos"))]
115119
#[cfg(not(feature = "re-onig"))]
@@ -122,24 +126,34 @@ sherlock!(everything_greedy_nl, r"(?s).*", 1);
122126

123127
// How fast can we match every letter? This also defeats any clever prefix
124128
// tricks.
129+
// std C++ does not support unicode character classes
125130
#[cfg(not(feature = "re-stdcpp"))]
131+
#[cfg(not(feature = "re-boost"))]
126132
#[cfg(not(feature = "re-tcl"))]
127133
sherlock!(letters, r"\p{L}", 447160);
128134

135+
// std C++ does not support unicode character classes
129136
#[cfg(not(feature = "re-stdcpp"))]
137+
#[cfg(not(feature = "re-boost"))]
130138
#[cfg(not(feature = "re-tcl"))]
131139
sherlock!(letters_upper, r"\p{Lu}", 14180);
132140

141+
// std C++ does not support unicode character classes
133142
#[cfg(not(feature = "re-stdcpp"))]
143+
#[cfg(not(feature = "re-boost"))]
134144
#[cfg(not(feature = "re-tcl"))]
135145
sherlock!(letters_lower, r"\p{Ll}", 432980);
136146

137147
// Similarly, for words.
138-
#[cfg(not(feature = "re-re2"))]
139148
#[cfg(not(feature = "re-stdcpp"))]
149+
#[cfg(not(feature = "re-boost"))]
150+
#[cfg(not(feature = "re-re2"))]
140151
sherlock!(words, r"\w+", 109214);
141-
#[cfg(feature = "re-re2")]
142-
#[cfg(feature = "re-stdcpp")]
152+
#[cfg(any(
153+
feature = "re-stdcpp",
154+
feature = "re-boost",
155+
feature = "re-re2",
156+
))]
143157
sherlock!(words, r"\w+", 109222); // hmm, why does RE2 diverge here?
144158

145159
// Find complete words before Holmes. The `\w` defeats any prefix
@@ -162,6 +176,7 @@ sherlock!(holmes_cochar_watson, r"Holmes.{0,25}Watson|Watson.{0,25}Holmes", 7);
162176
#[cfg(not(feature = "re-pcre1"))]
163177
#[cfg(not(feature = "re-pcre2"))]
164178
#[cfg(not(feature = "re-stdcpp"))]
179+
#[cfg(not(feature = "re-boost"))]
165180
#[cfg(not(feature = "re-tcl"))]
166181
sherlock!(
167182
holmes_coword_watson,
@@ -178,13 +193,17 @@ sherlock!(quotes, r#"["'][^"']{0,30}[?!.]["']"#, 767);
178193
// lazy DFA the entire way.
179194
// std C++ does not support multiline until C++17 nor the inline modifier syntax
180195
#[cfg(not(feature = "re-stdcpp"))]
196+
#[cfg(not(feature = "re-boost"))]
181197
#[cfg(not(feature = "re-dphobos"))]
182198
sherlock!(
183199
line_boundary_sherlock_holmes,
184200
r"(?m)^Sherlock Holmes|Sherlock Holmes$",
185201
34);
186202
// D matches both \r\n and \n as EOL
187-
#[cfg(feature = "re-dphobos")]
203+
#[cfg(any(
204+
feature = "re-boost",
205+
feature = "re-dphobos",
206+
))]
188207
sherlock!(
189208
line_boundary_sherlock_holmes,
190209
r"(?m)^Sherlock Holmes|Sherlock Holmes$",

0 commit comments

Comments
 (0)