Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 80929bf

Browse files
addisoncrumpBurntSushi
authored andcommittedMay 18, 2023
fuzz: add syntactic structurally aware fuzzers
This makes uses of the new 'arbitrary' feature in 'regex-syntax' to make fuzzing much more targeted and complete. Closes #848
1 parent 966fa4f commit 80929bf

File tree

6 files changed

+179
-1
lines changed

6 files changed

+179
-1
lines changed
 

‎fuzz/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
target
22
corpus
33
artifacts
4+
coverage

‎fuzz/Cargo.toml

+24-1
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
11
[package]
22
name = "regex-fuzz"
33
version = "0.0.0"
4-
authors = ["David Korczynski <david@adalogics.com>"]
4+
authors = [
5+
"The Rust Project Developers",
6+
"David Korczynski <david@adalogics.com>",
7+
"Addison Crump <me@addisoncrump.info>",
8+
"Andrew Gallant <jamslam@gmail.com>",
9+
]
510
publish = false
611
edition = "2021"
712

813
[package.metadata]
914
cargo-fuzz = true
1015

1116
[dependencies]
17+
arbitrary = { version = "1.3.0", features = ["derive"] }
1218
libfuzzer-sys = { version = "0.4.1", features = ["arbitrary-derive"] }
1319
regex = { path = ".." }
1420
regex-automata = { path = "../regex-automata" }
1521
regex-lite = { path = "../regex-lite" }
22+
regex-syntax = { path = "../regex-syntax", features = ["arbitrary"] }
1623

1724
# Prevent this from interfering with workspaces
1825
[workspace]
@@ -34,6 +41,22 @@ path = "fuzz_targets/fuzz_regex_automata_deserialize_dense_dfa.rs"
3441
name = "fuzz_regex_automata_deserialize_sparse_dfa"
3542
path = "fuzz_targets/fuzz_regex_automata_deserialize_sparse_dfa.rs"
3643

44+
[[bin]]
45+
name = "ast_roundtrip"
46+
path = "fuzz_targets/ast_roundtrip.rs"
47+
48+
[[bin]]
49+
name = "ast_fuzz_match"
50+
path = "fuzz_targets/ast_fuzz_match.rs"
51+
52+
[[bin]]
53+
name = "ast_fuzz_regex"
54+
path = "fuzz_targets/ast_fuzz_regex.rs"
55+
56+
[[bin]]
57+
name = "ast_fuzz_match_bytes"
58+
path = "fuzz_targets/ast_fuzz_match_bytes.rs"
59+
3760
[profile.release]
3861
opt-level = 3
3962
debug = true

‎fuzz/fuzz_targets/ast_fuzz_match.rs

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#![no_main]
2+
3+
use {
4+
libfuzzer_sys::fuzz_target, regex::RegexBuilder, regex_syntax::ast::Ast,
5+
};
6+
7+
#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
8+
struct FuzzData {
9+
ast: Ast,
10+
haystack: String,
11+
}
12+
13+
impl std::fmt::Debug for FuzzData {
14+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
15+
let mut builder = f.debug_struct("FuzzData");
16+
builder.field("ast", &format!("{}", self.ast));
17+
builder.field("haystack", &self.haystack);
18+
builder.finish()
19+
}
20+
}
21+
22+
fuzz_target!(|data: FuzzData| {
23+
let pattern = format!("{}", data.ast);
24+
let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else {
25+
return
26+
};
27+
re.is_match(&data.haystack);
28+
re.find(&data.haystack);
29+
re.captures(&data.haystack).map_or(0, |c| c.len());
30+
});
+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#![no_main]
2+
3+
use {
4+
libfuzzer_sys::fuzz_target, regex::bytes::RegexBuilder,
5+
regex_syntax::ast::Ast,
6+
};
7+
8+
#[derive(arbitrary::Arbitrary, Eq, PartialEq)]
9+
struct FuzzData {
10+
ast: Ast,
11+
haystack: Vec<u8>,
12+
}
13+
14+
impl std::fmt::Debug for FuzzData {
15+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16+
let mut builder = f.debug_struct("FuzzData");
17+
builder.field("ast", &format!("{}", self.ast));
18+
builder.field("haystack", &self.haystack);
19+
builder.finish()
20+
}
21+
}
22+
23+
fuzz_target!(|data: FuzzData| {
24+
let pattern = format!("{}", data.ast);
25+
let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else {
26+
return
27+
};
28+
re.is_match(&data.haystack);
29+
re.find(&data.haystack);
30+
re.captures(&data.haystack).map_or(0, |c| c.len());
31+
});

‎fuzz/fuzz_targets/ast_fuzz_regex.rs

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#![no_main]
2+
3+
use {
4+
libfuzzer_sys::fuzz_target, regex::RegexBuilder, regex_syntax::ast::Ast,
5+
};
6+
7+
#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
8+
struct FuzzData {
9+
ast: Ast,
10+
}
11+
12+
impl std::fmt::Debug for FuzzData {
13+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
14+
let mut builder = f.debug_struct("FuzzData");
15+
builder.field("ast", &format!("{}", self.ast));
16+
builder.finish()
17+
}
18+
}
19+
20+
fuzz_target!(|data: FuzzData| {
21+
let pattern = format!("{}", data.ast);
22+
RegexBuilder::new(&pattern).size_limit(1 << 20).build().ok();
23+
});

‎fuzz/fuzz_targets/ast_roundtrip.rs

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#![no_main]
2+
3+
use {
4+
libfuzzer_sys::{fuzz_target, Corpus},
5+
regex_syntax::ast::{
6+
parse::Parser, visit, Ast, Flag, Group, GroupKind, SetFlags, Visitor,
7+
},
8+
};
9+
10+
#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
11+
struct FuzzData {
12+
ast: Ast,
13+
}
14+
15+
impl std::fmt::Debug for FuzzData {
16+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
17+
let mut builder = f.debug_struct("FuzzData");
18+
builder.field("ast", &format!("{}", self.ast));
19+
builder.finish()
20+
}
21+
}
22+
23+
struct VerboseVisitor;
24+
25+
impl Visitor for VerboseVisitor {
26+
type Output = ();
27+
type Err = ();
28+
29+
fn finish(self) -> Result<Self::Output, Self::Err> {
30+
Ok(())
31+
}
32+
33+
fn visit_pre(&mut self, ast: &Ast) -> Result<Self::Output, Self::Err> {
34+
match ast {
35+
Ast::Flags(SetFlags { flags, .. })
36+
| Ast::Group(Group {
37+
kind: GroupKind::NonCapturing(flags), ..
38+
}) if flags
39+
.flag_state(Flag::IgnoreWhitespace)
40+
.unwrap_or(false) =>
41+
{
42+
Err(())
43+
}
44+
_ => Ok(()),
45+
}
46+
}
47+
}
48+
49+
fuzz_target!(|data: FuzzData| -> Corpus {
50+
let pattern = format!("{}", data.ast);
51+
let Ok(ast) = Parser::new().parse(&pattern) else {
52+
return Corpus::Keep;
53+
};
54+
if visit(&ast, VerboseVisitor).is_err() {
55+
return Corpus::Reject;
56+
}
57+
let ast2 = Parser::new().parse(&ast.to_string()).unwrap();
58+
assert_eq!(
59+
ast,
60+
ast2,
61+
"Found difference:\
62+
\nleft: {:?}\
63+
\nright: {:?}\
64+
\nIf these two match, then there was a parsing difference; \
65+
maybe non-determinism?",
66+
ast.to_string(),
67+
ast2.to_string()
68+
);
69+
Corpus::Keep
70+
});

0 commit comments

Comments
 (0)
Please sign in to comment.