Skip to content

Commit

Permalink
fuzz: add syntactic structurally aware fuzzers
Browse files Browse the repository at this point in the history
This makes uses of the new 'arbitrary' feature in 'regex-syntax' to make
fuzzing much more targeted and complete.

Closes #848
  • Loading branch information
addisoncrump authored and BurntSushi committed May 24, 2023
1 parent 06f3ec2 commit a83bdd2
Show file tree
Hide file tree
Showing 6 changed files with 179 additions and 1 deletion.
1 change: 1 addition & 0 deletions fuzz/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
target
corpus
artifacts
coverage
25 changes: 24 additions & 1 deletion fuzz/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
[package]
name = "regex-fuzz"
version = "0.0.0"
authors = ["David Korczynski <[email protected]>"]
authors = [
"The Rust Project Developers",
"David Korczynski <[email protected]>",
"Addison Crump <[email protected]>",
"Andrew Gallant <[email protected]>",
]
publish = false
edition = "2021"

[package.metadata]
cargo-fuzz = true

[dependencies]
arbitrary = { version = "1.3.0", features = ["derive"] }
libfuzzer-sys = { version = "0.4.1", features = ["arbitrary-derive"] }
regex = { path = ".." }
regex-automata = { path = "../regex-automata" }
regex-lite = { path = "../regex-lite" }
regex-syntax = { path = "../regex-syntax", features = ["arbitrary"] }

# Prevent this from interfering with workspaces
[workspace]
Expand All @@ -34,6 +41,22 @@ path = "fuzz_targets/fuzz_regex_automata_deserialize_dense_dfa.rs"
name = "fuzz_regex_automata_deserialize_sparse_dfa"
path = "fuzz_targets/fuzz_regex_automata_deserialize_sparse_dfa.rs"

[[bin]]
name = "ast_roundtrip"
path = "fuzz_targets/ast_roundtrip.rs"

[[bin]]
name = "ast_fuzz_match"
path = "fuzz_targets/ast_fuzz_match.rs"

[[bin]]
name = "ast_fuzz_regex"
path = "fuzz_targets/ast_fuzz_regex.rs"

[[bin]]
name = "ast_fuzz_match_bytes"
path = "fuzz_targets/ast_fuzz_match_bytes.rs"

[profile.release]
opt-level = 3
debug = true
Expand Down
30 changes: 30 additions & 0 deletions fuzz/fuzz_targets/ast_fuzz_match.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#![no_main]

use {
libfuzzer_sys::fuzz_target, regex::RegexBuilder, regex_syntax::ast::Ast,
};

#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
struct FuzzData {
ast: Ast,
haystack: String,
}

impl std::fmt::Debug for FuzzData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_struct("FuzzData");
builder.field("ast", &format!("{}", self.ast));
builder.field("haystack", &self.haystack);
builder.finish()
}
}

fuzz_target!(|data: FuzzData| {
let pattern = format!("{}", data.ast);
let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else {
return
};
re.is_match(&data.haystack);
re.find(&data.haystack);
re.captures(&data.haystack).map_or(0, |c| c.len());
});
31 changes: 31 additions & 0 deletions fuzz/fuzz_targets/ast_fuzz_match_bytes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#![no_main]

use {
libfuzzer_sys::fuzz_target, regex::bytes::RegexBuilder,
regex_syntax::ast::Ast,
};

#[derive(arbitrary::Arbitrary, Eq, PartialEq)]
struct FuzzData {
ast: Ast,
haystack: Vec<u8>,
}

impl std::fmt::Debug for FuzzData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_struct("FuzzData");
builder.field("ast", &format!("{}", self.ast));
builder.field("haystack", &self.haystack);
builder.finish()
}
}

fuzz_target!(|data: FuzzData| {
let pattern = format!("{}", data.ast);
let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else {
return
};
re.is_match(&data.haystack);
re.find(&data.haystack);
re.captures(&data.haystack).map_or(0, |c| c.len());
});
23 changes: 23 additions & 0 deletions fuzz/fuzz_targets/ast_fuzz_regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#![no_main]

use {
libfuzzer_sys::fuzz_target, regex::RegexBuilder, regex_syntax::ast::Ast,
};

#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
struct FuzzData {
ast: Ast,
}

impl std::fmt::Debug for FuzzData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_struct("FuzzData");
builder.field("ast", &format!("{}", self.ast));
builder.finish()
}
}

fuzz_target!(|data: FuzzData| {
let pattern = format!("{}", data.ast);
RegexBuilder::new(&pattern).size_limit(1 << 20).build().ok();
});
70 changes: 70 additions & 0 deletions fuzz/fuzz_targets/ast_roundtrip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#![no_main]

use {
libfuzzer_sys::{fuzz_target, Corpus},
regex_syntax::ast::{
parse::Parser, visit, Ast, Flag, Group, GroupKind, SetFlags, Visitor,
},
};

#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
struct FuzzData {
ast: Ast,
}

impl std::fmt::Debug for FuzzData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_struct("FuzzData");
builder.field("ast", &format!("{}", self.ast));
builder.finish()
}
}

struct VerboseVisitor;

impl Visitor for VerboseVisitor {
type Output = ();
type Err = ();

fn finish(self) -> Result<Self::Output, Self::Err> {
Ok(())
}

fn visit_pre(&mut self, ast: &Ast) -> Result<Self::Output, Self::Err> {
match ast {
Ast::Flags(SetFlags { flags, .. })
| Ast::Group(Group {
kind: GroupKind::NonCapturing(flags), ..
}) if flags
.flag_state(Flag::IgnoreWhitespace)
.unwrap_or(false) =>
{
Err(())
}
_ => Ok(()),
}
}
}

fuzz_target!(|data: FuzzData| -> Corpus {
let pattern = format!("{}", data.ast);
let Ok(ast) = Parser::new().parse(&pattern) else {
return Corpus::Keep;
};
if visit(&ast, VerboseVisitor).is_err() {
return Corpus::Reject;
}
let ast2 = Parser::new().parse(&ast.to_string()).unwrap();
assert_eq!(
ast,
ast2,
"Found difference:\
\nleft: {:?}\
\nright: {:?}\
\nIf these two match, then there was a parsing difference; \
maybe non-determinism?",
ast.to_string(),
ast2.to_string()
);
Corpus::Keep
});

0 comments on commit a83bdd2

Please sign in to comment.