diff --git a/fuzz/.gitignore b/fuzz/.gitignore index a0925114d..1a45eee77 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,3 +1,4 @@ target corpus artifacts +coverage diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index eabe9691d..3fd2b8a3a 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -1,7 +1,12 @@ [package] name = "regex-fuzz" version = "0.0.0" -authors = ["David Korczynski "] +authors = [ + "The Rust Project Developers", + "David Korczynski ", + "Addison Crump ", + "Andrew Gallant ", +] publish = false edition = "2021" @@ -9,10 +14,12 @@ edition = "2021" cargo-fuzz = true [dependencies] +arbitrary = { version = "1.3.0", features = ["derive"] } libfuzzer-sys = { version = "0.4.1", features = ["arbitrary-derive"] } regex = { path = ".." } regex-automata = { path = "../regex-automata" } regex-lite = { path = "../regex-lite" } +regex-syntax = { path = "../regex-syntax", features = ["arbitrary"] } # Prevent this from interfering with workspaces [workspace] @@ -34,6 +41,22 @@ path = "fuzz_targets/fuzz_regex_automata_deserialize_dense_dfa.rs" name = "fuzz_regex_automata_deserialize_sparse_dfa" path = "fuzz_targets/fuzz_regex_automata_deserialize_sparse_dfa.rs" +[[bin]] +name = "ast_roundtrip" +path = "fuzz_targets/ast_roundtrip.rs" + +[[bin]] +name = "ast_fuzz_match" +path = "fuzz_targets/ast_fuzz_match.rs" + +[[bin]] +name = "ast_fuzz_regex" +path = "fuzz_targets/ast_fuzz_regex.rs" + +[[bin]] +name = "ast_fuzz_match_bytes" +path = "fuzz_targets/ast_fuzz_match_bytes.rs" + [profile.release] opt-level = 3 debug = true diff --git a/fuzz/fuzz_targets/ast_fuzz_match.rs b/fuzz/fuzz_targets/ast_fuzz_match.rs new file mode 100644 index 000000000..4565244cc --- /dev/null +++ b/fuzz/fuzz_targets/ast_fuzz_match.rs @@ -0,0 +1,30 @@ +#![no_main] + +use { + libfuzzer_sys::fuzz_target, regex::RegexBuilder, regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fuzz_target!(|data: FuzzData| { + let pattern = format!("{}", data.ast); + let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else { + return + }; + re.is_match(&data.haystack); + re.find(&data.haystack); + re.captures(&data.haystack).map_or(0, |c| c.len()); +}); diff --git a/fuzz/fuzz_targets/ast_fuzz_match_bytes.rs b/fuzz/fuzz_targets/ast_fuzz_match_bytes.rs new file mode 100644 index 000000000..3f586d04f --- /dev/null +++ b/fuzz/fuzz_targets/ast_fuzz_match_bytes.rs @@ -0,0 +1,31 @@ +#![no_main] + +use { + libfuzzer_sys::fuzz_target, regex::bytes::RegexBuilder, + regex_syntax::ast::Ast, +}; + +#[derive(arbitrary::Arbitrary, Eq, PartialEq)] +struct FuzzData { + ast: Ast, + haystack: Vec, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fuzz_target!(|data: FuzzData| { + let pattern = format!("{}", data.ast); + let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else { + return + }; + re.is_match(&data.haystack); + re.find(&data.haystack); + re.captures(&data.haystack).map_or(0, |c| c.len()); +}); diff --git a/fuzz/fuzz_targets/ast_fuzz_regex.rs b/fuzz/fuzz_targets/ast_fuzz_regex.rs new file mode 100644 index 000000000..c4ba7e0a9 --- /dev/null +++ b/fuzz/fuzz_targets/ast_fuzz_regex.rs @@ -0,0 +1,23 @@ +#![no_main] + +use { + libfuzzer_sys::fuzz_target, regex::RegexBuilder, regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.finish() + } +} + +fuzz_target!(|data: FuzzData| { + let pattern = format!("{}", data.ast); + RegexBuilder::new(&pattern).size_limit(1 << 20).build().ok(); +}); diff --git a/fuzz/fuzz_targets/ast_roundtrip.rs b/fuzz/fuzz_targets/ast_roundtrip.rs new file mode 100644 index 000000000..90e6b5099 --- /dev/null +++ b/fuzz/fuzz_targets/ast_roundtrip.rs @@ -0,0 +1,70 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_syntax::ast::{ + parse::Parser, visit, Ast, Flag, Group, GroupKind, SetFlags, Visitor, + }, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.finish() + } +} + +struct VerboseVisitor; + +impl Visitor for VerboseVisitor { + type Output = (); + type Err = (); + + fn finish(self) -> Result { + Ok(()) + } + + fn visit_pre(&mut self, ast: &Ast) -> Result { + match ast { + Ast::Flags(SetFlags { flags, .. }) + | Ast::Group(Group { + kind: GroupKind::NonCapturing(flags), .. + }) if flags + .flag_state(Flag::IgnoreWhitespace) + .unwrap_or(false) => + { + Err(()) + } + _ => Ok(()), + } + } +} + +fuzz_target!(|data: FuzzData| -> Corpus { + let pattern = format!("{}", data.ast); + let Ok(ast) = Parser::new().parse(&pattern) else { + return Corpus::Keep; + }; + if visit(&ast, VerboseVisitor).is_err() { + return Corpus::Reject; + } + let ast2 = Parser::new().parse(&ast.to_string()).unwrap(); + assert_eq!( + ast, + ast2, + "Found difference:\ + \nleft: {:?}\ + \nright: {:?}\ + \nIf these two match, then there was a parsing difference; \ + maybe non-determinism?", + ast.to_string(), + ast2.to_string() + ); + Corpus::Keep +});