Skip to content

Commit

Permalink
Unicode-preserving mutators (#1542)
Browse files Browse the repository at this point in the history
* create the string classification stage

* modify API to pre-group

* preserving mutator

* more meaningful test

* subproperty mutators + some fixes

* document, finalise, integrate with libafl_libfuzzer

* add example, fix for weird range select

* fix for introspection

* fix fuzzer build

* speed optimisation: allow, but do not require, stacking

* property => category

* token replacement

* fixup: rare case where rust does not agree on valid character

* fix CI again

* again again

* take two: dynamic unicode discovery

* oops

* fix: last byte is never selected

* opt: bias to smaller unicode categories

* fix test

* opt: precompute regions and fix tests

* cache and allow stacking

* document and update libafl_libfuzzer

* oops, use reverse

* fix bolts clippy error

* fixup part 2

* clippy

* part 2

* clippy warning allow

* clippy complaint

* use alloc not std

---------

Co-authored-by: toka <[email protected]>
  • Loading branch information
addisoncrump and tokatoka committed Nov 21, 2023
1 parent 0018b7c commit b6f0f78
Show file tree
Hide file tree
Showing 16 changed files with 1,037 additions and 14 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ jobs:
run: command -v llvm-config-15 && clang-15 -v
- name: Add nightly rustfmt and clippy
run: rustup toolchain install nightly --component rustfmt --component clippy --component miri --allow-downgrade
- name: Install ucd-generate
run: cargo install -f ucd-generate
- uses: actions/checkout@v3
- uses: Swatinem/rust-cache@v2

Expand Down Expand Up @@ -135,6 +137,8 @@ jobs:
run: command -v llvm-config-15 && clang-15 -v
- name: Install cargo-hack
run: curl -LsSf https://github.com/taiki-e/cargo-hack/releases/latest/download/cargo-hack-x86_64-unknown-linux-gnu.tar.gz | tar xzf - -C ~/.cargo/bin
- name: Install ucd-generate
run: cargo install -f ucd-generate
- name: Add nightly
run: rustup toolchain install nightly --allow-downgrade
- uses: actions/checkout@v3
Expand Down Expand Up @@ -222,6 +226,8 @@ jobs:
- name: Install cxxbridge
if: runner.os == 'macOS'
run: cargo install cxxbridge-cmd
- name: Install ucd-generate
run: cargo install -f ucd-generate
- name: Install python (macOS)
# Removing macOS things already installed in CI against failed linking
if: runner.os == 'macOS'
Expand Down Expand Up @@ -384,6 +390,8 @@ jobs:
toolchain: stable
- name: Add nightly rustfmt and clippy
run: rustup toolchain install nightly --component rustfmt --component clippy --allow-downgrade
- name: Install ucd-generate
run: cargo install -f ucd-generate
- name: Install deps
run: brew install z3 gtk+3
- name: Install cxxbridge
Expand Down Expand Up @@ -453,6 +461,7 @@ jobs:
freebsd-version
. "$HOME/.cargo/env"
rustup toolchain install nightly
cargo install -f ucd-generate
export LLVM_CONFIG=/usr/local/bin/llvm-config16
pwd
ls -lah
Expand Down
1 change: 1 addition & 0 deletions fuzzers/baby_fuzzer_unicode/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
libpng-*
24 changes: 24 additions & 0 deletions fuzzers/baby_fuzzer_unicode/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[package]
name = "baby_fuzzer_unicode"
version = "0.10.0"
authors = ["Andrea Fioraldi <[email protected]>", "Dominik Maier <[email protected]>"]
edition = "2021"

[features]
default = ["std"]
tui = []
std = []

[profile.dev]
panic = "abort"

[profile.release]
panic = "abort"
lto = true
codegen-units = 1
opt-level = 3
debug = true

[dependencies]
libafl = { path = "../../libafl/", features = ["unicode"] }
libafl_bolts = { path = "../../libafl_bolts/" }
15 changes: 15 additions & 0 deletions fuzzers/baby_fuzzer_unicode/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Baby fuzzer: unicode

This is a minimalistic example about how to create a libafl based fuzzer.

It runs on a single core until a crash occurs and then exits.

The tested program is a simple Rust function without any instrumentation.
For real fuzzing, you will want to add some sort to add coverage or other feedback.

You can run this example using `cargo run`, and you can enable the TUI feature by running `cargo run --features tui`.

## Unicode

This fuzzer uses mutators which preserve unicode properties. For programs which have string-heavy inputs, you may
consider using the same strategy.
138 changes: 138 additions & 0 deletions fuzzers/baby_fuzzer_unicode/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#[cfg(windows)]
use std::ptr::write_volatile;
use std::{path::PathBuf, ptr::write};

#[cfg(feature = "tui")]
use libafl::monitors::tui::{ui::TuiUI, TuiMonitor};
#[cfg(not(feature = "tui"))]
use libafl::monitors::SimpleMonitor;
use libafl::{
corpus::{InMemoryCorpus, OnDiskCorpus},
events::SimpleEventManager,
executors::{inprocess::InProcessExecutor, ExitKind},
feedbacks::{CrashFeedback, MaxMapFeedback},
fuzzer::{Fuzzer, StdFuzzer},
inputs::{BytesInput, HasTargetBytes},
mutators::{StdScheduledMutator, StringCategoryRandMutator, StringSubcategoryRandMutator},
observers::StdMapObserver,
schedulers::QueueScheduler,
stages::{mutational::StdMutationalStage, StringIdentificationStage},
state::StdState,
Evaluator,
};
use libafl_bolts::{current_nanos, rands::StdRand, tuples::tuple_list, AsSlice};

/// Coverage map with explicit assignments due to the lack of instrumentation
static mut SIGNALS: [u8; 64] = [0; 64];
static mut SIGNALS_PTR: *mut u8 = unsafe { SIGNALS.as_mut_ptr() };

/// Assign a signal to the signals map
fn signals_set(idx: usize) {
unsafe { write(SIGNALS_PTR.add(idx), 1) };
}

#[allow(clippy::similar_names, clippy::manual_assert)]
pub fn main() {
// The closure that we want to fuzz
let mut harness = |input: &BytesInput| {
let target = input.target_bytes();
let buf = target.as_slice();
let goal = b"abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
let mut i = 0;
for _ in buf.iter().zip(goal).take_while(|(b, c)| b == c) {
signals_set(i);
i += 1;
}
if i == goal.len() {
#[cfg(unix)]
panic!("Artificial bug triggered =)");

#[cfg(windows)]
unsafe {
write_volatile(0 as *mut u32, 0);
}
}
ExitKind::Ok
};

// Create an observation channel using the signals map
let observer = unsafe { StdMapObserver::from_mut_ptr("signals", SIGNALS_PTR, SIGNALS.len()) };

// Feedback to rate the interestingness of an input
let mut feedback = MaxMapFeedback::new(&observer);

// A feedback to choose if an input is a solution or not
let mut objective = CrashFeedback::new();

// create a State from scratch
let mut state = StdState::new(
// RNG
StdRand::with_seed(current_nanos()),
// Corpus that will be evolved, we keep it in memory for performance
InMemoryCorpus::new(),
// Corpus in which we store solutions (crashes in this example),
// on disk so the user can get them after stopping the fuzzer
OnDiskCorpus::new(PathBuf::from("./crashes")).unwrap(),
// States of the feedbacks.
// The feedbacks can report the data that should persist in the State.
&mut feedback,
// Same for objective feedbacks
&mut objective,
)
.unwrap();

// The Monitor trait define how the fuzzer stats are displayed to the user
#[cfg(not(feature = "tui"))]
let mon = SimpleMonitor::new(|s| println!("{s}"));
#[cfg(feature = "tui")]
let ui = TuiUI::with_version(String::from("Baby Fuzzer"), String::from("0.0.1"), false);
#[cfg(feature = "tui")]
let mon = TuiMonitor::new(ui);

// The event manager handle the various events generated during the fuzzing loop
// such as the notification of the addition of a new item to the corpus
let mut mgr = SimpleEventManager::new(mon);

// A queue policy to get testcasess from the corpus
let scheduler = QueueScheduler::new();

// A fuzzer with feedbacks and a corpus scheduler
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);

// Create the executor for an in-process function with just one observer
let mut executor = InProcessExecutor::new(
&mut harness,
tuple_list!(observer),
&mut fuzzer,
&mut state,
&mut mgr,
)
.expect("Failed to create the Executor");

// Generate 8 initial inputs
fuzzer
.evaluate_input(
&mut state,
&mut executor,
&mut mgr,
BytesInput::new(vec![b'a']),
)
.unwrap();

// Setup a mutational stage with a basic bytes mutator
let mutator = StdScheduledMutator::new(tuple_list!(
StringCategoryRandMutator,
StringSubcategoryRandMutator,
StringSubcategoryRandMutator,
StringSubcategoryRandMutator,
StringSubcategoryRandMutator
));
let mut stages = tuple_list!(
StringIdentificationStage::new(),
StdMutationalStage::transforming(mutator)
);

fuzzer
.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)
.expect("Error in the fuzzing loop");
}
9 changes: 8 additions & 1 deletion libafl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ concolic_mutation = ["z3"]
## Enable the fancy TuiMonitor for a termanal UI using crossterm
tui_monitor = ["ratatui", "crossterm"]

## Enables `StringClassificationStage` and associated mutators, which allow for mutations which preserve the Unicode property data
unicode = ["libafl_bolts/alloc", "ahash/std", "serde/rc", "bitvec"]


#! ## LibAFL-Bolts Features

Expand Down Expand Up @@ -129,7 +132,9 @@ agpl = ["nautilus"]
nautilus = ["grammartec", "std", "serde_json/std"]

[build-dependencies]
reqwest = { version = "0.11", features = ["blocking"] }
rustversion = "1.0"
zip = "0.6"

[dev-dependencies]
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
Expand Down Expand Up @@ -175,7 +180,9 @@ z3 = { version = "0.12.0", features = ["static-link-z3"], optional = true } # fo
pyo3 = { version = "0.18", optional = true, features = ["serde", "macros"] }
concat-idents = { version = "1.1.3", optional = true }

libcasr = { version = "2.7", optional = true}
libcasr = { version = "2.7", optional = true }

bitvec = { version = "1.0", optional = true, features = ["serde"] } # used for string range storage

# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
serial_test = { version = "2", optional = true, default-features = false, features = ["logging"] }
Expand Down
59 changes: 57 additions & 2 deletions libafl/build.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,69 @@
use std::error::Error;

#[rustversion::nightly]
fn main() {
fn main() -> Result<(), Box<dyn Error>> {
println!("cargo:rerun-if-changed=build.rs");
println!("cargo:rustc-cfg=nightly");
#[cfg(feature = "unicode")]
{
build_unicode_property_map()?;
}
Ok(())
}

#[rustversion::not(nightly)]
fn main() {
fn main() -> Result<(), Box<dyn Error>> {
println!("cargo:rerun-if-changed=build.rs");
assert!(
cfg!(all(not(docrs), not(feature = "nautilus"))),
"The 'nautilus' feature of libafl requires a nightly compiler"
);
#[cfg(feature = "unicode")]
{
build_unicode_property_map()?;
}
Ok(())
}

#[cfg(feature = "unicode")]
fn build_unicode_property_map() -> Result<(), Box<dyn Error>> {
use std::{
env,
fs::File,
io::{BufWriter, Write},
path::PathBuf,
process::{Command, Stdio},
};

let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap());
let ucd_dir = out_dir.join("ucd-dir");
let generated_file = out_dir.join("unicode_categories.rs");

std::fs::create_dir_all(&ucd_dir)?;

let zip_path = ucd_dir.join("ucd.zip");
let mut ucd_file = BufWriter::new(File::create(&zip_path)?);
for chunk in reqwest::blocking::get("https://www.unicode.org/Public/zipped/latest/UCD.zip")?
.bytes()?
.chunks(1 << 12)
{
ucd_file.write_all(chunk)?;
}
ucd_file.flush()?;
drop(ucd_file);

let mut zip_file = zip::ZipArchive::new(File::open(&zip_path)?)?;
zip_file.extract(&ucd_dir)?;
drop(zip_file);

std::fs::remove_file(zip_path)?;

let status = Command::new("ucd-generate")
.arg("general-category")
.arg(ucd_dir.as_os_str())
.stdout(Stdio::from(File::create(generated_file)?))
.status()?;
assert!(status.success());

Ok(())
}
5 changes: 5 additions & 0 deletions libafl/src/mutators/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ pub use grimoire::*;
pub mod tuneable;
pub use tuneable::*;

#[cfg(feature = "unicode")]
pub mod string;
#[cfg(feature = "unicode")]
pub use string::*;

#[cfg(feature = "nautilus")]
pub mod nautilus;
use alloc::vec::Vec;
Expand Down
Loading

0 comments on commit b6f0f78

Please sign in to comment.