[fuzz] Add a new fuzz target using the meta-differential oracle

This change adds the `differential-meta` target to the list of fuzz targets. I expect that sometime soon this could replace the other `differential*` targets, as it almost checks all the things those check. The major missing piece is that currently it only chooses single-instruction modules instead of also generating arbitrary modules using `wasm-smith`. Also, this change adds the concept of an ignorable error: some differential engines will choke with certain inputs (e.g., `wasmi` might have an old opcode mapping) which we do not want to flag as fuzz bugs. Here we wrap those errors in `DiffIgnoreError` and then use a new helper trait, `DiffIgnorable`, to downcast and inspect the `anyhow` error to only panic on non-ignorable errors; the ignorable errors are converted to one of the `arbitrary::Error` variants, which we already ignore.
abrown · Aug 10, 2022 · b28b757 · b28b757
1 parent 7de4ab0
commit b28b757
Show file tree

Hide file tree

Showing 7 changed files with 186 additions and 15 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/fuzzing/src/oracles.rs b/crates/fuzzing/src/oracles.rs
@@ -17,10 +17,13 @@ pub mod diff_wasmtime;
 pub mod dummy;
 pub mod engine;
 
-use crate::generators;
+use self::engine::DiffInstance;
+use crate::generators::{self, DiffValue};
 use arbitrary::Arbitrary;
 use log::debug;
 use std::cell::Cell;
+use std::collections::hash_map::DefaultHasher;
+use std::hash::Hasher;
 use std::rc::Rc;
 use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
 use std::sync::{Arc, Condvar, Mutex};
@@ -243,17 +246,18 @@ fn compile_module(
             if let generators::InstanceAllocationStrategy::Pooling { .. } =
                 &config.wasmtime.strategy
             {
-                // When using the pooling allocator, accept failures to compile when arbitrary
-                // table element limits have been exceeded as there is currently no way
-                // to constrain the generated module table types.
+                // When using the pooling allocator, accept failures to compile
+                // when arbitrary table element limits have been exceeded as
+                // there is currently no way to constrain the generated module
+                // table types.
                 let string = e.to_string();
                 if string.contains("minimum element size") {
                     return None;
                 }
 
                 // Allow modules-failing-to-compile which exceed the requested
                 // size for each instance. This is something that is difficult
-                // to control and ensure it always suceeds, so we simply have a
+                // to control and ensure it always succeeds, so we simply have a
                 // "random" instance size limit and if a module doesn't fit we
                 // move on to the next fuzz input.
                 if string.contains("instance allocation for this module requires") {
@@ -266,6 +270,10 @@ fn compile_module(
     }
 }
 
+// TODO: we should implement tracing versions of these dummy imports that record
+// a trace of the order that imported functions were called in and with what
+// values. Like the results of exported functions, calls to imports should also
+// yield the same values for each configuration, and we should assert that.
 fn instantiate_with_dummy(store: &mut Store<StoreLimits>, module: &Module) -> Option<Instance> {
     // Creation of imports can fail due to resource limit constraints, and then
     // instantiation can naturally fail for a number of reasons as well. Bundle
@@ -311,6 +319,38 @@ fn instantiate_with_dummy(store: &mut Store<StoreLimits>, module: &Module) -> Op
     panic!("failed to instantiate: {:?}", e);
 }
 
+/// TODO
+pub fn differential(
+    lhs: &mut dyn DiffInstance,
+    rhs: &mut dyn DiffInstance,
+    name: &str,
+    args: &[DiffValue],
+) -> anyhow::Result<()> {
+    log::debug!("Evaluating: {}({:?})", name, args);
+    let lhs_results = lhs.evaluate(name, args)?;
+    log::debug!(" -> results on {}: {:?}", lhs.name(), &lhs_results);
+    let rhs_results = rhs.evaluate(name, args)?;
+    log::debug!(" -> results on {}: {:?}", rhs.name(), &rhs_results);
+    assert_eq!(lhs_results, rhs_results);
+
+    let hash = |i: &mut dyn DiffInstance| -> anyhow::Result<u64> {
+        let mut hasher = DefaultHasher::new();
+        i.hash(&mut hasher)?;
+        Ok(hasher.finish())
+    };
+
+    if lhs.is_hashable() && rhs.is_hashable() {
+        log::debug!("Hashing instances:");
+        let lhs_hash = hash(lhs)?;
+        log::debug!(" -> hash of {}: {:?}", lhs.name(), lhs_hash);
+        let rhs_hash = hash(rhs)?;
+        log::debug!(" -> hash of {}: {:?}", rhs.name(), rhs_hash);
+        assert_eq!(lhs_hash, rhs_hash);
+    }
+
+    Ok(())
+}
+
 /// Instantiate the given Wasm module with each `Config` and call all of its
 /// exports. Modulo OOM, non-canonical NaNs, and usage of Wasm features that are
 /// or aren't enabled for different configs, we should get the same results when

diff --git a/crates/fuzzing/src/oracles/diff_wasmi.rs b/crates/fuzzing/src/oracles/diff_wasmi.rs
@@ -2,9 +2,11 @@
 
 use crate::generators::{DiffValue, ModuleFeatures};
 use crate::oracles::engine::{DiffEngine, DiffInstance};
-use anyhow::{bail, Context, Result};
+use anyhow::{bail, Context, Result, anyhow};
 use std::hash::Hash;
 
+use super::engine::DiffIgnoreError;
+
 /// A wrapper for `wasmi` as a [`DiffEngine`].
 pub struct WasmiEngine;
 
@@ -27,8 +29,12 @@ impl WasmiEngine {
 
 impl DiffEngine for WasmiEngine {
     fn instantiate(&self, wasm: &[u8]) -> Result<Box<dyn DiffInstance>> {
-        let module =
-            wasmi::Module::from_buffer(wasm).context("unable to validate module in wasmi")?;
+        let module = wasmi::Module::from_buffer(wasm).map_err(|e| match e {
+            // Ignore `wasmi` validation errors; some opcodes not supported
+            // (TODO).
+            wasmi::Error::Validation(e) => anyhow!(DiffIgnoreError(e)),
+            e => anyhow!(e),
+        })?;
         let instance = wasmi::ModuleInstance::new(&module, &wasmi::ImportsBuilder::default())
             .context("unable to instantiate module in wasmi")?;
         let instance = instance.assert_no_start();

diff --git a/crates/fuzzing/src/oracles/diff_wasmtime.rs b/crates/fuzzing/src/oracles/diff_wasmtime.rs
@@ -32,11 +32,11 @@ impl WasmtimeEngine {
     pub fn arbitrary_with_features(
         u: &mut Unstructured<'_>,
         features: &ModuleFeatures,
-    ) -> Result<Box<Self>> {
+    ) -> arbitrary::Result<Box<Self>> {
         let mut config: generators::Config = u.arbitrary()?;
         config.set_differential_config();
         config.set_features(features);
-        WasmtimeEngine::new(&config)
+        Ok(WasmtimeEngine::new(&config).unwrap())
     }
 
     /// Construct a new Wasmtime engine with a randomly-generated configuration

diff --git a/crates/fuzzing/src/oracles/engine.rs b/crates/fuzzing/src/oracles/engine.rs
@@ -2,7 +2,6 @@
 
 use crate::generators::{DiffValue, ModuleFeatures};
 use crate::oracles::{diff_wasmi::WasmiEngine, diff_wasmtime::WasmtimeEngine};
-use anyhow::{bail, Ok};
 use arbitrary::Unstructured;
 use std::collections::hash_map::DefaultHasher;
 
@@ -13,7 +12,7 @@ pub fn choose(
     u: &mut Unstructured<'_>,
     features: &ModuleFeatures,
     wasmtime_engine: &WasmtimeEngine,
-) -> anyhow::Result<Box<dyn DiffEngine>> {
+) -> arbitrary::Result<Box<dyn DiffEngine>> {
     // Filter out any engines that cannot match the given configuration.
     let mut engines: Vec<Box<dyn DiffEngine>> = vec![];
     if let Result::Ok(e) = WasmtimeEngine::arbitrary_with_compatible_config(u, wasmtime_engine) {
@@ -32,7 +31,8 @@ pub fn choose(
         let index: usize = u.int_in_range(0..=engines.len() - 1)?;
         Ok(engines.swap_remove(index))
     } else {
-        bail!("no engines to pick from")
+        panic!("no engines to pick from");
+        // Err(arbitrary::Error::EmptyChoose)
     }
 }
 
@@ -66,6 +66,43 @@ pub trait DiffInstance {
     fn hash(&self, state: &mut DefaultHasher) -> anyhow::Result<()>;
 }
 
+/// For errors that we want to ignore (not fuzz bugs), we can wrap them in this
+/// structure.
+#[derive(Debug)]
+pub struct DiffIgnoreError(pub String);
+impl std::fmt::Display for DiffIgnoreError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "this error should be ignored by fuzzing: {}", self.0)
+    }
+}
+impl std::error::Error for DiffIgnoreError {}
+
+/// This trait adds a handy way to ignore [`DiffIgnoreError`] during fuzzing.
+pub trait DiffIgnorable<T> {
+    /// Like `Result::expect`, but ignores all [`DiffIgnoreError`]s by logging
+    /// the error and converting it to an `arbitrary` error.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the value is an `Err` but not a [`DiffIgnoreError`].
+    fn expect_or_ignore(self, message: &str) -> arbitrary::Result<T>;
+}
+impl <T> DiffIgnorable<T> for anyhow::Result<T> {
+    fn expect_or_ignore(self, message: &str) -> arbitrary::Result<T> {
+        match self {
+            Ok(t) => Ok(t),
+            Err(e) => {
+                if let Some(ignorable) = e.downcast_ref::<DiffIgnoreError>() {
+                    println!("ignoring error: {}", ignorable);
+                    Err(arbitrary::Error::IncorrectFormat)
+                } else {
+                    panic!("{}: {}", message, e);
+                }
+            }
+        }
+    }
+}
+
 /// Extract the signatures of any exported functions in a Wasm module.
 ///
 /// This is useful for evaluating each exported function with different values.

diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -48,6 +48,13 @@ path = "fuzz_targets/differential.rs"
 test = false
 doc = false
 
+[[bin]]
+name = "differential-meta"
+path = "fuzz_targets/differential-meta.rs"
+test = false
+doc = false
+
+
 [[bin]]
 name = "differential_spec"
 path = "fuzz_targets/differential_spec.rs"

diff --git a/fuzz/fuzz_targets/differential_meta.rs b/fuzz/fuzz_targets/differential_meta.rs
@@ -0,0 +1,81 @@
+#![no_main]
+
+use libfuzzer_sys::arbitrary::{Result, Unstructured};
+use libfuzzer_sys::fuzz_target;
+use wasmtime_fuzzing::generators::{DiffValue, SingleInstModule};
+use wasmtime_fuzzing::oracles::engine::{get_exported_function_signatures, DiffEngine, DiffIgnorable};
+use wasmtime_fuzzing::oracles::{diff_wasmtime, differential, engine};
+
+const NUM_INVOCATIONS: usize = 5;
+
+fuzz_target!(|data: &[u8]| {
+    // errors in `run` have to do with not enough input in `data`, which we
+    // ignore here since it doesn't affect how we'd like to fuzz.
+    drop(run(&data));
+});
+
+fn run(data: &[u8]) -> Result<()> {
+    let mut u = Unstructured::new(data);
+
+    // Generate the Wasm module. TODO eventually, this should pick between the
+    // single-instruction and wasm-smith modules, but currently the wasm-smith
+    // module generation will eat up all of the random data, leaving none for
+    // the remaining decisions that follow (e.g., choosing an engine, generating
+    // arguments).
+    let module: &SingleInstModule = u.arbitrary()?;
+    let wasm = module.to_bytes();
+    let features = module.to_features();
+
+    // Choose a right-hand side Wasm engine--this will always be Wasmtime. The
+    // order (execute `lhs` first, then `rhs`) is important because, in some
+    // cases (e.g., OCaml spec interpreter), both sides register signal
+    // handlers; Wasmtime uses these signal handlers for catching various
+    // WebAssembly failures. On certain OSes (e.g. Linux x86_64), the signal
+    // handlers interfere, observable as an uncaught `SIGSEGV`--not even caught
+    // by libFuzzer. By always running Wasmtime second, its signal handlers are
+    // registered most recently and they catch failures appropriately. We create
+    // `rhs` first, however, so we have the option of creating a compatible
+    // Wasmtime engine (e.g., pooling allocator memory differences).
+    let rhs = diff_wasmtime::WasmtimeEngine::arbitrary_with_features(&mut u, &features)?;
+
+    // Choose a left-hand side Wasm engine.
+    let lhs = engine::choose(&mut u, &features, &rhs)?;
+
+    // Instantiate each engine and try each exported functions with various
+    // values.
+    let mut lhs_instance = lhs
+        .instantiate(&module.to_bytes()).expect_or_ignore("failed to instantiate `lhs` module")?;
+    let mut rhs_instance = rhs
+        .instantiate(&module.to_bytes())
+        .expect("failed to instantiate `rhs` module");
+    for (name, signature) in get_exported_function_signatures(&wasm)
+        .expect("failed to extract exported function signatures")
+    {
+        let mut invocations = 0;
+        loop {
+            let arguments = signature
+                .params
+                .iter()
+                .map(|&t| DiffValue::arbitrary_of_type(&mut u, t.into()))
+                .collect::<Result<Vec<_>>>()?;
+            differential(
+                lhs_instance.as_mut(),
+                rhs_instance.as_mut(),
+                &name,
+                &arguments,
+            )
+            .expect("failed to run differential evaluation");
+
+            // We evaluate the same function with different arguments until we
+            // hit a predetermined limit or we run out of unstructured data--it
+            // does not make sense to re-evaluate the same arguments over and
+            // over.
+            invocations += 1;
+            if invocations > NUM_INVOCATIONS || u.is_empty() {
+                break;
+            }
+        }
+    }
+
+    Ok(())
+}