diff --git a/Cargo.lock b/Cargo.lock index 01dbccdd546a..d68b8db21918 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -288,6 +288,27 @@ dependencies = [ "winapi", ] +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "either" version = "1.13.0" @@ -892,6 +913,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scanner" +version = "0.0.0" +dependencies = [ + "csv", + "serde", + "strum", + "strum_macros", +] + [[package]] name = "scopeguard" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index 1b4933c5bdcf..68b5bcc20ff3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ members = [ "library/std", "tools/compiletest", "tools/build-kani", + "tools/scanner", "kani-driver", "kani-compiler", "kani_metadata", diff --git a/scripts/std-analysis.sh b/scripts/std-analysis.sh new file mode 100755 index 000000000000..87ac991cb00d --- /dev/null +++ b/scripts/std-analysis.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT + +# Collect some metrics related to the crates that compose the standard library. +# +# Files generates so far: +# +# - ${crate}_scan_overall.csv: Summary of function metrics, such as safe vs unsafe. +# - ${crate}_scan_input_tys.csv: Detailed information about the inputs' type of each +# function found in this crate. +# +# How we collect metrics: +# +# - Compile the standard library using the `scan` tool to collect some metrics. +# - After compilation, move all CSV files that were generated by the scanner, +# to the results folder. +set -eu + +# Test for platform +PLATFORM=$(uname -sp) +if [[ $PLATFORM == "Linux x86_64" ]] +then + TARGET="x86_64-unknown-linux-gnu" + # 'env' necessary to avoid bash built-in 'time' + WRAPPER="env time -v" +elif [[ $PLATFORM == "Darwin i386" ]] +then + TARGET="x86_64-apple-darwin" + # mac 'time' doesn't have -v + WRAPPER="time" +elif [[ $PLATFORM == "Darwin arm" ]] +then + TARGET="aarch64-apple-darwin" + # mac 'time' doesn't have -v + WRAPPER="time" +else + echo + echo "Std-Lib codegen regression only works on Linux or OSX x86 platforms, skipping..." + echo + exit 0 +fi + +# Get Kani root +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +KANI_DIR=$(dirname "$SCRIPT_DIR") + +echo "-------------------------------------------------------" +echo "-- Starting analysis of the Rust standard library... --" +echo "-------------------------------------------------------" + +echo "-- Build scanner" +cd $KANI_DIR +cargo build -p scanner + +echo "-- Build std" +cd /tmp +if [ -d std_lib_analysis ] +then + rm -rf std_lib_analysis +fi +cargo new std_lib_analysis --lib +cd std_lib_analysis + +echo ' +pub fn dummy() { +} +' > src/lib.rs + +# Use same nightly toolchain used to build Kani +cp ${KANI_DIR}/rust-toolchain.toml . + +export RUST_BACKTRACE=1 +export RUSTC_LOG=error + +RUST_FLAGS=( + "-Cpanic=abort" + "-Zalways-encode-mir" +) +export RUSTFLAGS="${RUST_FLAGS[@]}" +export RUSTC="$KANI_DIR/target/debug/scan" +# Compile rust with our extension +$WRAPPER cargo build --verbose -Z build-std --lib --target $TARGET + +echo "-- Process results" + +# Move files to results folder +results=/tmp/std_lib_analysis/results +mkdir $results +find /tmp/std_lib_analysis/target -name "*.csv" -exec mv {} $results \; + +# Create a summary table +summary=$results/summary.csv + +# write header +echo -n "crate," > $summary +tr -d "[:digit:],;" < $results/alloc_scan_overall.csv \ + | tr -s '\n' ',' >> $summary +echo "" >> $summary + +# write body +for f in $results/*overall.csv; do + # Join all crate summaries into one table + fname=$(basename $f) + crate=${fname%_scan_overall.csv} + echo -n "$crate," >> $summary + tr -d [:alpha:]_,; < $f | tr -s '\n' ',' \ + >> $summary + echo "" >> $summary +done + +echo "-------------------------------------------------------" +echo "Finished analysis successfully..." +echo "- See results at ${results}" +echo "-------------------------------------------------------" diff --git a/tests/perf/s2n-quic b/tests/perf/s2n-quic index 71f8d9f5aafb..2d5e891f3fdc 160000 --- a/tests/perf/s2n-quic +++ b/tests/perf/s2n-quic @@ -1 +1 @@ -Subproject commit 71f8d9f5aafbf59f31ad85eeb7b4b67a7564a685 +Subproject commit 2d5e891f3fdc8a88b2d457baceedea5751efaa0d diff --git a/tests/script-based-pre/tool-scanner/config.yml b/tests/script-based-pre/tool-scanner/config.yml new file mode 100644 index 000000000000..6fd2895971a4 --- /dev/null +++ b/tests/script-based-pre/tool-scanner/config.yml @@ -0,0 +1,4 @@ +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT +script: scanner-test.sh +expected: scanner-test.expected diff --git a/tests/script-based-pre/tool-scanner/scanner-test.expected b/tests/script-based-pre/tool-scanner/scanner-test.expected new file mode 100644 index 000000000000..c8f9af0ef1b7 --- /dev/null +++ b/tests/script-based-pre/tool-scanner/scanner-test.expected @@ -0,0 +1,6 @@ +2 test_scan_fn_loops.csv +16 test_scan_functions.csv +5 test_scan_input_tys.csv +14 test_scan_overall.csv +3 test_scan_recursion.csv +5 test_scan_unsafe_ops.csv diff --git a/tests/script-based-pre/tool-scanner/scanner-test.sh b/tests/script-based-pre/tool-scanner/scanner-test.sh new file mode 100755 index 000000000000..2cd5a33a3f8e --- /dev/null +++ b/tests/script-based-pre/tool-scanner/scanner-test.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT + +set -e + +# Run this inside a tmp folder in the current directory +OUT_DIR=output_dir +# Ensure output folder is clean +rm -rf ${OUT_DIR} +mkdir output_dir +# Move the original source to the output folder since it will be modified +cp test.rs ${OUT_DIR} +pushd $OUT_DIR + +cargo run -p scanner test.rs --crate-type lib +wc -l *csv + +popd +rm -rf ${OUT_DIR} diff --git a/tests/script-based-pre/tool-scanner/test.rs b/tests/script-based-pre/tool-scanner/test.rs new file mode 100644 index 000000000000..24b346e535b5 --- /dev/null +++ b/tests/script-based-pre/tool-scanner/test.rs @@ -0,0 +1,77 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Sanity check for the utility tool `scanner`. + +pub fn check_outer_coercion() { + assert!(false); +} + +unsafe fn do_nothing() {} + +pub fn generic() -> T { + unsafe { do_nothing() }; + T::default() +} + +pub struct RecursiveType { + pub inner: Option<*const RecursiveType>, +} + +pub enum RecursiveEnum { + Base, + Recursion(Box), + RefCell(std::cell::RefCell), +} + +pub fn recursive_type(input1: RecursiveType, input2: RecursiveEnum) { + let _ = (input1, input2); +} + +pub fn with_iterator(input: &[usize]) -> usize { + input + .iter() + .copied() + .find(|e| *e == 0) + .unwrap_or_else(|| input.iter().fold(0, |acc, i| acc + 1)) +} + +static mut COUNTER: Option = Some(0); +static OK: bool = true; + +pub unsafe fn next_id() -> usize { + let sum = COUNTER.unwrap() + 1; + COUNTER = Some(sum); + sum +} + +pub unsafe fn current_id() -> usize { + COUNTER.unwrap() +} + +pub fn ok() -> bool { + OK +} + +pub unsafe fn raw_to_ref<'a, T>(raw: *const T) -> &'a T { + &*raw +} + +pub fn recursion_begin(stop: bool) { + if !stop { + recursion_tail() + } +} + +pub fn recursion_tail() { + recursion_begin(false); + not_recursive(); +} + +pub fn start_recursion() { + recursion_begin(true); +} + +pub fn not_recursive() { + let _ = ok(); +} diff --git a/tools/scanner/Cargo.toml b/tools/scanner/Cargo.toml new file mode 100644 index 000000000000..edbd330bea47 --- /dev/null +++ b/tools/scanner/Cargo.toml @@ -0,0 +1,23 @@ +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT + + +[package] +name = "scanner" +description = "A rustc extension used to scan rust features in a crate" +version = "0.0.0" +edition = "2021" +license = "MIT OR Apache-2.0" +publish = false + +[dependencies] +csv = "1.3" +serde = {version = "1", features = ["derive"]} +strum = "0.26" +strum_macros = "0.26" + +[package.metadata.rust-analyzer] +# This crate uses rustc crates. +# More info: https://github.com/rust-analyzer/rust-analyzer/pull/7891 +rustc_private = true + diff --git a/tools/scanner/build.rs b/tools/scanner/build.rs new file mode 100644 index 000000000000..775a0f507a45 --- /dev/null +++ b/tools/scanner/build.rs @@ -0,0 +1,26 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::env; +use std::path::PathBuf; + +macro_rules! path_str { + ($input:expr) => { + String::from( + $input + .iter() + .collect::() + .to_str() + .unwrap_or_else(|| panic!("Invalid path {}", stringify!($input))), + ) + }; +} + +/// Configure the compiler to properly link the scanner binary with rustc's library. +pub fn main() { + // Add rustup to the rpath in order to properly link with the correct rustc version. + let rustup_home = env::var("RUSTUP_HOME").unwrap(); + let rustup_tc = env::var("RUSTUP_TOOLCHAIN").unwrap(); + let rustup_lib = path_str!([&rustup_home, "toolchains", &rustup_tc, "lib"]); + println!("cargo:rustc-link-arg-bin=scan=-Wl,-rpath,{rustup_lib}"); +} diff --git a/tools/scanner/src/analysis.rs b/tools/scanner/src/analysis.rs new file mode 100644 index 000000000000..c376af9662f8 --- /dev/null +++ b/tools/scanner/src/analysis.rs @@ -0,0 +1,629 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Provide different static analysis to be performed in the crate under compilation + +use crate::info; +use csv::WriterBuilder; +use serde::{ser::SerializeStruct, Serialize, Serializer}; +use stable_mir::mir::mono::Instance; +use stable_mir::mir::visit::{Location, PlaceContext, PlaceRef}; +use stable_mir::mir::{ + Body, MirVisitor, Mutability, ProjectionElem, Safety, Terminator, TerminatorKind, +}; +use stable_mir::ty::{AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind}; +use stable_mir::visitor::{Visitable, Visitor}; +use stable_mir::{CrateDef, CrateItem}; +use std::collections::{HashMap, HashSet}; +use std::ops::ControlFlow; +use std::path::{Path, PathBuf}; + +#[derive(Clone, Debug)] +pub struct OverallStats { + /// The key and value of each counter. + counters: Vec<(&'static str, usize)>, + /// TODO: Group stats per function. + fn_stats: HashMap, +} + +#[derive(Clone, Debug, Serialize)] +struct FnStats { + name: String, + is_unsafe: Option, + has_unsafe_ops: Option, + has_unsupported_input: Option, + has_loop: Option, +} + +impl FnStats { + fn new(fn_item: CrateItem) -> FnStats { + FnStats { + name: fn_item.name(), + is_unsafe: None, + has_unsafe_ops: None, + has_unsupported_input: None, + // TODO: Implement this. + has_loop: None, + } + } +} + +impl OverallStats { + pub fn new() -> OverallStats { + let all_items = stable_mir::all_local_items(); + let fn_stats: HashMap<_, _> = all_items + .into_iter() + .filter_map(|item| item.ty().kind().is_fn().then_some((item, FnStats::new(item)))) + .collect(); + let counters = vec![("total_fns", fn_stats.len())]; + OverallStats { counters, fn_stats } + } + + pub fn store_csv(&self, base_path: PathBuf, file_stem: &str) { + let filename = format!("{}_overall", file_stem); + let mut out_path = base_path.parent().map_or(PathBuf::default(), Path::to_path_buf); + out_path.set_file_name(filename); + dump_csv(out_path, &self.counters); + + let filename = format!("{}_functions", file_stem); + let mut out_path = base_path.parent().map_or(PathBuf::default(), Path::to_path_buf); + out_path.set_file_name(filename); + dump_csv(out_path, &self.fn_stats.values().collect::>()); + } + + /// Iterate over all functions defined in this crate and log generic vs monomorphic. + pub fn generic_fns(&mut self) { + let all_items = stable_mir::all_local_items(); + let fn_items = + all_items.into_iter().filter(|item| item.ty().kind().is_fn()).collect::>(); + let (mono_fns, generics) = fn_items + .iter() + .partition::, _>(|fn_item| Instance::try_from(**fn_item).is_ok()); + self.counters + .extend_from_slice(&[("generic_fns", generics.len()), ("mono_fns", mono_fns.len())]); + } + + /// Iterate over all functions defined in this crate and log safe vs unsafe. + pub fn safe_fns(&mut self, _base_filename: PathBuf) { + let all_items = stable_mir::all_local_items(); + let (unsafe_fns, safe_fns) = all_items + .into_iter() + .filter_map(|item| { + let kind = item.ty().kind(); + if !kind.is_fn() { + return None; + }; + let fn_sig = kind.fn_sig().unwrap(); + let is_unsafe = fn_sig.skip_binder().safety == Safety::Unsafe; + self.fn_stats.get_mut(&item).unwrap().is_unsafe = Some(is_unsafe); + Some((item, is_unsafe)) + }) + .partition::, _>(|(_, is_unsafe)| *is_unsafe); + self.counters + .extend_from_slice(&[("safe_fns", safe_fns.len()), ("unsafe_fns", unsafe_fns.len())]); + } + + /// Iterate over all functions defined in this crate and log the inputs. + pub fn supported_inputs(&mut self, filename: PathBuf) { + let all_items = stable_mir::all_local_items(); + let (supported, unsupported) = all_items + .into_iter() + .filter_map(|item| { + let kind = item.ty().kind(); + if !kind.is_fn() { + return None; + }; + let fn_sig = kind.fn_sig().unwrap(); + let props = FnInputProps::new(item.name()).collect(fn_sig.skip_binder().inputs()); + self.fn_stats.get_mut(&item).unwrap().has_unsupported_input = + Some(!props.is_supported()); + Some(props) + }) + .partition::, _>(|props| props.is_supported()); + self.counters.extend_from_slice(&[ + ("supported_inputs", supported.len()), + ("unsupported_inputs", unsupported.len()), + ]); + dump_csv(filename, &unsupported); + } + + /// Iterate over all functions defined in this crate and log any unsafe operation. + pub fn unsafe_operations(&mut self, filename: PathBuf) { + let all_items = stable_mir::all_local_items(); + let (has_unsafe, no_unsafe) = all_items + .into_iter() + .filter_map(|item| { + let kind = item.ty().kind(); + if !kind.is_fn() { + return None; + }; + let unsafe_ops = FnUnsafeOperations::new(item.name()).collect(&item.body()); + let fn_sig = kind.fn_sig().unwrap(); + let is_unsafe = fn_sig.skip_binder().safety == Safety::Unsafe; + self.fn_stats.get_mut(&item).unwrap().has_unsafe_ops = + Some(unsafe_ops.has_unsafe()); + Some((is_unsafe, unsafe_ops)) + }) + .partition::, _>(|(_, props)| props.has_unsafe()); + self.counters.extend_from_slice(&[ + ("has_unsafe_ops", has_unsafe.len()), + ("no_unsafe_ops", no_unsafe.len()), + ("safe_abstractions", has_unsafe.iter().filter(|(is_unsafe, _)| !is_unsafe).count()), + ]); + dump_csv(filename, &has_unsafe.into_iter().map(|(_, props)| props).collect::>()); + } + + /// Iterate over all functions defined in this crate and log any loop / "hidden" loop. + /// + /// A hidden loop is a call to a iterator function that has a loop inside. + pub fn loops(&mut self, filename: PathBuf) { + let all_items = stable_mir::all_local_items(); + let (has_loops, no_loops) = all_items + .into_iter() + .filter_map(|item| { + let kind = item.ty().kind(); + if !kind.is_fn() { + return None; + }; + Some(FnLoops::new(item.name()).collect(&item.body())) + }) + .partition::, _>(|props| props.has_loops()); + self.counters + .extend_from_slice(&[("has_loops", has_loops.len()), ("no_loops", no_loops.len())]); + dump_csv(filename, &has_loops); + } + + /// Create a callgraph for this crate and try to find recursive calls. + pub fn recursion(&mut self, filename: PathBuf) { + let all_items = stable_mir::all_local_items(); + let recursions = Recursion::collect(&all_items); + self.counters.extend_from_slice(&[ + ("with_recursion", recursions.with_recursion.len()), + ("recursive_fns", recursions.recursive_fns.len()), + ]); + dump_csv( + filename, + &recursions + .with_recursion + .iter() + .map(|def| { + ( + def.name(), + if recursions.recursive_fns.contains(&def) { "recursive" } else { "" }, + ) + }) + .collect::>(), + ); + } +} + +macro_rules! fn_props { + ($(#[$attr:meta])* + struct $name:ident { + $( + $(#[$prop_attr:meta])* + $prop:ident, + )+ + }) => { + #[derive(Debug)] + struct $name { + fn_name: String, + $($(#[$prop_attr])* $prop: usize,)+ + } + + impl $name { + const fn num_props() -> usize { + [$(stringify!($prop),)+].len() + } + + fn new(fn_name: String) -> Self { + Self { fn_name, $($prop: 0,)+} + } + } + + /// Need to manually implement this, since CSV serializer does not support map (i.e.: flatten). + impl Serialize for $name { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut state = serializer.serialize_struct("FnInputProps", Self::num_props())?; + state.serialize_field("fn_name", &self.fn_name)?; + $(state.serialize_field(stringify!($prop), &self.$prop)?;)+ + state.end() + } + } + }; +} + +fn_props! { + struct FnInputProps { + boxes, + closures, + coroutines, + floats, + fn_defs, + fn_ptrs, + generics, + interior_muts, + raw_ptrs, + recursive_types, + mut_refs, + simd, + unions, + } +} + +impl FnInputProps { + pub fn collect(mut self, inputs: &[Ty]) -> FnInputProps { + for input in inputs { + let mut visitor = TypeVisitor { metrics: &mut self, visited: HashSet::new() }; + let _ = visitor.visit_ty(input); + } + self + } + + pub fn is_supported(&self) -> bool { + (self.closures + + self.coroutines + + self.floats + + self.fn_defs + + self.fn_ptrs + + self.interior_muts + + self.raw_ptrs + + self.recursive_types + + self.mut_refs) + == 0 + } +} + +struct TypeVisitor<'a> { + metrics: &'a mut FnInputProps, + visited: HashSet, +} + +impl<'a> TypeVisitor<'a> { + pub fn visit_variants(&mut self, def: AdtDef, _args: &GenericArgs) -> ControlFlow<()> { + for variant in def.variants_iter() { + for field in variant.fields() { + self.visit_ty(&field.ty())? + } + } + ControlFlow::Continue(()) + } +} + +impl<'a> Visitor for TypeVisitor<'a> { + type Break = (); + + fn visit_ty(&mut self, ty: &Ty) -> ControlFlow { + if self.visited.contains(ty) { + self.metrics.recursive_types += 1; + ControlFlow::Continue(()) + } else { + self.visited.insert(*ty); + let kind = ty.kind(); + match kind { + TyKind::Alias(..) => {} + TyKind::Param(_) => self.metrics.generics += 1, + TyKind::RigidTy(rigid) => match rigid { + RigidTy::Coroutine(..) => self.metrics.coroutines += 1, + RigidTy::Closure(..) => self.metrics.closures += 1, + RigidTy::FnDef(..) => self.metrics.fn_defs += 1, + RigidTy::FnPtr(..) => self.metrics.fn_ptrs += 1, + RigidTy::Float(..) => self.metrics.floats += 1, + RigidTy::RawPtr(..) => self.metrics.raw_ptrs += 1, + RigidTy::Ref(_, _, Mutability::Mut) => self.metrics.mut_refs += 1, + RigidTy::Adt(def, args) => match def.kind() { + AdtKind::Union => self.metrics.unions += 1, + _ => { + let name = def.name(); + if def.is_box() { + self.metrics.boxes += 1; + } else if name.ends_with("UnsafeCell") { + self.metrics.interior_muts += 1; + } else { + self.visit_variants(def, &args)?; + } + } + }, + _ => {} + }, + kind => unreachable!("Expected rigid type, but found: {kind:?}"), + } + ty.super_visit(self) + } + } +} + +fn dump_csv(mut out_path: PathBuf, data: &[T]) { + out_path.set_extension("csv"); + info(format!("Write file: {out_path:?}")); + let mut writer = WriterBuilder::new().delimiter(b';').from_path(&out_path).unwrap(); + for d in data { + writer.serialize(d).unwrap(); + } +} + +fn_props! { + struct FnUnsafeOperations { + inline_assembly, + /// Dereference a raw pointer. + /// This is also counted when we access a static variable since it gets translated to a raw pointer. + unsafe_dereference, + /// Call an unsafe function or method. + unsafe_call, + /// Access or modify a mutable static variable. + unsafe_static_access, + /// Access fields of unions. + unsafe_union_access, + } +} + +impl FnUnsafeOperations { + pub fn collect(self, body: &Body) -> FnUnsafeOperations { + let mut visitor = BodyVisitor { props: self, body }; + visitor.visit_body(body); + visitor.props + } + + pub fn has_unsafe(&self) -> bool { + (self.inline_assembly + + self.unsafe_static_access + + self.unsafe_dereference + + self.unsafe_union_access + + self.unsafe_call) + > 0 + } +} + +struct BodyVisitor<'a> { + props: FnUnsafeOperations, + body: &'a Body, +} + +impl<'a> MirVisitor for BodyVisitor<'a> { + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + match &term.kind { + TerminatorKind::Call { func, .. } => { + let fn_sig = func.ty(self.body.locals()).unwrap().kind().fn_sig().unwrap(); + if fn_sig.value.safety == Safety::Unsafe { + self.props.unsafe_call += 1; + } + } + TerminatorKind::InlineAsm { .. } => self.props.inline_assembly += 1, + _ => { /* safe */ } + } + self.super_terminator(term, location) + } + + fn visit_projection_elem( + &mut self, + place: PlaceRef, + elem: &ProjectionElem, + ptx: PlaceContext, + location: Location, + ) { + match elem { + ProjectionElem::Deref => { + if place.ty(self.body.locals()).unwrap().kind().is_raw_ptr() { + self.props.unsafe_dereference += 1; + } + } + ProjectionElem::Field(_, ty) => { + if ty.kind().is_union() { + self.props.unsafe_union_access += 1; + } + } + ProjectionElem::Downcast(_) => {} + ProjectionElem::OpaqueCast(_) => {} + ProjectionElem::Subtype(_) => {} + ProjectionElem::Index(_) + | ProjectionElem::ConstantIndex { .. } + | ProjectionElem::Subslice { .. } => { /* safe */ } + } + self.super_projection_elem(elem, ptx, location) + } + + fn visit_mir_const(&mut self, constant: &MirConst, location: Location) { + if constant.ty().kind().is_raw_ptr() { + self.props.unsafe_static_access += 1; + } + self.super_mir_const(constant, location) + } +} + +fn_props! { + struct FnLoops { + iterators, + nested_loops, + /// TODO: Collect loops. + loops, + } +} + +impl FnLoops { + pub fn collect(self, body: &Body) -> FnLoops { + let mut visitor = IteratorVisitor { props: self, body }; + visitor.visit_body(body); + visitor.props + } + + pub fn has_loops(&self) -> bool { + (self.iterators + self.loops + self.nested_loops) > 0 + } +} + +/// Try to find hidden loops by looking for calls to Iterator functions that has a loop in them. +/// +/// Note that this will not find a loop, if the iterator is called inside a closure. +/// Run with -C opt-level 2 to help with this issue (i.e.: inline). +struct IteratorVisitor<'a> { + props: FnLoops, + body: &'a Body, +} + +impl<'a> MirVisitor for IteratorVisitor<'a> { + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + if let TerminatorKind::Call { func, .. } = &term.kind { + let kind = func.ty(self.body.locals()).unwrap().kind(); + if let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind { + let fullname = def.name(); + let names = fullname.split("::").collect::>(); + if let [.., s_last, last] = names.as_slice() { + if *s_last == "Iterator" + && [ + "for_each", + "collect", + "advance_by", + "all", + "any", + "partition", + "partition_in_place", + "fold", + "try_fold", + "spec_fold", + "spec_try_fold", + "try_for_each", + "for_each", + "try_reduce", + "reduce", + "find", + "find_map", + "try_find", + "position", + "rposition", + "nth", + "count", + "last", + "find", + ] + .contains(last) + { + self.props.iterators += 1; + } + } + } + } + self.super_terminator(term, location) + } +} + +#[derive(Debug, Default)] +struct Recursion { + /// Collect the functions that may lead to a recursion loop. + /// I.e., for the following control flow graph: + /// ```dot + /// A -> B + /// B -> C + /// C -> [B, D] + /// ``` + /// this field value would contain A, B, and C since they can all lead to a recursion. + with_recursion: HashSet, + /// Collect the functions that are part of a recursion loop. + /// For the following control flow graph: + /// ```dot + /// A -> [B, C] + /// B -> B + /// C -> D + /// D -> [C, E] + /// ``` + /// The recursive functions would be B, C, and D. + recursive_fns: HashSet, +} + +impl Recursion { + pub fn collect<'a>(items: impl IntoIterator) -> Recursion { + let call_graph = items + .into_iter() + .filter_map(|item| { + if let TyKind::RigidTy(RigidTy::FnDef(def, _)) = item.ty().kind() { + let body = item.body(); + let mut visitor = FnCallVisitor { body: &body, fns: vec![] }; + visitor.visit_body(&body); + Some((def, visitor.fns)) + } else { + None + } + }) + .collect::>(); + let mut recursions = Recursion::default(); + recursions.analyze(call_graph); + recursions + } + + /// DFS post-order traversal to collect all loops in our control flow graph. + /// We only include direct call recursions which can only happen within a crate. + /// + /// # How it works + /// + /// Given a call graph, [(fn_def, [fn_def]*)]*, enqueue all existing nodes together with the + /// graph distance. + /// Keep track of the current path and the visiting status of each node. + /// For those that we have visited once, store whether a loop is reachable from them. + fn analyze(&mut self, call_graph: HashMap>) { + #[derive(Copy, Clone, PartialEq, Eq)] + enum Status { + ToVisit, + Visiting, + Visited, + } + let mut visit_status = HashMap::::new(); + let mut queue: Vec<_> = call_graph.keys().map(|node| (*node, 0)).collect(); + let mut path: Vec = vec![]; + while let Some((next, level)) = queue.last().copied() { + match visit_status.get(&next).unwrap_or(&Status::ToVisit) { + Status::ToVisit => { + assert_eq!(path.len(), level); + path.push(next); + visit_status.insert(next, Status::Visiting); + let next_level = level + 1; + if let Some(callees) = call_graph.get(&next) { + queue.extend(callees.iter().map(|callee| (*callee, next_level))); + } + } + Status::Visiting => { + if level < path.len() { + // We have visited all callees in this node. + visit_status.insert(next, Status::Visited); + path.pop(); + } else { + // Found a loop. + let mut in_loop = false; + for def in &path { + in_loop |= *def == next; + if in_loop { + self.recursive_fns.insert(*def); + } + self.with_recursion.insert(*def); + } + } + queue.pop(); + } + Status::Visited => { + queue.pop(); + if self.with_recursion.contains(&next) { + self.with_recursion.extend(&path); + } + } + } + } + } +} + +struct FnCallVisitor<'a> { + body: &'a Body, + fns: Vec, +} + +impl<'a> MirVisitor for FnCallVisitor<'a> { + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + if let TerminatorKind::Call { func, .. } = &term.kind { + let kind = func.ty(self.body.locals()).unwrap().kind(); + if let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind { + self.fns.push(def); + } + } + self.super_terminator(term, location) + } +} diff --git a/tools/scanner/src/bin/scan.rs b/tools/scanner/src/bin/scan.rs new file mode 100644 index 000000000000..92b5319ec780 --- /dev/null +++ b/tools/scanner/src/bin/scan.rs @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Modifications Copyright Kani Contributors +// See GitHub history for details. + +// This is a modified version of project-stable-mir `test-drive` +// + +//! Provide a binary that can be used as a replacement to rustc. +//! +//! Besides executing the regular compilation, this binary will run a few static analyses. +//! +//! The result for each analysis will be stored in a file with the same prefix as an object file, +//! together with the name of the analysis. +//! +//! Look at each analysis documentation to see which files an analysis produces. + +use scanner::run_all; +use std::process::ExitCode; + +// ---- Arguments that should be parsed by the test-driver (w/ "scan" prefix) +/// Enable verbose mode. +const VERBOSE_ARG: &str = "--scan-verbose"; + +/// This is a wrapper that can be used to replace rustc. +fn main() -> ExitCode { + let args = std::env::args(); + let (scan_args, rustc_args): (Vec, _) = args.partition(|arg| arg.starts_with("--scan")); + let verbose = scan_args.contains(&VERBOSE_ARG.to_string()); + run_all(rustc_args, verbose) +} diff --git a/tools/scanner/src/lib.rs b/tools/scanner/src/lib.rs new file mode 100644 index 000000000000..7f9555781ccf --- /dev/null +++ b/tools/scanner/src/lib.rs @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Modifications Copyright Kani Contributors +// See GitHub history for details. + +// This is a modified version of project-stable-mir `test-drive` +// + +//! This library provide different ways of scanning a crate. + +#![feature(rustc_private)] + +mod analysis; + +extern crate rustc_driver; +extern crate rustc_interface; +extern crate rustc_middle; +extern crate rustc_session; +#[macro_use] +extern crate rustc_smir; +extern crate stable_mir; + +use crate::analysis::OverallStats; +use rustc_middle::ty::TyCtxt; +use rustc_session::config::OutputType; +use rustc_smir::{run_with_tcx, rustc_internal}; +use stable_mir::CompilerError; +use std::ops::ControlFlow; +use std::path::{Path, PathBuf}; +use std::process::ExitCode; +use std::sync::atomic::{AtomicBool, Ordering}; +use strum::IntoEnumIterator; +use strum_macros::{AsRefStr, EnumIter}; + +// Use a static variable for simplicity. +static VERBOSE: AtomicBool = AtomicBool::new(false); + +pub fn run_all(rustc_args: Vec, verbose: bool) -> ExitCode { + run_analyses(rustc_args, &Analysis::iter().collect::>(), verbose) +} + +/// Executes a compilation and run the analysis that were requested. +pub fn run_analyses(rustc_args: Vec, analyses: &[Analysis], verbose: bool) -> ExitCode { + VERBOSE.store(verbose, Ordering::Relaxed); + let result = run_with_tcx!(rustc_args, |tcx| analyze_crate(tcx, analyses)); + if result.is_ok() || matches!(result, Err(CompilerError::Skipped)) { + ExitCode::SUCCESS + } else { + ExitCode::FAILURE + } +} + +#[derive(AsRefStr, EnumIter, Debug, PartialEq)] +#[strum(serialize_all = "snake_case")] +pub enum Analysis { + /// Collect information about generic functions. + MonoFns, + /// Collect information about function safety. + SafeFns, + /// Collect information about function inputs. + InputTys, + /// Collect information about unsafe operations. + UnsafeOps, + /// Collect information about loops inside a function. + FnLoops, + /// Collect information about recursion via direct calls. + Recursion, +} + +fn info(msg: String) { + if VERBOSE.load(Ordering::Relaxed) { + eprintln!("[INFO] {}", msg); + } +} + +/// This function invoke the required analyses in the given order. +fn analyze_crate(tcx: TyCtxt, analyses: &[Analysis]) -> ControlFlow<()> { + let object_file = tcx.output_filenames(()).path(OutputType::Object); + let base_path = object_file.as_path().to_path_buf(); + // Use name for now to make it more friendly. Change to base_path.file_stem() to avoid conflict. + // let file_stem = base_path.file_stem().unwrap(); + let file_stem = format!("{}_scan", stable_mir::local_crate().name); + let mut crate_stats = OverallStats::new(); + for analysis in analyses { + let filename = format!("{}_{}", file_stem, analysis.as_ref()); + let mut out_path = base_path.parent().map_or(PathBuf::default(), Path::to_path_buf); + out_path.set_file_name(filename); + match analysis { + Analysis::MonoFns => { + crate_stats.generic_fns(); + } + Analysis::SafeFns => { + crate_stats.safe_fns(out_path); + } + Analysis::InputTys => crate_stats.supported_inputs(out_path), + Analysis::UnsafeOps => crate_stats.unsafe_operations(out_path), + Analysis::FnLoops => crate_stats.loops(out_path), + Analysis::Recursion => crate_stats.recursion(out_path), + } + } + crate_stats.store_csv(base_path, &file_stem); + ControlFlow::<()>::Continue(()) +}