From 3c0e11763fd7e1e7b7439ebcfd4b97ceb609d679 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 29 Aug 2024 19:24:31 +0200 Subject: [PATCH] interpret: reset padding during validation --- .../src/const_eval/machine.rs | 27 +- .../rustc_const_eval/src/interpret/machine.rs | 12 +- .../rustc_const_eval/src/interpret/memory.rs | 11 +- .../rustc_const_eval/src/interpret/mod.rs | 2 +- .../rustc_const_eval/src/interpret/place.rs | 17 +- .../src/interpret/validity.rs | 283 ++++++++++++++++-- .../rustc_const_eval/src/interpret/visitor.rs | 12 + .../src/util/check_validity_requirement.rs | 2 +- src/tools/miri/src/machine.rs | 14 + .../miri/tests/fail/uninit/padding-enum.rs | 23 ++ .../tests/fail/uninit/padding-enum.stderr | 15 + .../miri/tests/fail/uninit/padding-pair.rs | 25 ++ .../tests/fail/uninit/padding-pair.stderr | 15 + .../fail/uninit/padding-struct-in-union.rs | 32 ++ .../uninit/padding-struct-in-union.stderr | 15 + .../miri/tests/fail/uninit/padding-struct.rs | 11 + .../tests/fail/uninit/padding-struct.stderr | 15 + .../miri/tests/fail/uninit/padding-union.rs | 14 + .../tests/fail/uninit/padding-union.stderr | 15 + .../{ => uninit}/transmute-pair-uninit.rs | 11 +- .../{ => uninit}/transmute-pair-uninit.stderr | 0 src/tools/miri/tests/pass/enums.rs | 38 +++ 22 files changed, 569 insertions(+), 40 deletions(-) create mode 100644 src/tools/miri/tests/fail/uninit/padding-enum.rs create mode 100644 src/tools/miri/tests/fail/uninit/padding-enum.stderr create mode 100644 src/tools/miri/tests/fail/uninit/padding-pair.rs create mode 100644 src/tools/miri/tests/fail/uninit/padding-pair.stderr create mode 100644 src/tools/miri/tests/fail/uninit/padding-struct-in-union.rs create mode 100644 src/tools/miri/tests/fail/uninit/padding-struct-in-union.stderr create mode 100644 src/tools/miri/tests/fail/uninit/padding-struct.rs create mode 100644 src/tools/miri/tests/fail/uninit/padding-struct.stderr create mode 100644 src/tools/miri/tests/fail/uninit/padding-union.rs create mode 100644 src/tools/miri/tests/fail/uninit/padding-union.stderr rename src/tools/miri/tests/fail/{ => uninit}/transmute-pair-uninit.rs (61%) rename src/tools/miri/tests/fail/{ => uninit}/transmute-pair-uninit.stderr (100%) diff --git a/compiler/rustc_const_eval/src/const_eval/machine.rs b/compiler/rustc_const_eval/src/const_eval/machine.rs index 9c1fef095f552..7405ca09342da 100644 --- a/compiler/rustc_const_eval/src/const_eval/machine.rs +++ b/compiler/rustc_const_eval/src/const_eval/machine.rs @@ -1,16 +1,16 @@ -use std::borrow::Borrow; +use std::borrow::{Borrow, Cow}; use std::fmt; use std::hash::Hash; use std::ops::ControlFlow; use rustc_ast::Mutability; -use rustc_data_structures::fx::{FxIndexMap, IndexEntry}; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap, IndexEntry}; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_hir::{self as hir, LangItem, CRATE_HIR_ID}; use rustc_middle::mir::AssertMessage; use rustc_middle::query::TyCtxtAt; use rustc_middle::ty::layout::{FnAbiOf, TyAndLayout}; -use rustc_middle::ty::{self, TyCtxt}; +use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::{bug, mir}; use rustc_span::symbol::{sym, Symbol}; use rustc_span::Span; @@ -24,8 +24,8 @@ use crate::fluent_generated as fluent; use crate::interpret::{ self, compile_time_machine, err_ub, throw_exhaust, throw_inval, throw_ub_custom, throw_unsup, throw_unsup_format, AllocId, AllocRange, ConstAllocation, CtfeProvenance, FnArg, Frame, - GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic, Scalar, - StackPopCleanup, + GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic, + RangeSet, Scalar, StackPopCleanup, }; /// When hitting this many interpreted terminators we emit a deny by default lint @@ -65,6 +65,9 @@ pub struct CompileTimeMachine<'tcx> { /// storing the result in the given `AllocId`. /// Used to prevent reads from a static's base allocation, as that may allow for self-initialization loops. pub(crate) static_root_ids: Option<(AllocId, LocalDefId)>, + + /// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes). + union_data_ranges: FxHashMap, RangeSet>, } #[derive(Copy, Clone)] @@ -99,6 +102,7 @@ impl<'tcx> CompileTimeMachine<'tcx> { can_access_mut_global, check_alignment, static_root_ids: None, + union_data_ranges: FxHashMap::default(), } } } @@ -766,6 +770,19 @@ impl<'tcx> interpret::Machine<'tcx> for CompileTimeMachine<'tcx> { } Ok(()) } + + fn cached_union_data_range<'e>( + ecx: &'e mut InterpCx<'tcx, Self>, + ty: Ty<'tcx>, + compute_range: impl FnOnce() -> RangeSet, + ) -> Cow<'e, RangeSet> { + if ecx.tcx.sess.opts.unstable_opts.extra_const_ub_checks { + Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range)) + } else { + // Don't bother caching, we're only doing one validation at the end anyway. + Cow::Owned(compute_range()) + } + } } // Please do not add any code below the above `Machine` trait impl. I (oli-obk) plan more cleanups diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs index 88453245b8444..8a4c81b283be4 100644 --- a/compiler/rustc_const_eval/src/interpret/machine.rs +++ b/compiler/rustc_const_eval/src/interpret/machine.rs @@ -10,6 +10,7 @@ use rustc_apfloat::{Float, FloatConvert}; use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece}; use rustc_middle::query::TyCtxtAt; use rustc_middle::ty::layout::TyAndLayout; +use rustc_middle::ty::Ty; use rustc_middle::{mir, ty}; use rustc_span::def_id::DefId; use rustc_span::Span; @@ -19,7 +20,7 @@ use rustc_target::spec::abi::Abi as CallAbi; use super::{ throw_unsup, throw_unsup_format, AllocBytes, AllocId, AllocKind, AllocRange, Allocation, ConstAllocation, CtfeProvenance, FnArg, Frame, ImmTy, InterpCx, InterpResult, MPlaceTy, - MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, CTFE_ALLOC_SALT, + MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, RangeSet, CTFE_ALLOC_SALT, }; /// Data returned by [`Machine::after_stack_pop`], and consumed by @@ -588,6 +589,15 @@ pub trait Machine<'tcx>: Sized { ecx: &InterpCx<'tcx, Self>, instance: Option>, ) -> usize; + + fn cached_union_data_range<'e>( + _ecx: &'e mut InterpCx<'tcx, Self>, + _ty: Ty<'tcx>, + compute_range: impl FnOnce() -> RangeSet, + ) -> Cow<'e, RangeSet> { + // Default to no caching. + Cow::Owned(compute_range()) + } } /// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index 5d3e448393301..7f27e4ea1322e 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -1136,8 +1136,17 @@ impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> self.write_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size), val) } + /// Mark the given sub-range (relative to this allocation reference) as uninitialized. + pub fn write_uninit(&mut self, range: AllocRange) -> InterpResult<'tcx> { + let range = self.range.subrange(range); + Ok(self + .alloc + .write_uninit(&self.tcx, range) + .map_err(|e| e.to_interp_error(self.alloc_id))?) + } + /// Mark the entire referenced range as uninitialized - pub fn write_uninit(&mut self) -> InterpResult<'tcx> { + pub fn write_uninit_full(&mut self) -> InterpResult<'tcx> { Ok(self .alloc .write_uninit(&self.tcx, self.range) diff --git a/compiler/rustc_const_eval/src/interpret/mod.rs b/compiler/rustc_const_eval/src/interpret/mod.rs index c1e46cc3a15b4..561d681f804a1 100644 --- a/compiler/rustc_const_eval/src/interpret/mod.rs +++ b/compiler/rustc_const_eval/src/interpret/mod.rs @@ -39,5 +39,5 @@ use self::place::{MemPlace, Place}; pub use self::projection::{OffsetMode, Projectable}; pub use self::stack::{Frame, FrameInfo, LocalState, StackPopCleanup, StackPopInfo}; pub(crate) use self::util::create_static_alloc; -pub use self::validity::{CtfeValidationMode, RefTracking}; +pub use self::validity::{CtfeValidationMode, RangeSet, RefTracking}; pub use self::visitor::ValueVisitor; diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs index 9652ebadb855f..d9028f595dca3 100644 --- a/compiler/rustc_const_eval/src/interpret/place.rs +++ b/compiler/rustc_const_eval/src/interpret/place.rs @@ -602,10 +602,11 @@ where if M::enforce_validity(self, dest.layout()) { // Data got changed, better make sure it matches the type! + // Also needed to reset padding. self.validate_operand( &dest.to_place(), M::enforce_validity_recursively(self, dest.layout()), - /*reset_provenance*/ true, + /*reset_provenance_and_padding*/ true, )?; } @@ -701,9 +702,11 @@ where // fields do not match the `ScalarPair` components. alloc.write_scalar(alloc_range(Size::ZERO, a_val.size()), a_val)?; - alloc.write_scalar(alloc_range(b_offset, b_val.size()), b_val) + alloc.write_scalar(alloc_range(b_offset, b_val.size()), b_val)?; + // We don't have to reset padding here, `write_immediate` will anyway do a validation run. + Ok(()) } - Immediate::Uninit => alloc.write_uninit(), + Immediate::Uninit => alloc.write_uninit_full(), } } @@ -720,7 +723,7 @@ where // Zero-sized access return Ok(()); }; - alloc.write_uninit()?; + alloc.write_uninit_full()?; } } Ok(()) @@ -812,17 +815,17 @@ where // Given that there were two typed copies, we have to ensure this is valid at both types, // and we have to ensure this loses provenance and padding according to both types. // But if the types are identical, we only do one pass. - if src.layout().ty != dest.layout().ty { + if allow_transmute && src.layout().ty != dest.layout().ty { self.validate_operand( &dest.transmute(src.layout(), self)?, M::enforce_validity_recursively(self, src.layout()), - /*reset_provenance*/ true, + /*reset_provenance_and_padding*/ true, )?; } self.validate_operand( &dest, M::enforce_validity_recursively(self, dest.layout()), - /*reset_provenance*/ true, + /*reset_provenance_and_padding*/ true, )?; } diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs index 953831a55e5ed..1797e0378d008 100644 --- a/compiler/rustc_const_eval/src/interpret/validity.rs +++ b/compiler/rustc_const_eval/src/interpret/validity.rs @@ -4,6 +4,7 @@ //! That's useful because it means other passes (e.g. promotion) can rely on `const`s //! to be const-safe. +use std::borrow::Cow; use std::fmt::Write; use std::hash::Hash; use std::num::NonZero; @@ -16,14 +17,14 @@ use rustc_hir as hir; use rustc_middle::bug; use rustc_middle::mir::interpret::ValidationErrorKind::{self, *}; use rustc_middle::mir::interpret::{ - ExpectedKind, InterpError, InvalidMetaKind, Misalignment, PointerKind, Provenance, + alloc_range, ExpectedKind, InterpError, InvalidMetaKind, Misalignment, PointerKind, Provenance, UnsupportedOpInfo, ValidationErrorInfo, }; -use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; -use rustc_middle::ty::{self, Ty}; +use rustc_middle::ty::layout::{LayoutCx, LayoutOf, TyAndLayout}; +use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_span::symbol::{sym, Symbol}; use rustc_target::abi::{ - Abi, FieldIdx, Scalar as ScalarAbi, Size, VariantIdx, Variants, WrappingRange, + Abi, FieldIdx, FieldsShape, Scalar as ScalarAbi, Size, VariantIdx, Variants, WrappingRange, }; use tracing::trace; @@ -125,6 +126,7 @@ pub enum PathElem { EnumTag, CoroutineTag, DynDowncast, + Vtable, } /// Extra things to check for during validation of CTFE results. @@ -204,11 +206,56 @@ fn write_path(out: &mut String, path: &[PathElem]) { // not the root. Deref => write!(out, "."), DynDowncast => write!(out, "."), + Vtable => write!(out, "."), } .unwrap() } } +/// Represents a set of `Size` values as a sorted list of ranges. +// These are (offset, length) pairs, and they are sorted and mutually disjoint, +// and never adjacent (i.e. there's always a gap between two of them). +#[derive(Debug, Clone)] +pub struct RangeSet(Vec<(Size, Size)>); + +impl RangeSet { + fn add_range(&mut self, offset: Size, size: Size) { + let v = &mut self.0; + // We scan for a partition point where the left partition is all the elements that end + // strictly before we start. Those are elements that are too "low" to merge with us. + let idx = + v.partition_point(|&(other_offset, other_size)| other_offset + other_size < offset); + // Now we want to either merge with the first element of the second partition, or insert ourselves before that. + if let Some(&(other_offset, other_size)) = v.get(idx) + && offset + size >= other_offset + { + // Their end is >= our start (otherwise it would not be in the 2nd partition) and + // our end is >= their start. This means we can merge the ranges. + let new_start = other_offset.min(offset); + let mut new_end = (other_offset + other_size).max(offset + size); + // We grew to the right, so merge with overlapping/adjacent elements. + // (We also may have grown to the left, but that can never make us adjacent with + // anything there since we selected the first such candidate via `partition_point`.) + let mut scan_right = 1; + while let Some(&(next_offset, next_size)) = v.get(idx + scan_right) + && new_end >= next_offset + { + // Increase our size to absorb the next element. + new_end = new_end.max(next_offset + next_size); + // Look at the next element. + scan_right += 1; + } + // Update the element we grew. + v[idx] = (new_start, new_end - new_start); + // Remove the elements we absorbed. + drop(v.drain((idx + 1)..(idx + scan_right))); + } else { + // Insert new element. + v.insert(idx, (offset, size)); + } + } +} + struct ValidityVisitor<'rt, 'tcx, M: Machine<'tcx>> { /// The `path` may be pushed to, but the part that is present when a function /// starts must not be changed! `visit_fields` and `visit_array` rely on @@ -220,7 +267,12 @@ struct ValidityVisitor<'rt, 'tcx, M: Machine<'tcx>> { ecx: &'rt mut InterpCx<'tcx, M>, /// Whether provenance should be reset outside of pointers (emulating the effect of a typed /// copy). - reset_provenance: bool, + reset_provenance_and_padding: bool, + /// This tracks which byte ranges in this value contain data; the remaining bytes are padding. + /// The ideal representation here would be pointer-length pairs, but to keep things more compact + /// we only store a (range) set of offsets -- the base pointer is the same throughout the entire + /// visit, after all. + data_bytes: Option, } impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { @@ -290,8 +342,14 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { // arrays/slices ty::Array(..) | ty::Slice(..) => PathElem::ArrayElem(field), + // dyn* vtables + ty::Dynamic(_, _, ty::DynKind::DynStar) if field == 1 => PathElem::Vtable, + // dyn traits - ty::Dynamic(..) => PathElem::DynDowncast, + ty::Dynamic(..) => { + assert_eq!(field, 0); + PathElem::DynDowncast + } // nothing else has an aggregate layout _ => bug!("aggregate_field_path_elem: got non-aggregate type {:?}", layout.ty), @@ -350,7 +408,7 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { let imm = self.read_immediate(val, expected)?; // Reset provenance: ensure slice tail metadata does not preserve provenance, // and ensure all pointers do not preserve partial provenance. - if self.reset_provenance { + if self.reset_provenance_and_padding { if matches!(imm.layout.abi, Abi::Scalar(..)) { // A thin pointer. If it has provenance, we don't have to do anything. // If it does not, ensure we clear the provenance in memory. @@ -364,6 +422,8 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { // is a perf hotspot it's just not worth the effort. self.ecx.write_immediate_no_validate(*imm, val)?; } + // The entire thing is data, not padding. + self.add_data_range_place(val); } // Now turn it into a place. self.ecx.ref_to_mplace(&imm) @@ -608,8 +668,9 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { value: format!("{scalar:x}"), } ); - if self.reset_provenance { + if self.reset_provenance_and_padding { self.ecx.clear_provenance(value)?; + self.add_data_range_place(value); } Ok(true) } @@ -622,8 +683,9 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { value: format!("{scalar:x}"), } ); - if self.reset_provenance { + if self.reset_provenance_and_padding { self.ecx.clear_provenance(value)?; + self.add_data_range_place(value); } Ok(true) } @@ -638,8 +700,9 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { ExpectedKind::Int }, )?; - if self.reset_provenance { + if self.reset_provenance_and_padding { self.ecx.clear_provenance(value)?; + self.add_data_range_place(value); } Ok(true) } @@ -673,12 +736,13 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { throw_validation_failure!(self.path, NullFnPtr); } } - if self.reset_provenance { + if self.reset_provenance_and_padding { // Make sure we do not preserve partial provenance. This matches the thin // pointer handling in `deref_pointer`. if matches!(scalar, Scalar::Int(..)) { self.ecx.clear_provenance(value)?; } + self.add_data_range_place(value); } Ok(true) } @@ -774,6 +838,155 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { true } } + + /// Add the given pointer-length pair to the "data" range of this visit. + fn add_data_range(&mut self, ptr: Pointer>, size: Size) { + if let Some(data_bytes) = self.data_bytes.as_mut() { + // We only have to store the offset, the rest is the same for all pointers here. + let (_prov, offset) = ptr.into_parts(); + // Add this. + data_bytes.add_range(offset, size); + }; + } + + /// Add the entire given place to the "data" range of this visit. + fn add_data_range_place(&mut self, place: &PlaceTy<'tcx, M::Provenance>) { + // Only sized places can be added this way. + debug_assert!(place.layout.abi.is_sized()); + if let Some(data_bytes) = self.data_bytes.as_mut() { + let offset = Self::data_range_offset(self.ecx, place); + data_bytes.add_range(offset, place.layout.size); + } + } + + /// Convert a place into the offset it starts at, for the purpose of data_range tracking. + /// Must only be called if `data_bytes` is `Some(_)`. + fn data_range_offset(ecx: &InterpCx<'tcx, M>, place: &PlaceTy<'tcx, M::Provenance>) -> Size { + // The presence of `data_bytes` implies that our place is in memory. + let ptr = ecx + .place_to_op(place) + .expect("place must be in memory") + .as_mplace_or_imm() + .expect_left("place must be in memory") + .ptr(); + let (_prov, offset) = ptr.into_parts(); + offset + } + + fn reset_padding(&mut self, place: &PlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx> { + let Some(data_bytes) = self.data_bytes.as_mut() else { return Ok(()) }; + // Our value must be in memory, otherwise we would not have set up `data_bytes`. + let mplace = self.ecx.force_allocation(place)?; + // Determine starting offset and size. + let (_prov, start_offset) = mplace.ptr().into_parts(); + let (size, _align) = self + .ecx + .size_and_align_of_mplace(&mplace)? + .unwrap_or((mplace.layout.size, mplace.layout.align.abi)); + // If there is no padding at all, we can skip the rest: check for + // a single data range covering the entire value. + if data_bytes.0 == &[(start_offset, size)] { + return Ok(()); + } + // Get a handle for the allocation. Do this only once, to avoid looking up the same + // allocation over and over again. (Though to be fair, iterating the value already does + // exactly that.) + let Some(mut alloc) = self.ecx.get_ptr_alloc_mut(mplace.ptr(), size)? else { + // A ZST, no padding to clear. + return Ok(()); + }; + // Add a "finalizer" data range at the end, so that the iteration below finds all gaps + // between ranges. + data_bytes.0.push((start_offset + size, Size::ZERO)); + // Iterate, and reset gaps. + let mut padding_cleared_until = start_offset; + for &(offset, size) in data_bytes.0.iter() { + assert!( + offset >= padding_cleared_until, + "reset_padding on {}: previous field ended at offset {}, next field starts at {} (and has a size of {} bytes)", + mplace.layout.ty, + (padding_cleared_until - start_offset).bytes(), + (offset - start_offset).bytes(), + size.bytes(), + ); + if offset > padding_cleared_until { + // We found padding. Adjust the range to be relative to `alloc`, and make it uninit. + let padding_start = padding_cleared_until - start_offset; + let padding_size = offset - padding_cleared_until; + let range = alloc_range(padding_start, padding_size); + trace!("reset_padding on {}: resetting padding range {range:?}", mplace.layout.ty); + alloc.write_uninit(range)?; + } + padding_cleared_until = offset + size; + } + assert!(padding_cleared_until == start_offset + size); + Ok(()) + } + + /// Computes the data range of this union type: + /// which bytes are inside a field (i.e., not padding.) + fn union_data_range<'e>( + ecx: &'e mut InterpCx<'tcx, M>, + layout: TyAndLayout<'tcx>, + ) -> Cow<'e, RangeSet> { + assert!(layout.ty.is_union()); + assert!(layout.abi.is_sized(), "there are no unsized unions"); + let layout_cx = LayoutCx { tcx: *ecx.tcx, param_env: ecx.param_env }; + return M::cached_union_data_range(ecx, layout.ty, || { + let mut out = RangeSet(Vec::new()); + union_data_range_(&layout_cx, layout, Size::ZERO, &mut out); + out + }); + + /// Helper for recursive traversal: add data ranges of the given type to `out`. + fn union_data_range_<'tcx>( + cx: &LayoutCx<'tcx, TyCtxt<'tcx>>, + layout: TyAndLayout<'tcx>, + base_offset: Size, + out: &mut RangeSet, + ) { + // Just recursively add all the fields of everything to the output. + match &layout.fields { + FieldsShape::Primitive => { + out.add_range(base_offset, layout.size); + } + &FieldsShape::Union(fields) => { + // Currently, all fields start at offset 0. + for field in 0..fields.get() { + let field = layout.field(cx, field); + union_data_range_(cx, field, base_offset, out); + } + } + &FieldsShape::Array { stride, count } => { + let elem = layout.field(cx, 0); + for idx in 0..count { + // This repeats the same computation for every array elements... but the alternative + // is to allocate temporary storage for a dedicated `out` set for the array element, + // and replicating that N times. Is that better? + union_data_range_(cx, elem, base_offset + idx * stride, out); + } + } + FieldsShape::Arbitrary { offsets, .. } => { + for (field, &offset) in offsets.iter_enumerated() { + let field = layout.field(cx, field.as_usize()); + union_data_range_(cx, field, base_offset + offset, out); + } + } + } + // Don't forget potential other variants. + match &layout.variants { + Variants::Single { .. } => { + // Fully handled above. + } + Variants::Multiple { variants, .. } => { + for variant in variants.indices() { + let variant = layout.for_variant(cx, variant); + union_data_range_(cx, variant, base_offset, out); + } + } + } + } + } } /// Returns whether the allocation is mutable, and whether it's actually a static. @@ -890,6 +1103,16 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, } } } + if self.reset_provenance_and_padding + && let Some(data_bytes) = self.data_bytes.as_mut() + { + let base_offset = Self::data_range_offset(self.ecx, val); + // Determine and add data range for this union. + let union_data_range = Self::union_data_range(self.ecx, val.layout); + for &(offset, size) in union_data_range.0.iter() { + data_bytes.add_range(base_offset + offset, size); + } + } Ok(()) } @@ -1013,10 +1236,12 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, // Don't forget that these are all non-pointer types, and thus do not preserve // provenance. - if self.reset_provenance { + if self.reset_provenance_and_padding { // We can't share this with above as above, we might be looking at read-only memory. let mut alloc = self.ecx.get_ptr_alloc_mut(mplace.ptr(), size)?.expect("we already excluded size 0"); alloc.clear_provenance()?; + // Also, mark this as containing data, not padding. + self.add_data_range(mplace.ptr(), size); } } // Fast path for arrays and slices of ZSTs. We only need to check a single ZST element @@ -1096,14 +1321,28 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { path: Vec, ref_tracking: Option<&mut RefTracking, Vec>>, ctfe_mode: Option, - reset_provenance: bool, + reset_provenance_and_padding: bool, ) -> InterpResult<'tcx> { trace!("validate_operand_internal: {:?}, {:?}", *val, val.layout.ty); // Run the visitor. match self.run_for_validation(|ecx| { - let mut v = ValidityVisitor { path, ref_tracking, ctfe_mode, ecx, reset_provenance }; - v.visit_value(val) + let reset_padding = reset_provenance_and_padding && { + // Check if `val` is actually stored in memory. If not, padding is not even + // represented and we need not reset it. + ecx.place_to_op(val)?.as_mplace_or_imm().is_left() + }; + let mut v = ValidityVisitor { + path, + ref_tracking, + ctfe_mode, + ecx, + reset_provenance_and_padding, + data_bytes: reset_padding.then_some(RangeSet(Vec::new())), + }; + v.visit_value(val)?; + v.reset_padding(val)?; + InterpResult::Ok(()) }) { Ok(()) => Ok(()), // Pass through validation failures and "invalid program" issues. @@ -1163,13 +1402,19 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { &mut self, val: &PlaceTy<'tcx, M::Provenance>, recursive: bool, - reset_provenance: bool, + reset_provenance_and_padding: bool, ) -> InterpResult<'tcx> { // Note that we *could* actually be in CTFE here with `-Zextra-const-ub-checks`, but it's // still correct to not use `ctfe_mode`: that mode is for validation of the final constant // value, it rules out things like `UnsafeCell` in awkward places. if !recursive { - return self.validate_operand_internal(val, vec![], None, None, reset_provenance); + return self.validate_operand_internal( + val, + vec![], + None, + None, + reset_provenance_and_padding, + ); } // Do a recursive check. let mut ref_tracking = RefTracking::empty(); @@ -1178,7 +1423,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { vec![], Some(&mut ref_tracking), None, - reset_provenance, + reset_provenance_and_padding, )?; while let Some((mplace, path)) = ref_tracking.todo.pop() { // Things behind reference do *not* have the provenance reset. @@ -1187,7 +1432,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { path, Some(&mut ref_tracking), None, - /*reset_provenance*/ false, + /*reset_provenance_and_padding*/ false, )?; } Ok(()) diff --git a/compiler/rustc_const_eval/src/interpret/visitor.rs b/compiler/rustc_const_eval/src/interpret/visitor.rs index e67136bd69ef5..249fb1c5c65fc 100644 --- a/compiler/rustc_const_eval/src/interpret/visitor.rs +++ b/compiler/rustc_const_eval/src/interpret/visitor.rs @@ -5,6 +5,7 @@ use std::num::NonZero; use rustc_index::IndexVec; use rustc_middle::mir::interpret::InterpResult; +use rustc_middle::ty::layout::LayoutOf; use rustc_middle::ty::{self, Ty}; use rustc_target::abi::{FieldIdx, FieldsShape, VariantIdx, Variants}; use tracing::trace; @@ -104,6 +105,17 @@ pub trait ValueVisitor<'tcx, M: Machine<'tcx>>: Sized { // DynStar types. Very different from a dyn type (but strangely part of the // same variant in `TyKind`): These are pairs where the 2nd component is the // vtable, and the first component is the data (which must be ptr-sized). + + // First make sure the vtable can be read at its type. + // The type of this vtable is fake, it claims to be a reference to some actual memory but that isn't true. + // So we transmute it to a raw pointer. + let raw_ptr_ty = Ty::new_mut_ptr(*self.ecx().tcx, self.ecx().tcx.types.unit); + let raw_ptr_ty = self.ecx().layout_of(raw_ptr_ty)?; + let vtable_field = + self.ecx().project_field(v, 1)?.transmute(raw_ptr_ty, self.ecx())?; + self.visit_field(v, 1, &vtable_field)?; + + // Then unpack the first field, and continue. let data = self.ecx().unpack_dyn_star(v, data)?; return self.visit_field(v, 0, &data); } diff --git a/compiler/rustc_const_eval/src/util/check_validity_requirement.rs b/compiler/rustc_const_eval/src/util/check_validity_requirement.rs index 5e424190d07dd..26fd59bc6ef77 100644 --- a/compiler/rustc_const_eval/src/util/check_validity_requirement.rs +++ b/compiler/rustc_const_eval/src/util/check_validity_requirement.rs @@ -69,7 +69,7 @@ fn might_permit_raw_init_strict<'tcx>( .validate_operand( &allocated.into(), /*recursive*/ false, - /*reset_provenance*/ false, + /*reset_provenance_and_padding*/ false, ) .is_ok()) } diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 264b7b269de72..6e64ff8ac7e77 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1,6 +1,7 @@ //! Global machine state as well as implementation of the interpreter engine //! `Machine` trait. +use std::borrow::Cow; use std::cell::RefCell; use std::collections::hash_map::Entry; use std::fmt; @@ -571,6 +572,9 @@ pub struct MiriMachine<'tcx> { /// Invariant: the promised alignment will never be less than the native alignment of the /// allocation. pub(crate) symbolic_alignment: RefCell>, + + /// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes). + union_data_ranges: FxHashMap, RangeSet>, } impl<'tcx> MiriMachine<'tcx> { @@ -713,6 +717,7 @@ impl<'tcx> MiriMachine<'tcx> { allocation_spans: RefCell::new(FxHashMap::default()), const_cache: RefCell::new(FxHashMap::default()), symbolic_alignment: RefCell::new(FxHashMap::default()), + union_data_ranges: FxHashMap::default(), } } @@ -825,6 +830,7 @@ impl VisitProvenance for MiriMachine<'_> { allocation_spans: _, const_cache: _, symbolic_alignment: _, + union_data_ranges: _, } = self; threads.visit_provenance(visit); @@ -1602,4 +1608,12 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { ecx.machine.rng.borrow_mut().gen::() % ADDRS_PER_ANON_GLOBAL } } + + fn cached_union_data_range<'e>( + ecx: &'e mut InterpCx<'tcx, Self>, + ty: Ty<'tcx>, + compute_range: impl FnOnce() -> RangeSet, + ) -> Cow<'e, RangeSet> { + Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range)) + } } diff --git a/src/tools/miri/tests/fail/uninit/padding-enum.rs b/src/tools/miri/tests/fail/uninit/padding-enum.rs new file mode 100644 index 0000000000000..0ab9be275815d --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-enum.rs @@ -0,0 +1,23 @@ +use std::mem; + +// We have three fields to avoid the ScalarPair optimization. +#[allow(unused)] +enum E { + None, + Some(&'static (), &'static (), usize), +} + +fn main() { unsafe { + let mut p: mem::MaybeUninit = mem::MaybeUninit::zeroed(); + // The copy when `E` is returned from `transmute` should destroy padding + // (even when we use `write_unaligned`, which under the hood uses an untyped copy). + p.as_mut_ptr().write_unaligned(mem::transmute((0usize, 0usize, 0usize))); + // This is a `None`, so everything but the discriminant is padding. + assert!(matches!(*p.as_ptr(), E::None)); + + // Turns out the discriminant is (currently) stored + // in the 2nd pointer, so the first half is padding. + let c = &p as *const _ as *const u8; + let _val = *c.add(0); // Get the padding byte. + //~^ERROR: uninitialized +} } diff --git a/src/tools/miri/tests/fail/uninit/padding-enum.stderr b/src/tools/miri/tests/fail/uninit/padding-enum.stderr new file mode 100644 index 0000000000000..a507a5d49bc2c --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-enum.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory + --> $DIR/padding-enum.rs:LL:CC + | +LL | let _val = *c.add(0); // Get the padding byte. + | ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-enum.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-pair.rs b/src/tools/miri/tests/fail/uninit/padding-pair.rs new file mode 100644 index 0000000000000..c8c00b3c65a06 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-pair.rs @@ -0,0 +1,25 @@ +#![feature(core_intrinsics)] + +use std::mem::{self, MaybeUninit}; + +fn main() { + // This constructs a `(usize, bool)` pair: 9 bytes initialized, the rest not. + // Ensure that these 9 bytes are indeed initialized, and the rest is indeed not. + // This should be the case even if we write into previously initialized storage. + let mut x: MaybeUninit> = MaybeUninit::zeroed(); + let z = std::intrinsics::add_with_overflow(0usize, 0usize); + unsafe { x.as_mut_ptr().cast::<(usize, bool)>().write(z) }; + // Now read this bytewise. There should be (`ptr_size + 1`) def bytes followed by + // (`ptr_size - 1`) undef bytes (the padding after the bool) in there. + let z: *const u8 = &x as *const _ as *const _; + let first_undef = mem::size_of::() as isize + 1; + for i in 0..first_undef { + let byte = unsafe { *z.offset(i) }; + assert_eq!(byte, 0); + } + let v = unsafe { *z.offset(first_undef) }; + //~^ ERROR: uninitialized + if v == 0 { + println!("it is zero"); + } +} diff --git a/src/tools/miri/tests/fail/uninit/padding-pair.stderr b/src/tools/miri/tests/fail/uninit/padding-pair.stderr new file mode 100644 index 0000000000000..d35934d83d58f --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-pair.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory + --> $DIR/padding-pair.rs:LL:CC + | +LL | let v = unsafe { *z.offset(first_undef) }; + | ^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-pair.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-struct-in-union.rs b/src/tools/miri/tests/fail/uninit/padding-struct-in-union.rs new file mode 100644 index 0000000000000..132b85828362d --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-struct-in-union.rs @@ -0,0 +1,32 @@ +#[repr(C)] +#[derive(Debug, Copy, Clone)] +struct Foo { + val16: u16, + // Padding bytes go here! + val32: u32, +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +struct Bar { + bytes: [u8; 8], +} + +#[repr(C)] +union FooBar { + foo: Foo, + bar: Bar, +} + +pub fn main() { + // Initialize as u8 to ensure padding bytes are zeroed. + let mut foobar = FooBar { bar: Bar { bytes: [0u8; 8] } }; + // Reading either field is ok. + let _val = unsafe { (foobar.foo, foobar.bar) }; + // Does this assignment copy the uninitialized padding bytes + // over the initialized padding bytes? miri doesn't seem to think so. + foobar.foo = Foo { val16: 1, val32: 2 }; + // This resets the padding to uninit. + let _val = unsafe { (foobar.foo, foobar.bar) }; + //~^ ERROR: uninitialized +} diff --git a/src/tools/miri/tests/fail/uninit/padding-struct-in-union.stderr b/src/tools/miri/tests/fail/uninit/padding-struct-in-union.stderr new file mode 100644 index 0000000000000..e122249af16e5 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-struct-in-union.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: constructing invalid value at .bytes[2]: encountered uninitialized memory, but expected an integer + --> $DIR/padding-struct-in-union.rs:LL:CC + | +LL | let _val = unsafe { (foobar.foo, foobar.bar) }; + | ^^^^^^^^^^ constructing invalid value at .bytes[2]: encountered uninitialized memory, but expected an integer + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-struct-in-union.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-struct.rs b/src/tools/miri/tests/fail/uninit/padding-struct.rs new file mode 100644 index 0000000000000..dd3be50343902 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-struct.rs @@ -0,0 +1,11 @@ +use std::mem; + +#[repr(C)] +struct Pair(u8, u16); + +fn main() { unsafe { + let p: Pair = mem::transmute(0u32); // The copy when `Pair` is returned from `transmute` should destroy padding. + let c = &p as *const _ as *const u8; + let _val = *c.add(1); // Get the padding byte. + //~^ERROR: uninitialized +} } diff --git a/src/tools/miri/tests/fail/uninit/padding-struct.stderr b/src/tools/miri/tests/fail/uninit/padding-struct.stderr new file mode 100644 index 0000000000000..8dc40a482ac52 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-struct.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory + --> $DIR/padding-struct.rs:LL:CC + | +LL | let _val = *c.add(1); // Get the padding byte. + | ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-struct.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-union.rs b/src/tools/miri/tests/fail/uninit/padding-union.rs new file mode 100644 index 0000000000000..2e9e0a40d6c68 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-union.rs @@ -0,0 +1,14 @@ +use std::mem; + +#[allow(unused)] +#[repr(C)] +union U { + field: (u8, u16), +} + +fn main() { unsafe { + let p: U = mem::transmute(0u32); // The copy when `U` is returned from `transmute` should destroy padding. + let c = &p as *const _ as *const [u8; 4]; + let _val = *c; // Read the entire thing, definitely contains the padding byte. + //~^ERROR: uninitialized +} } diff --git a/src/tools/miri/tests/fail/uninit/padding-union.stderr b/src/tools/miri/tests/fail/uninit/padding-union.stderr new file mode 100644 index 0000000000000..04002da4f195c --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-union.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: constructing invalid value at [1]: encountered uninitialized memory, but expected an integer + --> $DIR/padding-union.rs:LL:CC + | +LL | let _val = *c; // Read the entire thing, definitely contains the padding byte. + | ^^ constructing invalid value at [1]: encountered uninitialized memory, but expected an integer + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-union.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/transmute-pair-uninit.rs b/src/tools/miri/tests/fail/uninit/transmute-pair-uninit.rs similarity index 61% rename from src/tools/miri/tests/fail/transmute-pair-uninit.rs rename to src/tools/miri/tests/fail/uninit/transmute-pair-uninit.rs index bc95f3cb7ad3a..0ba5520a54468 100644 --- a/src/tools/miri/tests/fail/transmute-pair-uninit.rs +++ b/src/tools/miri/tests/fail/uninit/transmute-pair-uninit.rs @@ -1,16 +1,17 @@ #![feature(core_intrinsics)] -use std::mem; +use std::mem::{self, MaybeUninit}; fn main() { - let x: Option> = unsafe { + // This constructs a `(usize, bool)` pair: 9 bytes initialized, the rest not. + // Ensure that these 9 bytes are indeed initialized, and the rest is indeed not. + let x: MaybeUninit> = unsafe { let z = std::intrinsics::add_with_overflow(0usize, 0usize); - std::mem::transmute::<(usize, bool), Option>>(z) + std::mem::transmute::<(usize, bool), MaybeUninit>>(z) }; - let y = &x; // Now read this bytewise. There should be (`ptr_size + 1`) def bytes followed by // (`ptr_size - 1`) undef bytes (the padding after the bool) in there. - let z: *const u8 = y as *const _ as *const _; + let z: *const u8 = &x as *const _ as *const _; let first_undef = mem::size_of::() as isize + 1; for i in 0..first_undef { let byte = unsafe { *z.offset(i) }; diff --git a/src/tools/miri/tests/fail/transmute-pair-uninit.stderr b/src/tools/miri/tests/fail/uninit/transmute-pair-uninit.stderr similarity index 100% rename from src/tools/miri/tests/fail/transmute-pair-uninit.stderr rename to src/tools/miri/tests/fail/uninit/transmute-pair-uninit.stderr diff --git a/src/tools/miri/tests/pass/enums.rs b/src/tools/miri/tests/pass/enums.rs index 1dafef025e958..9fc61f07c047f 100644 --- a/src/tools/miri/tests/pass/enums.rs +++ b/src/tools/miri/tests/pass/enums.rs @@ -132,6 +132,43 @@ fn overaligned_casts() { assert_eq!(aligned as u8, 0); } +// This hits a corner case in the logic for clearing padding on typed copies. +fn padding_clear_corner_case() { + #[allow(unused)] + #[derive(Copy, Clone)] + #[repr(C)] + pub struct Decoded { + /// The scaled mantissa. + pub mant: u64, + /// The lower error range. + pub minus: u64, + /// The upper error range. + pub plus: u64, + /// The shared exponent in base 2. + pub exp: i16, + /// True when the error range is inclusive. + /// + /// In IEEE 754, this is true when the original mantissa was even. + pub inclusive: bool, + } + + #[allow(unused)] + #[derive(Copy, Clone)] + pub enum FullDecoded { + /// Not-a-number. + Nan, + /// Infinities, either positive or negative. + Infinite, + /// Zero, either positive or negative. + Zero, + /// Finite numbers with further decoded fields. + Finite(Decoded), + } + + let val = FullDecoded::Finite(Decoded { mant: 0, minus: 0, plus: 0, exp: 0, inclusive: false }); + let _val2 = val; // trigger typed copy +} + fn main() { test(MyEnum::MyEmptyVariant); test(MyEnum::MyNewtypeVariant(42)); @@ -141,4 +178,5 @@ fn main() { discriminant_overflow(); more_discriminant_overflow(); overaligned_casts(); + padding_clear_corner_case(); }