Skip to content

Commit

Permalink
[WIP] KnownLayout::try_cast_from
Browse files Browse the repository at this point in the history
TODO: Fix miri-symbolic-alignment issue: rust-lang/miri#3068
  • Loading branch information
joshlf committed Sep 21, 2023
1 parent 94f3f03 commit 710a9c5
Show file tree
Hide file tree
Showing 3 changed files with 256 additions and 13 deletions.
171 changes: 167 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,27 @@ use core::{
NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, Wrapping,
},
ops::{Deref, DerefMut},
ptr, slice,
ptr::{self, NonNull},
slice,
};

#[cfg(feature = "alloc")]
extern crate alloc;
#[cfg(feature = "alloc")]
use {
alloc::{boxed::Box, vec::Vec},
core::{alloc::Layout, ptr::NonNull},
core::alloc::Layout,
};

// For each polyfill, as soon as the corresponding feature is stable, the
// polyfill import will be unused because method/function resolution will prefer
// the inherent method/function over a trait method/function. Thus, we suppress
// the `unused_imports` warning.
//
// See the documentation on `util::polyfills` for more information.
#[allow(unused_imports)]
use crate::util::polyfills::{NonNullExt as _, NonNullSliceExt as _};

// This is a hack to allow zerocopy-derive derives to work in this crate. They
// assume that zerocopy is linked as an extern crate, so they access items from
// it as `zerocopy::Xxx`. This makes that still work.
Expand Down Expand Up @@ -300,8 +310,11 @@ impl SizeInfo {
}
}

#[cfg_attr(test, derive(Copy, Clone, Debug))]
enum _CastType {
#[doc(hidden)]
#[derive(Copy, Clone)]
#[cfg_attr(test, derive(Debug))]
#[allow(missing_debug_implementations)]
pub enum _CastType {
_Prefix,
_Suffix,
}
Expand Down Expand Up @@ -573,12 +586,71 @@ impl DstLayout {
pub unsafe trait KnownLayout: sealed::KnownLayoutSealed {
#[doc(hidden)]
const LAYOUT: DstLayout;

/// TODO
///
/// # Safety
///
/// TODO: Mention isize overflow, address space wraparound.
#[doc(hidden)]
#[inline(always)]
fn try_cast_from(bytes: NonNull<[u8]>, cast_type: _CastType) -> Option<(NonNull<Self>, usize)> {
let base = bytes.cast::<u8>();
// TODO
#[allow(clippy::as_conversions)]
let addr = base.as_ptr() as usize;
// TODO(#67): Remove this allow. See NonNulSlicelExt for more details.
#[allow(unstable_name_collisions)]
let (elems, split_at) =
Self::LAYOUT._validate_cast_and_convert_metadata(addr, bytes.len(), cast_type)?;
let slf = match cast_type {
_CastType::_Prefix => base,
_CastType::_Suffix => {
// SAFETY: TODO
let base = unsafe { base.as_ptr().add(split_at) };
// SAFETY: TODO
unsafe { NonNull::new_unchecked(base) }
}
};
Some((Self::raw_from_ptr_len(slf, elems), split_at))
}

/// TODO
///
/// # Safety
///
/// TODO: Mention isize overflow, address space wraparound.
#[doc(hidden)]
#[inline(always)]
fn try_cast_from_no_prefix_suffix(bytes: NonNull<[u8]>) -> Option<NonNull<Self>> {
// TODO(#67): Remove this allow. See NonNulSlicelExt for more details.
#[allow(unstable_name_collisions)]
match Self::try_cast_from(bytes, _CastType::_Prefix) {
Some((slf, split_at)) if split_at == bytes.len() => Some(slf),
Some(_) | None => None,
}
}

/// SAFETY: The returned pointer has the same address and provenance as
/// `bytes`. If `Self` is a DST, the returned pointer's referent has `elems`
/// elements in its trailing slice. If `Self` is sized, `elems` is ignored.
#[doc(hidden)]
fn raw_from_ptr_len(bytes: NonNull<u8>, elems: usize) -> NonNull<Self>;
}

impl<T: KnownLayout> sealed::KnownLayoutSealed for [T] {}
// SAFETY: Delegates safety to `DstLayout::for_slice`.
unsafe impl<T: KnownLayout> KnownLayout for [T] {
const LAYOUT: DstLayout = DstLayout::for_slice::<T>();

// SAFETY: `.cast` preserves address and provenance. The returned pointer
// refers to an object with `elems` elements by construction.
#[inline(always)]
fn raw_from_ptr_len(data: NonNull<u8>, elems: usize) -> NonNull<Self> {
// TODO(#67): Remove this allow. See NonNullExt for more details.
#[allow(unstable_name_collisions)]
NonNull::slice_from_raw_parts(data.cast::<T>(), elems)
}
}

#[rustfmt::skip]
Expand Down Expand Up @@ -3709,6 +3781,97 @@ mod tests {
test!(str, layout(0, 1, Some(1)));
}

#[test]
fn test_known_layout_try_cast_from_soundness() {
// This test is designed so that if `KnownLayout::try_cast_from_xxx` are
// buggy, it will manifest as unsoundness that Miri can detect.

// - If `size_of::<T>() == 0`, `N == 4`
// - Else, `N == 4 * size_of::<T>()`
fn test<const N: usize, T: KnownLayout + FromBytes>() {
let mut bytes = [MaybeUninit::<u8>::uninit(); N];
let initialized = [MaybeUninit::new(0u8); N];
for start in 0..=bytes.len() {
for end in start..=bytes.len() {
// Set all bytes to uninitialized other than those in the
// range we're going to pass to `try_cast_from`. This allows
// Miri to detect out-of-bounds reads because they read
// uninitialized memory. Without this, some out-of-bounds
// reads would still be in-bounds of `bytes`, and so might
// spuriously be accepted.
bytes = [MaybeUninit::<u8>::uninit(); N];
let bytes = &mut bytes[start..end];
// Initialize only the byte range we're going to pass to
// `try_cast_from`.
bytes.copy_from_slice(&initialized[start..end]);

let bytes = {
let bytes: *const [MaybeUninit<u8>] = bytes;
#[allow(clippy::as_conversions)]
let bytes = bytes as *const [u8];
// SAFETY: We just initialized these bytes to valid
// `u8`s.
unsafe { &*bytes }
};

for cast_type in [_CastType::_Prefix, _CastType::_Suffix] {
if let Some((slf, split_at)) =
T::try_cast_from(NonNull::from(bytes), cast_type)
{
// SAFETY: TODO
let t: T = unsafe { ptr::read(slf.as_ptr()) };

let bytes = {
let len = mem::size_of_val(&t);
let t: *const T = &t;
// SAFETY:
// - We know `t`'s bytes are all initialized
// because we just read it from `slf`, which
// points to an initialized range of bytes. If
// there's a bug and this doesn't hold, then
// that's exactly what we're hoping Miri will
// catch!
// - Since `T: FromBytes`, `T` doesn't contain
// any `UnsafeCell`s, so it's okay for `t: T`
// and a `&[u8]` to the same memory to be
// alive concurrently.
unsafe { core::slice::from_raw_parts(t.cast::<u8>(), len) }
};

// This assertion ensures that `t`'s bytes are read
// and compared to another value, which in turn
// ensures that Miri gets a chance to notice if any
// of `t`'s bytes are uninitialized, which they
// shouldn't be (see the comment above).
let type_name = core::any::type_name::<T>();
assert_eq!(bytes, vec![0u8; bytes.len()], "type:{type_name}, start:{start}, end:{end}, cast_type:{cast_type:?}, slf:{slf:?}, split_at:{split_at}");
}
}
}
}
}

macro_rules! test {
($($ty:ty),*) => {
$({
const S: usize = core::mem::size_of::<$ty>();
const N: usize = if S == 0 { 4 } else { S * 4 };
test::<N, $ty>();
})*
};
}

test!(());
test!(u8, u16, u32, u64, u128, usize, AU64);
test!(i8, i16, i32, i64, i128, isize);
test!(f32, f64);

// TODO:
// - What other conditions should we test for?
// - Leave a TODO to test with slice DSTs once we have any that implement `FromBytes`
// - In `test::<T>`, test with slices of `T` as well as just `T`
}

#[test]
fn test_object_safety() {
fn _takes_from_zeroes(_: &dyn FromZeroes) {}
Expand Down
43 changes: 34 additions & 9 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,11 +204,21 @@ macro_rules! impl_known_layout {
};
($($ty:ty),*) => { $(impl_known_layout!(@inner , => $ty);)* };
(@inner $(const $constvar:ident : $constty:ty)? , $($tyvar:ident $(: ?$optbound:ident)?)? => $ty:ty) => {
impl<$(const $constvar : $constty,)? $($tyvar $(: ?$optbound)?)?> sealed::KnownLayoutSealed for $ty {}
// SAFETY: Delegates safety to `DstLayout::for_type`.
unsafe impl<$(const $constvar : $constty,)? $($tyvar $(: ?$optbound)?)?> KnownLayout for $ty {
const LAYOUT: DstLayout = DstLayout::for_type::<$ty>();
}
const _: () = {
use core::ptr::NonNull;

impl<$(const $constvar : $constty,)? $($tyvar $(: ?$optbound)?)?> sealed::KnownLayoutSealed for $ty {}
// SAFETY: Delegates safety to `DstLayout::for_type`.
unsafe impl<$(const $constvar : $constty,)? $($tyvar $(: ?$optbound)?)?> KnownLayout for $ty {
const LAYOUT: DstLayout = DstLayout::for_type::<$ty>();

// SAFETY: `.cast` preserves address and provenance.
#[inline(always)]
fn raw_from_ptr_len(bytes: NonNull<u8>, _elems: usize) -> NonNull<Self> {
bytes.cast::<Self>()
}
}
};
};
}

Expand All @@ -225,10 +235,25 @@ macro_rules! impl_known_layout {
/// and this operation must preserve referent size (ie, `size_of_val_raw`).
macro_rules! unsafe_impl_known_layout {
($($tyvar:ident: ?Sized + KnownLayout =>)? #[repr($repr:ty)] $ty:ty) => {
impl<$($tyvar: ?Sized + KnownLayout)?> sealed::KnownLayoutSealed for $ty {}
unsafe impl<$($tyvar: ?Sized + KnownLayout)?> KnownLayout for $ty {
const LAYOUT: DstLayout = <$repr as KnownLayout>::LAYOUT;
}
const _: () = {
use core::ptr::NonNull;

impl<$($tyvar: ?Sized + KnownLayout)?> sealed::KnownLayoutSealed for $ty {}
unsafe impl<$($tyvar: ?Sized + KnownLayout)?> KnownLayout for $ty {
const LAYOUT: DstLayout = <$repr as KnownLayout>::LAYOUT;

// SAFETY: All operations preserve address and provenance. Caller
// has promised that the `as` cast preserves size.
#[inline(always)]
#[allow(unused_qualifications)] // for `core::ptr::NonNull`
fn raw_from_ptr_len(bytes: NonNull<u8>, elems: usize) -> NonNull<Self> {
#[allow(clippy::as_conversions)]
let ptr = <$repr>::raw_from_ptr_len(bytes, elems).as_ptr() as *mut Self;
// SAFETY: `ptr` was converted from `bytes`, which is non-null.
unsafe { NonNull::new_unchecked(ptr) }
}
}
};
};
}

Expand Down
55 changes: 55 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,61 @@ pub(crate) const fn _round_down_to_next_multiple_of_alignment(
n & mask
}

/// Since we support multiple versions of Rust, there are often features which
/// have been stabilized in the most recent stable release which do not yet
/// exist (stably) on our MSRV. This module provides polyfills for those
/// features so that we can write more "modern" code, and just remove the
/// polyfill once our MSRV supports the corresponding feature. Without this,
/// we'd have to write worse/more verbose code and leave TODO comments sprinkled
/// throughout the codebase to update to the new pattern once it's stabilized.
///
/// Each trait is imported as `_` at the crate root; each polyfill should "just
/// work" at usage sites.
pub(crate) mod polyfills {
use core::ptr::{self, NonNull};

// A polyfill for `NonNull::slice_from_raw_parts` that we can use before our
// MSRV is 1.70, when that function was stabilized.
//
// TODO(#67): Once our MSRV is 1.70, remove this.
pub(crate) trait NonNullExt<T> {
fn slice_from_raw_parts(data: Self, len: usize) -> NonNull<[T]>;
}

impl<T> NonNullExt<T> for NonNull<T> {
#[inline(always)]
fn slice_from_raw_parts(data: Self, len: usize) -> NonNull<[T]> {
let ptr = ptr::slice_from_raw_parts_mut(data.as_ptr(), len);
// SAFETY: `ptr` is converted from `data`, which is non-null.
unsafe { NonNull::new_unchecked(ptr) }
}
}

// A polyfill for `NonNull::len` that we can use before our MSRV is 1.63,
// when that function was stabilized.
//
// TODO(#67): Once our MSRV is 1.63, remove this.
pub(crate) trait NonNullSliceExt<T> {
fn len(&self) -> usize;
}

impl<T> NonNullSliceExt<T> for NonNull<[T]> {
#[inline(always)]
fn len(&self) -> usize {
#[allow(clippy::as_conversions)]
let slc = self.as_ptr() as *const [()];
// SAFETY:
// - `()` has alignment 1, so `slc` is trivially aligned
// - `slc` was derived from a non-null pointer
// - the size is 0 regardless of the length, so it is sound to
// materialize a reference regardless of location
// - pointer provenance may be an issue, but we never dereference
let slc = unsafe { &*slc };
slc.len()
}
}
}

#[cfg(test)]
pub(crate) mod testutil {
use core::fmt::{self, Display, Formatter};
Expand Down

0 comments on commit 710a9c5

Please sign in to comment.