diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 93608a1ce4864..f511ca8eb7851 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -1,5 +1,4 @@ // ignore-tidy-filelength -// ignore-tidy-undocumented-unsafe //! Slice management and manipulation. //! @@ -70,6 +69,8 @@ impl [T] { #[allow(unused_attributes)] #[allow_internal_unstable(const_fn_union)] pub const fn len(&self) -> usize { + // SAFETY: this is safe because `&[T]` and `FatPtr` have the same layout. + // Only `std` can make this guarantee. unsafe { crate::ptr::Repr { rust: self }.raw.len } } @@ -443,7 +444,8 @@ impl [T] { #[unstable(feature = "slice_ptr_range", issue = "65807")] #[inline] pub fn as_ptr_range(&self) -> Range<*const T> { - // The `add` here is safe, because: + let start = self.as_ptr(); + // SAFETY: The `add` here is safe, because: // // - Both pointers are part of the same object, as pointing directly // past the object also counts. @@ -460,7 +462,6 @@ impl [T] { // the end of the address space. // // See the documentation of pointer::add. - let start = self.as_ptr(); let end = unsafe { start.add(self.len()) }; start..end } @@ -484,8 +485,8 @@ impl [T] { #[unstable(feature = "slice_ptr_range", issue = "65807")] #[inline] pub fn as_mut_ptr_range(&mut self) -> Range<*mut T> { - // See as_ptr_range() above for why `add` here is safe. let start = self.as_mut_ptr(); + // SAFETY: See as_ptr_range() above for why `add` here is safe. let end = unsafe { start.add(self.len()) }; start..end } @@ -511,11 +512,15 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn swap(&mut self, a: usize, b: usize) { + // Can't take two mutable loans from one vector, so instead just cast + // them to their raw pointers to do the swap. + let pa: *mut T = &mut self[a]; + let pb: *mut T = &mut self[b]; + // SAFETY: `pa` and `pb` have been created from safe mutable references and refer + // to elements in the slice and therefore are guaranteed to be valid and aligned. + // Note that accessing the elements behind `a` and `b` is checked and will + // panic when out of bounds. unsafe { - // Can't take two mutable loans from one vector, so instead just cast - // them to their raw pointers to do the swap - let pa: *mut T = &mut self[a]; - let pb: *mut T = &mut self[b]; ptr::swap(pa, pb); } } @@ -554,6 +559,24 @@ impl [T] { // Use the llvm.bswap intrinsic to reverse u8s in a usize let chunk = mem::size_of::(); while i + chunk - 1 < ln / 2 { + // SAFETY: There are several things to check here: + // + // - Note that `chunk` is either 4 or 8 due to the cfg check + // above. So `chunk - 1` is positive. + // - Indexing with index `i` is fine as the loop check guarantees + // `i + chunk - 1 < ln / 2` + // <=> `i < ln / 2 - (chunk - 1) < ln / 2 < ln`. + // - Indexing with index `ln - i - chunk = ln - (i + chunk)` is fine: + // - `i + chunk > 0` is trivially true. + // - The loop check guarantees: + // `i + chunk - 1 < ln / 2` + // <=> `i + chunk ≤ ln / 2 ≤ ln`, thus subtraction does not underflow. + // - The `read_unaligned` and `write_unaligned` calls are fine: + // - `pa` points to index `i` where `i < ln / 2 - (chunk - 1)` + // (see above) and `pb` points to index `ln - i - chunk`, so + // both are at least `chunk` + // many bytes away from the end of `self`. + // - Any initialized memory is valid `usize`. unsafe { let pa: *mut T = self.get_unchecked_mut(i); let pb: *mut T = self.get_unchecked_mut(ln - i - chunk); @@ -570,6 +593,20 @@ impl [T] { // Use rotate-by-16 to reverse u16s in a u32 let chunk = mem::size_of::() / 2; while i + chunk - 1 < ln / 2 { + // SAFETY: An unaligned u32 can be read from `i` if `i + 1 < ln` + // (and obviously `i < ln`), because each element is 2 bytes and + // we're reading 4. + // + // `i + chunk - 1 < ln / 2` # while condition + // `i + 2 - 1 < ln / 2` + // `i + 1 < ln / 2` + // + // Since it's less than the length divided by 2, then it must be + // in bounds. + // + // This also means that the condition `0 < i + chunk <= ln` is + // always respected, ensuring the `pb` pointer can be used + // safely. unsafe { let pa: *mut T = self.get_unchecked_mut(i); let pb: *mut T = self.get_unchecked_mut(ln - i - chunk); @@ -583,8 +620,13 @@ impl [T] { } while i < ln / 2 { - // Unsafe swap to avoid the bounds check in safe swap. + // SAFETY: `i` is inferior to half the length of the slice so + // accessing `i` and `ln - i - 1` is safe (`i` starts at 0 and + // will not go further than `ln / 2 - 1`). + // The resulting pointers `pa` and `pb` are therefore valid and + // aligned, and can be read from and written to. unsafe { + // Unsafe swap to avoid the bounds check in safe swap. let pa: *mut T = self.get_unchecked_mut(i); let pb: *mut T = self.get_unchecked_mut(ln - i - 1); ptr::swap(pa, pb); @@ -609,8 +651,24 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn iter(&self) -> Iter<'_, T> { + let ptr = self.as_ptr(); + // SAFETY: There are several things here: + // + // `ptr` has been obtained by `self.as_ptr()` where `self` is a valid + // reference thus it is non-NUL and safe to use and pass to + // `NonNull::new_unchecked` . + // + // Adding `self.len()` to the starting pointer gives a pointer + // at the end of `self`. `end` will never be dereferenced, only checked + // for direct pointer equality with `ptr` to check if the iterator is + // done. + // + // In the case of a ZST, the end pointer is just the start pointer plus + // the length, to also allows for the fast `ptr == end` check. + // + // See the `next_unchecked!` and `is_empty!` macros as well as the + // `post_inc_start` method for more informations. unsafe { - let ptr = self.as_ptr(); assume(!ptr.is_null()); let end = if mem::size_of::() == 0 { @@ -637,8 +695,24 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn iter_mut(&mut self) -> IterMut<'_, T> { + let ptr = self.as_mut_ptr(); + // SAFETY: There are several things here: + // + // `ptr` has been obtained by `self.as_ptr()` where `self` is a valid + // reference thus it is non-NUL and safe to use and pass to + // `NonNull::new_unchecked` . + // + // Adding `self.len()` to the starting pointer gives a pointer + // at the end of `self`. `end` will never be dereferenced, only checked + // for direct pointer equality with `ptr` to check if the iterator is + // done. + // + // In the case of a ZST, the end pointer is just the start pointer plus + // the length, to also allows for the fast `ptr == end` check. + // + // See the `next_unchecked!` and `is_empty!` macros as well as the + // `post_inc_start` method for more informations. unsafe { - let ptr = self.as_mut_ptr(); assume(!ptr.is_null()); let end = if mem::size_of::() == 0 { @@ -1107,6 +1181,8 @@ impl [T] { let len = self.len(); let ptr = self.as_mut_ptr(); + // SAFETY: `[ptr; mid]` and `[mid; len]` are inside `self`, which + // fulfills the requirements of `from_raw_parts_mut`. unsafe { assert!(mid <= len); @@ -1655,14 +1731,14 @@ impl [T] { while size > 1 { let half = size / 2; let mid = base + half; - // mid is always in [0, size), that means mid is >= 0 and < size. - // mid >= 0: by definition - // mid < size: mid = size / 2 + size / 4 + size / 8 ... + // SAFETY: the call is made safe by the following inconstants: + // - `mid >= 0`: by definition + // - `mid < size`: `mid = size / 2 + size / 4 + size / 8 ...` let cmp = f(unsafe { s.get_unchecked(mid) }); base = if cmp == Greater { base } else { mid }; size -= half; } - // base is always in [0, size) because base <= mid. + // SAFETY: base is always in [0, size) because base <= mid. let cmp = f(unsafe { s.get_unchecked(base) }); if cmp == Equal { Ok(base) } else { Err(base + (cmp == Less) as usize) } } @@ -2120,6 +2196,21 @@ impl [T] { let mut next_read: usize = 1; let mut next_write: usize = 1; + // SAFETY: the `while` condition guarantees `next_read` and `next_write` + // are less than `len`, thus are inside `self`. `prev_ptr_write` points to + // one element before `ptr_write`, but `next_write` starts at 1, so + // `prev_ptr_write` is never less than 0 and is inside the slice. + // This fulfils the requirements for dereferencing `ptr_read`, `prev_ptr_write` + // and `ptr_write`, and for using `ptr.add(next_read)`, `ptr.add(next_write - 1)` + // and `prev_ptr_write.offset(1)`. + // + // `next_write` is also incremented at most once per loop at most meaning + // no element is skipped when it may need to be swapped. + // + // `ptr_read` and `prev_ptr_write` never point to the same element. This + // is required for `&mut *ptr_read`, `&mut *prev_ptr_write` to be safe. + // The explanation is simply that `next_read >= next_write` is always true, + // thus `next_read > next_write - 1` is too. unsafe { // Avoid bounds checks by using raw pointers. while next_read < len { @@ -2203,9 +2294,11 @@ impl [T] { pub fn rotate_left(&mut self, mid: usize) { assert!(mid <= self.len()); let k = self.len() - mid; + let p = self.as_mut_ptr(); + // SAFETY: The range `[p.add(mid) - mid, p.add(mid) + k)` is trivially + // valid for reading and writing, as required by `ptr_rotate`. unsafe { - let p = self.as_mut_ptr(); rotate::ptr_rotate(mid, p.add(mid), k); } } @@ -2244,9 +2337,11 @@ impl [T] { pub fn rotate_right(&mut self, k: usize) { assert!(k <= self.len()); let mid = self.len() - k; + let p = self.as_mut_ptr(); + // SAFETY: The range `[p.add(mid) - mid, p.add(mid) + k)` is trivially + // valid for reading and writing, as required by `ptr_rotate`. unsafe { - let p = self.as_mut_ptr(); rotate::ptr_rotate(mid, p.add(mid), k); } } @@ -2407,6 +2502,9 @@ impl [T] { T: Copy, { assert_eq!(self.len(), src.len(), "destination and source slices have different lengths"); + // SAFETY: `self` is valid for `self.len()` elements by definition, and `src` was + // checked to have the same length. The slices cannot overlap because + // mutable references are exclusive. unsafe { ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), self.len()); } @@ -2460,6 +2558,8 @@ impl [T] { assert!(src_end <= self.len(), "src is out of bounds"); let count = src_end - src_start; assert!(dest <= self.len() - count, "dest is out of bounds"); + // SAFETY: the conditions for `ptr::copy` have all been checked above, + // as have those for `ptr::add`. unsafe { ptr::copy(self.as_ptr().add(src_start), self.as_mut_ptr().add(dest), count); } @@ -2515,6 +2615,9 @@ impl [T] { #[stable(feature = "swap_with_slice", since = "1.27.0")] pub fn swap_with_slice(&mut self, other: &mut [T]) { assert!(self.len() == other.len(), "destination and source slices have different lengths"); + // SAFETY: `self` is valid for `self.len()` elements by definition, and `src` was + // checked to have the same length. The slices cannot overlap because + // mutable references are exclusive. unsafe { ptr::swap_nonoverlapping(self.as_mut_ptr(), other.as_mut_ptr(), self.len()); } @@ -2546,6 +2649,8 @@ impl [T] { // iterative stein’s algorithm // We should still make this `const fn` (and revert to recursive algorithm if we do) // because relying on llvm to consteval all this is… well, it makes me uncomfortable. + + // SAFETY: `a` and `b` are checked to be non-zero values. let (ctz_a, mut ctz_b) = unsafe { if a == 0 { return b; @@ -2565,6 +2670,7 @@ impl [T] { mem::swap(&mut a, &mut b); } b = b - a; + // SAFETY: `b` is checked to be non-zero. unsafe { if b == 0 { break; @@ -2626,6 +2732,7 @@ impl [T] { // First, find at what point do we split between the first and 2nd slice. Easy with // ptr.align_offset. let ptr = self.as_ptr(); + // SAFETY: See the `align_to_mut` method for the detailed safety comment. let offset = unsafe { crate::ptr::align_offset(ptr, mem::align_of::()) }; if offset > self.len() { (self, &[], &[]) @@ -2685,6 +2792,13 @@ impl [T] { // First, find at what point do we split between the first and 2nd slice. Easy with // ptr.align_offset. let ptr = self.as_ptr(); + // SAFETY: Here we are ensuring we will use aligned pointers for U for the + // rest of the method. This is done by passing a pointer to &[T] with an + // alignment targeted for U. + // `crate::ptr::align_offset` is called with a correctly aligned and + // valid pointer `ptr` (it comes from a reference to `self`) and with + // a size that is a power of two (since it comes from the alignement for U), + // satisfying its safety constraints. let offset = unsafe { crate::ptr::align_offset(ptr, mem::align_of::()) }; if offset > self.len() { (self, &mut [], &mut []) @@ -2810,15 +2924,13 @@ impl [T] { while left != right { let mid = left + (right - left) / 2; - // SAFETY: - // When left < right, left <= mid < right. - // Therefore left always increases and right always decreases, - // and eigher of them is selected. - // In both cases left <= right is satisfied. - // Therefore if left < right in a step, - // left <= right is satisfied in the next step. - // Therefore as long as left != right, 0 <= left < right <= len is satisfied - // and if this case 0 <= mid < len is satisfied too. + // SAFETY: When `left < right`, `left <= mid < right`. + // Therefore `left` always increases and `right` always decreases, + // and either of them is selected. In both cases `left <= right` is + // satisfied. Therefore if `left < right` in a step, `left <= right` + // is satisfied in the next step. Therefore as long as `left != right`, + // `0 <= left < right <= len` is satisfied and if this case + // `0 <= mid < len` is satisfied too. let value = unsafe { self.get_unchecked(mid) }; if pred(value) { left = mid + 1; @@ -2938,7 +3050,8 @@ fn is_ascii(s: &[u8]) -> bool { // above. debug_assert!(offset_to_aligned <= len); - // word_ptr is the (properly aligned) usize ptr we use to read the middle chunk of the slice. + // SAFETY: word_ptr is the (properly aligned) usize ptr we use to read the + // middle chunk of the slice. let mut word_ptr = unsafe { start.add(offset_to_aligned) as *const usize }; // `byte_pos` is the byte index of `word_ptr`, used for loop end checks. @@ -3126,11 +3239,13 @@ unsafe impl SliceIndex<[T]> for usize { #[inline] fn get(self, slice: &[T]) -> Option<&T> { + // SAFETY: `self` is checked to be in bounds. if self < slice.len() { unsafe { Some(&*self.get_unchecked(slice)) } } else { None } } #[inline] fn get_mut(self, slice: &mut [T]) -> Option<&mut T> { + // SAFETY: `self` is checked to be in bounds. if self < slice.len() { unsafe { Some(&mut *self.get_unchecked_mut(slice)) } } else { None } } @@ -3171,6 +3286,7 @@ unsafe impl SliceIndex<[T]> for ops::Range { if self.start > self.end || self.end > slice.len() { None } else { + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { Some(&*self.get_unchecked(slice)) } } } @@ -3180,6 +3296,7 @@ unsafe impl SliceIndex<[T]> for ops::Range { if self.start > self.end || self.end > slice.len() { None } else { + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { Some(&mut *self.get_unchecked_mut(slice)) } } } @@ -3208,6 +3325,7 @@ unsafe impl SliceIndex<[T]> for ops::Range { } else if self.end > slice.len() { slice_end_index_len_fail(self.end, slice.len()); } + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { &*self.get_unchecked(slice) } } @@ -3218,6 +3336,7 @@ unsafe impl SliceIndex<[T]> for ops::Range { } else if self.end > slice.len() { slice_end_index_len_fail(self.end, slice.len()); } + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { &mut *self.get_unchecked_mut(slice) } } } @@ -3290,6 +3409,7 @@ unsafe impl SliceIndex<[T]> for ops::RangeFrom { if self.start > slice.len() { slice_start_index_len_fail(self.start, slice.len()); } + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { &*self.get_unchecked(slice) } } @@ -3298,6 +3418,7 @@ unsafe impl SliceIndex<[T]> for ops::RangeFrom { if self.start > slice.len() { slice_start_index_len_fail(self.start, slice.len()); } + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { &mut *self.get_unchecked_mut(slice) } } } @@ -3543,6 +3664,9 @@ macro_rules! iterator { // Helper function for creating a slice from the iterator. #[inline(always)] fn make_slice(&self) -> &'a [T] { + // SAFETY: the iterator was created from a slice with pointer + // `self.ptr` and length `len!(self)`. This guarantees that all + // the prerequisites for `from_raw_parts` are fulfilled. unsafe { from_raw_parts(self.ptr.as_ptr(), len!(self)) } } @@ -3601,6 +3725,11 @@ macro_rules! iterator { #[inline] fn next(&mut self) -> Option<$elem> { // could be implemented with slices, but this avoids bounds checks + + // SAFETY: `assume` calls are safe since a slice's start pointer + // must be non-null, and slices over non-ZSTs must also have a + // non-null end pointer. The call to `next_unchecked!` is safe + // since we check if the iterator is empty first. unsafe { assume(!self.ptr.as_ptr().is_null()); if mem::size_of::() != 0 { @@ -3634,14 +3763,14 @@ macro_rules! iterator { // could be (due to wrapping). self.end = self.ptr.as_ptr(); } else { + // SAFETY: end can't be 0 if T isn't ZST because ptr isn't 0 and end >= ptr unsafe { - // End can't be 0 if T isn't ZST because ptr isn't 0 and end >= ptr self.ptr = NonNull::new_unchecked(self.end as *mut T); } } return None; } - // We are in bounds. `post_inc_start` does the right thing even for ZSTs. + // SAFETY: We are in bounds. `post_inc_start` does the right thing even for ZSTs. unsafe { self.post_inc_start(n as isize); Some(next_unchecked!(self)) @@ -3748,6 +3877,8 @@ macro_rules! iterator { let mut i = 0; while let Some(x) = self.next() { if predicate(x) { + // SAFETY: we are guaranteed to be in bounds by the loop invariant: + // when `i >= n`, `self.next()` returns `None` and the loop breaks. unsafe { assume(i < n) }; return Some(i); } @@ -3769,6 +3900,8 @@ macro_rules! iterator { while let Some(x) = self.next_back() { i -= 1; if predicate(x) { + // SAFETY: `i` must be lower than `n` since it starts at `n` + // and is only decreasing. unsafe { assume(i < n) }; return Some(i); } @@ -3784,6 +3917,11 @@ macro_rules! iterator { #[inline] fn next_back(&mut self) -> Option<$elem> { // could be implemented with slices, but this avoids bounds checks + + // SAFETY: `assume` calls are safe since a slice's start pointer must be non-null, + // and slices over non-ZSTs must also have a non-null end pointer. + // The call to `next_back_unchecked!` is safe since we check if the iterator is + // empty first. unsafe { assume(!self.ptr.as_ptr().is_null()); if mem::size_of::() != 0 { @@ -3804,7 +3942,7 @@ macro_rules! iterator { self.end = self.ptr.as_ptr(); return None; } - // We are in bounds. `pre_dec_end` does the right thing even for ZSTs. + // SAFETY: We are in bounds. `pre_dec_end` does the right thing even for ZSTs. unsafe { self.pre_dec_end(n as isize); Some(next_back_unchecked!(self)) @@ -3999,6 +4137,9 @@ impl<'a, T> IterMut<'a, T> { /// ``` #[stable(feature = "iter_to_slice", since = "1.4.0")] pub fn into_slice(self) -> &'a mut [T] { + // SAFETY: the iterator was created from a mutable slice with pointer + // `self.ptr` and length `len!(self)`. This guarantees that all the prerequisites + // for `from_raw_parts_mut` are fulfilled. unsafe { from_raw_parts_mut(self.ptr.as_ptr(), len!(self)) } } @@ -5568,6 +5709,8 @@ impl FusedIterator for ArrayChunks<'_, T, N> {} #[unstable(feature = "array_chunks", issue = "74985")] unsafe impl<'a, T, const N: usize> TrustedRandomAccess for ArrayChunks<'a, T, N> { unsafe fn get_unchecked(&mut self, i: usize) -> &'a [T; N] { + // SAFETY: The safety guarantees of `get_unchecked` are transferred to + // the caller. unsafe { self.iter.get_unchecked(i) } } fn may_have_side_effect() -> bool { @@ -6288,12 +6431,20 @@ pub unsafe fn from_raw_parts_mut<'a, T>(data: *mut T, len: usize) -> &'a mut [T] /// Converts a reference to T into a slice of length 1 (without copying). #[stable(feature = "from_ref", since = "1.28.0")] pub fn from_ref(s: &T) -> &[T] { + // SAFETY: a reference is guaranteed to be valid for reads. The returned + // reference cannot be mutated as it is an immutable reference. + // `mem::size_of::()` cannot be larger than `isize::MAX`. + // Thus the call to `from_raw_parts` is safe. unsafe { from_raw_parts(s, 1) } } /// Converts a reference to T into a slice of length 1 (without copying). #[stable(feature = "from_ref", since = "1.28.0")] pub fn from_mut(s: &mut T) -> &mut [T] { + // SAFETY: a mutable reference is guaranteed to be valid for writes. + // The reference cannot be accessed by another pointer as it is an mutable reference. + // `mem::size_of::()` cannot be larger than `isize::MAX`. + // Thus the call to `from_raw_parts_mut` is safe. unsafe { from_raw_parts_mut(s, 1) } } @@ -6414,6 +6565,8 @@ where if self.as_ptr().guaranteed_eq(other.as_ptr()) { return true; } + // SAFETY: `self` and `other` are references and are thus guaranteed to be valid. + // The two slices have been checked to have the same size above. unsafe { let size = mem::size_of_val(self); memcmp(self.as_ptr() as *const u8, other.as_ptr() as *const u8, size) == 0 @@ -6516,6 +6669,9 @@ impl SliceOrd for u8 { #[inline] fn compare(left: &[Self], right: &[Self]) -> Ordering { let order = + // SAFETY: `left` and `right` are references and are thus guaranteed to be valid. + // We use the minimum of both lengths which guarantees that both regions are + // valid for reads in that interval. unsafe { memcmp(left.as_ptr(), right.as_ptr(), cmp::min(left.len(), right.len())) }; if order == 0 { left.len().cmp(&right.len()) @@ -6590,6 +6746,10 @@ impl SliceContains for u8 { impl SliceContains for i8 { fn slice_contains(&self, x: &[Self]) -> bool { let byte = *self as u8; + // SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()` + // as `*const u8` is safe. The `x.as_ptr()` comes from a reference and is thus guaranteed + // to be valid for reads for the length of the slice `x.len()`, which cannot be larger + // than `isize::MAX`. The returned slice is never mutated. let bytes: &[u8] = unsafe { from_raw_parts(x.as_ptr() as *const u8, x.len()) }; memchr::memchr(byte, bytes).is_some() }