unicode-org · robertbastian · Jan 14, 2025 · Jan 9, 2025 · Jan 10, 2025 · Jan 10, 2025
@@ -1888,11 +1888,9 @@ where
                                     while !remaining.is_empty() {
                                         // Numeric CEs are generated for segments of
                                         // up to 254 digits.
-                                        let (head, tail) = if remaining.len() > 254 {
-                                            remaining.split_at(254)
-                                        } else {
-                                            (remaining, &b""[..])
-                                        };
+                                        let (head, tail) = remaining
+                                            .split_at_checked(254)
+                                            .unwrap_or((remaining, b""));
                                         remaining = tail;
                                         // From ICU4C CollationIterator::appendNumericSegmentCEs
                                         if head.len() <= 7 {

@@ -122,10 +122,9 @@ impl SubdivisionId {
             })
             .ok_or(ParseError::InvalidExtension)?;
         let region_len = if is_alpha { 2 } else { 3 };
-        if code_units.len() < region_len + 1 {
-            return Err(ParseError::InvalidExtension);
-        }
-        let (region_code_units, suffix_code_units) = code_units.split_at(region_len);
+        let (region_code_units, suffix_code_units) = code_units
+            .split_at_checked(region_len)
+            .ok_or(ParseError::InvalidExtension)?;
         let region =
             Region::try_from_utf8(region_code_units).map_err(|_| ParseError::InvalidExtension)?;
         let suffix = SubdivisionSuffix::try_from_utf8(suffix_code_units)?;

@@ -27,9 +27,8 @@ const fn skip_before_separator(slice: &[u8]) -> &[u8] {
     }
 
     // Notice: this slice may be empty for cases like `"en-"` or `"en--US"`
-    // MSRV 1.71/1.79: Use split_at/split_at_unchecked
     // SAFETY: end < slice.len() by while loop
-    unsafe { core::slice::from_raw_parts(slice.as_ptr(), end) }
+    unsafe { slice.split_at_unchecked(end).0 }
 }
 
 // `SubtagIterator` is a helper iterator for [`LanguageIdentifier`] and [`Locale`] parsing.
@@ -66,14 +65,8 @@ impl<'a> SubtagIterator<'a> {
 
         self.current = if result.len() < self.remaining.len() {
             // If there is more after `result`, by construction `current` starts with a separator
-            // MSRV 1.79: Use split_at_unchecked
             // SAFETY: `self.remaining` is strictly longer than `result`
-            self.remaining = unsafe {
-                core::slice::from_raw_parts(
-                    self.remaining.as_ptr().add(result.len() + 1),
-                    self.remaining.len() - (result.len() + 1),
-                )
-            };
+            self.remaining = unsafe { self.remaining.split_at_unchecked(result.len() + 1).1 };
             Some(skip_before_separator(self.remaining))
         } else {
             None

@@ -1454,14 +1454,11 @@ macro_rules! normalizer_methods {
         /// is the normalization of the whole input.
         pub fn split_normalized<'a>(&self, text: &'a str) -> (&'a str, &'a str) {
             let up_to = self.is_normalized_up_to(text);
-            if up_to <= text.len() {
-                // not using split_at_checked due to MSRV
-                text.split_at(up_to)
-            } else {
+            text.split_at_checked(up_to).unwrap_or_else(|| {
                 // Internal bug, not even GIGO, never supposed to happen
                 debug_assert!(false);
                 ("", text)
-            }
+            })
         }
 
         /// Return the index a string slice is normalized up to.
@@ -1502,14 +1499,11 @@ macro_rules! normalizer_methods {
         #[cfg(feature = "utf16_iter")]
         pub fn split_normalized_utf16<'a>(&self, text: &'a [u16]) -> (&'a [u16], &'a [u16]) {
             let up_to = self.is_normalized_utf16_up_to(text);
-            if up_to <= text.len() {
-                // not using split_at_checked due to MSRV
-                text.split_at(up_to)
-            } else {
+            text.split_at_checked(up_to).unwrap_or_else(|| {
                 // Internal bug, not even GIGO, never supposed to happen
                 debug_assert!(false);
                 (&[], text)
-            }
+            })
         }
 
         /// Return the index a slice of potentially-invalid UTF-16 is normalized up to.
@@ -1559,17 +1553,14 @@ macro_rules! normalizer_methods {
         #[cfg(feature = "utf8_iter")]
         pub fn split_normalized_utf8<'a>(&self, text: &'a [u8]) -> (&'a str, &'a [u8]) {
             let up_to = self.is_normalized_utf8_up_to(text);
-            if up_to <= text.len() {
-                // not using split_at_checked due to MSRV
-                let (head, tail) = text.split_at(up_to);
-                // SAFETY: The normalization check also checks for
-                // UTF-8 well-formedness.
-                (unsafe { core::str::from_utf8_unchecked(head) }, tail)
-            } else {
+            let (head, tail) = text.split_at_checked(up_to).unwrap_or_else(|| {
                 // Internal bug, not even GIGO, never supposed to happen
                 debug_assert!(false);
-                ("", text)
-            }
+                (&[], text)
+            });
+            // SAFETY: The normalization check also checks for
+            // UTF-8 well-formedness.
+            (unsafe { core::str::from_utf8_unchecked(head) }, tail)
         }
 
         /// Return the index a slice of potentially-invalid UTF-8 is normalized up to

@@ -434,38 +434,31 @@ impl<'a> MultiNamedPlaceholderPatternIterator<'a> {
         match self.store.find(|x| (x as usize) <= 0x07) {
             Some(0) => {
                 // Placeholder
-                let Some(&[lead, trail]) = self.store.as_bytes().get(0..2) else {
+                let Some((&[lead, trail], remainder)) = self
+                    .store
+                    .split_at_checked(2)
+                    .map(|(a, b)| (a.as_bytes(), b))
+                else {
                     return Err(MultiNamedPlaceholderError::InvalidStore);
                 };
                 debug_assert!(lead <= 7);
                 if trail > 7 {
                     return Err(MultiNamedPlaceholderError::InvalidStore);
                 }
                 let placeholder_len = (lead << 3) + trail;
-                let boundary = (placeholder_len as usize) + 2;
-                // TODO: use .split_at_checked() when available:
-                // https://github.com/rust-lang/rust/issues/119128
-                let Some(placeholder_name) = self.store.get(2..boundary) else {
+                let Some((placeholder_name, remainder)) =
+                    remainder.split_at_checked(placeholder_len as usize)
+                else {
                     return Err(MultiNamedPlaceholderError::InvalidStore);
                 };
-                let Some(remainder) = self.store.get(boundary..) else {
-                    debug_assert!(false, "should be a perfect slice");
-                    return Err(MultiNamedPlaceholderError::Unreachable);
-                };
                 self.store = remainder;
                 Ok(Some(PatternItem::Placeholder(MultiNamedPlaceholderKey(
                     placeholder_name,
                 ))))
             }
             Some(i) => {
                 // Literal
-                // TODO: use .split_at_checked() when available:
-                // https://github.com/rust-lang/rust/issues/119128
-                let Some(literal) = self.store.get(..i) else {
-                    debug_assert!(false, "should be a perfect slice");
-                    return Err(MultiNamedPlaceholderError::Unreachable);
-                };
-                let Some(remainder) = self.store.get(i..) else {
+                let Some((literal, remainder)) = self.store.split_at_checked(i) else {
                     debug_assert!(false, "should be a perfect slice");
                     return Err(MultiNamedPlaceholderError::Unreachable);
                 };

@@ -451,12 +451,7 @@ where
             })
         } else {
             let (second_byte, remainder) = remainder.split_first()?;
-            // TODO in Rust 1.80: use split_at_checked
-            let v_length = *second_byte as usize;
-            if remainder.len() < v_length {
-                return None;
-            }
-            let (v_bytes, remainder) = remainder.split_at(v_length);
+            let (v_bytes, remainder) = remainder.split_at_checked(*second_byte as usize)?;
             Some(PluralElementsUnpackedBytes {
                 lead_byte: *lead_byte,
                 v_bytes,
@@ -824,7 +819,10 @@ where
                     panic!("other value too long to be packed: {self:?}")
                 }
             };
-            let (v_bytes, specials_bytes) = remainder.split_at_mut(*second_byte as usize);
+            #[allow(clippy::unwrap_used)] // for now okay since it is mostly only during datagen
+            let (v_bytes, specials_bytes) = remainder
+                .split_at_mut_checked(*second_byte as usize)
+                .unwrap();
             builder.default.1.encode_var_ule_write(v_bytes);
             EncodeAsVarULE::<PluralElementsTupleSliceVarULE<V>>::encode_var_ule_write(
                 &specials,

@@ -801,24 +801,7 @@ See also: the [Panics](#Panics--required) section of this document.
 
 #### Special Case: `split_at`
 
-The standard library functions such as `slice::split_at` are panicky, but they are not covered by our Clippy lints.
-
-Instead of using `slice::split_at` directly, it is recommended to use a helper function, which can be saved in the file in which it is being used. Example:
-
-```rust
-/// Like slice::split_at but returns an Option instead of panicking
-#[inline]
-fn debug_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
-    if mid > slice.len() {
-        debug_assert!(false, "debug_split_at: index expected to be in range");
-        None
-    } else {
-        // Note: We're trusting the compiler to inline this and remove the assertion
-        // hiding on the top of slice::split_at: `assert(mid <= self.len())`
-        Some(slice.split_at(mid))
-    }
-}
-```
+The standard library functions such as `slice::split_at` are panicky, but they are not covered by our Clippy lints. Be careful to use `slice::split_at_checked` instead.
 
 #### Exception: Poison
 

@@ -19,7 +19,8 @@ impl fmt::Write for WriteComparator<'_> {
             return Ok(());
         }
         let cmp_len = core::cmp::min(other.len(), self.code_units.len());
-        let (this, remainder) = self.code_units.split_at(cmp_len);
+        // SAFETY: cmp_len is at most self.code_units.len()
+        let (this, remainder) = unsafe { self.code_units.split_at_unchecked(cmp_len) };
         self.code_units = remainder;
         self.result = this.cmp(other.as_bytes());
         Ok(())

@@ -3,35 +3,18 @@
 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
 
 pub(crate) trait MaybeSplitAt<T> {
-    /// Like slice::split_at but returns an Option instead of panicking
-    /// if the index is out of range.
-    fn maybe_split_at(&self, mid: usize) -> Option<(&Self, &Self)>;
     /// Like slice::split_at but debug-panics and returns an empty second slice
     /// if the index is out of range.
     fn debug_split_at(&self, mid: usize) -> (&Self, &Self);
 }
 
 impl<T> MaybeSplitAt<T> for [T] {
-    #[inline]
-    fn maybe_split_at(&self, mid: usize) -> Option<(&Self, &Self)> {
-        if mid > self.len() {
-            None
-        } else {
-            // Note: We're trusting the compiler to inline this and remove the assertion
-            // hiding on the top of slice::split_at: `assert(mid <= self.len())`
-            Some(self.split_at(mid))
-        }
-    }
     #[inline]
     fn debug_split_at(&self, mid: usize) -> (&Self, &Self) {
-        if mid > self.len() {
-            debug_assert!(false, "debug_split_at: index expected to be in range");
+        self.split_at_checked(mid).unwrap_or_else(|| {
+            debug_assert!(false, "debug_split_at: {mid} expected to be in range");
             (self, &[])
-        } else {
-            // Note: We're trusting the compiler to inline this and remove the assertion
-            // hiding on the top of slice::split_at: `assert(mid <= self.len())`
-            self.split_at(mid)
-        }
+        })
     }
 }
 

@@ -345,7 +345,7 @@ pub(crate) fn get_parameterized<T: ZeroTrieWithOptions + ?Sized>(
             {
                 let (trie_span, ascii_span);
                 (trie_span, trie) = trie.debug_split_at(x);
-                (ascii_span, ascii) = ascii.maybe_split_at(x)?;
+                (ascii_span, ascii) = ascii.split_at_checked(x)?;
                 if trie_span == ascii_span {
                     // Matched a byte span
                     continue;

@@ -111,22 +111,16 @@ where
     V: VarULE + ?Sized,
 {
     fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
-        // TODO: use split_first_chunk_mut in 1.77
-        if bytes.len() < size_of::<A::ULE>() {
-            return Err(UleError::length::<Self>(bytes.len()));
-        }
-        let (sized_chunk, variable_chunk) = bytes.split_at(size_of::<A::ULE>());
+        let (sized_chunk, variable_chunk) = bytes
+            .split_at_checked(size_of::<A::ULE>())
+            .ok_or(UleError::length::<Self>(bytes.len()))?;
         A::ULE::validate_bytes(sized_chunk)?;
         V::validate_bytes(variable_chunk)?;
         Ok(())
     }
 
     unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
-        #[allow(clippy::panic)] // panic is documented in function contract
-        if bytes.len() < size_of::<A::ULE>() {
-            panic!("from_bytes_unchecked called with short slice")
-        }
-        let (_sized_chunk, variable_chunk) = bytes.split_at(size_of::<A::ULE>());
+        let (_sized_chunk, variable_chunk) = bytes.split_at_unchecked(size_of::<A::ULE>());
         // Safety: variable_chunk is a valid V because of this function's precondition: bytes is a valid Self,
         // and a valid Self contains a valid V after the space needed for A::ULE.
         let variable_ref = V::from_bytes_unchecked(variable_chunk);

@@ -316,8 +316,7 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>
                 marker: PhantomData,
             };
         }
-        // MSRV Rust 1.79: Use split_at_unchecked
-        let len_bytes = slice.get_unchecked(0..F::Len::SIZE);
+        let (len_bytes, data_bytes) = unsafe { slice.split_at_unchecked(F::Len::SIZE) };
         // Safety: F::Len allows all byte sequences
         let len_ule = F::Len::slice_from_bytes_unchecked(len_bytes);
 
@@ -328,7 +327,7 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>
         // whereas we're calling something that asks for `parse_bytes_with_length()`.
         // The two methods perform similar validation, with parse_bytes() validating an additional
         // 4-byte `length` header.
-        Self::from_bytes_unchecked_with_length(len_u32, slice.get_unchecked(F::Len::SIZE..))
+        Self::from_bytes_unchecked_with_length(len_u32, data_bytes)
     }
 
     /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously