diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index 668af60611b86..1cda6184d1ec6 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -43,7 +43,7 @@ #![stable(feature = "rust1", since = "1.0.0")] #[cfg(not(no_global_oom_handling))] -use core::char::{decode_utf16, REPLACEMENT_CHARACTER}; +use core::char::{decode_utf16, MAX_UTF8_LEN, REPLACEMENT_CHARACTER}; use core::fmt; use core::hash; #[cfg(not(no_global_oom_handling))] @@ -1221,7 +1221,7 @@ impl String { pub fn push(&mut self, ch: char) { match ch.len_utf8() { 1 => self.vec.push(ch as u8), - _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), + _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; MAX_UTF8_LEN]).as_bytes()), } } @@ -1528,7 +1528,7 @@ impl String { #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(&mut self, idx: usize, ch: char) { assert!(self.is_char_boundary(idx)); - let mut bits = [0; 4]; + let mut bits = [0; MAX_UTF8_LEN]; let bits = ch.encode_utf8(&mut bits).as_bytes(); unsafe { @@ -2497,7 +2497,7 @@ impl ToString for T { impl ToString for char { #[inline] fn to_string(&self) -> String { - String::from(self.encode_utf8(&mut [0; 4])) + String::from(self.encode_utf8(&mut [0; MAX_UTF8_LEN])) } } diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index 7379569dd68fe..670c8596c4b89 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1217,7 +1217,7 @@ fn test_to_uppercase_rev_iterator() { #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn test_chars_decoding() { - let mut bytes = [0; 4]; + let mut bytes = [0; std::char::MAX_UTF8_LEN]; for c in (0..0x110000).filter_map(std::char::from_u32) { let s = c.encode_utf8(&mut bytes); if Some(c) != s.chars().next() { @@ -1229,7 +1229,7 @@ fn test_chars_decoding() { #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn test_chars_rev_decoding() { - let mut bytes = [0; 4]; + let mut bytes = [0; std::char::MAX_UTF8_LEN]; for c in (0..0x110000).filter_map(std::char::from_u32) { let s = c.encode_utf8(&mut bytes); if Some(c) != s.chars().rev().next() { diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 0bec38a877ed5..4b62f45db4560 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -45,6 +45,16 @@ impl char { #[stable(feature = "assoc_char_consts", since = "1.52.0")] pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION; + /// The maximum number of bytes required to [encode](char::encode_utf8) a + /// `char` in UTF-8. + #[stable(feature = "max_len", since = "1.63.0")] + pub const MAX_UTF8_LEN: usize = 4; + + /// The maximum number of 16-bit code units required to + /// [encode](char::encode_utf16) a `char` in UTF-16. + #[stable(feature = "max_len", since = "1.63.0")] + pub const MAX_UTF16_LEN: usize = 2; + /// Creates an iterator over the UTF-16 encoded code points in `iter`, /// returning unpaired surrogates as `Err`s. /// diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs index 0df23e7bbe695..a00ca3d320472 100644 --- a/library/core/src/char/mod.rs +++ b/library/core/src/char/mod.rs @@ -97,6 +97,17 @@ pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER; #[stable(feature = "unicode_version", since = "1.45.0")] pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION; +/// The maximum number of bytes required to [encode](char::encode_utf8) a +/// `char` in UTF-8. Use [`char::MAX_UTF8_LEN`] instead. +#[stable(feature = "max_len", since = "1.63.0")] +pub const MAX_UTF8_LEN: usize = char::MAX_UTF8_LEN; + +/// The maximum number of 16-bit code units required to +/// [encode](char::encode_utf16) a `char` in UTF-16. Use [`char::MAX_UTF16_LEN`] +/// instead. +#[stable(feature = "max_len", since = "1.63.0")] +pub const MAX_UTF16_LEN: usize = char::MAX_UTF16_LEN; + /// Creates an iterator over the UTF-16 encoded code points in `iter`, returning /// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead. #[stable(feature = "decode_utf16", since = "1.9.0")] diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index 1c66089fad6e6..500eece54e9c1 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -3,7 +3,7 @@ #![stable(feature = "rust1", since = "1.0.0")] use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell}; -use crate::char::EscapeDebugExtArgs; +use crate::char::{EscapeDebugExtArgs, MAX_UTF8_LEN}; use crate::marker::PhantomData; use crate::mem; use crate::num::fmt as numfmt; @@ -161,7 +161,7 @@ pub trait Write { /// ``` #[stable(feature = "fmt_write_char", since = "1.1.0")] fn write_char(&mut self, c: char) -> Result { - self.write_str(c.encode_utf8(&mut [0; 4])) + self.write_str(c.encode_utf8(&mut [0; MAX_UTF8_LEN])) } /// Glue for usage of the [`write!`] macro with implementors of this trait. @@ -2225,7 +2225,7 @@ impl Display for char { if f.width.is_none() && f.precision.is_none() { f.write_char(*self) } else { - f.pad(self.encode_utf8(&mut [0; 4])) + f.pad(self.encode_utf8(&mut [0; MAX_UTF8_LEN])) } } } diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 24083ee6af44f..4b47e06d6e818 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1410,7 +1410,7 @@ impl<'a> Iterator for EncodeUtf16<'a> { return Some(tmp); } - let mut buf = [0; 2]; + let mut buf = [0; char::MAX_UTF16_LEN]; self.chars.next().map(|ch| { let n = ch.encode_utf16(&mut buf).len(); if n == 2 { diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index 031fb8e8b21fb..368b8df302219 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -38,6 +38,7 @@ issue = "27721" )] +use crate::char::MAX_UTF8_LEN; use crate::cmp; use crate::fmt; use crate::slice::memchr; @@ -540,7 +541,7 @@ impl<'a> Pattern<'a> for char { #[inline] fn into_searcher(self, haystack: &'a str) -> Self::Searcher { - let mut utf8_encoded = [0; 4]; + let mut utf8_encoded = [0; MAX_UTF8_LEN]; let utf8_size = self.encode_utf8(&mut utf8_encoded).len(); CharSearcher { haystack, diff --git a/library/core/tests/char.rs b/library/core/tests/char.rs index 8542e5c70d40c..628a2959115c1 100644 --- a/library/core/tests/char.rs +++ b/library/core/tests/char.rs @@ -258,7 +258,7 @@ fn test_escape_unicode() { #[test] fn test_encode_utf8() { fn check(input: char, expect: &[u8]) { - let mut buf = [0; 4]; + let mut buf = [0; char::MAX_UTF8_LEN]; let ptr = buf.as_ptr(); let s = input.encode_utf8(&mut buf); assert_eq!(s.as_ptr() as usize, ptr as usize); @@ -275,7 +275,7 @@ fn test_encode_utf8() { #[test] fn test_encode_utf16() { fn check(input: char, expect: &[u16]) { - let mut buf = [0; 2]; + let mut buf = [0; char::MAX_UTF16_LEN]; let ptr = buf.as_mut_ptr(); let b = input.encode_utf16(&mut buf); assert_eq!(b.as_mut_ptr() as usize, ptr as usize); diff --git a/library/std/src/fs/tests.rs b/library/std/src/fs/tests.rs index e8d0132f4b98c..aadd9159f8968 100644 --- a/library/std/src/fs/tests.rs +++ b/library/std/src/fs/tests.rs @@ -175,7 +175,7 @@ fn file_test_io_non_positional_read() { #[test] fn file_test_io_seek_and_tell_smoke_test() { let message = "ten-four"; - let mut read_mem = [0; 4]; + let mut read_mem = [0; char::MAX_UTF8_LEN]; let set_cursor = 4 as u64; let tell_pos_pre_read; let tell_pos_post_read; diff --git a/library/std/src/sys/windows/stdio.rs b/library/std/src/sys/windows/stdio.rs index a001d6b985823..1edc8454d599b 100644 --- a/library/std/src/sys/windows/stdio.rs +++ b/library/std/src/sys/windows/stdio.rs @@ -1,6 +1,6 @@ #![unstable(issue = "none", feature = "windows_stdio")] -use crate::char::decode_utf16; +use crate::char::{decode_utf16, MAX_UTF8_LEN}; use crate::cmp; use crate::io; use crate::os::windows::io::{FromRawHandle, IntoRawHandle}; @@ -27,7 +27,7 @@ pub struct Stderr { } struct IncompleteUtf8 { - bytes: [u8; 4], + bytes: [u8; MAX_UTF8_LEN], len: u8, } @@ -377,7 +377,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result { impl IncompleteUtf8 { pub const fn new() -> IncompleteUtf8 { - IncompleteUtf8 { bytes: [0; 4], len: 0 } + IncompleteUtf8 { bytes: [0; MAX_UTF8_LEN], len: 0 } } } diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 57fa4989358a4..5294df8dbb75b 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -205,7 +205,7 @@ impl Wtf8Buf { /// Copied from String::push /// This does **not** include the WTF-8 concatenation check. fn push_code_point_unchecked(&mut self, code_point: CodePoint) { - let mut bytes = [0; 4]; + let mut bytes = [0; char::MAX_UTF8_LEN]; let bytes = char::encode_utf8_raw(code_point.value, &mut bytes); self.bytes.extend_from_slice(bytes) } @@ -878,7 +878,7 @@ impl<'a> Iterator for EncodeWide<'a> { return Some(tmp); } - let mut buf = [0; 2]; + let mut buf = [0; char::MAX_UTF16_LEN]; self.code_points.next().map(|code_point| { let n = char::encode_utf16_raw(code_point.value, &mut buf).len(); if n == 2 {