From bbdf97254a2ed63d764a39e92a3adcecc49b962b Mon Sep 17 00:00:00 2001 From: Marcondiro Date: Sun, 5 May 2024 23:01:09 +0200 Subject: [PATCH] fix #124714 str.to_lowercase sigma handling --- library/alloc/src/str.rs | 10 ++++++---- library/alloc/tests/str.rs | 3 +++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs index c0d292cd20886..3e23612d0c13c 100644 --- a/library/alloc/src/str.rs +++ b/library/alloc/src/str.rs @@ -375,14 +375,16 @@ impl str { // Safety: We have written only valid ASCII to our vec let mut s = unsafe { String::from_utf8_unchecked(out) }; - for (i, c) in rest[..].char_indices() { + for (i, c) in rest.char_indices() { if c == 'Σ' { // Σ maps to σ, except at the end of a word where it maps to ς. // This is the only conditional (contextual) but language-independent mapping // in `SpecialCasing.txt`, // so hard-code it rather than have a generic "condition" mechanism. // See https://github.com/rust-lang/rust/issues/26035 - map_uppercase_sigma(rest, i, &mut s) + let out_len = self.len() - rest.len(); + let sigma_lowercase = map_uppercase_sigma(&self, i + out_len); + s.push(sigma_lowercase); } else { match conversions::to_lower(c) { [a, '\0', _] => s.push(a), @@ -400,13 +402,13 @@ impl str { } return s; - fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) { + fn map_uppercase_sigma(from: &str, i: usize) -> char { // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 // for the definition of `Final_Sigma`. debug_assert!('Σ'.len_utf8() == 2); let is_word_final = case_ignorable_then_cased(from[..i].chars().rev()) && !case_ignorable_then_cased(from[i + 2..].chars()); - to.push_str(if is_word_final { "ς" } else { "σ" }); + if is_word_final { 'ς' } else { 'σ' } } fn case_ignorable_then_cased>(iter: I) -> bool { diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index df8a260624a28..0078f5eaa3d2b 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1848,6 +1848,9 @@ fn to_lowercase() { assert_eq!("ΑΣ'Α".to_lowercase(), "ασ'α"); assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α"); + // https://github.com/rust-lang/rust/issues/124714 + assert_eq!("abcdefghijklmnopΣ".to_lowercase(), "abcdefghijklmnopς"); + // a really long string that has it's lowercase form // even longer. this tests that implementations don't assume // an incorrect upper bound on allocations