From 30834e44d5b543bf4ead6d3786e5e4e3f5c4ebd4 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed <10833894+tarekgh@users.noreply.github.com> Date: Tue, 14 Jan 2025 08:51:41 -0800 Subject: [PATCH] Fix Encoding regression (#111320) * Fix Encoding regression * Feedback addressing * Fix the test --- .../src/System/Text/EncodingCharBuffer.cs | 7 ++++++- .../tests/EncodingCodePages.cs | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingCharBuffer.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingCharBuffer.cs index 1ba281dc68afb..71c411b0c79ca 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingCharBuffer.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingCharBuffer.cs @@ -68,9 +68,14 @@ internal unsafe bool AddChar(char ch) return AddChar(ch, 1); } - internal unsafe bool AddChar(char ch1, char ch2, int numBytes) { + if (_chars is null) + { + _charCountResult += 2; + return true; + } + // Need room for 2 chars if (_charEnd - _chars < 2) { diff --git a/src/libraries/System.Text.Encoding.CodePages/tests/EncodingCodePages.cs b/src/libraries/System.Text.Encoding.CodePages/tests/EncodingCodePages.cs index f075b37d522dd..e6e25c19dd145 100644 --- a/src/libraries/System.Text.Encoding.CodePages/tests/EncodingCodePages.cs +++ b/src/libraries/System.Text.Encoding.CodePages/tests/EncodingCodePages.cs @@ -519,6 +519,7 @@ public static void TestDefaultEncodings() Assert.Contains(mappedEncoding, CrossplatformDefaultEncodings().Union(CodePageInfo().Select(i => Map((int)i[0], (string)i[1])))); TestRegister1252(); + TestMultiBytesEncodingsSupportSurrogate(); } private static void ValidateDefaultEncodings() @@ -639,6 +640,23 @@ public static void TestEncodingDisplayNames(int codePage, string webName, string Assert.All(name, c => Assert.True(c >= ' ' && c < '~' + 1, "Name: " + name + " contains character: " + c)); } + private static void TestMultiBytesEncodingsSupportSurrogate() + { + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + + Encoding encoding = Encoding.GetEncoding("GB18030"); + Assert.NotNull(encoding); + + string surrogatePair = "\uD840\uDE13"; // Surrogate Pair codepoint '𠈓' \U00020213 + byte[] expectedBytes = new byte[] { 0x95, 0x32, 0xB7, 0x37 }; + + Assert.Equal(expectedBytes, encoding.GetBytes(surrogatePair)); + Assert.Equal(expectedBytes.Length, encoding.GetByteCount(surrogatePair)); + + Assert.Equal(surrogatePair, encoding.GetString(expectedBytes)); + Assert.Equal(surrogatePair.Length, encoding.GetCharCount(expectedBytes)); + } + // This test is run as part of the default mappings test, since it modifies global state which that test // depends on. private static void TestRegister1252()