From 8124729fae002c870d6626b3e090e3ee6ba85347 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 7 Feb 2024 01:37:15 +0100 Subject: [PATCH 1/4] Add span-based Uri {Try}{Un}EscapeDataString overloads --- .../System.Private.Uri/src/System/UriExt.cs | 109 ++++- .../src/System/UriHelper.cs | 73 +++- .../PercentEncodingHelperTests.cs | 82 ---- ...System.Private.Uri.Functional.Tests.csproj | 1 - .../tests/FunctionalTests/UriEscapingTest.cs | 413 +++++++++++------- .../System.Runtime/ref/System.Runtime.cs | 4 + .../System/Uri.MethodsTests.cs | 54 --- 7 files changed, 420 insertions(+), 316 deletions(-) delete mode 100644 src/libraries/System.Private.Uri/tests/FunctionalTests/PercentEncodingHelperTests.cs diff --git a/src/libraries/System.Private.Uri/src/System/UriExt.cs b/src/libraries/System.Private.Uri/src/System/UriExt.cs index 1aedf02d9299d4..99eb29d11a107f 100644 --- a/src/libraries/System.Private.Uri/src/System/UriExt.cs +++ b/src/libraries/System.Private.Uri/src/System/UriExt.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; @@ -552,28 +552,99 @@ internal unsafe bool InternalIsWellFormedOriginalString() return true; } + /// Converts a string to its unescaped representation. + /// The string to unescape. + /// The unescaped representation of . public static string UnescapeDataString(string stringToUnescape) { ArgumentNullException.ThrowIfNull(stringToUnescape); - if (stringToUnescape.Length == 0) - return string.Empty; + return UnescapeDataString(stringToUnescape, stringToUnescape); + } + + /// Converts a span to its unescaped representation. + /// The span to unescape. + /// The unescaped representation of . + public static string UnescapeDataString(ReadOnlySpan charsToUnescape) + { + return UnescapeDataString(charsToUnescape, backingString: null); + } + + private static string UnescapeDataString(ReadOnlySpan charsToUnescape, string? backingString = null) + { + Debug.Assert(backingString is null || backingString.Length == charsToUnescape.Length); + + int indexOfFirstToUnescape = charsToUnescape.IndexOf('%'); + if (indexOfFirstToUnescape < 0) + { + // Nothing to unescape, just return the original value. + return backingString ?? charsToUnescape.ToString(); + } + + var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]); + + // This may throw for very large inputs. ¯\_(ツ)_/¯ + vsb.EnsureCapacity(charsToUnescape.Length - indexOfFirstToUnescape); + + UriHelper.UnescapeString( + charsToUnescape.Slice(indexOfFirstToUnescape), ref vsb, + c_DummyChar, c_DummyChar, c_DummyChar, + UnescapeMode.Unescape | UnescapeMode.UnescapeAll, + syntax: null, isQuery: false); + + string result = string.Concat(charsToUnescape.Slice(0, indexOfFirstToUnescape), vsb.AsSpan()); + vsb.Dispose(); + return result; + } + + /// Attempts to convert a span to its unescaped representation. + /// The span to unescape. + /// The output span that contains the unescaped result of the operation. + /// When this method returns, contains the number of chars that were written into . + /// if the was large enough to hold the entire result; otherwise, . + public static bool TryUnescapeDataString(ReadOnlySpan charsToUnescape, Span destination, out int charsWritten) + { + int indexOfFirstToUnescape = charsToUnescape.IndexOf('%'); + if (indexOfFirstToUnescape < 0) + { + // Nothing to unescape, just copy the original chars. + if (charsToUnescape.TryCopyTo(destination)) + { + charsWritten = charsToUnescape.Length; + return true; + } - int position = stringToUnescape.IndexOf('%'); - if (position == -1) - return stringToUnescape; + charsWritten = 0; + return false; + } var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]); - vsb.EnsureCapacity(stringToUnescape.Length); - vsb.Append(stringToUnescape.AsSpan(0, position)); + // This may throw for very large inputs. ¯\_(ツ)_/¯ + vsb.EnsureCapacity(charsToUnescape.Length - indexOfFirstToUnescape); + UriHelper.UnescapeString( - stringToUnescape, position, stringToUnescape.Length, ref vsb, + charsToUnescape.Slice(indexOfFirstToUnescape), ref vsb, c_DummyChar, c_DummyChar, c_DummyChar, UnescapeMode.Unescape | UnescapeMode.UnescapeAll, syntax: null, isQuery: false); - return vsb.ToString(); + int newLength = indexOfFirstToUnescape + vsb.Length; + Debug.Assert(newLength <= charsToUnescape.Length); + + if (destination.Length >= newLength) + { + charsToUnescape.Slice(0, indexOfFirstToUnescape).CopyTo(destination); + vsb.AsSpan().CopyTo(destination.Slice(indexOfFirstToUnescape)); + + vsb.Dispose(); + charsWritten = newLength; + return true; + } + + vsb.Dispose(); + charsWritten = 0; + return false; } // Where stringToEscape is intended to be a completely unescaped URI string. @@ -584,9 +655,27 @@ public static string EscapeUriString(string stringToEscape) => // Where stringToEscape is intended to be URI data, but not an entire URI. // This method will escape any character that is not an unreserved character, including percent signs. + + /// Converts a string to its escaped representation. + /// The string to escape. + /// The escaped representation of . public static string EscapeDataString(string stringToEscape) => UriHelper.EscapeString(stringToEscape, checkExistingEscaped: false, UriHelper.Unreserved); + /// Converts a span to its escaped representation. + /// The span to escape. + /// The escaped representation of . + public static string EscapeDataString(ReadOnlySpan charsToEscape) => + UriHelper.EscapeString(charsToEscape, checkExistingEscaped: false, UriHelper.Unreserved, backingString: null); + + /// Attempts to convert a span to its escaped representation. + /// The span to escape. + /// The output span that contains the escaped result of the operation. + /// When this method returns, contains the number of chars that were written into . + /// if the was large enough to hold the entire result; otherwise, . + public static bool TryEscapeDataString(ReadOnlySpan charsToEscape, Span destination, out int charsWritten) => + UriHelper.TryEscapeDataString(charsToEscape, destination, out charsWritten); + // // Cleans up the specified component according to Iri rules // a) Chars allowed by iri in a component are unescaped if found escaped diff --git a/src/libraries/System.Private.Uri/src/System/UriHelper.cs b/src/libraries/System.Private.Uri/src/System/UriHelper.cs index a6a92878f63877..980487163ce46e 100644 --- a/src/libraries/System.Private.Uri/src/System/UriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/UriHelper.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; @@ -118,26 +118,79 @@ internal static unsafe bool TestForSubPath(char* selfPtr, int selfLength, char* return true; } - internal static string EscapeString(string stringToEscape, bool checkExistingEscaped, SearchValues noEscape) + public static bool TryEscapeDataString(ReadOnlySpan charsToEscape, Span destination, out int charsWritten) + { + if (destination.Length < charsToEscape.Length) + { + charsWritten = 0; + return false; + } + + int indexOfFirstToEscape = charsToEscape.IndexOfAnyExcept(Unreserved); + if (indexOfFirstToEscape < 0) + { + // Nothing to escape, just copy the original chars. + charsToEscape.CopyTo(destination); + charsWritten = charsToEscape.Length; + return true; + } + + var vsb = new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold]); + + // This may throw for very large inputs. ¯\_(ツ)_/¯ + vsb.EnsureCapacity(charsToEscape.Length - indexOfFirstToEscape); + + EscapeStringToBuilder(charsToEscape.Slice(indexOfFirstToEscape), ref vsb, Unreserved, checkExistingEscaped: false); + + int newLength = checked(indexOfFirstToEscape + vsb.Length); + Debug.Assert(newLength > charsToEscape.Length); + + if (destination.Length >= newLength) + { + charsToEscape.Slice(0, indexOfFirstToEscape).CopyTo(destination); + vsb.AsSpan().CopyTo(destination.Slice(indexOfFirstToEscape)); + + vsb.Dispose(); + charsWritten = newLength; + return true; + } + + vsb.Dispose(); + charsWritten = 0; + return false; + } + + public static string EscapeString(string stringToEscape, bool checkExistingEscaped, SearchValues noEscape) { ArgumentNullException.ThrowIfNull(stringToEscape); + return EscapeString(stringToEscape, checkExistingEscaped, noEscape, stringToEscape); + } + + public static string EscapeString(ReadOnlySpan charsToEscape, bool checkExistingEscaped, SearchValues noEscape, string? backingString) + { Debug.Assert(!noEscape.Contains('%'), "Need to treat % specially; it should be part of any escaped set"); + Debug.Assert(backingString is null || backingString.Length == charsToEscape.Length); - int indexOfFirstToEscape = stringToEscape.AsSpan().IndexOfAnyExcept(noEscape); + int indexOfFirstToEscape = charsToEscape.IndexOfAnyExcept(noEscape); if (indexOfFirstToEscape < 0) { - // Nothing to escape, just return the original string. - return stringToEscape; + // Nothing to escape, just return the original value. + return backingString ?? charsToEscape.ToString(); } // Otherwise, create a ValueStringBuilder to store the escaped data into, - // append to it all of the noEscape chars we already iterated through, - // escape the rest, and return the result as a string. + // escape the rest, and concat the result with the characters we skipped above. var vsb = new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold]); - vsb.Append(stringToEscape.AsSpan(0, indexOfFirstToEscape)); - EscapeStringToBuilder(stringToEscape.AsSpan(indexOfFirstToEscape), ref vsb, noEscape, checkExistingEscaped); - return vsb.ToString(); + + // This may throw for very large inputs. ¯\_(ツ)_/¯ + vsb.EnsureCapacity(charsToEscape.Length - indexOfFirstToEscape); + + EscapeStringToBuilder(charsToEscape.Slice(indexOfFirstToEscape), ref vsb, noEscape, checkExistingEscaped); + + string result = string.Concat(charsToEscape.Slice(0, indexOfFirstToEscape), vsb.AsSpan()); + vsb.Dispose(); + return result; } internal static unsafe void EscapeString(scoped ReadOnlySpan stringToEscape, ref ValueStringBuilder dest, diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/PercentEncodingHelperTests.cs b/src/libraries/System.Private.Uri/tests/FunctionalTests/PercentEncodingHelperTests.cs deleted file mode 100644 index 54161fd38afed2..00000000000000 --- a/src/libraries/System.Private.Uri/tests/FunctionalTests/PercentEncodingHelperTests.cs +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Collections.Generic; -using Xunit; - -namespace System.PrivateUri.Tests -{ - public class PercentEncodingHelperTests - { - private const string OneByteUtf8 = "%41"; // A - private const string TwoByteUtf8 = "%C3%BC"; // \u00FC - private const string ThreeByteUtf8 = "%E8%AF%B6"; // \u8BF6 - private const string FourByteUtf8 = "%F0%9F%98%80"; // \uD83D\uDE00 - - private const string InvalidOneByteUtf8 = "%FF"; - private const string OverlongTwoByteUtf8 = "%C1%81"; // A - private const string OverlongThreeByteUtf8 = "%E0%83%BC"; // \u00FC - private const string OverlongFourByteUtf8 = "%F0%88%AF%B6"; // \u8BF6; - - public static IEnumerable PercentEncodedAndDecodedUTF8Sequences() - { - static object[] Pair(string s1, string s2) => new object[] { s1, s2 }; - - yield return Pair(OneByteUtf8, "A"); - yield return Pair(TwoByteUtf8, "\u00FC"); - yield return Pair(ThreeByteUtf8, "\u8BF6"); - yield return Pair(FourByteUtf8, "\uD83D\uDE00"); - - yield return Pair(OneByteUtf8 + OneByteUtf8, "AA"); - yield return Pair(TwoByteUtf8 + TwoByteUtf8, "\u00FC\u00FC"); - yield return Pair(ThreeByteUtf8 + ThreeByteUtf8, "\u8BF6\u8BF6"); - yield return Pair(FourByteUtf8 + FourByteUtf8, "\uD83D\uDE00\uD83D\uDE00"); - - yield return Pair(OneByteUtf8 + TwoByteUtf8 + OneByteUtf8, "A\u00FCA"); - yield return Pair(TwoByteUtf8 + ThreeByteUtf8 + TwoByteUtf8, "\u00FC\u8BF6\u00FC"); - - yield return Pair(InvalidOneByteUtf8 + OneByteUtf8, InvalidOneByteUtf8 + "A"); - yield return Pair(OverlongTwoByteUtf8 + TwoByteUtf8, OverlongTwoByteUtf8 + "\u00FC"); - yield return Pair(OverlongThreeByteUtf8 + ThreeByteUtf8, OverlongThreeByteUtf8 + "\u8BF6"); - yield return Pair(OverlongFourByteUtf8 + FourByteUtf8, OverlongFourByteUtf8 + "\uD83D\uDE00"); - - yield return Pair(InvalidOneByteUtf8, InvalidOneByteUtf8); - yield return Pair(InvalidOneByteUtf8 + InvalidOneByteUtf8, InvalidOneByteUtf8 + InvalidOneByteUtf8); - yield return Pair(InvalidOneByteUtf8 + InvalidOneByteUtf8 + InvalidOneByteUtf8, InvalidOneByteUtf8 + InvalidOneByteUtf8 + InvalidOneByteUtf8); - - // 11001010 11100100 10001000 10110010 - 2-byte marker followed by 3-byte sequence - yield return Pair("%CA" + "%E4%88%B2", "%CA" + '\u4232'); - - // 4 valid UTF8 bytes followed by 5 invalid UTF8 bytes - yield return Pair("%F4%80%80%BA" + "%FD%80%80%BA%CD", "\U0010003A" + "%FD%80%80%BA%CD"); - - // BIDI char - yield return Pair("%E2%80%8E", "\u200E"); - - // Char Block: 3400..4DBF-CJK Unified Ideographs Extension A - yield return Pair("%E4%88%B2", "\u4232"); - - // BIDI char followed by a valid 3-byte UTF8 sequence (\u30AF) - yield return Pair("%E2%80%8E" + "%E3%82%AF", "\u200E" + "\u30AF"); - - // BIDI char followed by invalid UTF8 bytes - yield return Pair("%E2%80%8E" + "%F0%90%90", "\u200E" + "%F0%90%90"); - - // Input string: %98%C8%D4%F3 %D4%A8 %7A %CF%DE %41 %16 - // Valid Unicode sequences: %D4%A8 %7A %41 %16 - yield return Pair("%98%C8%D4%F3" + "%D4%A8" + "%7A" + "%CF%DE" + "%41" + "%16", - "%98%C8%D4%F3" + '\u0528' + 'z' + "%CF%DE" + 'A' + '\x16'); - - // 2-byte marker, valid 4-byte sequence, continuation byte - yield return Pair("%C6" + "%F3%BC%A1%B8" + "%B5", - "%C6" + "\U000FC878" + "%B5"); - } - - [Theory] - [MemberData(nameof(PercentEncodedAndDecodedUTF8Sequences))] - public static void UnescapeDataString_UnescapesUtf8Sequences(string stringToUnescape, string expected) - { - Assert.Equal(expected, Uri.UnescapeDataString(stringToUnescape)); - } - } -} diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj b/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj index 151547aba2ce7f..980804ded97c5c 100644 --- a/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj +++ b/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj @@ -13,7 +13,6 @@ - diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/UriEscapingTest.cs b/src/libraries/System.Private.Uri/tests/FunctionalTests/UriEscapingTest.cs index 3e9385206bfe5b..883f48a420e982 100644 --- a/src/libraries/System.Private.Uri/tests/FunctionalTests/UriEscapingTest.cs +++ b/src/libraries/System.Private.Uri/tests/FunctionalTests/UriEscapingTest.cs @@ -24,193 +24,304 @@ public class UriEscapingTest "\u6570\u636E eq '\uD840\uDC00\uD840\uDC01\uD840\uDC02\uD840\uDC03\uD869\uDED1\uD869\uDED2\uD869\uDED3" + "\uD869\uDED4\uD869\uDED5\uD869\uDED6'"; - #region EscapeDataString + #region EscapeUnescapeDataString [Fact] - public void UriEscapingDataString_JustAlphaNumeric_NothingEscaped() + public void EscapeUnescapeDataString_NullArgument() { - string output = Uri.EscapeDataString(AlphaNumeric); - Assert.Equal(AlphaNumeric, output); + AssertExtensions.Throws("stringToEscape", () => Uri.EscapeDataString(null)); + AssertExtensions.Throws("stringToUnescape", () => Uri.UnescapeDataString(null)); } - [Fact] - public void UriEscapingDataString_RFC2396Reserved_Escaped() + private static IEnumerable<(string Unescaped, string Escaped)> CombinationsWithDifferentSections(string unescaped, string escaped) { - string input = RFC2396Reserved; - string output = Uri.EscapeDataString(input); - Assert.Equal(Escape(RFC2396Reserved), output); - } + yield return (unescaped, escaped); + yield return (unescaped + unescaped, escaped + escaped); - [Fact] - public void UriEscapingDataString_RFC3986Unreserved_NothingEscaped() - { - string input = RFC3986Unreserved; - string output = Uri.EscapeDataString(input); - Assert.Equal(input, output); + foreach ((string padding, string escapedPadding) in new[] + { + (" ", "%20"), ("abc", "abc"), ("a b%", "a%20b%25"), ("\u00FC", "%C3%BC"), ("\uD83C\uDF49", "%F0%9F%8D%89") + }) + { + yield return ($"{padding}{unescaped}", $"{escapedPadding}{escaped}"); + yield return ($"{unescaped}{padding}", $"{escaped}{escapedPadding}"); + yield return ($"{padding}{unescaped}{padding}", $"{escapedPadding}{escaped}{escapedPadding}"); + yield return ($"{unescaped}{padding}{unescaped}", $"{escaped}{escapedPadding}{escaped}"); + yield return ($"{padding}{unescaped}{padding}{unescaped}{padding}", $"{escapedPadding}{escaped}{escapedPadding}{escaped}{escapedPadding}"); + } } - [Fact] - public void UriEscapingDataString_RFC3986Reserved_Escaped() + private static IEnumerable<(string Unescaped, string Escaped)> UriEscapeUnescapeDataStringTestInputs() { - string input = RFC3986Reserved; - string output = Uri.EscapeDataString(input); - Assert.Equal(Escape(RFC3986Reserved), output); - } + yield return ("", ""); + yield return ("He\\l/lo", "He%5Cl%2Flo"); - [Fact] - public void UriEscapingDataString_RFC3986ReservedWithIRI_Escaped() - { - // Note that \ and % are not officialy reserved, but we treat it as reserved. - string input = RFC3986Reserved; - string output = Uri.EscapeDataString(input); - Assert.Equal(Escape(RFC3986Reserved), output); - } + yield return (AlphaNumeric, AlphaNumeric); + yield return (RFC3986Unreserved, RFC3986Unreserved); - [Fact] - public void UriEscapingDataString_Unicode_Escaped() - { - string input = "\u30AF"; - string output = Uri.EscapeDataString(input); - Assert.Equal("%E3%82%AF", output); - } + yield return (RFC2396Reserved, EscapeAscii(RFC2396Reserved)); + yield return (RFC3986Reserved, EscapeAscii(RFC3986Reserved)); - [Fact] - public void UriEscapingDataString_UnicodeWithIRI_Escaped() - { - string input = "\u30AF"; + // Note that \ and % are not officially reserved, but we treat it as reserved. + yield return (RFC3986Reserved + "\\%", EscapeAscii(RFC3986Reserved + "\\%")); - string output = Uri.EscapeDataString(input); - Assert.Equal("%E3%82%AF", output); + yield return ("\u30AF", "%E3%82%AF"); + yield return (GB18030CertificationString1, "%E6%95%B0%E6%8D%AE%20eq%20%27%F0%A0%80%80%F0%A0%80%81%F0%A0%80%82%F0%A0%80%83%F0%AA%9B%91%F0%AA%9B%92%F0%AA%9B%93%F0%AA%9B%94%F0%AA%9B%95%F0%AA%9B%96%27"); - using (new ThreadCultureChange("zh-cn")) + // Test all ASCII that should be escaped + for (int i = 0; i < 128; i++) { - Assert.Equal(output, Uri.EscapeDataString(input)); //, "Same normalized result expected in different locales." + if (!RFC3986Unreserved.Contains((char)i)) + { + string s = new string((char)i, 42); + yield return (s, EscapeAscii(s)); + } } + + // Valid surrogate pairs + yield return ("\uD800\uDC00", "%F0%90%80%80"); + yield return ("\uD83C\uDF49", "%F0%9F%8D%89"); } - [Fact] - public void UriEscapingDataString_Unicode_SurrogatePair() + public static IEnumerable UriEscapeDataString_MemberData() { - string output = Uri.EscapeDataString(GB18030CertificationString1); - Assert.Equal( - @"%E6%95%B0%E6%8D%AE%20eq" + - "%20%27%F0%A0%80%80%F0%A0%80%81%F0%A0%80%82%F0%A0%80%83%F0%AA%9B%91" + - "%F0%AA%9B%92%F0%AA%9B%93%F0%AA%9B%94%F0%AA%9B%95%F0%AA%9B%96%27", - output); + (string Unescaped, string Escaped)[] pairs = + [ + .. UriEscapeUnescapeDataStringTestInputs(), - using (new ThreadCultureChange("zh-cn")) - { - Assert.Equal(output, Uri.EscapeDataString(GB18030CertificationString1)); //"Same normalized result expected in different locales." - } + // Invalid surrogate pairs + ("\uD800", "%EF%BF%BD"), + ("abc\uD800", "abc%EF%BF%BD"), + ("abc\uD800\uD800abc", "abc%EF%BF%BD%EF%BF%BDabc"), + ("\xD800\xD800\xDFFF", "%EF%BF%BD%F0%90%8F%BF"), + ]; + + return pairs + .SelectMany(p => CombinationsWithDifferentSections(p.Unescaped, p.Escaped)) + .Select(p => new[] { p.Unescaped, p.Escaped }); } - public static IEnumerable UriEscapeUnescapeDataString_Roundtrip_MemberData() + public static IEnumerable UriUnescapeDataString_MemberData() { - // Test the no-longer-existing "c_MaxUriBufferSize" limit of 0xFFF0, - // as well as lengths longer than the max Uri length of ushort.MaxValue. - foreach (int length in new[] { 1, 0xFFF0, 0xFFF1, ushort.MaxValue + 10 }) - { - yield return new object[] { new string('s', length), string.Concat(Enumerable.Repeat("s", length)) }; - yield return new object[] { new string('/', length), string.Concat(Enumerable.Repeat("%2F", length)) }; - } + const string OneByteUtf8 = "%41"; // A + const string TwoByteUtf8 = "%C3%BC"; // \u00FC + const string ThreeByteUtf8 = "%E8%AF%B6"; // \u8BF6 + const string FourByteUtf8 = "%F0%9F%98%80"; // \uD83D\uDE00 + + const string InvalidOneByteUtf8 = "%FF"; + const string OverlongTwoByteUtf8 = "%C1%81"; // A + const string OverlongThreeByteUtf8 = "%E0%83%BC"; // \u00FC + const string OverlongFourByteUtf8 = "%F0%88%AF%B6"; // \u8BF6; + + (string Unescaped, string Escaped)[] pairs = + [ + .. UriEscapeUnescapeDataStringTestInputs(), + + // Many combinations that include non-ASCII to test the PercentEncodingHelper + ("A", OneByteUtf8), + ("\u00FC", TwoByteUtf8), + ("\u8BF6", ThreeByteUtf8), + ("\uD83D\uDE00", FourByteUtf8), + + ("AA", OneByteUtf8 + OneByteUtf8), + ("\u00FC\u00FC", TwoByteUtf8 + TwoByteUtf8), + ("\u8BF6\u8BF6", ThreeByteUtf8 + ThreeByteUtf8), + ("\uD83D\uDE00\uD83D\uDE00", FourByteUtf8 + FourByteUtf8), + + ("A\u00FCA", OneByteUtf8 + TwoByteUtf8 + OneByteUtf8), + ("\u00FC\u8BF6\u00FC", TwoByteUtf8 + ThreeByteUtf8 + TwoByteUtf8), + + (InvalidOneByteUtf8 + "A", InvalidOneByteUtf8 + OneByteUtf8), + (OverlongTwoByteUtf8 + "\u00FC", OverlongTwoByteUtf8 + TwoByteUtf8), + (OverlongThreeByteUtf8 + "\u8BF6", OverlongThreeByteUtf8 + ThreeByteUtf8), + (OverlongFourByteUtf8 + "\uD83D\uDE00", OverlongFourByteUtf8 + FourByteUtf8), + + (InvalidOneByteUtf8, InvalidOneByteUtf8), + (InvalidOneByteUtf8 + InvalidOneByteUtf8, InvalidOneByteUtf8 + InvalidOneByteUtf8), + (InvalidOneByteUtf8 + InvalidOneByteUtf8 + InvalidOneByteUtf8, InvalidOneByteUtf8 + InvalidOneByteUtf8 + InvalidOneByteUtf8), + + // 11001010 11100100 10001000 10110010 - 2-byte marker followed by 3-byte sequence + ("%CA" + '\u4232', "%CA" + "%E4%88%B2"), + + // 4 valid UTF8 bytes followed by 5 invalid UTF8 bytes + ("\U0010003A" + "%FD%80%80%BA%CD", "%F4%80%80%BA" + "%FD%80%80%BA%CD"), + + // BIDI char + ("\u200E", "%E2%80%8E"), + + // Char Block: 3400..4DBF-CJK Unified Ideographs Extension A + ("\u4232", "%E4%88%B2"), + + // BIDI char followed by a valid 3-byte UTF8 sequence (\u30AF) + ("\u200E" + "\u30AF", "%E2%80%8E" + "%E3%82%AF"), + + // BIDI char followed by invalid UTF8 bytes + ("\u200E" + "%F0%90%90", "%E2%80%8E" + "%F0%90%90"), + + // Input string: %98%C8%D4%F3 %D4%A8 %7A %CF%DE %41 %16 + // Valid Unicode sequences: %D4%A8 %7A %41 %16 + ("%98%C8%D4%F3" + '\u0528' + 'z' + "%CF%DE" + 'A' + '\x16', "%98%C8%D4%F3" + "%D4%A8" + "%7A" + "%CF%DE" + "%41" + "%16"), + + // 2-byte marker, valid 4-byte sequence, continuation byte + ("%C6" + "\U000FC878" + "%B5", "%C6" + "%F3%BC%A1%B8" + "%B5"), + ]; + + return pairs + .SelectMany(p => CombinationsWithDifferentSections(p.Unescaped, p.Escaped)) + .Select(p => new[] { p.Unescaped, p.Escaped }); } [Theory] - [MemberData(nameof(UriEscapeUnescapeDataString_Roundtrip_MemberData))] - public void UriEscapeUnescapeDataString_Roundtrip(string input, string expectedEscaped) + [MemberData(nameof(UriEscapeDataString_MemberData))] + public void UriEscapeDataString(string unescaped, string escaped) { - string output = Uri.EscapeDataString(input); - Assert.Equal(expectedEscaped, output); - Assert.Equal(input, Uri.UnescapeDataString(output)); - } + ValidateEscape(unescaped, escaped); - #endregion EscapeDataString + using (new ThreadCultureChange("zh-cn")) + { + // Same result expected in different locales. + ValidateEscape(unescaped, escaped); + } - #region UnescapeDataString + static void ValidateEscape(string input, string expectedOutput) + { + Assert.True(input.Length <= expectedOutput.Length); - [Fact] - public void UriUnescapingDataString_JustAlphaNumeric_Unescaped() - { - string output = Uri.UnescapeDataString(Escape(AlphaNumeric)); - Assert.Equal(AlphaNumeric, output); - } + // String overload + string output = Uri.EscapeDataString(input); + Assert.Equal(expectedOutput, output); - [Fact] - public void UriUnescapingDataString_RFC2396Unreserved_Unescaped() - { - string input = RFC2396Unreserved; - string output = Uri.UnescapeDataString(Escape(input)); - Assert.Equal(input, output); - } + if (input == expectedOutput) + { + Assert.Same(input, output); + } - [Fact] - public void UriUnescapingDataString_RFC2396Reserved_Unescaped() - { - string input = RFC2396Reserved; - string output = Uri.UnescapeDataString(Escape(input)); - Assert.Equal(input, output); - } + // Span overload + output = Uri.EscapeDataString(input.AsSpan()); + Assert.Equal(expectedOutput, output); - [Fact] - public void UriUnescapingDataString_RFC3986Unreserved_Unescaped() - { - string input = RFC3986Unreserved; - string output = Uri.UnescapeDataString(Escape(input)); - Assert.Equal(input, output); - } + char[] destination = new char[expectedOutput.Length + 2]; - [Fact] - public void UriUnescapingDataString_RFC3986Reserved_Unescaped() - { - string input = RFC3986Reserved; - string output = Uri.UnescapeDataString(Escape(input)); - Assert.Equal(input, output); - } + // Exact destination size + Assert.True(Uri.TryEscapeDataString(input, destination.AsSpan(0, expectedOutput.Length), out int charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten)); - [Fact] - public void UriUnescapingDataString_RFC3986ReservedWithIRI_Unescaped() - { - // Note that \ and % are not officialy reserved, but we treat it as reserved. - string input = RFC3986Reserved; - string output = Uri.UnescapeDataString(Escape(input)); - Assert.Equal(input, output); - } + // Larger destination + Assert.True(Uri.TryEscapeDataString(input, destination.AsSpan(1), out charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(1, charsWritten)); - [Fact] - public void UriUnescapingDataString_Unicode_Unescaped() - { - string input = @"\u30AF"; - string output = Uri.UnescapeDataString(Escape(input)); - Assert.Equal(input, output); + // Destination too small + if (expectedOutput.Length > 0) + { + Assert.False(Uri.TryEscapeDataString(input, destination.AsSpan(0, expectedOutput.Length - 1), out charsWritten)); + Assert.Equal(0, charsWritten); + } + + // Overlapped source/destination + input.CopyTo(destination); + Assert.True(Uri.TryEscapeDataString(destination.AsSpan(0, input.Length), destination, out charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten)); + + // Overlapped source/destination with different starts + input.CopyTo(destination.AsSpan(1)); + Assert.True(Uri.TryEscapeDataString(destination.AsSpan(1, input.Length), destination, out charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten)); + + input.CopyTo(destination); + Assert.True(Uri.TryEscapeDataString(destination.AsSpan(0, input.Length), destination.AsSpan(1), out charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(1, charsWritten)); + } } - [Fact] - public void UriUnescapingDataString_UnicodeWithIRI_Unescaped() + [Theory] + [MemberData(nameof(UriUnescapeDataString_MemberData))] + public void UriUnescapeDataString(string unescaped, string escaped) { - string input = @"\u30AF"; - string output = Uri.UnescapeDataString(Escape(input)); - Assert.Equal(input, output); + ValidateUnescape(escaped, unescaped); using (new ThreadCultureChange("zh-cn")) { - Assert.Equal(output, Uri.UnescapeDataString(Escape(input))); // Same normalized result expected in different locales. + // Same result expected in different locales. + ValidateUnescape(escaped, unescaped); + } + + static void ValidateUnescape(string input, string expectedOutput) + { + Assert.True(input.Length >= expectedOutput.Length); + + // String overload + string output = Uri.UnescapeDataString(input); + Assert.Equal(expectedOutput, output); + + // Span overload + output = Uri.UnescapeDataString(input.AsSpan()); + Assert.Equal(expectedOutput, output); + + char[] destination = new char[input.Length + 2]; + + // Exact destination size + Assert.True(Uri.TryUnescapeDataString(input, destination.AsSpan(0, expectedOutput.Length), out int charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten)); + + // Larger destination + Assert.True(Uri.TryUnescapeDataString(input, destination.AsSpan(1), out charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(1, charsWritten)); + + // Destination too small + if (expectedOutput.Length > 0) + { + Assert.False(Uri.TryUnescapeDataString(input, destination.AsSpan(0, expectedOutput.Length - 1), out charsWritten)); + Assert.Equal(0, charsWritten); + } + + // Overlapped source/destination + input.CopyTo(destination); + Assert.True(Uri.TryUnescapeDataString(destination.AsSpan(0, input.Length), destination, out charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten)); + + // Overlapped source/destination with different starts + input.CopyTo(destination.AsSpan(1)); + Assert.True(Uri.TryUnescapeDataString(destination.AsSpan(1, input.Length), destination, out charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten)); + + input.CopyTo(destination); + Assert.True(Uri.TryUnescapeDataString(destination.AsSpan(0, input.Length), destination.AsSpan(1), out charsWritten)); + Assert.Equal(expectedOutput.Length, charsWritten); + Assert.Equal(expectedOutput, destination.AsSpan(1, charsWritten)); } } [Fact] - public void UriUnescapingDataString_Unicode_SurrogatePair() + public void UriEscapeUnescapeDataString_LongInputs() { - string escapedInput = Uri.EscapeDataString(GB18030CertificationString1); - string output = Uri.UnescapeDataString(escapedInput); - Assert.Equal(GB18030CertificationString1, output); - - using (new ThreadCultureChange("zh-cn")) + // Test the no-longer-existing "c_MaxUriBufferSize" limit of 0xFFF0, + // as well as lengths longer than the max Uri length of ushort.MaxValue. + foreach (int length in new[] { 1, 0xFFF0, 0xFFF1, ushort.MaxValue + 10 }) { - Assert.Equal(output, Uri.UnescapeDataString(escapedInput)); // Same normalized result expected in different locales. + string unescaped = new string('s', length); + string escaped = unescaped; + + Assert.Equal(Uri.EscapeDataString(unescaped), escaped); + Assert.Equal(Uri.UnescapeDataString(escaped), unescaped); + + unescaped = new string('/', length); + escaped = EscapeAscii(unescaped); + + Assert.Equal(Uri.EscapeDataString(unescaped), escaped); + Assert.Equal(Uri.UnescapeDataString(escaped), unescaped); } } - #endregion UnescapeDataString + #endregion EscapeUnescapeDataString #region EscapeUriString @@ -332,7 +443,7 @@ public void UriAbsoluteEscaping_AlphaNumeric_NoEscaping() [Fact] public void UriAbsoluteUnEscaping_AlphaNumericEscapedIriOn_UnEscaping() { - string escapedAlphaNum = Escape(AlphaNumeric); + string escapedAlphaNum = EscapeAscii(AlphaNumeric); string input = "http://" + AlphaNumeric.ToLowerInvariant() + "/" + escapedAlphaNum + "?" + escapedAlphaNum + "#" + escapedAlphaNum; string expectedOutput = "http://" + AlphaNumeric.ToLowerInvariant() + "/" + AlphaNumeric @@ -353,7 +464,7 @@ public void UriAbsoluteEscaping_RFC2396Unreserved_NoEscaping() [Fact] public void UriAbsoluteUnEscaping_RFC3986UnreservedEscaped_AllUnescaped() { - string escaped = Escape(RFC3986Unreserved); + string escaped = EscapeAscii(RFC3986Unreserved); string input = "http://" + AlphaNumeric.ToLowerInvariant() + "/" + escaped + "?" + escaped + "#" + escaped; string expectedOutput = "http://" + AlphaNumeric.ToLowerInvariant() + "/" + RFC3986Unreserved @@ -375,7 +486,7 @@ public void UriAbsoluteEscaping_RFC2396Reserved_NoEscaping() [Fact] public void UriAbsoluteUnEscaping_RFC2396ReservedEscaped_NoUnEscaping() { - string escaped = Escape(RFC2396Reserved); + string escaped = EscapeAscii(RFC2396Reserved); string input = "http://host/" + escaped + "?" + escaped + "#" + escaped; Uri testUri = new Uri(input); @@ -404,7 +515,7 @@ public void UriAbsoluteEscaping_RFC3986Reserved_NothingEscaped() [Fact] public void UriAbsoluteUnEscaping_RFC3986ReservedEscaped_NothingUnescaped() { - string escaped = Escape(RFC3986Reserved); + string escaped = EscapeAscii(RFC3986Reserved); string input = "http://host/" + escaped + "?" + escaped + "#" + escaped; Uri testUri = new Uri(input); @@ -724,28 +835,12 @@ public void UriUnescapeInvalid_ValidUtf8IncompleteUtf8AsciiIriOn_InvalidUtf8Left #region Helpers - private static readonly char[] s_hexUpperChars = { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' - }; - // Percent encode every character - private static string Escape(string input) + private static string EscapeAscii(string input) { - byte[] bytes = new byte[4]; - StringBuilder output = new StringBuilder(); - for (int index = 0; index < input.Length; index++) - { - // Non-Ascii is escaped as UTF-8 - int byteCount = Encoding.UTF8.GetBytes(input, index, 1, bytes, 0); - for (int byteIndex = 0; byteIndex < byteCount; byteIndex++) - { - output.Append("%"); - output.Append(s_hexUpperChars[(bytes[byteIndex] & 0xf0) >> 4]); - output.Append(s_hexUpperChars[bytes[byteIndex] & 0xf]); - } - } - return output.ToString(); + Assert.True(Ascii.IsValid(input)); + + return string.Concat(input.Select(c => $"%{(int)c:X2}")); } #endregion Helpers diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 4c6ffd326a686b..53bba2a3f34973 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -15836,6 +15836,8 @@ protected virtual void CheckSecurity() { } [System.ObsoleteAttribute("Uri.Escape has been deprecated and is not supported.")] protected virtual void Escape() { } public static string EscapeDataString(string stringToEscape) { throw null; } + public static string EscapeDataString(System.ReadOnlySpan charsToEscape) { throw null; } + public static bool TryEscapeDataString(System.ReadOnlySpan charsToEscape, System.Span destination, out int charsWritten) { throw null; } [System.ObsoleteAttribute("Uri.EscapeString has been deprecated. Use GetComponents() or Uri.EscapeDataString to escape a Uri component or a string.")] protected static string EscapeString(string? str) { throw null; } [System.ObsoleteAttribute("Uri.EscapeUriString can corrupt the Uri string in some cases. Consider using Uri.EscapeDataString for query string components instead.", DiagnosticId="SYSLIB0013", UrlFormat="https://aka.ms/dotnet-warnings/{0}")] @@ -15877,6 +15879,8 @@ void System.Runtime.Serialization.ISerializable.GetObjectData(System.Runtime.Ser [System.ObsoleteAttribute("Uri.Unescape has been deprecated. Use GetComponents() or Uri.UnescapeDataString() to unescape a Uri component or a string.")] protected virtual string Unescape(string path) { throw null; } public static string UnescapeDataString(string stringToUnescape) { throw null; } + public static string UnescapeDataString(System.ReadOnlySpan charsToUnescape) { throw null; } + public static bool TryUnescapeDataString(System.ReadOnlySpan charsToUnescape, System.Span destination, out int charsWritten) { throw null; } } public partial class UriBuilder { diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Uri.MethodsTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Uri.MethodsTests.cs index 5a393231204e59..cc71bf8d1d4ad4 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Uri.MethodsTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Uri.MethodsTests.cs @@ -412,60 +412,6 @@ public void EqualsTest(Uri uri1, object obj, bool expected) } } - [Theory] - [InlineData("", "")] - [InlineData("Hello", "Hello")] - [InlineData("He\\l/lo", "He%5Cl%2Flo")] - [InlineData("\uD800\uDC00", "%F0%90%80%80")] // With surrogate pair - public void EscapeDataString(string stringToEscape, string expected) - { - Assert.Equal(expected, Uri.EscapeDataString(stringToEscape)); - } - - [Fact] - public void EscapeDataString_InvalidSurrogatePairs() - { - EscapeDataString("\uD800", "%EF%BF%BD"); - EscapeDataString("abc\uD800", "abc%EF%BF%BD"); - EscapeDataString("abc\uD800\uD800abc", "abc%EF%BF%BD%EF%BF%BDabc"); - EscapeDataString("\xD800\xD800\xDFFF", "%EF%BF%BD%F0%90%8F%BF"); - } - - [Fact] - public void EscapeDataString_Long_Success() - { - string s; - const int LongCount = 65520 + 1; - - s = new string('a', LongCount); - Assert.Equal(s, Uri.EscapeDataString(s)); - - s = new string('/', LongCount); - Assert.Equal(string.Concat(Enumerable.Repeat("%2F", LongCount)), Uri.EscapeDataString(s)); - } - - [Fact] - public void EscapeDataString_NullArgument() - { - AssertExtensions.Throws("stringToEscape", () => Uri.EscapeDataString(null)); - } - - [Theory] - [InlineData("", "")] - [InlineData("Hello", "Hello")] - [InlineData("He%5Cl/lo", "He\\l/lo")] - [InlineData("%F0%90%80%80", "\uD800\uDC00")] // Surrogate pair - public void UnescapeDataString(string stringToUnEscape, string expected) - { - Assert.Equal(expected, Uri.UnescapeDataString(stringToUnEscape)); - } - - [Fact] - public void UnescapedDataString_Null_ThrowsArgumentNullException() - { - AssertExtensions.Throws("stringToUnescape", () => Uri.UnescapeDataString(null)); // StringToUnescape is null - } - [Theory] [InlineData("", "")] [InlineData("Hello", "Hello")] From e83c10998cfa5e8a843234b6f5d78048928e9787 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 7 Feb 2024 03:15:07 +0100 Subject: [PATCH 2/4] Use the new API in a few places --- .../System/Net/Http/FormUrlEncodedContent.cs | 49 +++++++++++++++---- .../HttpEnvironmentProxy.cs | 11 +---- .../src/System/Net/FtpWebRequest.cs | 8 +-- 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs b/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs index b2c6a3d9d7803d..23cda43a272c31 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs @@ -28,7 +28,8 @@ private static byte[] GetContentByteArray(IEnumerable pair in nameValueCollection) { if (builder.Length > 0) @@ -36,22 +37,52 @@ private static byte[] GetContentByteArray(IEnumerable escapedChars = builder.RawChars.Slice(builder.Length, charsWritten); + + while (true) + { + int indexOfEscapedSpace = escapedChars.IndexOf("%20", StringComparison.Ordinal); + if (indexOfEscapedSpace < 0) + { + builder.Append(escapedChars); + break; + } + + builder.Append(escapedChars.Slice(0, indexOfEscapedSpace)); + builder.Append('+'); + escapedChars = escapedChars.Slice(indexOfEscapedSpace + 3); // Skip "%20" + } + } + else + { + builder.Length += charsWritten; + } } - // Escape spaces as '+'. - return Uri.EscapeDataString(data).Replace("%20", "+"); } protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context, CancellationToken cancellationToken) => diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpEnvironmentProxy.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpEnvironmentProxy.cs index 226f0f449df50c..629b212f458107 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpEnvironmentProxy.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpEnvironmentProxy.cs @@ -170,16 +170,9 @@ private HttpEnvironmentProxy(Uri? httpProxy, Uri? httpsProxy, string? bypassList int separatorIndex = value.LastIndexOf('@'); if (separatorIndex != -1) { - string auth = value.Substring(0, separatorIndex); - // The User and password may or may not be URL encoded. - // Curl seems to accept both. To match that, - // we do opportunistic decode and we use original string if it fails. - try - { - auth = Uri.UnescapeDataString(auth); - } - catch { }; + // Curl seems to accept both. To match that, we also decode the value. + string auth = Uri.UnescapeDataString(value.AsSpan(0, separatorIndex)); value = value.Substring(separatorIndex + 1); separatorIndex = auth.IndexOf(':'); diff --git a/src/libraries/System.Net.Requests/src/System/Net/FtpWebRequest.cs b/src/libraries/System.Net.Requests/src/System/Net/FtpWebRequest.cs index 2f214e3d99f72b..4f55ea7b9048c7 100644 --- a/src/libraries/System.Net.Requests/src/System/Net/FtpWebRequest.cs +++ b/src/libraries/System.Net.Requests/src/System/Net/FtpWebRequest.cs @@ -495,17 +495,17 @@ internal FtpWebRequest(Uri uri) NetworkCredential? networkCredential = null; _uri = uri; _methodInfo = FtpMethodInfo.GetMethodInfo(WebRequestMethods.Ftp.DownloadFile); - if (!string.IsNullOrEmpty(_uri.UserInfo)) + + if (_uri.UserInfo is { Length: > 0 } userInfo) { - string userInfo = _uri.UserInfo; string username = userInfo; string password = ""; int index = userInfo.IndexOf(':'); if (index != -1) { - username = Uri.UnescapeDataString(userInfo.Substring(0, index)); + username = Uri.UnescapeDataString(userInfo.AsSpan(0, index)); index++; // skip ':' - password = Uri.UnescapeDataString(userInfo.Substring(index)); + password = Uri.UnescapeDataString(userInfo.AsSpan(index)); } networkCredential = new NetworkCredential(username, password); } From 16f70f42a7fca46358acf4e92f24f415d4234a93 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 7 Feb 2024 06:24:49 +0100 Subject: [PATCH 3/4] Write directly into the destination span when possible --- .../System.Private.Uri/src/System/UriExt.cs | 38 ++++++++++++++++--- .../src/System/UriHelper.cs | 35 +++++++++++++---- .../System/ValueStringBuilderExtensions.cs | 11 +++++- 3 files changed, 69 insertions(+), 15 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/UriExt.cs b/src/libraries/System.Private.Uri/src/System/UriExt.cs index 99eb29d11a107f..db0fa4d3261338 100644 --- a/src/libraries/System.Private.Uri/src/System/UriExt.cs +++ b/src/libraries/System.Private.Uri/src/System/UriExt.cs @@ -4,6 +4,8 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Globalization; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Text; namespace System @@ -583,7 +585,7 @@ private static string UnescapeDataString(ReadOnlySpan charsToUnescape, str var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]); - // This may throw for very large inputs. ¯\_(ツ)_/¯ + // We may throw for very large inputs (when growing the ValueStringBuilder). vsb.EnsureCapacity(charsToUnescape.Length - indexOfFirstToUnescape); UriHelper.UnescapeString( @@ -618,11 +620,25 @@ public static bool TryUnescapeDataString(ReadOnlySpan charsToUnescape, Spa return false; } - var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]); + // We may throw for very large inputs (when growing the ValueStringBuilder). + scoped ValueStringBuilder vsb; - // This may throw for very large inputs. ¯\_(ツ)_/¯ - vsb.EnsureCapacity(charsToUnescape.Length - indexOfFirstToUnescape); + // If the input and destination buffers overlap, we must take care not to overwrite parts of the input before we've processed it. + // If the buffers start at the same location, we can still use the destination as the output length is strictly <= input length. + bool overlapped = charsToUnescape.Overlaps(destination) && + !Unsafe.AreSame(ref MemoryMarshal.GetReference(charsToUnescape), ref MemoryMarshal.GetReference(destination)); + + if (overlapped) + { + vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]); + vsb.EnsureCapacity(charsToUnescape.Length - indexOfFirstToUnescape); + } + else + { + vsb = new ValueStringBuilder(destination.Slice(indexOfFirstToUnescape)); + } + // We may throw for very large inputs (when growing the ValueStringBuilder). UriHelper.UnescapeString( charsToUnescape.Slice(indexOfFirstToUnescape), ref vsb, c_DummyChar, c_DummyChar, c_DummyChar, @@ -635,9 +651,19 @@ public static bool TryUnescapeDataString(ReadOnlySpan charsToUnescape, Spa if (destination.Length >= newLength) { charsToUnescape.Slice(0, indexOfFirstToUnescape).CopyTo(destination); - vsb.AsSpan().CopyTo(destination.Slice(indexOfFirstToUnescape)); - vsb.Dispose(); + if (overlapped) + { + vsb.AsSpan().CopyTo(destination.Slice(indexOfFirstToUnescape)); + vsb.Dispose(); + } + else + { + // We are expecting the builder not to grow if the original span was large enough. + // This means that we MUST NOT over allocate anywhere in UnescapeString (e.g. append and then decrease the length). + Debug.Assert(vsb.RawChars.Overlaps(destination)); + } + charsWritten = newLength; return true; } diff --git a/src/libraries/System.Private.Uri/src/System/UriHelper.cs b/src/libraries/System.Private.Uri/src/System/UriHelper.cs index 980487163ce46e..335f862a139d0a 100644 --- a/src/libraries/System.Private.Uri/src/System/UriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/UriHelper.cs @@ -135,10 +135,21 @@ public static bool TryEscapeDataString(ReadOnlySpan charsToEscape, Span charsToEscape, Span= newLength) { charsToEscape.Slice(0, indexOfFirstToEscape).CopyTo(destination); - vsb.AsSpan().CopyTo(destination.Slice(indexOfFirstToEscape)); - vsb.Dispose(); + if (overlapped) + { + vsb.AsSpan().CopyTo(destination.Slice(indexOfFirstToEscape)); + vsb.Dispose(); + } + else + { + // We are expecting the builder not to grow if the original span was large enough. + // This means that we MUST NOT over allocate anywhere in EscapeStringToBuilder (e.g. append and then decrease the length). + Debug.Assert(vsb.RawChars.Overlaps(destination)); + } + charsWritten = newLength; return true; } @@ -183,8 +204,8 @@ public static string EscapeString(ReadOnlySpan charsToEscape, bool checkEx // escape the rest, and concat the result with the characters we skipped above. var vsb = new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold]); - // This may throw for very large inputs. ¯\_(ツ)_/¯ - vsb.EnsureCapacity(charsToEscape.Length - indexOfFirstToEscape); + // We may throw for very large inputs (when growing the ValueStringBuilder). + vsb.EnsureCapacity(charsToEscape.Length); EscapeStringToBuilder(charsToEscape.Slice(indexOfFirstToEscape), ref vsb, noEscape, checkExistingEscaped); diff --git a/src/libraries/System.Private.Uri/src/System/ValueStringBuilderExtensions.cs b/src/libraries/System.Private.Uri/src/System/ValueStringBuilderExtensions.cs index c93ee77fda6a4b..37a32dbebbcba8 100644 --- a/src/libraries/System.Private.Uri/src/System/ValueStringBuilderExtensions.cs +++ b/src/libraries/System.Private.Uri/src/System/ValueStringBuilderExtensions.cs @@ -35,8 +35,15 @@ public void Append(Rune rune) [MethodImpl(MethodImplOptions.NoInlining)] private void GrowAndAppend(Rune rune) { - Grow(2); - Append(rune); + if (rune.Value <= 0xFFFF) + { + Append((char)rune.Value); + } + else + { + Grow(2); + Append(rune); + } } } } From 7b0b1d58f7b6bc2a5273c3053458044bd1114e90 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Mon, 12 Feb 2024 23:01:07 -0800 Subject: [PATCH 4/4] Update comments --- .../src/System/Net/Http/FormUrlEncodedContent.cs | 3 ++- src/libraries/System.Private.Uri/src/System/UriExt.cs | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs b/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs index 23cda43a272c31..f13a22f170fe3c 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs @@ -42,7 +42,8 @@ private static byte[] GetContentByteArray(IEnumerable charsToUnescape, Spa vsb = new ValueStringBuilder(destination.Slice(indexOfFirstToUnescape)); } - // We may throw for very large inputs (when growing the ValueStringBuilder). UriHelper.UnescapeString( charsToUnescape.Slice(indexOfFirstToUnescape), ref vsb, c_DummyChar, c_DummyChar, c_DummyChar,