Skip to content

Commit

Permalink
Add Base64url encoding/decoding (#102364)
Browse files Browse the repository at this point in the history
* Base64Url encoding, validation impelementation

* Validation related updates

* Try fix perf regression in vectorized methods

* Add decoder implementation and unit tests

* Share code in place decoding code

* Add span<char> oveloads with vectorization

* Generalize Span char/byte implementations

* Move ref update to runtime and other cleanup

* Generalize the AdvSimd.Arm64 vectorization added recently

* Apply suggestions from code review

Co-authored-by: Günther Foidl <[email protected]>

* Apply some feedback

* Try fix ARM failure

* Use array pool whenever applicable

* Handle '%' as url padding, add more tests and fix findings

* Fix assertion failure, apply some feedback, try fix ARM failure

* Update docs, small clean ups

* Try fix ARM failure

* Update src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlEncoder.cs

Co-authored-by: Günther Foidl <[email protected]>

* Rename StoreToDetionation overloads, reuse some duplicate code

* Improve perf for Base.IsValid() oveerloads, exclude ARM vectorization for char(ushort) overload

* Apply feedbacks

* Revert Assert

* Fix ARM vectorization failure for char overload

* Apply suggestions from code review

Co-authored-by: Jeremy Barton <[email protected]>

* Apply suggestions from code review

Co-authored-by: Günther Foidl <[email protected]>

* Apply more feedback

* Apply suggestions from code review

Co-authored-by: Günther Foidl <[email protected]>

* Apply review comment left overs

* Apply suggestions from code review

Co-authored-by: Miha Zupan <[email protected]>

* Apply remaining feedback

* Apply suggestions from code review

Co-authored-by: Jeremy Barton <[email protected]>

* Apply the doc feedback for other API docs

* Fix Base64Url fuzzer findings

* Apply suggestions from code review

Co-authored-by: Jeremy Barton <[email protected]>

* Rename utf8 -> source/destintion

* Apply feedbacks

* Apply feedback

* Apply left out feedbacks

---------

Co-authored-by: Günther Foidl <[email protected]>
Co-authored-by: Jeremy Barton <[email protected]>
Co-authored-by: Miha Zupan <[email protected]>
  • Loading branch information
4 people authored and pull[bot] committed Oct 11, 2024
1 parent e3d6ad1 commit 3251814
Show file tree
Hide file tree
Showing 19 changed files with 4,443 additions and 392 deletions.
26 changes: 8 additions & 18 deletions src/libraries/System.Memory/tests/Base64/Base64DecoderUnitTests.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
Expand Down Expand Up @@ -273,6 +272,9 @@ public void BasicDecodingWithFinalBlockTrueKnownInputDone(string inputString, in

[Theory]
[InlineData("A", 0, 0)]
[InlineData("A===", 0, 0)]
[InlineData("A==", 0, 0)]
[InlineData("A=", 0, 0)]
[InlineData("AQ", 0, 0)]
[InlineData("AQI", 0, 0)]
[InlineData("AQIDBA", 4, 3)]
Expand All @@ -285,16 +287,18 @@ public void BasicDecodingWithFinalBlockTrueKnownInputInvalid(string inputString,
Assert.Equal(OperationStatus.InvalidData, Base64.DecodeFromUtf8(source, decodedBytes, out int consumed, out int decodedByteCount));
Assert.Equal(expectedConsumed, consumed);
Assert.Equal(expectedWritten, decodedByteCount); // expectedWritten == decodedBytes.Length
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, decodedBytes.Length, source, decodedBytes));
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, expectedWritten, source, decodedBytes));
}

[Theory]
[InlineData("\u00ecz/T", 0, 0)] // scalar code-path
[InlineData("z/Ta123\u00ec", 4, 3)]
[InlineData("\u00ecz/TpH7sqEkerqMweH1uSw==", 0, 0)] // Vector128 code-path
[InlineData("z/TpH7sqEkerqMweH1uSw\u00ec==", 20, 15)]
[InlineData("\u00ecz/TpH7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo==", 0, 0)] // Vector256 / AVX code-path
[InlineData("z/TpH7sqEkerqMweH1uSw\u5948==", 20, 15)]
[InlineData("\u5948/TpH7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo==", 0, 0)] // Vector256 / AVX code-path
[InlineData("z/TpH7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo\u00ec==", 44, 33)]
[InlineData("\u5948z+T/H7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo01234567890123456789012345678901234567890123456789==", 0, 0)] // Vector512 / Avx512Vbmi code-path
[InlineData("z/T+H7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo01234567890123456789012345678901234567890123456789\u5948==", 92, 69)]
public void BasicDecodingNonAsciiInputInvalid(string inputString, int expectedConsumed, int expectedWritten)
{
Span<byte> source = Encoding.UTF8.GetBytes(inputString);
Expand Down Expand Up @@ -749,19 +753,5 @@ public void BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes(strin
Assert.Equal(expectedWritten, decodedByteCount);
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, expectedWritten, source, decodedBytes));
}

public static IEnumerable<object[]> BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes_MemberData()
{
var r = new Random(42);
for (int i = 0; i < 5; i++)
{
yield return new object[] { "AQ==" + new string(r.GetItems<char>(" \n\t\r", i)), 4 + i, 1 };
}

foreach (string s in new[] { "MTIz", "M TIz", "MT Iz", "MTI z", "MTIz ", "M TI z", "M T I Z " })
{
yield return new object[] { s + s + s + s, s.Length * 4, 12 };
}
}
}
}
14 changes: 14 additions & 0 deletions src/libraries/System.Memory/tests/Base64/Base64TestBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,5 +107,19 @@ public static IEnumerable<object[]> StringsOnlyWithCharsToBeIgnored()

string GetRepeatedChar(char charToInsert, int numberOfTimesToInsert) => new string(charToInsert, numberOfTimesToInsert);
}

public static IEnumerable<object[]> BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes_MemberData()
{
var r = new Random(42);
for (int i = 0; i < 5; i++)
{
yield return new object[] { "AQ==" + new string(r.GetItems<char>(" \n\t\r", i)), 4 + i, 1 };
}

foreach (string s in new[] { "MTIz", "M TIz", "MT Iz", "MTI z", "MTIz ", "M TI z", "M T I Z " })
{
yield return new object[] { s + s + s + s, s.Length * 4, 12 };
}
}
}
}
88 changes: 84 additions & 4 deletions src/libraries/System.Memory/tests/Base64/Base64TestHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,23 @@ public static class Base64TestHelper
52, 53, 54, 55, 56, 57, 43, 47 //4..9, +, /
};

public static readonly byte[] s_urlEncodingMap = {
65, 66, 67, 68, 69, 70, 71, 72, //A..H
73, 74, 75, 76, 77, 78, 79, 80, //I..P
81, 82, 83, 84, 85, 86, 87, 88, //Q..X
89, 90, 97, 98, 99, 100, 101, 102, //Y..Z, a..f
103, 104, 105, 106, 107, 108, 109, 110, //g..n
111, 112, 113, 114, 115, 116, 117, 118, //o..v
119, 120, 121, 122, 48, 49, 50, 51, //w..z, 0..3
52, 53, 54, 55, 56, 57, 45, 95 //4..9, -, _
};

// Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in tests)
public static readonly sbyte[] s_decodingMap = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, //62 is placed at index 43 (for +), 63 at index 47 (for /)
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9), 64 at index 61 (for =)
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9)
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, //0-25 are placed at index 65-90 (for A-Z)
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
Expand All @@ -44,9 +55,29 @@ public static class Base64TestHelper
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};

public static readonly sbyte[] s_urlDecodingMap = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, //62 is placed at index 45 (for -), 63 at index 95 (for _)
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9)
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, //0-25 are placed at index 65-90 (for A-Z)
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, //26-51 are placed at index 97-122 (for a-z)
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bytes over 122 ('z') are invalid and cannot be decoded
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Hence, padding the map with 255, which indicates invalid input
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};

public static bool IsByteToBeIgnored(byte charByte) => charByte is (byte)' ' or (byte)'\t' or (byte)'\r' or (byte)'\n';

public const byte EncodingPad = (byte)'='; // '=', for padding
public const byte UrlEncodingPad = (byte)'%'; // '%', for url padding
public const sbyte InvalidByte = -1; // Designating -1 for invalid bytes in the decoding map

public static byte[] InvalidBytes
Expand All @@ -60,6 +91,17 @@ public static byte[] InvalidBytes
}
}

public static byte[] UrlInvalidBytes
{
get
{
int[] indices = s_urlDecodingMap.FindAllIndexOf(InvalidByte);
// Workaround for indices.Cast<byte>().ToArray() since it throws
// InvalidCastException: Unable to cast object of type 'System.Int32' to type 'System.Byte'
return indices.Select(i => (byte)i).ToArray();
}
}

internal static void InitializeBytes(Span<byte> bytes, int seed = 100)
{
var rnd = new Random(seed);
Expand All @@ -79,6 +121,26 @@ internal static void InitializeDecodableBytes(Span<byte> bytes, int seed = 100)
}
}

internal static void InitializeUrlDecodableChars(Span<char> bytes, int seed = 100)
{
var rnd = new Random(seed);
for (int i = 0; i < bytes.Length; i++)
{
int index = (byte)rnd.Next(0, s_urlEncodingMap.Length);
bytes[i] = (char)s_urlEncodingMap[index];
}
}

internal static void InitializeUrlDecodableBytes(Span<byte> bytes, int seed = 100)
{
var rnd = new Random(seed);
for (int i = 0; i < bytes.Length; i++)
{
int index = (byte)rnd.Next(0, s_urlEncodingMap.Length);
bytes[i] = s_urlEncodingMap[index];
}
}

[Fact]
public static void GenerateEncodingMapAndVerify()
{
Expand Down Expand Up @@ -112,16 +174,34 @@ public static int[] FindAllIndexOf<T>(this IEnumerable<T> values, T valueToFind)

public static bool VerifyEncodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> encodedBytes)
{
string expectedText = Convert.ToBase64String(source.Slice(0, expectedConsumed).ToArray());
string encodedText = Encoding.ASCII.GetString(encodedBytes.Slice(0, expectedWritten).ToArray());
string expectedText = Convert.ToBase64String(source.Slice(0, expectedConsumed));
string encodedText = Encoding.ASCII.GetString(encodedBytes.Slice(0, expectedWritten));
return expectedText.Equals(encodedText);
}

public static bool VerifyUrlEncodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> encodedBytes)
{
string expectedText = Convert.ToBase64String(source.Slice(0, expectedConsumed))
.Replace('+', '-').Replace('/', '_').TrimEnd('=');
string encodedText = Encoding.ASCII.GetString(encodedBytes.Slice(0, expectedWritten));
return expectedText.Equals(encodedText);
}

public static bool VerifyDecodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> decodedBytes)
{
string sourceString = Encoding.ASCII.GetString(source.Slice(0, expectedConsumed).ToArray());
string sourceString = Encoding.ASCII.GetString(source.Slice(0, expectedConsumed));
byte[] expectedBytes = Convert.FromBase64String(sourceString);
return expectedBytes.AsSpan().SequenceEqual(decodedBytes.Slice(0, expectedWritten));
}

public static bool VerifyUrlDecodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> decodedBytes)
{
string sourceString = Encoding.ASCII.GetString(source.Slice(0, expectedConsumed));
string padded = sourceString.Length % 4 == 0 ? sourceString :
sourceString.PadRight(sourceString.Length + (4 - sourceString.Length % 4), '=');
string base64 = padded.Replace('_', '/').Replace('-', '+').Replace('%', '=');
byte[] expectedBytes = Convert.FromBase64String(base64);
return expectedBytes.AsSpan().SequenceEqual(decodedBytes.Slice(0, expectedWritten));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public void BasicValidationInvalidInputLengthBytes()
} while (numBytes % 4 == 0); // ensure we have a invalid length

Span<byte> source = new byte[numBytes];
Base64TestHelper.InitializeDecodableBytes(source, numBytes);

Assert.False(Base64.IsValid(source));
Assert.False(Base64.IsValid(source, out int decodedLength));
Expand All @@ -88,10 +89,16 @@ public void BasicValidationInvalidInputLengthChars()
numBytes = rnd.Next(100, 1000 * 1000);
} while (numBytes % 4 == 0); // ensure we have a invalid length

Span<char> source = new char[numBytes];
Span<byte> source = new byte[numBytes];
Base64TestHelper.InitializeDecodableBytes(source, numBytes);
Span<char> chars = source
.ToArray()
.Select(Convert.ToChar)
.ToArray()
.AsSpan();

Assert.False(Base64.IsValid(source));
Assert.False(Base64.IsValid(source, out int decodedLength));
Assert.False(Base64.IsValid(chars));
Assert.False(Base64.IsValid(chars, out int decodedLength));
Assert.Equal(0, decodedLength);
}
}
Expand Down Expand Up @@ -267,7 +274,7 @@ public void InvalidSizeBytes(string utf8WithByteToBeIgnored)
[InlineData("Y")]
public void InvalidSizeChars(string utf8WithByteToBeIgnored)
{
byte[] utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithByteToBeIgnored);
ReadOnlySpan<char> utf8BytesWithByteToBeIgnored = utf8WithByteToBeIgnored;

Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored));
Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored, out int decodedLength));
Expand Down Expand Up @@ -329,10 +336,10 @@ public void InvalidBase64Bytes(string utf8WithByteToBeIgnored)
[InlineData(" a ")]
public void InvalidBase64Chars(string utf8WithByteToBeIgnored)
{
byte[] utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithByteToBeIgnored);
ReadOnlySpan<char> utf8CharsWithCharToBeIgnored = utf8WithByteToBeIgnored;

Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored));
Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored, out int decodedLength));
Assert.False(Base64.IsValid(utf8CharsWithCharToBeIgnored));
Assert.False(Base64.IsValid(utf8CharsWithCharToBeIgnored, out int decodedLength));
Assert.Equal(0, decodedLength);
}
}
Expand Down
Loading

0 comments on commit 3251814

Please sign in to comment.