-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add System.Text fuzzing for encoders #103968
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
53aac47
Add fuzzing for System.Text encoding classes
steveharter 8c45049
Update netfx
steveharter 94010d8
Forward to .NET Framework when Core fuzzing
steveharter 003e433
Remove .NET Framework forwarding approach; approach preserved in this…
steveharter 9ffc714
Remove additional change
steveharter 6ee823e
Remove unused #define
steveharter File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
189 changes: 189 additions & 0 deletions
189
src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/TextEncodingFuzzer.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Buffers; | ||
using System.Diagnostics; | ||
using System.Text; | ||
|
||
namespace DotnetFuzzing.Fuzzers; | ||
|
||
// The fuzzing infrastructure currently does not support fuzzing .NET Framework. | ||
// However, this test class, while running under .NET Core, was used to foward the fuzzing | ||
// input to a .NET Framework console app. That app had the same test semantics as the tests | ||
// here, although used slightly different supporting APIs since not all supporting library | ||
// and language features are present in .NET Framework. | ||
// This fowarding approach and .NET Framework test code is presevered in the original Pull | ||
// Request for this file. The approach used Base64 encoding to convert the incoming | ||
// ReadOnlySpan<byte> to a string which was then passed to the Main() method of the .NET | ||
// Framework app which was then converted back to bytes before being passed to the .NET | ||
// Framework fuzzing tests. | ||
internal sealed class TextEncodingFuzzer : IFuzzer | ||
{ | ||
string[] IFuzzer.TargetAssemblies => []; | ||
string[] IFuzzer.TargetCoreLibPrefixes { get; } = ["System.Text"]; | ||
|
||
void IFuzzer.FuzzTarget(ReadOnlySpan<byte> bytes) | ||
{ | ||
using PooledBoundedMemory<byte> poisonAfter = PooledBoundedMemory<byte>.Rent(bytes, PoisonPagePlacement.After); | ||
|
||
TestLatin1(poisonAfter.Span); | ||
TestASCII(poisonAfter.Span); | ||
TestUnicode(poisonAfter.Span); | ||
TestUtf32(poisonAfter.Span); | ||
TestUtf7(poisonAfter.Span); | ||
TestUtf8(poisonAfter.Span); | ||
} | ||
|
||
// We use individual methods for each encoding, so if there's an exception then | ||
// it's clear which encoding failed based on the call stack. | ||
|
||
private static void TestLatin1(ReadOnlySpan<byte> input) | ||
{ | ||
TestWithSubstitution(input, Encoding.GetEncoding("ISO-8859-1")); | ||
TestWithConvert(input, Encoding.GetEncoding("ISO-8859-1")); | ||
} | ||
|
||
private static void TestASCII(ReadOnlySpan<byte> input) | ||
{ | ||
TestWithSubstitution(input, new ASCIIEncoding()); | ||
TestWithConvert(input, new ASCIIEncoding()); | ||
} | ||
|
||
private static void TestUnicode(ReadOnlySpan<byte> input) | ||
{ | ||
TestWithSubstitution(input, new UnicodeEncoding()); | ||
TestWithExceptions(input, new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: true)); | ||
TestWithConvert(input, new UnicodeEncoding()); | ||
} | ||
|
||
private static void TestUtf32(ReadOnlySpan<byte> input) | ||
{ | ||
TestWithSubstitution(input, new UTF32Encoding()); | ||
TestWithExceptions(input, new UTF32Encoding(bigEndian: false, byteOrderMark: false, throwOnInvalidCharacters: true)); | ||
TestWithConvert(input, new UTF32Encoding()); | ||
} | ||
|
||
private static void TestUtf7(ReadOnlySpan<byte> input) | ||
{ | ||
#pragma warning disable SYSLIB0001 // Type or member is obsolete | ||
TestWithSubstitution(input, new UTF7Encoding()); | ||
#pragma warning restore SYSLIB0001 | ||
} | ||
|
||
private static void TestUtf8(ReadOnlySpan<byte> input) | ||
{ | ||
TestWithSubstitution(input, new UTF8Encoding()); | ||
TestWithExceptions(input, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true)); | ||
TestWithConvert(input, new UTF8Encoding()); | ||
} | ||
|
||
private static void TestWithSubstitution(ReadOnlySpan<byte> input, Encoding encoding) | ||
{ | ||
Decoder decoder = encoding.GetDecoder(); | ||
int charCount = decoder.GetCharCount(input, flush: true); | ||
|
||
using PooledBoundedMemory<char> chars = PooledBoundedMemory<char>.Rent(charCount, PoisonPagePlacement.After); | ||
using PooledBoundedMemory<char> chars2 = PooledBoundedMemory<char>.Rent(charCount, PoisonPagePlacement.After); | ||
|
||
// *4 for worst case scenario (*2 for char->byte + *2 for encoding) | ||
// +2 is for possible Base64 padding with UTF7Encoding. | ||
using PooledBoundedMemory<byte> bytes = PooledBoundedMemory<byte>.Rent(charCount * 4 + 2, PoisonPagePlacement.After); | ||
|
||
decoder.Reset(); | ||
int written = decoder.GetChars(input, chars.Span, flush: true); | ||
Assert.Equal(charCount, written); | ||
|
||
Encoder encoder = encoding.GetEncoder(); | ||
// We use flush:true here for UTF7Encoding which may do Base64 padding at the end. | ||
int bytesWritten = encoder.GetBytes(chars.Span, bytes.Span, flush: true); | ||
|
||
// Decode the encoded values. Any substitutions will be comparable now. | ||
decoder.Reset(); | ||
written = decoder.GetChars(bytes.Span.Slice(0, bytesWritten), chars2.Span, flush: true); | ||
Assert.Equal(charCount, written); | ||
|
||
// Verify that we round-tripped the values. | ||
Assert.SequenceEqual<char>(chars.Span, chars2.Span); | ||
} | ||
|
||
// If there are substitutions, these cases will fail with DecoderFallbackException early on, | ||
// otherwise there should be no DecoderFallbackExceptions. | ||
private static void TestWithExceptions(ReadOnlySpan<byte> input, Encoding encoding) | ||
{ | ||
Assert.Equal(typeof(DecoderExceptionFallback), encoding.DecoderFallback.GetType()); | ||
Assert.Equal(typeof(EncoderExceptionFallback), encoding.EncoderFallback.GetType()); | ||
|
||
Decoder decoder = encoding.GetDecoder(); | ||
|
||
int charCount; | ||
try | ||
{ | ||
charCount = decoder.GetCharCount(input, flush: true); | ||
} | ||
catch (DecoderFallbackException) | ||
{ | ||
// The input is not valid without fallbacks. | ||
return; | ||
} | ||
|
||
TestWithSubstitution(input, encoding); | ||
} | ||
|
||
private static void TestWithConvert(ReadOnlySpan<byte> input, Encoding encoding) | ||
{ | ||
// Use a few boundary cases. | ||
TestWithConvert(input, encoding, 1); | ||
TestWithConvert(input, encoding, 2); | ||
TestWithConvert(input, encoding, 3); | ||
TestWithConvert(input, encoding, 4); | ||
TestWithConvert(input, encoding, input.Length); | ||
|
||
if (input.Length >= 6) | ||
{ | ||
TestWithConvert(input, encoding, input.Length - 1); | ||
|
||
if (input.Length >= 12) | ||
{ | ||
TestWithConvert(input, encoding, input.Length / 2); | ||
} | ||
} | ||
} | ||
|
||
// Verify that obtaining data using several Convert() calls matches the result from a single GetChars() call. | ||
private static void TestWithConvert(ReadOnlySpan<byte> input, Encoding encoding, int blockSize) | ||
{ | ||
Decoder decoder = encoding.GetDecoder(); | ||
Encoder encoder = encoding.GetEncoder(); | ||
|
||
int charCount = decoder.GetCharCount(input, flush: true); | ||
|
||
using PooledBoundedMemory<char> chars = PooledBoundedMemory<char>.Rent(charCount, PoisonPagePlacement.After); | ||
using PooledBoundedMemory<char> chars2 = PooledBoundedMemory<char>.Rent(charCount, PoisonPagePlacement.After); | ||
|
||
decoder.Reset(); | ||
int charsUsedTotal = 0; | ||
int i = 0; | ||
|
||
while (i < input.Length) | ||
{ | ||
bool lastIteration = i + blockSize >= input.Length; | ||
int bytesToRead = lastIteration ? input.Length - i : blockSize; | ||
|
||
decoder.Convert( | ||
input.Slice(i, bytesToRead), | ||
chars.Span.Slice(charsUsedTotal, charCount - charsUsedTotal), | ||
flush: lastIteration, | ||
out int bytesUsed, | ||
out int charsUsed, | ||
out bool _); | ||
|
||
i += bytesUsed; | ||
charsUsedTotal += charsUsed; | ||
} | ||
|
||
Assert.Equal(charsUsedTotal, charCount); | ||
decoder.Reset(); | ||
decoder.GetChars(input, chars2.Span, flush: true); | ||
Assert.SequenceEqual<char>(chars.Span, chars2.Span); | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how this is working? I am not seeing any forwarding to console app here? is it done somewhere else when you do
#define FORWARD_TO_NETFRAMEWORK
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code to do this was removed from this PR for a few reasons, but exists in the commit history. See 94010d8#diff-1249360cc8e4b2057cb1111a705788d87911917d770743eae1779847159f9790R66-R67