diff --git a/src/Compilers/Core/Portable/GlobalSuppressions.cs b/src/Compilers/Core/Portable/GlobalSuppressions.cs index 81a04ac25c91d..e05dbf22d3d2a 100644 --- a/src/Compilers/Core/Portable/GlobalSuppressions.cs +++ b/src/Compilers/Core/Portable/GlobalSuppressions.cs @@ -11,5 +11,6 @@ // required paramter type is different for all overloads and so there is no ambiguity. [assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("ApiDesign", "RS0026:Do not add multiple public overloads with optional parameters", Justification = "", Scope = "member", Target = "~M:Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.Stream,System.Text.Encoding,Microsoft.CodeAnalysis.Text.SourceHashAlgorithm,System.Boolean,System.Boolean)~Microsoft.CodeAnalysis.Text.SourceText")] [assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("ApiDesign", "RS0026:Do not add multiple public overloads with optional parameters", Justification = "", Scope = "member", Target = "~M:Microsoft.CodeAnalysis.Text.SourceText.From(System.Byte[],System.Int32,System.Text.Encoding,Microsoft.CodeAnalysis.Text.SourceHashAlgorithm,System.Boolean,System.Boolean)~Microsoft.CodeAnalysis.Text.SourceText")] +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("ApiDesign", "RS0026:Do not add multiple public overloads with optional parameters", Justification = "", Scope = "member", Target = "~M:Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.TextReader,System.Int32,System.Text.Encoding,Microsoft.CodeAnalysis.Text.SourceHashAlgorithm)~Microsoft.CodeAnalysis.Text.SourceText")] [assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("ApiDesign", "RS0027:Public API with optional parameter(s) should have the most parameters amongst its public overloads.", Justification = "", Scope = "member", Target = "~M:Microsoft.CodeAnalysis.Text.SourceText.From(System.String,System.Text.Encoding,Microsoft.CodeAnalysis.Text.SourceHashAlgorithm)~Microsoft.CodeAnalysis.Text.SourceText")] diff --git a/src/Compilers/Core/Portable/InternalUtilities/EncodingExtensions.cs b/src/Compilers/Core/Portable/InternalUtilities/EncodingExtensions.cs index b039b6806074e..9d9c7a7bb3426 100644 --- a/src/Compilers/Core/Portable/InternalUtilities/EncodingExtensions.cs +++ b/src/Compilers/Core/Portable/InternalUtilities/EncodingExtensions.cs @@ -19,11 +19,29 @@ internal static int GetMaxCharCountOrThrowIfHuge(this Encoding encoding, Stream Debug.Assert(stream.CanSeek); long length = stream.Length; + int maxCharCount; + if (encoding.TryGetMaxCharCount(length, out maxCharCount)) + { + return maxCharCount; + } + +#if WORKSPACE_DESKTOP + throw new IOException(WorkspacesResources.Stream_is_too_long); +#else + throw new IOException(CodeAnalysisResources.StreamIsTooLong); +#endif + } + + internal static bool TryGetMaxCharCount(this Encoding encoding, long length, out int maxCharCount) + { + maxCharCount = 0; + if (length <= int.MaxValue) { try { - return encoding.GetMaxCharCount((int)length); + maxCharCount = encoding.GetMaxCharCount((int)length); + return true; } catch (ArgumentOutOfRangeException) { @@ -33,11 +51,7 @@ internal static int GetMaxCharCountOrThrowIfHuge(this Encoding encoding, Stream } } -#if WORKSPACE_DESKTOP - throw new IOException(WorkspacesResources.Stream_is_too_long); -#else - throw new IOException(CodeAnalysisResources.StreamIsTooLong); -#endif + return false; } } } diff --git a/src/Compilers/Core/Portable/PublicAPI.Unshipped.txt b/src/Compilers/Core/Portable/PublicAPI.Unshipped.txt index 25056ff072876..46be8ace2abdb 100644 --- a/src/Compilers/Core/Portable/PublicAPI.Unshipped.txt +++ b/src/Compilers/Core/Portable/PublicAPI.Unshipped.txt @@ -813,6 +813,7 @@ static Microsoft.CodeAnalysis.SeparatedSyntaxList.implicit operator Micro static Microsoft.CodeAnalysis.SeparatedSyntaxList.implicit operator Microsoft.CodeAnalysis.SeparatedSyntaxList(Microsoft.CodeAnalysis.SeparatedSyntaxList nodes) -> Microsoft.CodeAnalysis.SeparatedSyntaxList static Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.Stream stream, System.Text.Encoding encoding = null, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm = Microsoft.CodeAnalysis.Text.SourceHashAlgorithm.Sha1, bool throwIfBinaryDetected = false, bool canBeEmbedded = false) -> Microsoft.CodeAnalysis.Text.SourceText static Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.Stream stream, System.Text.Encoding encoding, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) -> Microsoft.CodeAnalysis.Text.SourceText +static Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.TextReader reader, int length, System.Text.Encoding encoding = null, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm = Microsoft.CodeAnalysis.Text.SourceHashAlgorithm.Sha1) -> Microsoft.CodeAnalysis.Text.SourceText static Microsoft.CodeAnalysis.Text.SourceText.From(byte[] buffer, int length, System.Text.Encoding encoding = null, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm = Microsoft.CodeAnalysis.Text.SourceHashAlgorithm.Sha1, bool throwIfBinaryDetected = false, bool canBeEmbedded = false) -> Microsoft.CodeAnalysis.Text.SourceText static Microsoft.CodeAnalysis.Text.SourceText.From(byte[] buffer, int length, System.Text.Encoding encoding, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) -> Microsoft.CodeAnalysis.Text.SourceText virtual Microsoft.CodeAnalysis.Diagnostics.AnalysisContext.RegisterOperationAction(System.Action action, System.Collections.Immutable.ImmutableArray operationKinds) -> void diff --git a/src/Compilers/Core/Portable/Text/LargeText.cs b/src/Compilers/Core/Portable/Text/LargeText.cs index 11941bc67f3fc..7d910b73eb67b 100644 --- a/src/Compilers/Core/Portable/Text/LargeText.cs +++ b/src/Compilers/Core/Portable/Text/LargeText.cs @@ -32,6 +32,7 @@ internal LargeText(ImmutableArray chunks, Encoding encoding, ImmutableAr _chunks = chunks; _encoding = encoding; _chunkStartOffsets = new int[chunks.Length]; + int offset = 0; for (int i = 0; i < chunks.Length; i++) { @@ -42,6 +43,11 @@ internal LargeText(ImmutableArray chunks, Encoding encoding, ImmutableAr _length = offset; } + internal LargeText(ImmutableArray chunks, Encoding encoding, SourceHashAlgorithm checksumAlgorithm) + : this(chunks, encoding, default(ImmutableArray), checksumAlgorithm, default(ImmutableArray)) + { + } + internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded) { stream.Seek(0, SeekOrigin.Begin); @@ -58,48 +64,69 @@ internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAl using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true)) { - ArrayBuilder chunks = ArrayBuilder.GetInstance(1 + maxCharRemainingGuess / ChunkSize); - while (!reader.EndOfStream) - { - var nextChunkSize = ChunkSize; - if (maxCharRemainingGuess < ChunkSize) - { - // maxCharRemainingGuess typically overestimates a little - // so we will first fill a slightly smaller (maxCharRemainingGuess - 64) chunk - // and then use 64 char tail, which is likley to be resized. - nextChunkSize = Math.Max(maxCharRemainingGuess - 64, 64); - } + var chunks = ReadFromTextReader(reader, maxCharRemainingGuess, throwIfBinaryDetected); - char[] chunk = new char[nextChunkSize]; + // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. + // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. + var checksum = CalculateChecksum(stream, checksumAlgorithm); + var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray); + return new LargeText(chunks, reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob); + } + } - int charsRead = reader.ReadBlock(chunk, 0, chunk.Length); - if (charsRead == 0) - { - break; - } + internal static SourceText Decode(TextReader reader, int length, Encoding encoding, SourceHashAlgorithm checksumAlgorithm) + { + if (length == 0) + { + return SourceText.From(string.Empty, encoding, checksumAlgorithm); + } - maxCharRemainingGuess -= charsRead; + // throwIfBinaryDetected == false since we are given text reader from the beginning + var chunks = ReadFromTextReader(reader, length, throwIfBinaryDetected: false); - if (charsRead < chunk.Length) - { - Array.Resize(ref chunk, charsRead); - } + return new LargeText(chunks, encoding, checksumAlgorithm); + } - // Check for binary files - if (throwIfBinaryDetected && IsBinary(chunk)) - { - throw new InvalidDataException(); - } + private static ImmutableArray ReadFromTextReader(TextReader reader, int maxCharRemainingGuess, bool throwIfBinaryDetected) + { + var chunks = ArrayBuilder.GetInstance(1 + maxCharRemainingGuess / ChunkSize); - chunks.Add(chunk); + while (reader.Peek() != -1) + { + var nextChunkSize = ChunkSize; + if (maxCharRemainingGuess < ChunkSize) + { + // maxCharRemainingGuess typically overestimates a little + // so we will first fill a slightly smaller (maxCharRemainingGuess - 64) chunk + // and then use 64 char tail, which is likley to be resized. + nextChunkSize = Math.Max(maxCharRemainingGuess - 64, 64); } - // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. - // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. - var checksum = CalculateChecksum(stream, checksumAlgorithm); - var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray); - return new LargeText(chunks.ToImmutableAndFree(), reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob); + char[] chunk = new char[nextChunkSize]; + + int charsRead = reader.ReadBlock(chunk, 0, chunk.Length); + if (charsRead == 0) + { + break; + } + + maxCharRemainingGuess -= charsRead; + + if (charsRead < chunk.Length) + { + Array.Resize(ref chunk, charsRead); + } + + // Check for binary files + if (throwIfBinaryDetected && IsBinary(chunk)) + { + throw new InvalidDataException(); + } + + chunks.Add(chunk); } + + return chunks.ToImmutableAndFree(); } /// @@ -272,7 +299,7 @@ private int[] ParseLineStarts() case '\u0085': case '\u2028': case '\u2029': - line_break: + line_break: arrayBuilder.Add(position); position = index; break; diff --git a/src/Compilers/Core/Portable/Text/SourceText.cs b/src/Compilers/Core/Portable/Text/SourceText.cs index 1ddbdfe67b926..047c3a835e3e5 100644 --- a/src/Compilers/Core/Portable/Text/SourceText.cs +++ b/src/Compilers/Core/Portable/Text/SourceText.cs @@ -29,7 +29,7 @@ public abstract class SourceText private TextLineCollection _lazyLineInfo; private ImmutableArray _lazyChecksum; private ImmutableArray _precomputedEmbeddedTextBlob; - + private static readonly Encoding s_utf8EncodingWithNoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false); protected SourceText(ImmutableArray checksum = default(ImmutableArray), SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, SourceTextContainer container = null) @@ -97,6 +97,47 @@ public static SourceText From(string text, Encoding encoding = null, SourceHashA return new StringText(text, encoding, checksumAlgorithm: checksumAlgorithm); } + /// + /// Constructs a from text in a string. + /// + /// TextReader + /// length of content from + /// + /// Encoding of the file that the was read from or is going to be saved to. + /// null if the encoding is unspecified. + /// If the encoding is not specified the resulting isn't debuggable. + /// If an encoding-less is written to a file a shall be used as a default. + /// + /// + /// Hash algorithm to use to calculate checksum of the text that's saved to PDB. + /// + /// is null. + /// is not supported. + public static SourceText From( + TextReader reader, + int length, + Encoding encoding = null, + SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) + { + if (reader == null) + { + throw new ArgumentNullException(nameof(reader)); + } + + ValidateChecksumAlgorithm(checksumAlgorithm); + + encoding = encoding ?? s_utf8EncodingWithNoBOM; + + // If the resulting string would end up on the large object heap, then use LargeEncodedText. + if (length >= LargeObjectHeapLimitInChars) + { + return LargeText.Decode(reader, length, encoding, checksumAlgorithm); + } + + string text = reader.ReadToEnd(); + return From(text, encoding, checksumAlgorithm); + } + // 1.0 BACKCOMPAT OVERLOAD - DO NOT TOUCH [EditorBrowsable(EditorBrowsableState.Never)] public static SourceText From(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) @@ -192,9 +233,9 @@ public static SourceText From(byte[] buffer, int length, Encoding encoding, Sour /// If the given encoding is set to use a throwing decoder as a fallback /// Two consecutive NUL characters were detected in the decoded text and was true. public static SourceText From( - byte[] buffer, - int length, - Encoding encoding = null, + byte[] buffer, + int length, + Encoding encoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, bool throwIfBinaryDetected = false, bool canBeEmbedded = false) diff --git a/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs b/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs index 641ba9d20ad3a..aba8ff314efd6 100644 --- a/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs +++ b/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs @@ -170,12 +170,13 @@ private unsafe TextReader CreateTextReaderFromTemporaryStorage(ISupportDirectMem return new DirectMemoryAccessStreamReader(src + 1, streamLength / sizeof(char) - 1); } - private unsafe class DirectMemoryAccessStreamReader : TextReader + private unsafe class DirectMemoryAccessStreamReader : TemporaryStorageTextReader { private char* _position; private readonly char* _end; - public DirectMemoryAccessStreamReader(char* src, int length) + public DirectMemoryAccessStreamReader(char* src, int length) : + base(length) { Debug.Assert(src != null); Debug.Assert(length >= 0); @@ -184,6 +185,16 @@ public DirectMemoryAccessStreamReader(char* src, int length) _end = _position + length; } + public override int Peek() + { + if (_position >= _end) + { + return -1; + } + + return *_position; + } + public override int Read() { if (_position >= _end) @@ -337,6 +348,16 @@ private async Task WriteStreamMaybeAsync(Stream stream, bool useAsync, Cancellat } } } + + internal class TemporaryStorageTextReader : TextReader + { + public TemporaryStorageTextReader(int length) + { + Length = length; + } + + public int Length { get; } + } } } diff --git a/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs b/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs index ad81a858cd7e7..9a35189227b26 100644 --- a/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs +++ b/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs @@ -21,6 +21,13 @@ internal class DesktopTextFactoryService : ITextFactoryService public SourceText CreateText(TextReader reader, Encoding encoding, CancellationToken cancellationToken = default(CancellationToken)) { cancellationToken.ThrowIfCancellationRequested(); + + var temporaryStorageReader = reader as TemporaryStorageServiceFactory.TemporaryStorageTextReader; + if (temporaryStorageReader != null) + { + return SourceText.From(temporaryStorageReader, temporaryStorageReader.Length, encoding); + } + return SourceText.From(reader.ReadToEnd(), encoding); } }