Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the impl of SyntaxNode.GetText more efficient #73512

Merged
merged 4 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/Compilers/Core/Portable/Syntax/SyntaxNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -325,9 +325,10 @@ public virtual void WriteTo(TextWriter writer)
/// <exception cref="ArgumentException"><paramref name="checksumAlgorithm"/> is not supported.</exception>
public SourceText GetText(Encoding? encoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1)
{
var builder = new StringBuilder(this.Green.FullWidth);
this.WriteTo(new StringWriter(builder));
return new StringBuilderText(builder, encoding, checksumAlgorithm);
var fullWidth = this.Green.FullWidth;
var writer = SourceTextWriter.Create(encoding, checksumAlgorithm, fullWidth);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this helper gets an appropriate writer depending on the length of the final source text. For <85k (the loh limit) you get a simple string writer. For >85k you get a chunking writer.

this.WriteTo(writer);
return writer.ToSourceText();
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ public override SourceText GetText(CancellationToken cancellationToken)
{
if (_lazyText == null)
{
var sourceText = SourceTextExtensions.CreateSourceText(GetRoot(cancellationToken), Encoding, _checksumAlgorithm, cancellationToken);
Interlocked.CompareExchange(ref _lazyText, sourceText, null);
Interlocked.CompareExchange(ref _lazyText, GetRoot(cancellationToken).GetText(Encoding, _checksumAlgorithm), null);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can just delegate to compiler, now that it is not inefficient.

}

return _lazyText;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@

using System;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using Microsoft.CodeAnalysis.Host;
Expand Down Expand Up @@ -238,87 +236,6 @@ public static SourceText ReadFrom(ITextFactoryService textService, ObjectReader
return textService.CreateText(textReader, encoding, checksumAlgorithm, cancellationToken);
}

public static SourceText CreateSourceText(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IDE impl that can be entirely removed now that the compiler side is just smarter.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also means we avoid the IDE making an entire copy of the node into our chunks, which then the compiler makes a copy of when making its LargeText instance itself.

SyntaxNode node, Encoding? encoding, SourceHashAlgorithm checksumAlgorithm, CancellationToken cancellationToken)
{
// If this node is small enough to not go into the LOH, we can just fast path directly to creating a SourceText from it.
var totalLength = node.FullWidth();
if (totalLength <= SourceTextLengthThreshold)
return SourceText.From(node.ToFullString(), encoding, checksumAlgorithm);

// Allocate the space to write the node into. Explicitly chunked so that nothing goes into the LOH.
var chunks = CreateChunks(totalLength);

// Write the node into that temp space.
using var chunkWriter = new CharArrayChunkTextWriter(totalLength, chunks, encoding!, cancellationToken);
node.WriteTo(chunkWriter);
Contract.ThrowIfTrue(totalLength != chunkWriter.Position);

// Call into the text service to make us a SourceText from the chunks. Disposal of this reader will free all
// the intermediary chunks we allocated.

Contract.ThrowIfTrue(chunks.Any(static (c, s) => c.Length != s, CharArrayLength));

using var chunkReader = new CharArrayChunkTextReader(chunks, totalLength);
var result = SourceText.From(chunkReader, totalLength, encoding, checksumAlgorithm);
Contract.ThrowIfTrue(totalLength != chunkReader.Position);

return result;

static ImmutableArray<char[]> CreateChunks(int totalLength)
{
var numberOfChunks = 1 + (totalLength / CharArrayLength);
var buffer = new FixedSizeArrayBuilder<char[]>(numberOfChunks);
for (var i = 0; i < numberOfChunks; i++)
buffer.Add(s_charArrayPool.Allocate());

return buffer.MoveToImmutable();
}
}

private static int GetIndexFromPosition(int position) => position / CharArrayLength;
private static int GetColumnFromPosition(int position) => position % CharArrayLength;

private sealed class CharArrayChunkTextWriter(
int totalLength, ImmutableArray<char[]> chunks, Encoding encoding, CancellationToken cancellationToken) : TextWriter
{
private readonly int _totalLength = totalLength;
private readonly ImmutableArray<char[]> _chunks = chunks;
private readonly CancellationToken _cancellationToken = cancellationToken;

/// <summary>
/// Public so that caller can assert that writing out the text actually wrote out the full text of the node.
/// </summary>
public int Position { get; private set; }

public override Encoding Encoding { get; } = encoding;

public override void Write(string? value)
{
Contract.ThrowIfNull(value);

var valueSpan = value.AsSpan();
while (valueSpan.Length > 0)
{
_cancellationToken.ThrowIfCancellationRequested();

var chunk = _chunks[GetIndexFromPosition(Position)];
Contract.ThrowIfTrue(chunk.Length != CharArrayLength);

var chunkIndex = GetColumnFromPosition(Position);
Contract.ThrowIfTrue(chunkIndex >= CharArrayLength);

var count = Math.Min(valueSpan.Length, CharArrayLength - chunkIndex);
valueSpan[..count].CopyTo(chunk.AsSpan().Slice(chunkIndex, count));

Position += count;
valueSpan = valueSpan[count..];
}

Contract.ThrowIfTrue(Position > _totalLength);
}
}

private sealed class CharArrayChunkTextReader(ImmutableArray<char[]> chunks, int length) : TextReaderWithLength(length)
{
private readonly ImmutableArray<char[]> _chunks = chunks;
Expand All @@ -329,6 +246,9 @@ private sealed class CharArrayChunkTextReader(ImmutableArray<char[]> chunks, int
/// </summary>
public int Position { get; private set; }

private static int GetIndexFromPosition(int position) => position / CharArrayLength;
private static int GetColumnFromPosition(int position) => position % CharArrayLength;

public static TextReader CreateFromObjectReader(ObjectReader reader)
{
var length = reader.ReadInt32();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ Namespace Microsoft.CodeAnalysis.VisualBasic

Public Overrides Function GetText(Optional cancellationToken As CancellationToken = Nothing) As SourceText
If _lazyText Is Nothing Then
Dim text = SourceTextExtensions.CreateSourceText(GetRoot(cancellationToken), Encoding, _checksumAlgorithm, cancellationToken)
Interlocked.CompareExchange(_lazyText, text, Nothing)
Interlocked.CompareExchange(_lazyText, GetRoot(cancellationToken).GetText(Encoding, _checksumAlgorithm), Nothing)
End If

Return _lazyText
Expand Down
Loading