Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.
/ corefx Public archive

Adding Buffer Pooling to System.IO #5742

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/System.IO/src/System.IO.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
<AssemblyName>System.IO</AssemblyName>
<ProjectGuid>{07390899-C8F6-4e83-A3A9-6867B8CB46A0}</ProjectGuid>
<AssemblyVersion>4.1.0.0</AssemblyVersion>
<IsPartialFacadeAssembly>true</IsPartialFacadeAssembly>
<IsPartialFacadeAssembly Condition="'$(TargetGroup)' != 'netcore50aot'">true</IsPartialFacadeAssembly>
<PackageTargetFramework Condition="'$(PackageTargetFramework)' == ''">dotnet5.4</PackageTargetFramework>
<!-- We don't want System.Buffer's transitive dependencies since those are
contract based, instead we'll provide the facades via project references
to ensure we all agree on core assembly -->
<OmitTransitiveCompileReferences Condition="'$(TargetGroup)' == ''">true</OmitTransitiveCompileReferences>
</PropertyGroup>

<!-- Default configurations to help VS understand the options -->
Expand Down Expand Up @@ -34,6 +38,7 @@
<Name>System.Diagnostics.Debug</Name>
<OSGroup>Windows_NT</OSGroup>
</ProjectReference>
<ProjectReference Include="..\..\System.Runtime\src\System.Runtime.csproj"/>
</ItemGroup>

<ItemGroup Condition="'$(TargetGroup)' != 'net46'">
Expand Down
32 changes: 23 additions & 9 deletions src/System.IO/src/System/IO/BinaryReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Text;
using System.Buffers;
using System.Diagnostics;
using System.Text;

namespace System.IO
{
Expand Down Expand Up @@ -56,7 +57,7 @@ public BinaryReader(Stream input, Encoding encoding, bool leaveOpen)
minBufferSize = 16;
}

_buffer = new byte[minBufferSize];
_buffer = ArrayPool<byte>.Shared.Rent(minBufferSize);
// _charBuffer and _charBytes will be left null.

// For Encodings that always use 2 bytes per char (or more),
Expand Down Expand Up @@ -89,12 +90,25 @@ protected virtual void Dispose(bool disposing)
copyOfStream.Dispose();
}
}

_stream = null;
_buffer = null;
_decoder = null;
_charBytes = null;
_singleChar = null;
_charBuffer = null;
if (_buffer != null)
{
ArrayPool<byte>.Shared.Return(_buffer);
_buffer = null;
}
if (_charBytes != null)
{
ArrayPool<byte>.Shared.Return(_charBytes);
_charBytes = null;
}
if (_charBuffer != null)
{
ArrayPool<char>.Shared.Return(_charBuffer);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to return the buffer before calling dispose on the stream? This would ensure the buffers are returned even if stream.Dispose throws.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this end up returning these buffers twice if it's Dispose'd twice?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the code should guard against double return either by checking _isDisposed, or setting _buffer to null after the first clear. Also, this should be thread-safe.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

_charBuffer = null;
}
}

public void Dispose()
Expand Down Expand Up @@ -294,12 +308,12 @@ public virtual string ReadString()

if (_charBytes == null)
{
_charBytes = new byte[MaxCharBytesSize];
_charBytes = ArrayPool<byte>.Shared.Rent(MaxCharBytesSize);
}

if (_charBuffer == null)
{
_charBuffer = new char[_maxCharsSize];
_charBuffer = ArrayPool<char>.Shared.Rent(_maxCharsSize);
}

StringBuilder sb = null;
Expand Down Expand Up @@ -370,7 +384,7 @@ private int InternalReadChars(char[] buffer, int index, int count)

if (_charBytes == null)
{
_charBytes = new byte[MaxCharBytesSize];
_charBytes = ArrayPool<byte>.Shared.Rent(MaxCharBytesSize);
}

while (charsRemaining > 0)
Expand Down Expand Up @@ -451,7 +465,7 @@ private int InternalReadOneChar()

if (_charBytes == null)
{
_charBytes = new byte[MaxCharBytesSize]; //REVIEW: We need atmost 2 bytes/char here?
_charBytes = ArrayPool<byte>.Shared.Rent(MaxCharBytesSize);
}
if (_singleChar == null)
{
Expand Down
21 changes: 17 additions & 4 deletions src/System.IO/src/System/IO/BinaryWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Text;
using System.Buffers;
using System.Diagnostics;
using System.Text;

namespace System.IO
{
Expand Down Expand Up @@ -33,7 +34,7 @@ public class BinaryWriter : IDisposable
protected BinaryWriter()
{
OutStream = Stream.Null;
_buffer = new byte[16];
_buffer = ArrayPool<byte>.Shared.Rent(16);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it still make sense to rent in the ctor? Why isn't it lazy?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There would be a lot of duplicate code since lots of different functions use _buffer, so I thought it made it simpler to initialize it in the ctor, especially since it is a trivially small array.

_encoding = new UTF8Encoding(false, true);
_encoder = _encoding.GetEncoder();
}
Expand Down Expand Up @@ -62,7 +63,7 @@ public BinaryWriter(Stream output, Encoding encoding, bool leaveOpen)
}

OutStream = output;
_buffer = new byte[16];
_buffer = ArrayPool<byte>.Shared.Rent(16);
_encoding = encoding;
_encoder = _encoding.GetEncoder();
_leaveOpen = leaveOpen;
Expand All @@ -81,6 +82,18 @@ protected virtual void Dispose(bool disposing)
OutStream.Dispose();
}
}

if (_buffer != null)
{
ArrayPool<byte>.Shared.Return(_buffer);
_buffer = null;
}

if (_largeByteBuffer != null)
{
ArrayPool<byte>.Shared.Return(_largeByteBuffer);
_largeByteBuffer = null;
}
}

public void Dispose()
Expand Down Expand Up @@ -371,7 +384,7 @@ public unsafe virtual void Write(string value)

if (_largeByteBuffer == null)
{
_largeByteBuffer = new byte[LargeByteBufferSize];
_largeByteBuffer = ArrayPool<byte>.Shared.Rent(LargeByteBufferSize);
_maxChars = LargeByteBufferSize / _encoding.GetMaxByteCount(1);
}

Expand Down
16 changes: 11 additions & 5 deletions src/System.IO/src/System/IO/BufferedStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Runtime.InteropServices;
using System.Buffers;
using System.Diagnostics;
using System.Diagnostics.Contracts;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using System.Diagnostics.Contracts;

namespace System.IO
{
Expand Down Expand Up @@ -133,8 +134,9 @@ private void EnsureShadowBufferAllocated()
if (_buffer.Length != _bufferSize || _bufferSize >= MaxShadowBufferSize)
return;

byte[] shadowBuffer = new byte[Math.Min(_bufferSize + _bufferSize, MaxShadowBufferSize)];
byte[] shadowBuffer = ArrayPool<byte>.Shared.Rent(Math.Min(_bufferSize + _bufferSize, MaxShadowBufferSize));
Array.Copy(_buffer, 0, shadowBuffer, 0, _writePos);
ArrayPool<byte>.Shared.Return(_buffer);
_buffer = shadowBuffer;
}

Expand All @@ -144,7 +146,7 @@ private void EnsureBufferAllocated()

// BufferedStream is not intended for multi-threaded use, so no worries about the get/set race on _buffer.
if (_buffer == null)
_buffer = new byte[_bufferSize];
_buffer = ArrayPool<byte>.Shared.Rent(_bufferSize);
}

public override bool CanRead
Expand Down Expand Up @@ -233,7 +235,11 @@ protected override void Dispose(bool disposing)
finally
{
_stream = null;
_buffer = null;
if (_buffer != null)
{
ArrayPool<byte>.Shared.Return(_buffer);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noted this offline, but I think that:
a) We should really change the default for clearArray to be true rather than false, so that by default code gets the more secure behavior and opts-in to trading that off for better perf, rather than the other way around.
b) And regardless of (a), we should always be clearing when returning buffers used in our libraries and when those buffers were storing any user-provided data.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't users hold onto buffers in the pool anyways (after they've been "returned"), and read from them regardless? Or do we protect from that in the implementation somehow?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this isn't about complete security; after all, this all also in the same process, in the same app domain, code could easily read from whatever memory it wanted to, etc. This is more about minimizing unexpected consequences.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I understand what you're saying, but won't people need to expect to get already-used buffers when using the .Shared pool? It seems like it's part of the contract that rented buffers might not be cleared.

What unexpected consequences are you referring to?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify, I think we should probably just clear the buffers here to take the conservative route, but I'm also just trying to figure out how we are thinking about this buffer pooling stuff going forward, and what our best practices are going to be, what we should be looking out for, etc.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What unexpected consequences are you referring to?

e.g. you load a .zip file using System.IO.Compression, and it contains some passwords. Those passwords are now in the buffers. Those buffers get returned to the pool, no one else uses the buffers from the pool, and they remain there for the remainder of the lifetime of the process. Previously they would have been much more likely to be collected and overwritten on the heap, but now they remain fresh.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll open an issue for this

_buffer = null;
}

// Call base.Dispose(bool) to cleanup async IO resources
base.Dispose(disposing);
Expand Down
31 changes: 23 additions & 8 deletions src/System.IO/src/System/IO/Stream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Buffers;
using System.Diagnostics;
using System.Diagnostics.Contracts;
using System.Threading;
Expand Down Expand Up @@ -139,11 +140,18 @@ private async Task CopyToAsyncInternal(Stream destination, int bufferSize, Cance
Debug.Assert(CanRead);
Debug.Assert(destination.CanWrite);

byte[] buffer = new byte[bufferSize];
int bytesRead;
while ((bytesRead = await ReadAsync(buffer, 0, buffer.Length, cancellationToken).ConfigureAwait(false)) != 0)
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize);
try
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: the bytesRead local can be declared inside the scope of the try

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

{
await destination.WriteAsync(buffer, 0, bytesRead, cancellationToken).ConfigureAwait(false);
int bytesRead;
while ((bytesRead = await ReadAsync(buffer, 0, buffer.Length, cancellationToken).ConfigureAwait(false)) != 0)
{
await destination.WriteAsync(buffer, 0, bytesRead, cancellationToken).ConfigureAwait(false);
}
}
finally
{
ArrayPool<byte>.Shared.Return(buffer);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that many of the files being edited in this change are used for CoreRT but not for CoreCLR, e.g. when compiled for coreclr, System.IO.dll type forwards Stream to the one in mscorlib. That means that these optimizations will accrue to CoreRT but not to CoreCLR.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's unfortunate; is there a way around this since I believe mscorlib can't take a dependency like System.Buffers?

}
}

Expand Down Expand Up @@ -213,11 +221,18 @@ private void InternalCopyTo(Stream destination, int bufferSize)
Debug.Assert(destination.CanWrite);
Debug.Assert(bufferSize > 0);

byte[] buffer = new byte[bufferSize];
int read;
while ((read = Read(buffer, 0, buffer.Length)) != 0)
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize);
try
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: read can be moved to inside the try

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

{
int read;
while ((read = Read(buffer, 0, buffer.Length)) != 0)
{
destination.Write(buffer, 0, read);
}
}
finally
{
destination.Write(buffer, 0, read);
ArrayPool<byte>.Shared.Return(buffer);
}
}

Expand Down
4 changes: 3 additions & 1 deletion src/System.IO/src/project.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
"frameworks": {
"dnxcore50": {
"dependencies": {
"Microsoft.TargetingPack.Private.CoreCLR": "1.0.0-rc3-23808"
"Microsoft.TargetingPack.Private.CoreCLR": "1.0.0-rc3-23808",
"System.Buffers" : "4.0.0-rc3-23808"
}
},
"netcore50": {
"dependencies": {
"System.Buffers" : "4.0.0-rc3-23808",
"System.Diagnostics.Contracts": "4.0.0",
"System.Diagnostics.Debug": "4.0.10",
"System.Diagnostics.Tools": "4.0.0",
Expand Down