From 77fc88ad405b4d7db3d0c4cdcf6694678c9883c1 Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Thu, 23 Jan 2025 00:11:36 +0000 Subject: [PATCH] Remove BinaryReader and BinaryWriter references from ZipArchive (#103153) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Removed BinaryReader, BinaryWriter from ZipArchive and ZipBlocks We now read the data into a (sometimes stack-allocated) byte array and slice it up with BinaryPrimitives. This reduces the number of reads and writes required to read and write a ZipArchive. It also makes future work to enable async APIs easier, since BinaryReader and BinaryWriter lack this support. Also changed approach to reading central file directory headers. Rather than performing X reads per header, we read 4KB of data at a time and look for all applicable headers in that data. This should improve performance when dealing with many small files. * Removed BinaryReader from ZipArchiveEntry This allowed the removal of the ArchiveReader property from ZipArchive. * Removed BinaryWriter from ZipArchiveEntry * Re-adding missing assertion * Reduced memory usage with array pooling Now pooling the file IO buffers and the temporary buffers for extra fields of the CD file header (which would otherwise be allocated and deallocated in a loop.) * Corrected duplicate comment * Added test for changed central directory read method This handles 64x entries with 19-character filenames (and thus, 65-byte file headers.) As a result, it straddles two 4KB read buffers. Also corrected the seek logic while reading the central directory header * Test changes: formatting, converted Theory to Fact * Resolving test failures The buffer returned from the ArrayPool contained older data (including a ZIP header.) When reading the last chunk of the file (i.e a chunk which was less than BackwardsSeekingBufferSize) the buffer's Span wasn't resized to account for this. SeekBackwardsToSignature would thus find the older data, try to seek beyond the end of the stream and fail to read the file. * Responded to code review Reads and writes are now performed using a new set of field lengths and locations. * Code review response Formatting change; added one comment to Zip64EndOfCentralDirectoryLocator.SignatureConstantBytes; clarified comment on ZipHelper.SeekBackwardsAndRead * Additional code review comments Added comment to SeekBackwardsToStream. Lingering references to SignatureConstantBytes.Length. Added two asserts to CanReadLargeCentralDirectoryHeader test, verifying that the archive entry metadata is in a sensible state. --------- Co-authored-by: Carlos Sánchez López <1175054+carlossanlop@users.noreply.github.com> --- .../src/System.IO.Compression.csproj | 2 + .../src/System/IO/Compression/ZipArchive.cs | 104 ++- .../System/IO/Compression/ZipArchiveEntry.cs | 195 +++-- .../IO/Compression/ZipBlocks.FieldLengths.cs | 131 ++++ .../Compression/ZipBlocks.FieldLocations.cs | 137 ++++ .../src/System/IO/Compression/ZipBlocks.cs | 713 ++++++++++++------ .../src/System/IO/Compression/ZipHelper.cs | 116 +-- .../tests/ZipArchive/zip_ReadTests.cs | 34 + 8 files changed, 1031 insertions(+), 401 deletions(-) create mode 100644 src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.FieldLengths.cs create mode 100644 src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.FieldLocations.cs diff --git a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj index 9292ec1f46d838..a744c58082b4c2 100644 --- a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj +++ b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj @@ -18,6 +18,8 @@ + + diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index 1fc5ce279f76cf..f092759abcaa72 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -8,6 +8,7 @@ using System.Collections.ObjectModel; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Text; namespace System.IO.Compression @@ -16,16 +17,15 @@ public class ZipArchive : IDisposable { private readonly Stream _archiveStream; private ZipArchiveEntry? _archiveStreamOwner; - private readonly BinaryReader? _archiveReader; private readonly ZipArchiveMode _mode; private readonly List _entries; private readonly ReadOnlyCollection _entriesCollection; private readonly Dictionary _entriesDictionary; private bool _readEntries; private readonly bool _leaveOpen; - private long _centralDirectoryStart; //only valid after ReadCentralDirectory + private long _centralDirectoryStart; // only valid after ReadCentralDirectory private bool _isDisposed; - private uint _numberOfThisDisk; //only valid after ReadCentralDirectory + private uint _numberOfThisDisk; // only valid after ReadCentralDirectory private long _expectedNumberOfEntries; private readonly Stream? _backingStream; private byte[] _archiveComment; @@ -161,10 +161,6 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? else _archiveStream = stream; _archiveStreamOwner = null; - if (mode == ZipArchiveMode.Create) - _archiveReader = null; - else - _archiveReader = new BinaryReader(_archiveStream, Encoding.UTF8, leaveOpen: true); _entries = new List(); _entriesCollection = new ReadOnlyCollection(_entries); _entriesDictionary = new Dictionary(); @@ -351,8 +347,6 @@ public void Dispose() return result; } - internal BinaryReader? ArchiveReader => _archiveReader; - internal Stream ArchiveStream => _archiveStream; internal uint NumberOfThisDisk => _numberOfThisDisk; @@ -460,7 +454,6 @@ private void CloseStreams() { _archiveStream.Dispose(); _backingStream?.Dispose(); - _archiveReader?.Dispose(); } else { @@ -483,6 +476,11 @@ private void EnsureCentralDirectoryRead() private void ReadCentralDirectory() { + const int ReadBufferSize = 4096; + + byte[] fileBuffer = System.Buffers.ArrayPool.Shared.Rent(ReadBufferSize); + Span fileBufferSpan = fileBuffer.AsSpan(0, ReadBufferSize); + try { // assume ReadEndOfCentralDirectory has been called and has populated _centralDirectoryStart @@ -490,25 +488,78 @@ private void ReadCentralDirectory() _archiveStream.Seek(_centralDirectoryStart, SeekOrigin.Begin); long numberOfEntries = 0; - - Debug.Assert(_archiveReader != null); - //read the central directory - ZipCentralDirectoryFileHeader currentHeader; bool saveExtraFieldsAndComments = Mode == ZipArchiveMode.Update; - while (ZipCentralDirectoryFileHeader.TryReadBlock(_archiveReader, - saveExtraFieldsAndComments, out currentHeader)) + + bool continueReadingCentralDirectory = true; + // total bytes read from central directory + int bytesRead = 0; + // current position in the current buffer + int currPosition = 0; + // total bytes read from all file headers starting in the current buffer + int bytesConsumed = 0; + + _entries.Clear(); + _entriesDictionary.Clear(); + + // read the central directory + while (continueReadingCentralDirectory) { - AddEntry(new ZipArchiveEntry(this, currentHeader)); - numberOfEntries++; + int currBytesRead = _archiveStream.Read(fileBufferSpan); + ReadOnlySpan sizedFileBuffer = fileBufferSpan.Slice(0, currBytesRead); + + // the buffer read must always be large enough to fit the constant section size of at least one header + continueReadingCentralDirectory = continueReadingCentralDirectory + && sizedFileBuffer.Length >= ZipCentralDirectoryFileHeader.BlockConstantSectionSize; + + while (continueReadingCentralDirectory + && currPosition + ZipCentralDirectoryFileHeader.BlockConstantSectionSize < sizedFileBuffer.Length) + { + ZipCentralDirectoryFileHeader currentHeader = default; + + continueReadingCentralDirectory = continueReadingCentralDirectory && + ZipCentralDirectoryFileHeader.TryReadBlock(sizedFileBuffer.Slice(currPosition), _archiveStream, + saveExtraFieldsAndComments, out bytesConsumed, out currentHeader); + + if (!continueReadingCentralDirectory) + { + break; + } + + AddEntry(new ZipArchiveEntry(this, currentHeader)); + numberOfEntries++; + if (numberOfEntries > _expectedNumberOfEntries) + { + throw new InvalidDataException(SR.NumEntriesWrong); + } + + currPosition += bytesConsumed; + bytesRead += bytesConsumed; + } + + // We've run out of possible space in the entry - seek backwards by the number of bytes remaining in + // this buffer (so that the next buffer overlaps with this one) and retry. + if (currPosition < sizedFileBuffer.Length) + { + _archiveStream.Seek(-(sizedFileBuffer.Length - currPosition), SeekOrigin.Current); + } + currPosition = 0; } if (numberOfEntries != _expectedNumberOfEntries) + { throw new InvalidDataException(SR.NumEntriesWrong); + } + + _archiveStream.Seek(_centralDirectoryStart + bytesRead, SeekOrigin.Begin); } catch (EndOfStreamException ex) { throw new InvalidDataException(SR.Format(SR.CentralDirectoryInvalid, ex)); } + finally + { + System.Buffers.ArrayPool.Shared.Return(fileBuffer); + } } // This function reads all the EOCD stuff it needs to find the offset to the start of the central directory @@ -526,16 +577,15 @@ private void ReadEndOfCentralDirectory() // If the EOCD has the minimum possible size (no zip file comment), then exactly the previous 4 bytes will contain the signature // But if the EOCD has max possible size, the signature should be found somewhere in the previous 64K + 4 bytes if (!ZipHelper.SeekBackwardsToSignature(_archiveStream, - ZipEndOfCentralDirectoryBlock.SignatureConstant, - ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength + ZipEndOfCentralDirectoryBlock.SignatureSize)) + ZipEndOfCentralDirectoryBlock.SignatureConstantBytes, + ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength + ZipEndOfCentralDirectoryBlock.FieldLengths.Signature)) throw new InvalidDataException(SR.EOCDNotFound); long eocdStart = _archiveStream.Position; - Debug.Assert(_archiveReader != null); // read the EOCD ZipEndOfCentralDirectoryBlock eocd; - bool eocdProper = ZipEndOfCentralDirectoryBlock.TryReadBlock(_archiveReader, out eocd); + bool eocdProper = ZipEndOfCentralDirectoryBlock.TryReadBlock(_archiveStream, out eocd); Debug.Assert(eocdProper); // we just found this using the signature finder, so it should be okay if (eocd.NumberOfThisDisk != eocd.NumberOfTheDiskWithTheStartOfTheCentralDirectory) @@ -587,14 +637,12 @@ private void TryReadZip64EndOfCentralDirectory(ZipEndOfCentralDirectoryBlock eoc // Exactly the previous 4 bytes should contain the Zip64-EOCDL signature // if we don't find it, assume it doesn't exist and use data from normal EOCD if (ZipHelper.SeekBackwardsToSignature(_archiveStream, - Zip64EndOfCentralDirectoryLocator.SignatureConstant, - Zip64EndOfCentralDirectoryLocator.SignatureSize)) + Zip64EndOfCentralDirectoryLocator.SignatureConstantBytes, + Zip64EndOfCentralDirectoryLocator.FieldLengths.Signature)) { - Debug.Assert(_archiveReader != null); - // use locator to get to Zip64-EOCD Zip64EndOfCentralDirectoryLocator locator; - bool zip64eocdLocatorProper = Zip64EndOfCentralDirectoryLocator.TryReadBlock(_archiveReader, out locator); + bool zip64eocdLocatorProper = Zip64EndOfCentralDirectoryLocator.TryReadBlock(_archiveStream, out locator); Debug.Assert(zip64eocdLocatorProper); // we just found this using the signature finder, so it should be okay if (locator.OffsetOfZip64EOCD > long.MaxValue) @@ -607,7 +655,7 @@ private void TryReadZip64EndOfCentralDirectory(ZipEndOfCentralDirectoryBlock eoc // Read Zip64 End of Central Directory Record Zip64EndOfCentralDirectoryRecord record; - if (!Zip64EndOfCentralDirectoryRecord.TryReadBlock(_archiveReader, out record)) + if (!Zip64EndOfCentralDirectoryRecord.TryReadBlock(_archiveStream, out record)) throw new InvalidDataException(SR.Zip64EOCDNotWhereExpected); _numberOfThisDisk = record.NumberOfThisDisk; diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index 57d07e27294324..8f5fa76b4e00aa 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Binary; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; @@ -377,11 +378,10 @@ private long OffsetOfCompressedData { if (_storedOffsetOfCompressedData == null) { - Debug.Assert(_archive.ArchiveReader != null); _archive.ArchiveStream.Seek(_offsetOfLocalHeader, SeekOrigin.Begin); // by calling this, we are using local header _storedEntryNameBytes.Length and extraFieldLength // to find start of data, but still using central directory size information - if (!ZipLocalFileHeader.TrySkipBlock(_archive.ArchiveReader)) + if (!ZipLocalFileHeader.TrySkipBlock(_archive.ArchiveStream)) throw new InvalidDataException(SR.LocalFileHeaderCorrupt); _storedOffsetOfCompressedData = _archive.ArchiveStream.Position; } @@ -469,11 +469,13 @@ internal void WriteAndFinishLocalEntry() internal void WriteCentralDirectoryFileHeader() { // This part is simple, because we should definitely know the sizes by this time - BinaryWriter writer = new BinaryWriter(_archive.ArchiveStream); - // _entryname only gets set when we read in or call moveTo. MoveTo does a check, and + // _storedEntryNameBytes only gets set when we read in or call moveTo. MoveTo does a check, and // reading in should not be able to produce an entryname longer than ushort.MaxValue + // _fileComment only gets set when we read in or set the FileComment property. This performs its own + // length check. Debug.Assert(_storedEntryNameBytes.Length <= ushort.MaxValue); + Debug.Assert(_fileComment.Length <= ushort.MaxValue); // decide if we need the Zip64 extra field: Zip64ExtraField zip64ExtraField = default; @@ -536,28 +538,46 @@ internal void WriteCentralDirectoryFileHeader() extraFieldLength = (ushort)bigExtraFieldLength; } - writer.Write(ZipCentralDirectoryFileHeader.SignatureConstant); // Central directory file header signature (4 bytes) - writer.Write((byte)_versionMadeBySpecification); // Version made by Specification (version) (1 byte) - writer.Write((byte)CurrentZipPlatform); // Version made by Compatibility (type) (1 byte) - writer.Write((ushort)_versionToExtract); // Minimum version needed to extract (2 bytes) - writer.Write((ushort)_generalPurposeBitFlag); // General Purpose bit flag (2 bytes) - writer.Write((ushort)CompressionMethod); // The Compression method (2 bytes) - writer.Write(ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); // File last modification time and date (4 bytes) - writer.Write(_crc32); // CRC-32 (4 bytes) - writer.Write(compressedSizeTruncated); // Compressed Size (4 bytes) - writer.Write(uncompressedSizeTruncated); // Uncompressed Size (4 bytes) - writer.Write((ushort)_storedEntryNameBytes.Length); // File Name Length (2 bytes) - writer.Write(extraFieldLength); // Extra Field Length (2 bytes) - - Debug.Assert(_fileComment.Length <= ushort.MaxValue); - - writer.Write((ushort)_fileComment.Length); - writer.Write((ushort)0); // disk number start - writer.Write((ushort)0); // internal file attributes - writer.Write(_externalFileAttr); // external file attributes - writer.Write(offsetOfLocalHeaderTruncated); // offset of local header - - writer.Write(_storedEntryNameBytes); + // The central directory file header begins with the below constant-length structure: + // Central directory file header signature (4 bytes) + // Version made by Specification (version) (1 byte) + // Version made by Compatibility (type) (1 byte) + // Minimum version needed to extract (2 bytes) + // General Purpose bit flag (2 bytes) + // The Compression method (2 bytes) + // File last modification time and date (4 bytes) + // CRC-32 (4 bytes) + // Compressed Size (4 bytes) + // Uncompressed Size (4 bytes) + // File Name Length (2 bytes) + // Extra Field Length (2 bytes) + // File Comment Length (2 bytes) + // Start Disk Number (2 bytes) + // Internal File Attributes (2 bytes) + // External File Attributes (4 bytes) + // Offset Of Local Header (4 bytes) + Span cdStaticHeader = stackalloc byte[ZipCentralDirectoryFileHeader.BlockConstantSectionSize]; + + ZipCentralDirectoryFileHeader.SignatureConstantBytes.CopyTo(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.Signature..]); + cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.VersionMadeBySpecification] = (byte)_versionMadeBySpecification; + cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.VersionMadeByCompatibility] = (byte)CurrentZipPlatform; + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.VersionNeededToExtract..], (ushort)_versionToExtract); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.GeneralPurposeBitFlags..], (ushort)_generalPurposeBitFlag); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.CompressionMethod..], (ushort)CompressionMethod); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.LastModified..], ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.Crc32..], _crc32); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.CompressedSize..], compressedSizeTruncated); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.UncompressedSize..], uncompressedSizeTruncated); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.FilenameLength..], (ushort)_storedEntryNameBytes.Length); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.ExtraFieldLength..], extraFieldLength); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.FileCommentLength..], (ushort)_fileComment.Length); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.DiskNumberStart..], 0); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.InternalFileAttributes..], 0); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.ExternalFileAttributes..], _externalFileAttr); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.RelativeOffsetOfLocalHeader..], offsetOfLocalHeaderTruncated); + + _archive.ArchiveStream.Write(cdStaticHeader); + _archive.ArchiveStream.Write(_storedEntryNameBytes); // write extra fields if (zip64Needed) @@ -566,7 +586,7 @@ internal void WriteCentralDirectoryFileHeader() ZipGenericExtraField.WriteAllBlocks(_cdUnknownExtraFields, _archive.ArchiveStream); if (_fileComment.Length > 0) - writer.Write(_fileComment); + _archive.ArchiveStream.Write(_fileComment); } // returns false if fails, will get called on every entry before closing in update mode @@ -580,9 +600,7 @@ internal bool LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded() if (_originallyInArchive) { _archive.ArchiveStream.Seek(_offsetOfLocalHeader, SeekOrigin.Begin); - - Debug.Assert(_archive.ArchiveReader != null); - _lhUnknownExtraFields = ZipLocalFileHeader.GetExtraFields(_archive.ArchiveReader); + _lhUnknownExtraFields = ZipLocalFileHeader.GetExtraFields(_archive.ArchiveStream); } if (!_everOpenedForWrite && _originallyInArchive) @@ -776,9 +794,8 @@ private bool IsOpenable(bool needToUncompress, bool needToLoadIntoMemory, out st message = SR.LocalFileHeaderCorrupt; return false; } - Debug.Assert(_archive.ArchiveReader != null); _archive.ArchiveStream.Seek(_offsetOfLocalHeader, SeekOrigin.Begin); - if (!ZipLocalFileHeader.TrySkipBlock(_archive.ArchiveReader)) + if (!ZipLocalFileHeader.TrySkipBlock(_archive.ArchiveStream)) { message = SR.LocalFileHeaderCorrupt; return false; @@ -858,7 +875,7 @@ private static BitFlagValues MapDeflateCompressionOption(BitFlagValues generalPu // return value is true if we allocated an extra field for 64 bit headers, un/compressed size private bool WriteLocalFileHeader(bool isEmptyFile) { - BinaryWriter writer = new BinaryWriter(_archive.ArchiveStream); + Span lfStaticHeader = stackalloc byte[ZipLocalFileHeader.SizeOfLocalHeader]; // _entryname only gets set when we read in or call moveTo. MoveTo does a check, and // reading in should not be able to produce an entryname longer than ushort.MaxValue @@ -870,7 +887,7 @@ private bool WriteLocalFileHeader(bool isEmptyFile) uint compressedSizeTruncated, uncompressedSizeTruncated; // save offset - _offsetOfLocalHeader = writer.BaseStream.Position; + _offsetOfLocalHeader = _archive.ArchiveStream.Position; // if we already know that we have an empty file don't worry about anything, just do a straight shot of the header if (isEmptyFile) @@ -924,6 +941,9 @@ private bool WriteLocalFileHeader(bool isEmptyFile) } } + // save offset + _offsetOfLocalHeader = _archive.ArchiveStream.Position; + // calculate extra field. if zip64 stuff + original extraField aren't going to fit, dump the original extraField, because this is more important int bigExtraFieldLength = (zip64Used ? zip64ExtraField.TotalSize : 0) + (_lhUnknownExtraFields != null ? ZipGenericExtraField.TotalSize(_lhUnknownExtraFields) : 0); @@ -938,19 +958,21 @@ private bool WriteLocalFileHeader(bool isEmptyFile) extraFieldLength = (ushort)bigExtraFieldLength; } + ZipLocalFileHeader.SignatureConstantBytes.CopyTo(lfStaticHeader[ZipLocalFileHeader.FieldLocations.Signature..]); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.VersionNeededToExtract..], (ushort)_versionToExtract); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags..], (ushort)_generalPurposeBitFlag); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.CompressionMethod..], (ushort)CompressionMethod); + BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.LastModified..], ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); + BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.Crc32..], _crc32); + BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.CompressedSize..], compressedSizeTruncated); + BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.UncompressedSize..], uncompressedSizeTruncated); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.FilenameLength..], (ushort)_storedEntryNameBytes.Length); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.ExtraFieldLength..], extraFieldLength); + // write header - writer.Write(ZipLocalFileHeader.SignatureConstant); - writer.Write((ushort)_versionToExtract); - writer.Write((ushort)_generalPurposeBitFlag); - writer.Write((ushort)CompressionMethod); - writer.Write(ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); // uint - writer.Write(_crc32); // uint - writer.Write(compressedSizeTruncated); // uint - writer.Write(uncompressedSizeTruncated); // uint - writer.Write((ushort)_storedEntryNameBytes.Length); - writer.Write(extraFieldLength); // ushort - - writer.Write(_storedEntryNameBytes); + _archive.ArchiveStream.Write(lfStaticHeader); + + _archive.ArchiveStream.Write(_storedEntryNameBytes); if (zip64Used) zip64ExtraField.WriteBlock(_archive.ArchiveStream); @@ -1017,8 +1039,15 @@ private void WriteLocalFileHeaderAndDataIfNeeded() // Assumes that the stream is currently at the end of the data private void WriteCrcAndSizesInLocalHeader(bool zip64HeaderUsed) { + const int MetadataBufferLength = ZipLocalFileHeader.FieldLengths.VersionNeededToExtract + ZipLocalFileHeader.FieldLengths.GeneralPurposeBitFlags; + const int CrcAndSizesBufferLength = ZipLocalFileHeader.FieldLengths.Crc32 + ZipLocalFileHeader.FieldLengths.CompressedSize + ZipLocalFileHeader.FieldLengths.UncompressedSize; + const int Zip64SizesBufferLength = Zip64ExtraField.FieldLengths.UncompressedSize + Zip64ExtraField.FieldLengths.CompressedSize; + const int Zip64DataDescriptorCrcAndSizesBufferLength = ZipLocalFileHeader.Zip64DataDescriptor.FieldLengths.Crc32 + + ZipLocalFileHeader.Zip64DataDescriptor.FieldLengths.CompressedSize + ZipLocalFileHeader.Zip64DataDescriptor.FieldLengths.UncompressedSize; + long finalPosition = _archive.ArchiveStream.Position; - BinaryWriter writer = new BinaryWriter(_archive.ArchiveStream); + // Buffer has been sized to the largest data payload required: the 64-bit data descriptor. + Span writeBuffer = stackalloc byte[Zip64DataDescriptorCrcAndSizesBufferLength]; bool zip64Needed = ShouldUseZIP64 #if DEBUG_FORCE_ZIP64 @@ -1037,31 +1066,39 @@ private void WriteCrcAndSizesInLocalHeader(bool zip64HeaderUsed) // and setting the version to Zip64 to indicate that descriptor contains 64-bit values if (pretendStreaming) { + int relativeVersionToExtractLocation = ZipLocalFileHeader.FieldLocations.VersionNeededToExtract - ZipLocalFileHeader.FieldLocations.VersionNeededToExtract; + int relativeGeneralPurposeBitFlagsLocation = ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags - ZipLocalFileHeader.FieldLocations.VersionNeededToExtract; + VersionToExtractAtLeast(ZipVersionNeededValues.Zip64); _generalPurposeBitFlag |= BitFlagValues.DataDescriptor; - _archive.ArchiveStream.Seek(_offsetOfLocalHeader + ZipLocalFileHeader.OffsetToVersionFromHeaderStart, + _archive.ArchiveStream.Seek(_offsetOfLocalHeader + ZipLocalFileHeader.FieldLocations.VersionNeededToExtract, SeekOrigin.Begin); - writer.Write((ushort)_versionToExtract); - writer.Write((ushort)_generalPurposeBitFlag); + BinaryPrimitives.WriteUInt16LittleEndian(writeBuffer[relativeVersionToExtractLocation..], (ushort)_versionToExtract); + BinaryPrimitives.WriteUInt16LittleEndian(writeBuffer[relativeGeneralPurposeBitFlagsLocation..], (ushort)_generalPurposeBitFlag); + + _archive.ArchiveStream.Write(writeBuffer[..MetadataBufferLength]); } // next step is fill out the 32-bit size values in the normal header. we can't assume that // they are correct. we also write the CRC - _archive.ArchiveStream.Seek(_offsetOfLocalHeader + ZipLocalFileHeader.OffsetToCrcFromHeaderStart, + _archive.ArchiveStream.Seek(_offsetOfLocalHeader + ZipLocalFileHeader.FieldLocations.Crc32, SeekOrigin.Begin); if (!pretendStreaming) { - writer.Write(_crc32); - writer.Write(compressedSizeTruncated); - writer.Write(uncompressedSizeTruncated); + int relativeCrc32Location = ZipLocalFileHeader.FieldLocations.Crc32 - ZipLocalFileHeader.FieldLocations.Crc32; + int relativeCompressedSizeLocation = ZipLocalFileHeader.FieldLocations.CompressedSize - ZipLocalFileHeader.FieldLocations.Crc32; + int relativeUncompressedSizeLocation = ZipLocalFileHeader.FieldLocations.UncompressedSize - ZipLocalFileHeader.FieldLocations.Crc32; + + BinaryPrimitives.WriteUInt32LittleEndian(writeBuffer[relativeCrc32Location..], _crc32); + BinaryPrimitives.WriteUInt32LittleEndian(writeBuffer[relativeCompressedSizeLocation..], compressedSizeTruncated); + BinaryPrimitives.WriteUInt32LittleEndian(writeBuffer[relativeUncompressedSizeLocation..], uncompressedSizeTruncated); } else // but if we are pretending to stream, we want to fill in with zeroes { - writer.Write((uint)0); - writer.Write((uint)0); - writer.Write((uint)0); + writeBuffer[..CrcAndSizesBufferLength].Clear(); } + _archive.ArchiveStream.Write(writeBuffer[..CrcAndSizesBufferLength]); // next step: if we wrote the 64 bit header initially, a different implementation might // try to read it, even if the 32-bit size values aren't masked. thus, we should always put the @@ -1071,11 +1108,16 @@ private void WriteCrcAndSizesInLocalHeader(bool zip64HeaderUsed) // is always the first extra field that is written if (zip64HeaderUsed) { + int relativeUncompressedSizeLocation = Zip64ExtraField.FieldLocations.UncompressedSize - Zip64ExtraField.FieldLocations.UncompressedSize; + int relativeCompressedSizeLocation = Zip64ExtraField.FieldLocations.CompressedSize - Zip64ExtraField.FieldLocations.UncompressedSize; + _archive.ArchiveStream.Seek(_offsetOfLocalHeader + ZipLocalFileHeader.SizeOfLocalHeader + _storedEntryNameBytes.Length + Zip64ExtraField.OffsetToFirstField, SeekOrigin.Begin); - writer.Write(_uncompressedSize); - writer.Write(_compressedSize); + BinaryPrimitives.WriteInt64LittleEndian(writeBuffer[relativeUncompressedSizeLocation..], _uncompressedSize); + BinaryPrimitives.WriteInt64LittleEndian(writeBuffer[relativeCompressedSizeLocation..], _compressedSize); + + _archive.ArchiveStream.Write(writeBuffer[..Zip64SizesBufferLength]); } // now go to the where we were. assume that this is the end of the data @@ -1086,9 +1128,15 @@ private void WriteCrcAndSizesInLocalHeader(bool zip64HeaderUsed) // 64-bit sizes if (pretendStreaming) { - writer.Write(_crc32); - writer.Write(_compressedSize); - writer.Write(_uncompressedSize); + int relativeCrc32Location = ZipLocalFileHeader.Zip64DataDescriptor.FieldLocations.Crc32 - ZipLocalFileHeader.Zip64DataDescriptor.FieldLocations.Crc32; + int relativeCompressedSizeLocation = ZipLocalFileHeader.Zip64DataDescriptor.FieldLocations.CompressedSize - ZipLocalFileHeader.Zip64DataDescriptor.FieldLocations.Crc32; + int relativeUncompressedSizeLocation = ZipLocalFileHeader.Zip64DataDescriptor.FieldLocations.UncompressedSize - ZipLocalFileHeader.Zip64DataDescriptor.FieldLocations.Crc32; + + BinaryPrimitives.WriteUInt32LittleEndian(writeBuffer[relativeCrc32Location..], _crc32); + BinaryPrimitives.WriteInt64LittleEndian(writeBuffer[relativeCompressedSizeLocation..], _compressedSize); + BinaryPrimitives.WriteInt64LittleEndian(writeBuffer[relativeUncompressedSizeLocation..], _uncompressedSize); + + _archive.ArchiveStream.Write(writeBuffer[..Zip64DataDescriptorCrcAndSizesBufferLength]); } } @@ -1099,21 +1147,30 @@ private void WriteDataDescriptor() // data descriptor can be 32-bit or 64-bit sizes. 32-bit is more compatible, so use that if possible // signature is optional but recommended by the spec + const int MaxSizeOfDataDescriptor = 24; + + Span dataDescriptor = stackalloc byte[MaxSizeOfDataDescriptor]; + int bytesToWrite; - BinaryWriter writer = new BinaryWriter(_archive.ArchiveStream); + ZipLocalFileHeader.DataDescriptorSignatureConstantBytes.CopyTo(dataDescriptor[ZipLocalFileHeader.ZipDataDescriptor.FieldLocations.Signature..]); + BinaryPrimitives.WriteUInt32LittleEndian(dataDescriptor[ZipLocalFileHeader.ZipDataDescriptor.FieldLocations.Crc32..], _crc32); - writer.Write(ZipLocalFileHeader.DataDescriptorSignature); - writer.Write(_crc32); if (AreSizesTooLarge) { - writer.Write(_compressedSize); - writer.Write(_uncompressedSize); + BinaryPrimitives.WriteInt64LittleEndian(dataDescriptor[ZipLocalFileHeader.Zip64DataDescriptor.FieldLocations.CompressedSize..], _compressedSize); + BinaryPrimitives.WriteInt64LittleEndian(dataDescriptor[ZipLocalFileHeader.Zip64DataDescriptor.FieldLocations.UncompressedSize..], _uncompressedSize); + + bytesToWrite = ZipLocalFileHeader.Zip64DataDescriptor.FieldLocations.CompressedSize + ZipLocalFileHeader.Zip64DataDescriptor.FieldLengths.UncompressedSize; } else { - writer.Write((uint)_compressedSize); - writer.Write((uint)_uncompressedSize); + BinaryPrimitives.WriteUInt32LittleEndian(dataDescriptor[ZipLocalFileHeader.ZipDataDescriptor.FieldLocations.CompressedSize..], (uint)_compressedSize); + BinaryPrimitives.WriteUInt32LittleEndian(dataDescriptor[ZipLocalFileHeader.ZipDataDescriptor.FieldLocations.UncompressedSize..], (uint)_uncompressedSize); + + bytesToWrite = ZipLocalFileHeader.ZipDataDescriptor.FieldLocations.CompressedSize + ZipLocalFileHeader.ZipDataDescriptor.FieldLengths.UncompressedSize; } + + _archive.ArchiveStream.Write(dataDescriptor[..bytesToWrite]); } private void UnloadStreams() diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.FieldLengths.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.FieldLengths.cs new file mode 100644 index 00000000000000..6ccf16c82522fe --- /dev/null +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.FieldLengths.cs @@ -0,0 +1,131 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace System.IO.Compression +{ + internal partial struct ZipGenericExtraField + { + private static class FieldLengths + { + public const int Tag = sizeof(ushort); + public const int Size = sizeof(ushort); + } + } + + internal partial struct Zip64ExtraField + { + internal static class FieldLengths + { + public const int UncompressedSize = sizeof(long); + public const int CompressedSize = sizeof(long); + public const int LocalHeaderOffset = sizeof(long); + public const int StartDiskNumber = sizeof(uint); + } + } + + internal partial struct Zip64EndOfCentralDirectoryLocator + { + internal static class FieldLengths + { + public static readonly int Signature = SignatureConstantBytes.Length; + public const int NumberOfDiskWithZip64EOCD = sizeof(uint); + public const int OffsetOfZip64EOCD = sizeof(ulong); + public const int TotalNumberOfDisks = sizeof(uint); + } + } + + internal partial struct Zip64EndOfCentralDirectoryRecord + { + private static class FieldLengths + { + public static readonly int Signature = SignatureConstantBytes.Length; + public const int SizeOfThisRecord = sizeof(ulong); + public const int VersionMadeBy = sizeof(ushort); + public const int VersionNeededToExtract = sizeof(ushort); + public const int NumberOfThisDisk = sizeof(uint); + public const int NumberOfDiskWithStartOfCD = sizeof(uint); + public const int NumberOfEntriesOnThisDisk = sizeof(ulong); + public const int NumberOfEntriesTotal = sizeof(ulong); + public const int SizeOfCentralDirectory = sizeof(ulong); + public const int OffsetOfCentralDirectory = sizeof(ulong); + } + } + + internal readonly partial struct ZipLocalFileHeader + { + internal static class FieldLengths + { + public static readonly int Signature = SignatureConstantBytes.Length; + public const int VersionNeededToExtract = sizeof(ushort); + public const int GeneralPurposeBitFlags = sizeof(ushort); + public const int CompressionMethod = sizeof(ushort); + public const int LastModified = sizeof(ushort) + sizeof(ushort); + public const int Crc32 = sizeof(uint); + public const int CompressedSize = sizeof(uint); + public const int UncompressedSize = sizeof(uint); + public const int FilenameLength = sizeof(ushort); + public const int ExtraFieldLength = sizeof(ushort); + } + + internal readonly partial struct ZipDataDescriptor + { + internal static class FieldLengths + { + public static readonly int Signature = DataDescriptorSignatureConstantBytes.Length; + public const int Crc32 = sizeof(uint); + public const int CompressedSize = sizeof(uint); + public const int UncompressedSize = sizeof(uint); + } + } + + internal readonly partial struct Zip64DataDescriptor + { + internal static class FieldLengths + { + public static readonly int Signature = DataDescriptorSignatureConstantBytes.Length; + public const int Crc32 = sizeof(uint); + public const int CompressedSize = sizeof(long); + public const int UncompressedSize = sizeof(long); + } + } + } + + internal partial struct ZipCentralDirectoryFileHeader + { + internal static class FieldLengths + { + public static readonly int Signature = SignatureConstantBytes.Length; + public const int VersionMadeBySpecification = sizeof(byte); + public const int VersionMadeByCompatibility = sizeof(byte); + public const int VersionNeededToExtract = sizeof(ushort); + public const int GeneralPurposeBitFlags = sizeof(ushort); + public const int CompressionMethod = sizeof(ushort); + public const int LastModified = sizeof(ushort) + sizeof(ushort); + public const int Crc32 = sizeof(uint); + public const int CompressedSize = sizeof(uint); + public const int UncompressedSize = sizeof(uint); + public const int FilenameLength = sizeof(ushort); + public const int ExtraFieldLength = sizeof(ushort); + public const int FileCommentLength = sizeof(ushort); + public const int DiskNumberStart = sizeof(ushort); + public const int InternalFileAttributes = sizeof(ushort); + public const int ExternalFileAttributes = sizeof(uint); + public const int RelativeOffsetOfLocalHeader = sizeof(uint); + } + } + + internal partial struct ZipEndOfCentralDirectoryBlock + { + internal static class FieldLengths + { + public static readonly int Signature = SignatureConstantBytes.Length; + public const int NumberOfThisDisk = sizeof(ushort); + public const int NumberOfTheDiskWithTheStartOfTheCentralDirectory = sizeof(ushort); + public const int NumberOfEntriesInTheCentralDirectoryOnThisDisk = sizeof(ushort); + public const int NumberOfEntriesInTheCentralDirectory = sizeof(ushort); + public const int SizeOfCentralDirectory = sizeof(uint); + public const int OffsetOfStartOfCentralDirectoryWithRespectToTheStartingDiskNumber = sizeof(uint); + public const int ArchiveCommentLength = sizeof(ushort); + } + } +} diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.FieldLocations.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.FieldLocations.cs new file mode 100644 index 00000000000000..d2cc3376f30e3a --- /dev/null +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.FieldLocations.cs @@ -0,0 +1,137 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace System.IO.Compression +{ + internal partial struct ZipGenericExtraField + { + internal static class FieldLocations + { + public const int Tag = 0; + public const int Size = Tag + FieldLengths.Tag; + public const int DynamicData = Size + FieldLengths.Size; + } + } + + internal partial struct Zip64ExtraField + { + internal static class FieldLocations + { + public const int Tag = ZipGenericExtraField.FieldLocations.Tag; + public const int Size = ZipGenericExtraField.FieldLocations.Size; + public const int UncompressedSize = ZipGenericExtraField.FieldLocations.DynamicData; + public const int CompressedSize = UncompressedSize + FieldLengths.UncompressedSize; + public const int LocalHeaderOffset = CompressedSize + FieldLengths.CompressedSize; + public const int StartDiskNumber = LocalHeaderOffset + FieldLengths.LocalHeaderOffset; + } + } + + internal partial struct Zip64EndOfCentralDirectoryLocator + { + private static class FieldLocations + { + public const int Signature = 0; + public static readonly int NumberOfDiskWithZip64EOCD = Signature + FieldLengths.Signature; + public static readonly int OffsetOfZip64EOCD = NumberOfDiskWithZip64EOCD + FieldLengths.NumberOfDiskWithZip64EOCD; + public static readonly int TotalNumberOfDisks = OffsetOfZip64EOCD + FieldLengths.OffsetOfZip64EOCD; + } + } + + internal partial struct Zip64EndOfCentralDirectoryRecord + { + private static class FieldLocations + { + public const int Signature = 0; + public static readonly int SizeOfThisRecord = Signature + FieldLengths.Signature; + public static readonly int VersionMadeBy = SizeOfThisRecord + FieldLengths.SizeOfThisRecord; + public static readonly int VersionNeededToExtract = VersionMadeBy + FieldLengths.VersionMadeBy; + public static readonly int NumberOfThisDisk = VersionNeededToExtract + FieldLengths.VersionNeededToExtract; + public static readonly int NumberOfDiskWithStartOfCD = NumberOfThisDisk + FieldLengths.NumberOfThisDisk; + public static readonly int NumberOfEntriesOnThisDisk = NumberOfDiskWithStartOfCD + FieldLengths.NumberOfDiskWithStartOfCD; + public static readonly int NumberOfEntriesTotal = NumberOfEntriesOnThisDisk + FieldLengths.NumberOfEntriesOnThisDisk; + public static readonly int SizeOfCentralDirectory = NumberOfEntriesTotal + FieldLengths.NumberOfEntriesTotal; + public static readonly int OffsetOfCentralDirectory = SizeOfCentralDirectory + FieldLengths.SizeOfCentralDirectory; + } + } + + internal readonly partial struct ZipLocalFileHeader + { + internal static class FieldLocations + { + public const int Signature = 0; + public static readonly int VersionNeededToExtract = Signature + FieldLengths.Signature; + public static readonly int GeneralPurposeBitFlags = VersionNeededToExtract + FieldLengths.VersionNeededToExtract; + public static readonly int CompressionMethod = GeneralPurposeBitFlags + FieldLengths.GeneralPurposeBitFlags; + public static readonly int LastModified = CompressionMethod + FieldLengths.CompressionMethod; + public static readonly int Crc32 = LastModified + FieldLengths.LastModified; + public static readonly int CompressedSize = Crc32 + FieldLengths.Crc32; + public static readonly int UncompressedSize = CompressedSize + FieldLengths.CompressedSize; + public static readonly int FilenameLength = UncompressedSize + FieldLengths.UncompressedSize; + public static readonly int ExtraFieldLength = FilenameLength + FieldLengths.FilenameLength; + public static readonly int DynamicData = ExtraFieldLength + FieldLengths.ExtraFieldLength; + } + + internal readonly partial struct ZipDataDescriptor + { + internal static class FieldLocations + { + public const int Signature = 0; + public static readonly int Crc32 = Signature + FieldLengths.Signature; + public static readonly int CompressedSize = Crc32 + FieldLengths.Crc32; + public static readonly int UncompressedSize = CompressedSize + FieldLengths.CompressedSize; + } + } + + internal readonly partial struct Zip64DataDescriptor + { + internal static class FieldLocations + { + public const int Signature = 0; + public static readonly int Crc32 = Signature + FieldLengths.Signature; + public static readonly int CompressedSize = Crc32 + FieldLengths.Crc32; + public static readonly int UncompressedSize = CompressedSize + FieldLengths.CompressedSize; + } + } + } + + internal partial struct ZipCentralDirectoryFileHeader + { + internal static class FieldLocations + { + public const int Signature = 0; + public static readonly int VersionMadeBySpecification = Signature + FieldLengths.Signature; + public static readonly int VersionMadeByCompatibility = VersionMadeBySpecification + FieldLengths.VersionMadeBySpecification; + public static readonly int VersionNeededToExtract = VersionMadeByCompatibility + FieldLengths.VersionMadeByCompatibility; + public static readonly int GeneralPurposeBitFlags = VersionNeededToExtract + FieldLengths.VersionNeededToExtract; + public static readonly int CompressionMethod = GeneralPurposeBitFlags + FieldLengths.GeneralPurposeBitFlags; + public static readonly int LastModified = CompressionMethod + FieldLengths.CompressionMethod; + public static readonly int Crc32 = LastModified + FieldLengths.LastModified; + public static readonly int CompressedSize = Crc32 + FieldLengths.Crc32; + public static readonly int UncompressedSize = CompressedSize + FieldLengths.CompressedSize; + public static readonly int FilenameLength = UncompressedSize + FieldLengths.UncompressedSize; + public static readonly int ExtraFieldLength = FilenameLength + FieldLengths.FilenameLength; + public static readonly int FileCommentLength = ExtraFieldLength + FieldLengths.ExtraFieldLength; + public static readonly int DiskNumberStart = FileCommentLength + FieldLengths.FileCommentLength; + public static readonly int InternalFileAttributes = DiskNumberStart + FieldLengths.DiskNumberStart; + public static readonly int ExternalFileAttributes = InternalFileAttributes + FieldLengths.InternalFileAttributes; + public static readonly int RelativeOffsetOfLocalHeader = ExternalFileAttributes + FieldLengths.ExternalFileAttributes; + public static readonly int DynamicData = RelativeOffsetOfLocalHeader + FieldLengths.RelativeOffsetOfLocalHeader; + } + } + + internal partial struct ZipEndOfCentralDirectoryBlock + { + private static class FieldLocations + { + public const int Signature = 0; + public static readonly int NumberOfThisDisk = Signature + FieldLengths.Signature; + public static readonly int NumberOfTheDiskWithTheStartOfTheCentralDirectory = NumberOfThisDisk + FieldLengths.NumberOfThisDisk; + public static readonly int NumberOfEntriesInTheCentralDirectoryOnThisDisk = NumberOfTheDiskWithTheStartOfTheCentralDirectory + FieldLengths.NumberOfTheDiskWithTheStartOfTheCentralDirectory; + public static readonly int NumberOfEntriesInTheCentralDirectory = NumberOfEntriesInTheCentralDirectoryOnThisDisk + FieldLengths.NumberOfEntriesInTheCentralDirectoryOnThisDisk; + public static readonly int SizeOfCentralDirectory = NumberOfEntriesInTheCentralDirectory + FieldLengths.NumberOfEntriesInTheCentralDirectory; + public static readonly int OffsetOfStartOfCentralDirectoryWithRespectToTheStartingDiskNumber = SizeOfCentralDirectory + FieldLengths.SizeOfCentralDirectory; + public static readonly int ArchiveCommentLength = OffsetOfStartOfCentralDirectoryWithRespectToTheStartingDiskNumber + FieldLengths.OffsetOfStartOfCentralDirectoryWithRespectToTheStartingDiskNumber; + public static readonly int DynamicData = ArchiveCommentLength + FieldLengths.ArchiveCommentLength; + } + } +} diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs index 96b19871cd734f..1df77357b581d2 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Binary; using System.Collections.Generic; using System.Diagnostics; @@ -9,9 +10,9 @@ namespace System.IO.Compression // All blocks.TryReadBlock do a check to see if signature is correct. Generic extra field is slightly different // all of the TryReadBlocks will throw if there are not enough bytes in the stream - internal struct ZipGenericExtraField + internal partial struct ZipGenericExtraField { - private const int SizeOfHeader = 4; + private const int SizeOfHeader = FieldLengths.Tag + FieldLengths.Size; private ushort _tag; private ushort _size; @@ -24,45 +25,51 @@ internal struct ZipGenericExtraField public void WriteBlock(Stream stream) { - BinaryWriter writer = new BinaryWriter(stream); - writer.Write(Tag); - writer.Write(Size); - writer.Write(Data); + Span extraFieldHeader = stackalloc byte[SizeOfHeader]; + + BinaryPrimitives.WriteUInt16LittleEndian(extraFieldHeader[FieldLocations.Tag..], _tag); + BinaryPrimitives.WriteUInt16LittleEndian(extraFieldHeader[FieldLocations.Size..], _size); + + stream.Write(extraFieldHeader); + stream.Write(Data); } - // shouldn't ever read the byte at position endExtraField - // assumes we are positioned at the beginning of an extra field subfield - public static bool TryReadBlock(BinaryReader reader, long endExtraField, out ZipGenericExtraField field) + // assumes that bytes starts at the beginning of an extra field subfield + public static bool TryReadBlock(ReadOnlySpan bytes, out int bytesConsumed, out ZipGenericExtraField field) { field = default; + bytesConsumed = 0; // not enough bytes to read tag + size - if (endExtraField - reader.BaseStream.Position < 4) + if (bytes.Length < SizeOfHeader) + { return false; + } - field._tag = reader.ReadUInt16(); - field._size = reader.ReadUInt16(); + field._tag = BinaryPrimitives.ReadUInt16LittleEndian(bytes[FieldLocations.Tag..]); + field._size = BinaryPrimitives.ReadUInt16LittleEndian(bytes[FieldLocations.Size..]); + bytesConsumed += SizeOfHeader; - // not enough bytes to read the data - if (endExtraField - reader.BaseStream.Position < field._size) + // not enough byte to read the data + if ((bytes.Length - SizeOfHeader) < field._size) + { return false; + } - field._data = reader.ReadBytes(field._size); + field._data = bytes.Slice(FieldLocations.DynamicData, field._size).ToArray(); + bytesConsumed += field._size; return true; } - // shouldn't ever read the byte at position endExtraField - public static List ParseExtraField(Stream extraFieldData) + public static List ParseExtraField(ReadOnlySpan extraFieldData) { List extraFields = new List(); + int totalBytesConsumed = 0; - using (BinaryReader reader = new BinaryReader(extraFieldData)) + while (TryReadBlock(extraFieldData[totalBytesConsumed..], out int currBytesConsumed, out ZipGenericExtraField field)) { - ZipGenericExtraField field; - while (TryReadBlock(reader, extraFieldData.Length, out field)) - { - extraFields.Add(field); - } + totalBytesConsumed += currBytesConsumed; + extraFields.Add(field); } return extraFields; @@ -72,24 +79,28 @@ public static int TotalSize(List fields) { int size = 0; foreach (ZipGenericExtraField field in fields) + { size += field.Size + SizeOfHeader; //size is only size of data + } return size; } public static void WriteAllBlocks(List fields, Stream stream) { foreach (ZipGenericExtraField field in fields) + { field.WriteBlock(stream); + } } } - internal struct Zip64ExtraField + internal partial struct Zip64ExtraField { // Size is size of the record not including the tag or size fields // If the extra field is going in the local header, it cannot include only // one of uncompressed/compressed size - public const int OffsetToFirstField = 4; + public const int OffsetToFirstField = ZipGenericExtraField.FieldLocations.DynamicData; private const ushort TagConstant = 1; private ushort _size; @@ -120,10 +131,22 @@ public long? LocalHeaderOffset private void UpdateSize() { _size = 0; - if (_uncompressedSize != null) _size += 8; - if (_compressedSize != null) _size += 8; - if (_localHeaderOffset != null) _size += 8; - if (_startDiskNumber != null) _size += 4; + if (_uncompressedSize != null) + { + _size += FieldLengths.UncompressedSize; + } + if (_compressedSize != null) + { + _size += FieldLengths.CompressedSize; + } + if (_localHeaderOffset != null) + { + _size += FieldLengths.LocalHeaderOffset; + } + if (_startDiskNumber != null) + { + _size += FieldLengths.StartDiskNumber; + } } // There is a small chance that something very weird could happen here. The code calling into this function @@ -139,21 +162,21 @@ private void UpdateSize() // // If there are more than one Zip64 extra fields, we take the first one that has the expected size // - public static Zip64ExtraField GetJustZip64Block(Stream extraFieldStream, + public static Zip64ExtraField GetJustZip64Block(ReadOnlySpan extraFieldData, bool readUncompressedSize, bool readCompressedSize, bool readLocalHeaderOffset, bool readStartDiskNumber) { Zip64ExtraField zip64Field; - using (BinaryReader reader = new BinaryReader(extraFieldStream)) + int totalBytesConsumed = 0; + + while (ZipGenericExtraField.TryReadBlock(extraFieldData.Slice(totalBytesConsumed), out int currBytesConsumed, out ZipGenericExtraField currentExtraField)) { - ZipGenericExtraField currentExtraField; - while (ZipGenericExtraField.TryReadBlock(reader, extraFieldStream.Length, out currentExtraField)) + totalBytesConsumed += currBytesConsumed; + + if (TryGetZip64BlockFromGenericExtraField(currentExtraField, readUncompressedSize, + readCompressedSize, readLocalHeaderOffset, readStartDiskNumber, out zip64Field)) { - if (TryGetZip64BlockFromGenericExtraField(currentExtraField, readUncompressedSize, - readCompressedSize, readLocalHeaderOffset, readStartDiskNumber, out zip64Field)) - { - return zip64Field; - } + return zip64Field; } } @@ -172,6 +195,7 @@ private static bool TryGetZip64BlockFromGenericExtraField(ZipGenericExtraField e bool readLocalHeaderOffset, bool readStartDiskNumber, out Zip64ExtraField zip64Block) { + const int MaximumExtraFieldLength = FieldLengths.UncompressedSize + FieldLengths.CompressedSize + FieldLengths.LocalHeaderOffset + FieldLengths.StartDiskNumber; zip64Block = default; zip64Block._compressedSize = null; @@ -180,82 +204,98 @@ private static bool TryGetZip64BlockFromGenericExtraField(ZipGenericExtraField e zip64Block._startDiskNumber = null; if (extraField.Tag != TagConstant) + { return false; + } zip64Block._size = extraField.Size; - using (MemoryStream ms = new MemoryStream(extraField.Data)) - using (BinaryReader reader = new BinaryReader(ms)) - { - // The spec section 4.5.3: - // The order of the fields in the zip64 extended - // information record is fixed, but the fields MUST - // only appear if the corresponding Local or Central - // directory record field is set to 0xFFFF or 0xFFFFFFFF. - // However tools commonly write the fields anyway; the prevailing convention - // is to respect the size, but only actually use the values if their 32 bit - // values were all 0xFF. + ReadOnlySpan data = extraField.Data; - if (extraField.Size < sizeof(long)) - return true; + // The spec section 4.5.3: + // The order of the fields in the zip64 extended + // information record is fixed, but the fields MUST + // only appear if the corresponding Local or Central + // directory record field is set to 0xFFFF or 0xFFFFFFFF. + // However tools commonly write the fields anyway; the prevailing convention + // is to respect the size, but only actually use the values if their 32 bit + // values were all 0xFF. - // Advancing the stream (by reading from it) is possible only when: - // 1. There is an explicit ask to do that (valid files, corresponding boolean flag(s) set to true). - // 2. When the size indicates that all the information is available ("slightly invalid files"). - bool readAllFields = extraField.Size >= sizeof(long) + sizeof(long) + sizeof(long) + sizeof(int); + if (data.Length < FieldLengths.UncompressedSize) + { + return true; + } - if (readUncompressedSize) - { - zip64Block._uncompressedSize = reader.ReadInt64(); - } - else if (readAllFields) - { - _ = reader.ReadInt64(); - } + // Advancing the stream (by reading from it) is possible only when: + // 1. There is an explicit ask to do that (valid files, corresponding boolean flag(s) set to true). + // 2. When the size indicates that all the information is available ("slightly invalid files"). + bool readAllFields = extraField.Size >= MaximumExtraFieldLength; - if (ms.Position > extraField.Size - sizeof(long)) - return true; + if (readUncompressedSize) + { + zip64Block._uncompressedSize = BinaryPrimitives.ReadInt64LittleEndian(data); + data = data.Slice(FieldLengths.UncompressedSize); + } + else if (readAllFields) + { + data = data.Slice(FieldLengths.UncompressedSize); + } - if (readCompressedSize) - { - zip64Block._compressedSize = reader.ReadInt64(); - } - else if (readAllFields) - { - _ = reader.ReadInt64(); - } + if (data.Length < FieldLengths.CompressedSize) + { + return true; + } - if (ms.Position > extraField.Size - sizeof(long)) - return true; + if (readCompressedSize) + { + zip64Block._compressedSize = BinaryPrimitives.ReadInt64LittleEndian(data); + data = data.Slice(FieldLengths.CompressedSize); + } + else if (readAllFields) + { + data = data.Slice(FieldLengths.CompressedSize); + } - if (readLocalHeaderOffset) - { - zip64Block._localHeaderOffset = reader.ReadInt64(); - } - else if (readAllFields) - { - _ = reader.ReadInt64(); - } + if (data.Length < FieldLengths.LocalHeaderOffset) + { + return true; + } - if (ms.Position > extraField.Size - sizeof(int)) - return true; + if (readLocalHeaderOffset) + { + zip64Block._localHeaderOffset = BinaryPrimitives.ReadInt64LittleEndian(data); + data = data.Slice(FieldLengths.LocalHeaderOffset); + } + else if (readAllFields) + { + data = data.Slice(FieldLengths.LocalHeaderOffset); + } - if (readStartDiskNumber) - { - zip64Block._startDiskNumber = reader.ReadUInt32(); - } - else if (readAllFields) - { - _ = reader.ReadInt32(); - } + if (data.Length < FieldLengths.StartDiskNumber) + { + return true; + } - // original values are unsigned, so implies value is too big to fit in signed integer - if (zip64Block._uncompressedSize < 0) throw new InvalidDataException(SR.FieldTooBigUncompressedSize); - if (zip64Block._compressedSize < 0) throw new InvalidDataException(SR.FieldTooBigCompressedSize); - if (zip64Block._localHeaderOffset < 0) throw new InvalidDataException(SR.FieldTooBigLocalHeaderOffset); + if (readStartDiskNumber) + { + zip64Block._startDiskNumber = BinaryPrimitives.ReadUInt32LittleEndian(data); + } - return true; + // original values are unsigned, so implies value is too big to fit in signed integer + if (zip64Block._uncompressedSize < 0) + { + throw new InvalidDataException(SR.FieldTooBigUncompressedSize); + } + if (zip64Block._compressedSize < 0) + { + throw new InvalidDataException(SR.FieldTooBigCompressedSize); + } + if (zip64Block._localHeaderOffset < 0) + { + throw new InvalidDataException(SR.FieldTooBigLocalHeaderOffset); } + + return true; } public static Zip64ExtraField GetAndRemoveZip64Block(List extraFields, @@ -299,53 +339,95 @@ public static void RemoveZip64Blocks(List extraFields) public void WriteBlock(Stream stream) { - BinaryWriter writer = new BinaryWriter(stream); - writer.Write(TagConstant); - writer.Write(_size); - if (_uncompressedSize != null) writer.Write(_uncompressedSize.Value); - if (_compressedSize != null) writer.Write(_compressedSize.Value); - if (_localHeaderOffset != null) writer.Write(_localHeaderOffset.Value); - if (_startDiskNumber != null) writer.Write(_startDiskNumber.Value); + Span extraFieldData = stackalloc byte[TotalSize]; + + BinaryPrimitives.WriteUInt16LittleEndian(extraFieldData[FieldLocations.Tag..], TagConstant); + BinaryPrimitives.WriteUInt16LittleEndian(extraFieldData[FieldLocations.Size..], _size); + + if (_uncompressedSize != null) + { + BinaryPrimitives.WriteInt64LittleEndian(extraFieldData[FieldLocations.UncompressedSize..], _uncompressedSize.Value); + } + + if (_compressedSize != null) + { + BinaryPrimitives.WriteInt64LittleEndian(extraFieldData[FieldLocations.CompressedSize..], _compressedSize.Value); + } + + if (_localHeaderOffset != null) + { + BinaryPrimitives.WriteInt64LittleEndian(extraFieldData[FieldLocations.LocalHeaderOffset..], _localHeaderOffset.Value); + } + + if (_startDiskNumber != null) + { + BinaryPrimitives.WriteUInt32LittleEndian(extraFieldData[FieldLocations.StartDiskNumber..], _startDiskNumber.Value); + } + + stream.Write(extraFieldData); } } - internal struct Zip64EndOfCentralDirectoryLocator + internal partial struct Zip64EndOfCentralDirectoryLocator { - public const uint SignatureConstant = 0x07064B50; - public const int SignatureSize = sizeof(uint); + // The Zip File Format Specification references 0x07064B50, this is a big endian representation. + // ZIP files store values in little endian, so this is reversed. + public static ReadOnlySpan SignatureConstantBytes => [0x50, 0x4B, 0x06, 0x07]; + private const int BlockConstantSectionSize = 20; public const int SizeOfBlockWithoutSignature = 16; public uint NumberOfDiskWithZip64EOCD; public ulong OffsetOfZip64EOCD; public uint TotalNumberOfDisks; - public static bool TryReadBlock(BinaryReader reader, out Zip64EndOfCentralDirectoryLocator zip64EOCDLocator) + public static bool TryReadBlock(Stream stream, out Zip64EndOfCentralDirectoryLocator zip64EOCDLocator) { + Span blockContents = stackalloc byte[BlockConstantSectionSize]; + int bytesRead; + zip64EOCDLocator = default; + bytesRead = stream.Read(blockContents); + + if (bytesRead < BlockConstantSectionSize) + { + return false; + } - if (reader.ReadUInt32() != SignatureConstant) + if (!blockContents.StartsWith(SignatureConstantBytes)) + { return false; + } + + zip64EOCDLocator.NumberOfDiskWithZip64EOCD = BinaryPrimitives.ReadUInt32LittleEndian(blockContents[FieldLocations.NumberOfDiskWithZip64EOCD..]); + zip64EOCDLocator.OffsetOfZip64EOCD = BinaryPrimitives.ReadUInt64LittleEndian(blockContents[FieldLocations.OffsetOfZip64EOCD..]); + zip64EOCDLocator.TotalNumberOfDisks = BinaryPrimitives.ReadUInt32LittleEndian(blockContents[FieldLocations.TotalNumberOfDisks..]); - zip64EOCDLocator.NumberOfDiskWithZip64EOCD = reader.ReadUInt32(); - zip64EOCDLocator.OffsetOfZip64EOCD = reader.ReadUInt64(); - zip64EOCDLocator.TotalNumberOfDisks = reader.ReadUInt32(); return true; } public static void WriteBlock(Stream stream, long zip64EOCDRecordStart) { - BinaryWriter writer = new BinaryWriter(stream); - writer.Write(SignatureConstant); - writer.Write((uint)0); // number of disk with start of zip64 eocd - writer.Write(zip64EOCDRecordStart); - writer.Write((uint)1); // total number of disks + Span blockContents = stackalloc byte[BlockConstantSectionSize]; + + SignatureConstantBytes.CopyTo(blockContents[FieldLocations.Signature..]); + // number of disk with start of zip64 eocd + BinaryPrimitives.WriteUInt32LittleEndian(blockContents[FieldLocations.NumberOfDiskWithZip64EOCD..], 0); + BinaryPrimitives.WriteInt64LittleEndian(blockContents[FieldLocations.OffsetOfZip64EOCD..], zip64EOCDRecordStart); + // total number of disks + BinaryPrimitives.WriteUInt32LittleEndian(blockContents[FieldLocations.TotalNumberOfDisks..], 1); + + stream.Write(blockContents); } } - internal struct Zip64EndOfCentralDirectoryRecord + internal partial struct Zip64EndOfCentralDirectoryRecord { - private const uint SignatureConstant = 0x06064B50; + // The Zip File Format Specification references 0x06064B50, this is a big endian representation. + // ZIP files store values in little endian, so this is reversed. + public static ReadOnlySpan SignatureConstantBytes => [0x50, 0x4B, 0x06, 0x06]; + + private const int BlockConstantSectionSize = 56; private const ulong NormalSize = 0x2C; // the size of the data excluding the size/signature fields if no extra data included public ulong SizeOfThisRecord; @@ -358,107 +440,160 @@ internal struct Zip64EndOfCentralDirectoryRecord public ulong SizeOfCentralDirectory; public ulong OffsetOfCentralDirectory; - public static bool TryReadBlock(BinaryReader reader, out Zip64EndOfCentralDirectoryRecord zip64EOCDRecord) + public static bool TryReadBlock(Stream stream, out Zip64EndOfCentralDirectoryRecord zip64EOCDRecord) { + Span blockContents = stackalloc byte[BlockConstantSectionSize]; + int bytesRead; + zip64EOCDRecord = default; + bytesRead = stream.Read(blockContents); + + if (bytesRead < BlockConstantSectionSize) + { + return false; + } - if (reader.ReadUInt32() != SignatureConstant) + if (!blockContents.StartsWith(SignatureConstantBytes)) + { return false; + } - zip64EOCDRecord.SizeOfThisRecord = reader.ReadUInt64(); - zip64EOCDRecord.VersionMadeBy = reader.ReadUInt16(); - zip64EOCDRecord.VersionNeededToExtract = reader.ReadUInt16(); - zip64EOCDRecord.NumberOfThisDisk = reader.ReadUInt32(); - zip64EOCDRecord.NumberOfDiskWithStartOfCD = reader.ReadUInt32(); - zip64EOCDRecord.NumberOfEntriesOnThisDisk = reader.ReadUInt64(); - zip64EOCDRecord.NumberOfEntriesTotal = reader.ReadUInt64(); - zip64EOCDRecord.SizeOfCentralDirectory = reader.ReadUInt64(); - zip64EOCDRecord.OffsetOfCentralDirectory = reader.ReadUInt64(); + zip64EOCDRecord.SizeOfThisRecord = BinaryPrimitives.ReadUInt64LittleEndian(blockContents[FieldLocations.SizeOfThisRecord..]); + zip64EOCDRecord.VersionMadeBy = BinaryPrimitives.ReadUInt16LittleEndian(blockContents[FieldLocations.VersionMadeBy..]); + zip64EOCDRecord.VersionNeededToExtract = BinaryPrimitives.ReadUInt16LittleEndian(blockContents[FieldLocations.VersionNeededToExtract..]); + zip64EOCDRecord.NumberOfThisDisk = BinaryPrimitives.ReadUInt32LittleEndian(blockContents[FieldLocations.NumberOfThisDisk..]); + zip64EOCDRecord.NumberOfDiskWithStartOfCD = BinaryPrimitives.ReadUInt32LittleEndian(blockContents[FieldLocations.NumberOfDiskWithStartOfCD..]); + zip64EOCDRecord.NumberOfEntriesOnThisDisk = BinaryPrimitives.ReadUInt64LittleEndian(blockContents[FieldLocations.NumberOfEntriesOnThisDisk..]); + zip64EOCDRecord.NumberOfEntriesTotal = BinaryPrimitives.ReadUInt64LittleEndian(blockContents[FieldLocations.NumberOfEntriesTotal..]); + zip64EOCDRecord.SizeOfCentralDirectory = BinaryPrimitives.ReadUInt64LittleEndian(blockContents[FieldLocations.SizeOfCentralDirectory..]); + zip64EOCDRecord.OffsetOfCentralDirectory = BinaryPrimitives.ReadUInt64LittleEndian(blockContents[FieldLocations.OffsetOfCentralDirectory..]); return true; } public static void WriteBlock(Stream stream, long numberOfEntries, long startOfCentralDirectory, long sizeOfCentralDirectory) { - BinaryWriter writer = new BinaryWriter(stream); + Span blockContents = stackalloc byte[BlockConstantSectionSize]; + + SignatureConstantBytes.CopyTo(blockContents[FieldLocations.Signature..]); + BinaryPrimitives.WriteUInt64LittleEndian(blockContents[FieldLocations.SizeOfThisRecord..], NormalSize); + // version made by: high byte is 0 for MS DOS, low byte is version needed + BinaryPrimitives.WriteUInt16LittleEndian(blockContents[FieldLocations.VersionMadeBy..], (ushort)ZipVersionNeededValues.Zip64); + // version needed is 45 for zip 64 support + BinaryPrimitives.WriteUInt16LittleEndian(blockContents[FieldLocations.VersionNeededToExtract..], (ushort)ZipVersionNeededValues.Zip64); + // number of this disk is 0 + BinaryPrimitives.WriteUInt32LittleEndian(blockContents[FieldLocations.NumberOfThisDisk..], 0); + // number of disk with start of central directory is 0 + BinaryPrimitives.WriteUInt32LittleEndian(blockContents[FieldLocations.NumberOfDiskWithStartOfCD..], 0); + // number of entries on this disk + BinaryPrimitives.WriteInt64LittleEndian(blockContents[FieldLocations.NumberOfEntriesOnThisDisk..], numberOfEntries); + // number of entries total + BinaryPrimitives.WriteInt64LittleEndian(blockContents[FieldLocations.NumberOfEntriesTotal..], numberOfEntries); + BinaryPrimitives.WriteInt64LittleEndian(blockContents[FieldLocations.SizeOfCentralDirectory..], sizeOfCentralDirectory); + BinaryPrimitives.WriteInt64LittleEndian(blockContents[FieldLocations.OffsetOfCentralDirectory..], startOfCentralDirectory); // write Zip 64 EOCD record - writer.Write(SignatureConstant); - writer.Write(NormalSize); - writer.Write((ushort)ZipVersionNeededValues.Zip64); // version needed is 45 for zip 64 support - writer.Write((ushort)ZipVersionNeededValues.Zip64); // version made by: high byte is 0 for MS DOS, low byte is version needed - writer.Write((uint)0); // number of this disk is 0 - writer.Write((uint)0); // number of disk with start of central directory is 0 - writer.Write(numberOfEntries); // number of entries on this disk - writer.Write(numberOfEntries); // number of entries total - writer.Write(sizeOfCentralDirectory); - writer.Write(startOfCentralDirectory); + stream.Write(blockContents); } } - internal readonly struct ZipLocalFileHeader + internal readonly partial struct ZipLocalFileHeader { - public const uint DataDescriptorSignature = 0x08074B50; - public const uint SignatureConstant = 0x04034B50; - public const int OffsetToCrcFromHeaderStart = 14; - public const int OffsetToVersionFromHeaderStart = 4; - public const int OffsetToBitFlagFromHeaderStart = 6; + // The Zip File Format Specification references 0x08074B50 and 0x04034B50, these are big endian representations. + // ZIP files store values in little endian, so these are reversed. + public static ReadOnlySpan DataDescriptorSignatureConstantBytes => [0x50, 0x4B, 0x07, 0x08]; + public static ReadOnlySpan SignatureConstantBytes => [0x50, 0x4B, 0x03, 0x04]; public const int SizeOfLocalHeader = 30; - public static List GetExtraFields(BinaryReader reader) + public static List GetExtraFields(Stream stream) { // assumes that TrySkipBlock has already been called, so we don't have to validate twice - List result; + const int StackAllocationThreshold = 512; - const int OffsetToFilenameLength = 26; // from the point before the signature + List result; + int relativeFilenameLengthLocation = FieldLocations.FilenameLength - FieldLocations.FilenameLength; + int relativeExtraFieldLengthLocation = FieldLocations.ExtraFieldLength - FieldLocations.FilenameLength; + Span fixedHeaderBuffer = stackalloc byte[FieldLengths.FilenameLength + FieldLengths.ExtraFieldLength]; - reader.BaseStream.Seek(OffsetToFilenameLength, SeekOrigin.Current); + stream.Seek(FieldLocations.FilenameLength, SeekOrigin.Current); + stream.ReadExactly(fixedHeaderBuffer); - ushort filenameLength = reader.ReadUInt16(); - ushort extraFieldLength = reader.ReadUInt16(); + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(fixedHeaderBuffer[relativeFilenameLengthLocation..]); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(fixedHeaderBuffer[relativeExtraFieldLengthLocation..]); + byte[]? arrayPoolBuffer = extraFieldLength > StackAllocationThreshold ? System.Buffers.ArrayPool.Shared.Rent(extraFieldLength) : null; + Span extraFieldBuffer = extraFieldLength <= StackAllocationThreshold ? stackalloc byte[StackAllocationThreshold].Slice(0, extraFieldLength) : arrayPoolBuffer.AsSpan(0, extraFieldLength); - reader.BaseStream.Seek(filenameLength, SeekOrigin.Current); + try + { + stream.Seek(filenameLength, SeekOrigin.Current); + stream.ReadExactly(extraFieldBuffer); + result = ZipGenericExtraField.ParseExtraField(extraFieldBuffer); + Zip64ExtraField.RemoveZip64Blocks(result); - using (Stream str = new SubReadStream(reader.BaseStream, reader.BaseStream.Position, extraFieldLength)) + return result; + } + finally { - result = ZipGenericExtraField.ParseExtraField(str); + if (arrayPoolBuffer != null) + { + System.Buffers.ArrayPool.Shared.Return(arrayPoolBuffer); + } } - Zip64ExtraField.RemoveZip64Blocks(result); - - return result; } // will not throw end of stream exception - public static bool TrySkipBlock(BinaryReader reader) + public static bool TrySkipBlock(Stream stream) { - const int OffsetToFilenameLength = 22; // from the point after the signature + Span blockBytes = stackalloc byte[4]; + long currPosition = stream.Position; + int bytesRead = stream.Read(blockBytes); - if (reader.ReadUInt32() != SignatureConstant) + if (bytesRead != FieldLengths.Signature || !blockBytes.SequenceEqual(SignatureConstantBytes)) + { return false; + } - - if (reader.BaseStream.Length < reader.BaseStream.Position + OffsetToFilenameLength) + if (stream.Length < currPosition + FieldLocations.FilenameLength) + { return false; + } + + // Already read the signature, so make the filename length field location relative to that + stream.Seek(FieldLocations.FilenameLength - FieldLengths.Signature, SeekOrigin.Current); - reader.BaseStream.Seek(OffsetToFilenameLength, SeekOrigin.Current); + bytesRead = stream.Read(blockBytes); + if (bytesRead != FieldLengths.FilenameLength + FieldLengths.ExtraFieldLength) + { + return false; + } - ushort filenameLength = reader.ReadUInt16(); - ushort extraFieldLength = reader.ReadUInt16(); + int relativeFilenameLengthLocation = FieldLocations.FilenameLength - FieldLocations.FilenameLength; + int relativeExtraFieldLengthLocation = FieldLocations.ExtraFieldLength - FieldLocations.FilenameLength; + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(blockBytes[relativeFilenameLengthLocation..]); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(blockBytes[relativeExtraFieldLengthLocation..]); - if (reader.BaseStream.Length < reader.BaseStream.Position + filenameLength + extraFieldLength) + if (stream.Length < stream.Position + filenameLength + extraFieldLength) + { return false; + } - reader.BaseStream.Seek(filenameLength + extraFieldLength, SeekOrigin.Current); + stream.Seek(filenameLength + extraFieldLength, SeekOrigin.Current); return true; } } - internal struct ZipCentralDirectoryFileHeader + internal partial struct ZipCentralDirectoryFileHeader { - public const uint SignatureConstant = 0x02014B50; + // The Zip File Format Specification references 0x02014B50, this is a big endian representation. + // ZIP files store values in little endian, so this is reversed. + public static ReadOnlySpan SignatureConstantBytes => [0x50, 0x4B, 0x01, 0x02]; + + // These are the minimum possible size, assuming the zip file comments variable section is empty + public const int BlockConstantSectionSize = 46; + public byte VersionMadeByCompatibility; public byte VersionMadeBySpecification; public ushort VersionNeededToExtract; @@ -482,86 +617,134 @@ internal struct ZipCentralDirectoryFileHeader // if saveExtraFieldsAndComments is false, FileComment and ExtraFields will be null // in either case, the zip64 extra field info will be incorporated into other fields - public static bool TryReadBlock(BinaryReader reader, bool saveExtraFieldsAndComments, out ZipCentralDirectoryFileHeader header) + public static bool TryReadBlock(ReadOnlySpan buffer, Stream furtherReads, bool saveExtraFieldsAndComments, out int bytesRead, out ZipCentralDirectoryFileHeader header) { + const int StackAllocationThreshold = 512; + header = default; + bytesRead = 0; + + // the buffer will always be large enough for at least the constant section to be verified + Debug.Assert(buffer.Length >= BlockConstantSectionSize); - if (reader.ReadUInt32() != SignatureConstant) + if (!buffer.StartsWith(SignatureConstantBytes)) + { return false; - header.VersionMadeBySpecification = reader.ReadByte(); - header.VersionMadeByCompatibility = reader.ReadByte(); - header.VersionNeededToExtract = reader.ReadUInt16(); - header.GeneralPurposeBitFlag = reader.ReadUInt16(); - header.CompressionMethod = reader.ReadUInt16(); - header.LastModified = reader.ReadUInt32(); - header.Crc32 = reader.ReadUInt32(); - uint compressedSizeSmall = reader.ReadUInt32(); - uint uncompressedSizeSmall = reader.ReadUInt32(); - header.FilenameLength = reader.ReadUInt16(); - header.ExtraFieldLength = reader.ReadUInt16(); - header.FileCommentLength = reader.ReadUInt16(); - ushort diskNumberStartSmall = reader.ReadUInt16(); - header.InternalFileAttributes = reader.ReadUInt16(); - header.ExternalFileAttributes = reader.ReadUInt32(); - uint relativeOffsetOfLocalHeaderSmall = reader.ReadUInt32(); - - header.Filename = reader.ReadBytes(header.FilenameLength); - - bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; - bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; - bool relativeOffsetInZip64 = relativeOffsetOfLocalHeaderSmall == ZipHelper.Mask32Bit; - bool diskNumberStartInZip64 = diskNumberStartSmall == ZipHelper.Mask16Bit; + } + + header.VersionMadeBySpecification = buffer[FieldLocations.VersionMadeBySpecification]; + header.VersionMadeByCompatibility = buffer[FieldLocations.VersionMadeByCompatibility]; + header.VersionNeededToExtract = BinaryPrimitives.ReadUInt16LittleEndian(buffer[FieldLocations.VersionNeededToExtract..]); + header.GeneralPurposeBitFlag = BinaryPrimitives.ReadUInt16LittleEndian(buffer[FieldLocations.GeneralPurposeBitFlags..]); + header.CompressionMethod = BinaryPrimitives.ReadUInt16LittleEndian(buffer[FieldLocations.CompressionMethod..]); + header.LastModified = BinaryPrimitives.ReadUInt32LittleEndian(buffer[FieldLocations.LastModified..]); + header.Crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buffer[FieldLocations.Crc32..]); + + uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(buffer[FieldLocations.CompressedSize..]); + uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(buffer[FieldLocations.UncompressedSize..]); + + header.FilenameLength = BinaryPrimitives.ReadUInt16LittleEndian(buffer[FieldLocations.FilenameLength..]); + header.ExtraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(buffer[FieldLocations.ExtraFieldLength..]); + header.FileCommentLength = BinaryPrimitives.ReadUInt16LittleEndian(buffer[FieldLocations.FileCommentLength..]); + + ushort diskNumberStartSmall = BinaryPrimitives.ReadUInt16LittleEndian(buffer[FieldLocations.DiskNumberStart..]); + + header.InternalFileAttributes = BinaryPrimitives.ReadUInt16LittleEndian(buffer[FieldLocations.InternalFileAttributes..]); + header.ExternalFileAttributes = BinaryPrimitives.ReadUInt32LittleEndian(buffer[FieldLocations.ExternalFileAttributes..]); + + uint relativeOffsetOfLocalHeaderSmall = BinaryPrimitives.ReadUInt32LittleEndian(buffer[FieldLocations.RelativeOffsetOfLocalHeader..]); + + // Assemble the dynamic header in a separate buffer. We can't guarantee that it's all in the input buffer, + // some additional data might need to come from the stream. + int dynamicHeaderSize = header.FilenameLength + header.ExtraFieldLength + header.FileCommentLength; + int remainingBufferLength = buffer.Length - FieldLocations.DynamicData; + int bytesToRead = dynamicHeaderSize - remainingBufferLength; + scoped ReadOnlySpan dynamicHeader; + byte[]? arrayPoolBuffer = null; Zip64ExtraField zip64; - long endExtraFields = reader.BaseStream.Position + header.ExtraFieldLength; - using (Stream str = new SubReadStream(reader.BaseStream, reader.BaseStream.Position, header.ExtraFieldLength)) + try { + // No need to read extra data from the stream, no need to allocate a new buffer. + if (bytesToRead <= 0) + { + dynamicHeader = buffer[FieldLocations.DynamicData..]; + } + // Data needs to come from two sources, and we must thus copy data into a single address space. + else + { + if (dynamicHeaderSize > StackAllocationThreshold) + { + arrayPoolBuffer = System.Buffers.ArrayPool.Shared.Rent(dynamicHeaderSize); + } + + Span collatedHeader = dynamicHeaderSize <= StackAllocationThreshold ? stackalloc byte[StackAllocationThreshold].Slice(0, dynamicHeaderSize) : arrayPoolBuffer.AsSpan(0, dynamicHeaderSize); + + buffer[FieldLocations.DynamicData..].CopyTo(collatedHeader); + int realBytesRead = furtherReads.Read(collatedHeader[remainingBufferLength..]); + + if (realBytesRead != bytesToRead) + { + return false; + } + dynamicHeader = collatedHeader; + } + + header.Filename = dynamicHeader[..header.FilenameLength].ToArray(); + + bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; + bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; + bool relativeOffsetInZip64 = relativeOffsetOfLocalHeaderSmall == ZipHelper.Mask32Bit; + bool diskNumberStartInZip64 = diskNumberStartSmall == ZipHelper.Mask16Bit; + + ReadOnlySpan zipExtraFields = dynamicHeader.Slice(header.FilenameLength, header.ExtraFieldLength); + + zip64 = default; if (saveExtraFieldsAndComments) { - header.ExtraFields = ZipGenericExtraField.ParseExtraField(str); + header.ExtraFields = ZipGenericExtraField.ParseExtraField(zipExtraFields); zip64 = Zip64ExtraField.GetAndRemoveZip64Block(header.ExtraFields, - uncompressedSizeInZip64, compressedSizeInZip64, - relativeOffsetInZip64, diskNumberStartInZip64); + uncompressedSizeInZip64, compressedSizeInZip64, + relativeOffsetInZip64, diskNumberStartInZip64); } else { header.ExtraFields = null; - zip64 = Zip64ExtraField.GetJustZip64Block(str, - uncompressedSizeInZip64, compressedSizeInZip64, - relativeOffsetInZip64, diskNumberStartInZip64); + zip64 = Zip64ExtraField.GetJustZip64Block(zipExtraFields, + uncompressedSizeInZip64, compressedSizeInZip64, + relativeOffsetInZip64, diskNumberStartInZip64); } - } - // There are zip files that have malformed ExtraField blocks in which GetJustZip64Block() silently bails out without reading all the way to the end - // of the ExtraField block. Thus we must force the stream's position to the proper place. - reader.BaseStream.AdvanceToPosition(endExtraFields); + header.FileComment = dynamicHeader.Slice(header.FilenameLength + header.ExtraFieldLength, header.FileCommentLength).ToArray(); + } + finally + { + if (arrayPoolBuffer != null) + { + System.Buffers.ArrayPool.Shared.Return(arrayPoolBuffer); + } + } - header.FileComment = reader.ReadBytes(header.FileCommentLength); + bytesRead = FieldLocations.DynamicData + dynamicHeaderSize; - header.UncompressedSize = zip64.UncompressedSize == null - ? uncompressedSizeSmall - : zip64.UncompressedSize.Value; - header.CompressedSize = zip64.CompressedSize == null - ? compressedSizeSmall - : zip64.CompressedSize.Value; - header.RelativeOffsetOfLocalHeader = zip64.LocalHeaderOffset == null - ? relativeOffsetOfLocalHeaderSmall - : zip64.LocalHeaderOffset.Value; - header.DiskNumberStart = zip64.StartDiskNumber == null - ? diskNumberStartSmall - : zip64.StartDiskNumber.Value; + header.UncompressedSize = zip64.UncompressedSize ?? uncompressedSizeSmall; + header.CompressedSize = zip64.CompressedSize ?? compressedSizeSmall; + header.RelativeOffsetOfLocalHeader = zip64.LocalHeaderOffset ?? relativeOffsetOfLocalHeaderSmall; + header.DiskNumberStart = zip64.StartDiskNumber ?? diskNumberStartSmall; return true; } } - internal struct ZipEndOfCentralDirectoryBlock + internal partial struct ZipEndOfCentralDirectoryBlock { - public const uint SignatureConstant = 0x06054B50; - public const int SignatureSize = sizeof(uint); + // The Zip File Format Specification references 0x06054B50, this is a big endian representation. + // ZIP files store values in little endian, so this is reversed. + public static ReadOnlySpan SignatureConstantBytes => [0x50, 0x4B, 0x05, 0x06]; - // This is the minimum possible size, assuming the zip file comments variable section is empty + // These are the minimum possible size, assuming the zip file comments variable section is empty + private const int BlockConstantSectionSize = 22; public const int SizeOfBlockWithoutSignature = 18; // The end of central directory can have a variable size zip file comment at the end, but its max length can be 64K @@ -580,7 +763,7 @@ internal struct ZipEndOfCentralDirectoryBlock public static void WriteBlock(Stream stream, long numberOfEntries, long startOfCentralDirectory, long sizeOfCentralDirectory, byte[] archiveComment) { - BinaryWriter writer = new BinaryWriter(stream); + Span blockContents = stackalloc byte[BlockConstantSectionSize]; ushort numberOfEntriesTruncated = numberOfEntries > ushort.MaxValue ? ZipHelper.Mask16Bit : (ushort)numberOfEntries; @@ -589,38 +772,74 @@ public static void WriteBlock(Stream stream, long numberOfEntries, long startOfC uint sizeOfCentralDirectoryTruncated = sizeOfCentralDirectory > uint.MaxValue ? ZipHelper.Mask32Bit : (uint)sizeOfCentralDirectory; - writer.Write(SignatureConstant); - writer.Write((ushort)0); // number of this disk - writer.Write((ushort)0); // number of disk with start of CD - writer.Write(numberOfEntriesTruncated); // number of entries on this disk's cd - writer.Write(numberOfEntriesTruncated); // number of entries in entire CD - writer.Write(sizeOfCentralDirectoryTruncated); - writer.Write(startOfCentralDirectoryTruncated); + SignatureConstantBytes.CopyTo(blockContents[FieldLocations.Signature..]); + // number of this disk + BinaryPrimitives.WriteUInt16LittleEndian(blockContents[FieldLocations.NumberOfThisDisk..], 0); + // number of disk with start of CD + BinaryPrimitives.WriteUInt16LittleEndian(blockContents[FieldLocations.NumberOfTheDiskWithTheStartOfTheCentralDirectory..], 0); + // number of entries on this disk's cd + BinaryPrimitives.WriteUInt16LittleEndian(blockContents[FieldLocations.NumberOfEntriesInTheCentralDirectoryOnThisDisk..], numberOfEntriesTruncated); + // number of entries in entire cd + BinaryPrimitives.WriteUInt16LittleEndian(blockContents[FieldLocations.NumberOfEntriesInTheCentralDirectory..], numberOfEntriesTruncated); + BinaryPrimitives.WriteUInt32LittleEndian(blockContents[FieldLocations.SizeOfCentralDirectory..], sizeOfCentralDirectoryTruncated); + BinaryPrimitives.WriteUInt32LittleEndian(blockContents[FieldLocations.OffsetOfStartOfCentralDirectoryWithRespectToTheStartingDiskNumber..], startOfCentralDirectoryTruncated); // Should be valid because of how we read archiveComment in TryReadBlock: Debug.Assert(archiveComment.Length <= ZipFileCommentMaxLength); - writer.Write((ushort)archiveComment.Length); // zip file comment length + // zip file comment length + BinaryPrimitives.WriteUInt16LittleEndian(blockContents[FieldLocations.ArchiveCommentLength..], (ushort)archiveComment.Length); + + stream.Write(blockContents); if (archiveComment.Length > 0) - writer.Write(archiveComment); + { + stream.Write(archiveComment); + } } - public static bool TryReadBlock(BinaryReader reader, out ZipEndOfCentralDirectoryBlock eocdBlock) + public static bool TryReadBlock(Stream stream, out ZipEndOfCentralDirectoryBlock eocdBlock) { + Span blockContents = stackalloc byte[BlockConstantSectionSize]; + int bytesRead; + eocdBlock = default; - if (reader.ReadUInt32() != SignatureConstant) + bytesRead = stream.Read(blockContents); + + if (bytesRead < BlockConstantSectionSize) + { return false; + } - eocdBlock.Signature = SignatureConstant; - eocdBlock.NumberOfThisDisk = reader.ReadUInt16(); - eocdBlock.NumberOfTheDiskWithTheStartOfTheCentralDirectory = reader.ReadUInt16(); - eocdBlock.NumberOfEntriesInTheCentralDirectoryOnThisDisk = reader.ReadUInt16(); - eocdBlock.NumberOfEntriesInTheCentralDirectory = reader.ReadUInt16(); - eocdBlock.SizeOfCentralDirectory = reader.ReadUInt32(); - eocdBlock.OffsetOfStartOfCentralDirectoryWithRespectToTheStartingDiskNumber = reader.ReadUInt32(); + if (!blockContents.StartsWith(SignatureConstantBytes)) + { + return false; + } + + eocdBlock.Signature = BinaryPrimitives.ReadUInt32LittleEndian(blockContents[FieldLocations.Signature..]); + eocdBlock.NumberOfThisDisk = BinaryPrimitives.ReadUInt16LittleEndian(blockContents[FieldLocations.NumberOfThisDisk..]); + eocdBlock.NumberOfTheDiskWithTheStartOfTheCentralDirectory = BinaryPrimitives.ReadUInt16LittleEndian(blockContents[FieldLocations.NumberOfTheDiskWithTheStartOfTheCentralDirectory..]); + eocdBlock.NumberOfEntriesInTheCentralDirectoryOnThisDisk = BinaryPrimitives.ReadUInt16LittleEndian(blockContents[FieldLocations.NumberOfEntriesInTheCentralDirectoryOnThisDisk..]); + eocdBlock.NumberOfEntriesInTheCentralDirectory = BinaryPrimitives.ReadUInt16LittleEndian(blockContents[FieldLocations.NumberOfEntriesInTheCentralDirectory..]); + eocdBlock.SizeOfCentralDirectory = BinaryPrimitives.ReadUInt32LittleEndian(blockContents[FieldLocations.SizeOfCentralDirectory..]); + eocdBlock.OffsetOfStartOfCentralDirectoryWithRespectToTheStartingDiskNumber = + BinaryPrimitives.ReadUInt32LittleEndian(blockContents[FieldLocations.OffsetOfStartOfCentralDirectoryWithRespectToTheStartingDiskNumber..]); + + ushort commentLength = BinaryPrimitives.ReadUInt16LittleEndian(blockContents[FieldLocations.ArchiveCommentLength..]); - ushort commentLength = reader.ReadUInt16(); - eocdBlock.ArchiveComment = reader.ReadBytes(commentLength); + if (stream.Position + commentLength > stream.Length) + { + return false; + } + + if (commentLength == 0) + { + eocdBlock.ArchiveComment = Array.Empty(); + } + else + { + eocdBlock.ArchiveComment = new byte[commentLength]; + stream.ReadExactly(eocdBlock.ArchiveComment); + } return true; } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipHelper.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipHelper.cs index 00c611e90f6e58..4cc4b4feef21d8 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipHelper.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipHelper.cs @@ -3,7 +3,6 @@ using System.Buffers; using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; using System.Text; namespace System.IO.Compression @@ -13,7 +12,7 @@ internal static class ZipHelper internal const uint Mask32Bit = 0xFFFFFFFF; internal const ushort Mask16Bit = 0xFFFF; - private const int BackwardsSeekingBufferSize = 32; + private const int BackwardsSeekingBufferSize = 4096; internal const int ValidZipDate_YearMin = 1980; internal const int ValidZipDate_YearMax = 2107; @@ -36,13 +35,14 @@ internal static Encoding GetEncoding(string text) /// /// Reads exactly bytesToRead out of stream, unless it is out of bytes /// - internal static void ReadBytes(Stream stream, byte[] buffer, int bytesToRead) + internal static int ReadBytes(Stream stream, Span buffer, int bytesToRead) { - int bytesRead = stream.ReadAtLeast(buffer.AsSpan(0, bytesToRead), bytesToRead, throwOnEndOfStream: false); + int bytesRead = stream.ReadAtLeast(buffer, bytesToRead, throwOnEndOfStream: false); if (bytesRead < bytesToRead) { throw new IOException(SR.UnexpectedEndOfStream); } + return bytesRead; } // will silently return InvalidDateIndicator if the uint is not a valid Dos DateTime @@ -103,91 +103,93 @@ internal static uint DateTimeToDosTime(DateTime dateTime) // assumes maxBytesToRead is positive, ensures to not read beyond the provided max number of bytes, // if the signature is found then returns true and positions stream at first byte of signature // if the signature is not found, returns false - internal static bool SeekBackwardsToSignature(Stream stream, uint signatureToFind, int maxBytesToRead) + internal static bool SeekBackwardsToSignature(Stream stream, ReadOnlySpan signatureToFind, int maxBytesToRead) { - Debug.Assert(signatureToFind != 0); + Debug.Assert(signatureToFind.Length != 0); Debug.Assert(maxBytesToRead > 0); + // This method reads blocks of BackwardsSeekingBufferSize bytes, searching each block for signatureToFind. + // A simple LastIndexOf(signatureToFind) doesn't account for cases where signatureToFind is split, starting in + // one block and ending in another. + // To account for this, we read blocks of BackwardsSeekingBufferSize bytes, but seek backwards by + // [BackwardsSeekingBufferSize - signatureToFind.Length] bytes. This guarantees that signatureToFind will not be + // split between two consecutive blocks, at the cost of reading [signatureToFind.Length] duplicate bytes in each iteration. int bufferPointer = 0; - uint currentSignature = 0; - byte[] buffer = new byte[BackwardsSeekingBufferSize]; + byte[] buffer = ArrayPool.Shared.Rent(BackwardsSeekingBufferSize); + Span bufferSpan = buffer.AsSpan(0, BackwardsSeekingBufferSize); - bool outOfBytes = false; - bool signatureFound = false; - - int bytesRead = 0; - while (!signatureFound && !outOfBytes && bytesRead <= maxBytesToRead) + try { - outOfBytes = SeekBackwardsAndRead(stream, buffer, out bufferPointer); + bool outOfBytes = false; + bool signatureFound = false; - Debug.Assert(bufferPointer < buffer.Length); + int totalBytesRead = 0; + int duplicateBytesRead = 0; - while (bufferPointer >= 0 && !signatureFound) + while (!signatureFound && !outOfBytes && totalBytesRead <= maxBytesToRead) { - currentSignature = (currentSignature << 8) | ((uint)buffer[bufferPointer]); - if (currentSignature == signatureToFind) + int bytesRead = SeekBackwardsAndRead(stream, bufferSpan, signatureToFind.Length); + + outOfBytes = bytesRead < bufferSpan.Length; + if (bytesRead < bufferSpan.Length) { - signatureFound = true; + bufferSpan = bufferSpan.Slice(0, bytesRead); } - else + + bufferPointer = bufferSpan.LastIndexOf(signatureToFind); + Debug.Assert(bufferPointer < bufferSpan.Length); + + totalBytesRead += (bufferSpan.Length - duplicateBytesRead); + + if (bufferPointer != -1) { - bufferPointer--; + signatureFound = true; + break; } - } - bytesRead += buffer.Length; - } + duplicateBytesRead = signatureToFind.Length; + } - if (!signatureFound) - { - return false; + if (!signatureFound) + { + return false; + } + else + { + stream.Seek(bufferPointer, SeekOrigin.Current); + return true; + } } - else + finally { - stream.Seek(bufferPointer, SeekOrigin.Current); - return true; + ArrayPool.Shared.Return(buffer); } } - // Skip to a further position downstream (without relying on the stream being seekable) - internal static void AdvanceToPosition(this Stream stream, long position) + // Returns the number of bytes actually read. + // Allows successive buffers to overlap by a number of bytes. This handles cases where + // the value being searched for straddles buffers (i.e. where the first buffer ends with the + // first X bytes being searched for, and the second buffer begins with the remaining bytes.) + private static int SeekBackwardsAndRead(Stream stream, Span buffer, int overlap) { - long numBytesLeft = position - stream.Position; - Debug.Assert(numBytesLeft >= 0); - if (numBytesLeft > 0) - { - byte[] buffer = new byte[64]; - do - { - int numBytesToSkip = (int)Math.Min(numBytesLeft, buffer.Length); - int numBytesActuallySkipped = stream.Read(buffer, 0, numBytesToSkip); - if (numBytesActuallySkipped == 0) - throw new IOException(SR.UnexpectedEndOfStream); - numBytesLeft -= numBytesActuallySkipped; - } while (numBytesLeft > 0); - } - } + int bytesRead; - // Returns true if we are out of bytes - private static bool SeekBackwardsAndRead(Stream stream, byte[] buffer, out int bufferPointer) - { if (stream.Position >= buffer.Length) { + Debug.Assert(overlap <= buffer.Length); + stream.Seek(-(buffer.Length - overlap), SeekOrigin.Current); + bytesRead = ReadBytes(stream, buffer, buffer.Length); stream.Seek(-buffer.Length, SeekOrigin.Current); - ReadBytes(stream, buffer, buffer.Length); - stream.Seek(-buffer.Length, SeekOrigin.Current); - bufferPointer = buffer.Length - 1; - return false; } else { int bytesToRead = (int)stream.Position; stream.Seek(0, SeekOrigin.Begin); - ReadBytes(stream, buffer, bytesToRead); + bytesRead = ReadBytes(stream, buffer, bytesToRead); stream.Seek(0, SeekOrigin.Begin); - bufferPointer = bytesToRead - 1; - return true; } + + return bytesRead; } // Converts the specified string into bytes using the optional specified encoding. diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs index a8019e0f6f0e62..a3130aac5a65ff 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs @@ -273,6 +273,40 @@ public static async Task EnsureDisposeIsCalledAsExpectedOnTheUnderlyingStream(bo Assert.Equal(expectedDisposeCalls, disposeCallCountingStream.NumberOfDisposeCalls); } + [Fact] + public static void CanReadLargeCentralDirectoryHeader() + { + // A 19-character filename will result in a 65-byte central directory header. 64 of these will make the central directory + // read process stretch into two 4KB buffers. + int count = 64; + string entryNameFormat = "example/file-{0:00}.dat"; + + using (MemoryStream archiveStream = new MemoryStream()) + { + using (ZipArchive creationArchive = new ZipArchive(archiveStream, ZipArchiveMode.Create, true)) + { + for (int i = 0; i < count; i++) + { + creationArchive.CreateEntry(string.Format(entryNameFormat, i)); + } + } + + archiveStream.Seek(0, SeekOrigin.Begin); + + using (ZipArchive readArchive = new ZipArchive(archiveStream, ZipArchiveMode.Read)) + { + Assert.Equal(count, readArchive.Entries.Count); + + for (int i = 0; i < count; i++) + { + Assert.Equal(string.Format(entryNameFormat, i), readArchive.Entries[i].FullName); + Assert.Equal(0, readArchive.Entries[i].CompressedLength); + Assert.Equal(0, readArchive.Entries[i].Length); + } + } + } + } + private class DisposeCallCountingStream : MemoryStream { public int NumberOfDisposeCalls { get; private set; }