From 0cf78d9ae575ce8e08607069f135da0317a84711 Mon Sep 17 00:00:00 2001 From: Gan Keyu Date: Sun, 19 Nov 2023 14:04:34 +0800 Subject: [PATCH 1/5] Check LH offset when writing LH. Fix #94899 ZipArchiveEntry didn't set ZIP64 in local headers for small files if their offset are >4GB. --- .../src/System/IO/Compression/ZipArchiveEntry.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index 3d415f8e3690f..6c9b40d45d54d 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -964,7 +964,7 @@ private void WriteCrcAndSizesInLocalHeader(bool zip64HeaderUsed) long finalPosition = _archive.ArchiveStream.Position; BinaryWriter writer = new BinaryWriter(_archive.ArchiveStream); - bool zip64Needed = SizesTooLarge() + bool zip64Needed = SizesTooLarge() || (_offsetOfLocalHeader > uint.MaxValue) #if DEBUG_FORCE_ZIP64 || _archive._forceZip64 #endif From b007ab6a7373fc5618d6d10c760f33e825356ba6 Mon Sep 17 00:00:00 2001 From: Gan Keyu Date: Sun, 19 Nov 2023 15:19:39 +0800 Subject: [PATCH 2/5] added test --- .../tests/ZipArchive/zip_LargeFiles.cs | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs index d240a176b2b7a..212c4c182177a 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; +using System.Reflection; using Xunit; namespace System.IO.Compression.Tests @@ -44,5 +45,81 @@ public static void UnzipOver4GBZipFile() tempDir.Delete(recursive: true); } } + + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes + [OuterLoop] + public static void CheckZIP64VersionIsSet_ForSmallFilesAfterBigFiles() + { + // issue #94899 + + byte[] largeBuffer = GC.AllocateUninitializedArray(1_000_000_000); // 1 GB + byte[] smallBuffer = GC.AllocateUninitializedArray(1000); + + string zipArchivePath = Path.Combine(Path.GetTempPath(), "over4GB.zip"); + DirectoryInfo tempDir = Directory.CreateDirectory(Path.Combine(Path.GetTempPath(), "over4GB")); + + try + { + using FileStream fs = File.Open(zipArchivePath, FileMode.Create, FileAccess.ReadWrite); + const string LargeFileName = "largefile"; + const string SmallFileName = "smallfile"; + const uint ZipLocalFileHeader_OffsetToVersionFromHeaderStart = 4; + const ushort Zip64Version = 45; + + { + // Create + + using var archive = new ZipArchive(fs, ZipArchiveMode.Create, true); + ZipArchiveEntry file = archive.CreateEntry(LargeFileName, CompressionLevel.NoCompression); + + using (Stream stream = file.Open()) + { + // Write 5GB of data + + stream.Write(largeBuffer); + stream.Write(largeBuffer); + stream.Write(largeBuffer); + stream.Write(largeBuffer); + stream.Write(largeBuffer); + } + + file = archive.CreateEntry(SmallFileName, CompressionLevel.NoCompression); + + using (Stream stream = file.Open()) + { + stream.Write(smallBuffer); + } + } + + fs.Position = 0; + + { + // Validate + + using var reader = new BinaryReader(fs); + using var archive = new ZipArchive(fs, ZipArchiveMode.Read); + FieldInfo offsetOfLHField = typeof(ZipArchiveEntry).GetField("_offsetOfLocalHeader", BindingFlags.NonPublic | BindingFlags.Instance); + + if (offsetOfLHField is null || offsetOfLHField.FieldType != typeof(long)) + { + Assert.Fail("Cannot find the private field of _offsetOfLocalHeader in ZipArchiveEntry or the type is not long. Code may be changed after the test is written."); + } + + foreach (ZipArchiveEntry entry in archive.Entries) + { + fs.Position = (long)offsetOfLHField.GetValue(entry) + ZipLocalFileHeader_OffsetToVersionFromHeaderStart; + ushort versionNeeded = reader.ReadUInt16(); + + Assert.True(versionNeeded >= Zip64Version, "ZIP64 version is not set for files with LH at >4GB offset."); + } + } + } + finally + { + File.Delete(zipArchivePath); + + tempDir.Delete(recursive: true); + } + } } } From 91f9de6f4f47e6e84500a6d65ba7b5c26c625c25 Mon Sep 17 00:00:00 2001 From: Gan Keyu Date: Sun, 19 Nov 2023 15:56:36 +0800 Subject: [PATCH 3/5] added comment --- .../System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs index 212c4c182177a..7990e8d92dc1e 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs @@ -47,7 +47,7 @@ public static void UnzipOver4GBZipFile() } [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes - [OuterLoop] + [OuterLoop("It requires 5 GB of free disk space")] public static void CheckZIP64VersionIsSet_ForSmallFilesAfterBigFiles() { // issue #94899 From 430f92c81a4d35def3b8bb4268bd6c811fa189cc Mon Sep 17 00:00:00 2001 From: Gan Keyu Date: Tue, 28 Nov 2023 22:40:40 +0800 Subject: [PATCH 4/5] move conditions into methods --- .../src/System/IO/Compression/ZipArchiveEntry.cs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index 6c9b40d45d54d..bc005bcb4573d 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -490,7 +490,7 @@ internal void WriteCentralDirectoryFileHeader() } - if (_offsetOfLocalHeader > uint.MaxValue + if (OffsetTooLarge() #if DEBUG_FORCE_ZIP64 || _archive._forceZip64 #endif @@ -799,6 +799,10 @@ private bool IsOpenable(bool needToUncompress, bool needToLoadIntoMemory, out st private bool SizesTooLarge() => _compressedSize > uint.MaxValue || _uncompressedSize > uint.MaxValue; + private bool OffsetTooLarge() => _offsetOfLocalHeader > uint.MaxValue; + + private bool ShouldUseZIP64() => SizesTooLarge() || OffsetTooLarge(); + // return value is true if we allocated an extra field for 64 bit headers, un/compressed size private bool WriteLocalFileHeader(bool isEmptyFile) { @@ -813,6 +817,9 @@ private bool WriteLocalFileHeader(bool isEmptyFile) bool zip64Used = false; uint compressedSizeTruncated, uncompressedSizeTruncated; + // save offset + _offsetOfLocalHeader = writer.BaseStream.Position; + // if we already know that we have an empty file don't worry about anything, just do a straight shot of the header if (isEmptyFile) { @@ -840,7 +847,7 @@ private bool WriteLocalFileHeader(bool isEmptyFile) { // We are in seekable mode so we will not need to write a data descriptor _generalPurposeBitFlag &= ~BitFlagValues.DataDescriptor; - if (SizesTooLarge() + if (ShouldUseZIP64() #if DEBUG_FORCE_ZIP64 || (_archive._forceZip64 && _archive.Mode == ZipArchiveMode.Update) #endif @@ -865,9 +872,6 @@ private bool WriteLocalFileHeader(bool isEmptyFile) } } - // save offset - _offsetOfLocalHeader = writer.BaseStream.Position; - // calculate extra field. if zip64 stuff + original extraField aren't going to fit, dump the original extraField, because this is more important int bigExtraFieldLength = (zip64Used ? zip64ExtraField.TotalSize : 0) + (_lhUnknownExtraFields != null ? ZipGenericExtraField.TotalSize(_lhUnknownExtraFields) : 0); @@ -964,7 +968,7 @@ private void WriteCrcAndSizesInLocalHeader(bool zip64HeaderUsed) long finalPosition = _archive.ArchiveStream.Position; BinaryWriter writer = new BinaryWriter(_archive.ArchiveStream); - bool zip64Needed = SizesTooLarge() || (_offsetOfLocalHeader > uint.MaxValue) + bool zip64Needed = ShouldUseZIP64() #if DEBUG_FORCE_ZIP64 || _archive._forceZip64 #endif From 53a4fc69764493f8f2aa4b2f4009b180b46cb1e6 Mon Sep 17 00:00:00 2001 From: Gan Keyu Date: Tue, 16 Apr 2024 14:08:41 +0800 Subject: [PATCH 5/5] improved large zip test --- .../tests/ZipArchive/zip_LargeFiles.cs | 42 ++++++++++++------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs index 7990e8d92dc1e..8401e172bf7c1 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs @@ -46,17 +46,23 @@ public static void UnzipOver4GBZipFile() } } - [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes - [OuterLoop("It requires 5 GB of free disk space")] - public static void CheckZIP64VersionIsSet_ForSmallFilesAfterBigFiles() + private static void FillWithHardToCompressData(byte[] buffer) + { + Random.Shared.NextBytes(buffer); + } + + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes + [OuterLoop("It requires 5~6 GB of free disk space and a lot of CPU time for compressed tests")] + [InlineData(false)] + [InlineData(true)] + public static void CheckZIP64VersionIsSet_ForSmallFilesAfterBigFiles(bool isCompressed) { // issue #94899 - byte[] largeBuffer = GC.AllocateUninitializedArray(1_000_000_000); // 1 GB byte[] smallBuffer = GC.AllocateUninitializedArray(1000); + byte[] largeBuffer = GC.AllocateUninitializedArray(1_000_000_000); // ~1 GB string zipArchivePath = Path.Combine(Path.GetTempPath(), "over4GB.zip"); - DirectoryInfo tempDir = Directory.CreateDirectory(Path.Combine(Path.GetTempPath(), "over4GB")); try { @@ -69,21 +75,29 @@ public static void CheckZIP64VersionIsSet_ForSmallFilesAfterBigFiles() { // Create + var compressLevel = isCompressed ? CompressionLevel.Optimal : CompressionLevel.NoCompression; + using var archive = new ZipArchive(fs, ZipArchiveMode.Create, true); - ZipArchiveEntry file = archive.CreateEntry(LargeFileName, CompressionLevel.NoCompression); + ZipArchiveEntry file = archive.CreateEntry(LargeFileName, compressLevel); using (Stream stream = file.Open()) { // Write 5GB of data - stream.Write(largeBuffer); - stream.Write(largeBuffer); - stream.Write(largeBuffer); - stream.Write(largeBuffer); - stream.Write(largeBuffer); + const int HOW_MANY_GB_TO_WRITE = 5; + + for (var i = 0; i < HOW_MANY_GB_TO_WRITE; i++) + { + if (isCompressed) + { + FillWithHardToCompressData(largeBuffer); + } + + stream.Write(largeBuffer); + } } - file = archive.CreateEntry(SmallFileName, CompressionLevel.NoCompression); + file = archive.CreateEntry(SmallFileName, compressLevel); using (Stream stream = file.Open()) { @@ -110,15 +124,13 @@ public static void CheckZIP64VersionIsSet_ForSmallFilesAfterBigFiles() fs.Position = (long)offsetOfLHField.GetValue(entry) + ZipLocalFileHeader_OffsetToVersionFromHeaderStart; ushort versionNeeded = reader.ReadUInt16(); - Assert.True(versionNeeded >= Zip64Version, "ZIP64 version is not set for files with LH at >4GB offset."); + Assert.True(versionNeeded == Zip64Version, "Version is not ZIP64 for files with Local Header at >4GB offset."); } } } finally { File.Delete(zipArchivePath); - - tempDir.Delete(recursive: true); } } }