From 2f5e4af4a1b8a00182ed56488dfdf7518d918778 Mon Sep 17 00:00:00 2001 From: Josh Wolfe <thejoshwolfe@gmail.com> Date: Fri, 8 Nov 2024 08:43:23 -0500 Subject: [PATCH 1/5] add support for Info-ZIP timestamp extra field --- README.md | 25 ++++++++++++++++++++----- index.js | 45 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 054f7bb..129ec52 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ After UTF-8 encoding, `metadataPath` must be at most `0xffff` bytes in length. compress: true, compressionLevel: 6, forceZip64Format: false, + forceDosTimestamp: false, fileComment: "", // or a UTF-8 Buffer } ``` @@ -86,6 +87,12 @@ If `forceZip64Format` is `true`, yazl will use ZIP64 format in this entry's Data and Central Directory Record even if not needed (this may be useful for testing.). Otherwise, yazl will use ZIP64 format where necessary. +By default, yazl includes the Info-ZIP "universal timestamp" extended field (`0x5455` aka `"UT"`) to encode the `mtime`. +The Info-ZIP timestamp is a more modern encoding for the mtime and is generally recommended. +Set `forceDosTimestamp` to `true` to revert to the pre-3.3.0 yazl behvior, disabling this extended field. +The DOS encoding is always included regardless of this option, because it is required in the fixed-size metadata of every archive entry. +The benefits of the Info-ZIP encoding include: timezone is specified as always UTC, which is better for cloud environments and any teams working in multiple timezones; capable of encoding "time 0", the unix epoch in 1970, which is better for some package managers; the precision is 1-second accurate rather than rounded to the nearest even second. The disadvantages of including this field are: it requires an extra 9 bytes of metadata per entry added to the archive. + If `fileComment` is a `string`, it will be encoded with UTF-8. If `fileComment` is a `Buffer`, it should be a UTF-8 encoded string. In UTF-8, `fileComment` must be at most `0xffff` bytes in length. @@ -126,12 +133,13 @@ See `addFile()` for the meaning of the `metadataPath` parameter. compress: true, compressionLevel: 6, forceZip64Format: false, + forceDosTimestamp: false, fileComment: "", // or a UTF-8 Buffer size: 12345, // example value } ``` -See `addFile()` for the meaning of `mtime`, `mode`, `compress`, `compressionLevel`, `forceZip64Format`, and `fileComment`. +See `addFile()` for the meaning of `mtime`, `mode`, `compress`, `compressionLevel`, `forceZip64Format`, `forceDosTimestamp`, and `fileComment`. If `size` is given, it will be checked against the actual number of bytes in the `readStream`, and an error will be emitted if there is a mismatch. See the documentation on `calculatedTotalSizeCallback` for why the `size` option exists. @@ -162,11 +170,12 @@ See `addFile()` for info about the `metadataPath` parameter. compress: true, compressionLevel: 6, forceZip64Format: false, + forceDosTimestamp: false, fileComment: "", // or a UTF-8 Buffer } ``` -See `addFile()` for the meaning of `mtime`, `mode`, `compress`, `compressionLevel`, `forceZip64Format`, and `fileComment`. +See `addFile()` for the meaning of `mtime`, `mode`, `compress`, `compressionLevel`, `forceZip64Format`, `forceDosTimestamp`, and `fileComment`. This method has the unique property that General Purpose Bit `3` will not be used in the Local File Header. This doesn't matter for unzip implementations that conform to the Zip File Spec. @@ -210,10 +219,11 @@ If `metadataPath` does not end with a `"/"`, a `"/"` will be appended. { mtime: new Date(), mode: 040775, + forceDosTimestamp: false, } ``` -See `addFile()` for the meaning of `mtime` and `mode`. +See `addFile()` for the meaning of `mtime`, `mode`, and `forceDosTimestamp`. #### end([options], [calculatedTotalSizeCallback]) @@ -285,8 +295,13 @@ In certain versions of node, you cannot use both `.on('data')` and `.pipe()` suc ### dateToDosDateTime(jsDate) -`jsDate` is a `Date` instance. -Returns `{date: date, time: time}`, where `date` and `time` are unsigned 16-bit integers. +*Deprecated* since yazl 3.3.0. + +This function only remains exported in order to maintain compatibility with older versions of yazl. +It will be removed in yazl 4.0.0 unless someone asks for it to remain supported. +If you ever have a use case for calling this function directly please +[open an issue against yazl](https://github.com/thejoshwolfe/yazl/issues/new) +requesting that this function be properly supported again. ## Regarding ZIP64 Support diff --git a/index.js b/index.js index 9c9320e..386eb7c 100644 --- a/index.js +++ b/index.js @@ -279,6 +279,9 @@ function calculateTotalSize(self) { } centralDirectorySize += CENTRAL_DIRECTORY_RECORD_FIXED_SIZE + entry.utf8FileName.length + entry.fileComment.length; + if (!entry.forceDosTimestamp) { + centralDirectorySize += INFO_ZIP_UNIVERSAL_TIMESTAMP_EXTRA_FIELD_SIZE; + } if (useZip64Format) { centralDirectorySize += ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD_SIZE; } @@ -427,6 +430,7 @@ function Entry(metadataPath, isDirectory, options) { this.isDirectory = isDirectory; this.state = Entry.WAITING_FOR_METADATA; this.setLastModDate(options.mtime != null ? options.mtime : new Date()); + this.forceDosTimestamp = !!options.forceDosTimestamp; if (options.mode != null) { this.setFileAttributesMode(options.mode); } else { @@ -469,6 +473,7 @@ Entry.READY_TO_PUMP_FILE_DATA = 1; Entry.FILE_DATA_IN_PROGRESS = 2; Entry.FILE_DATA_DONE = 3; Entry.prototype.setLastModDate = function(date) { + this.mtime = date; var dosDateTime = dateToDosDateTime(date); this.lastModFileTime = dosDateTime.time; this.lastModFileDate = dosDateTime.date; @@ -575,17 +580,39 @@ Entry.prototype.getDataDescriptor = function() { } }; var CENTRAL_DIRECTORY_RECORD_FIXED_SIZE = 46; +var INFO_ZIP_UNIVERSAL_TIMESTAMP_EXTRA_FIELD_SIZE = 9; var ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD_SIZE = 28; Entry.prototype.getCentralDirectoryRecord = function() { var fixedSizeStuff = bufferAlloc(CENTRAL_DIRECTORY_RECORD_FIXED_SIZE); var generalPurposeBitFlag = FILE_NAME_IS_UTF8; if (!this.crcAndFileSizeKnown) generalPurposeBitFlag |= UNKNOWN_CRC32_AND_FILE_SIZES; + var izutefBuffer = EMPTY_BUFFER; + if (!this.forceDosTimestamp) { + // Here is one specification for this: https://commons.apache.org/proper/commons-compress/apidocs/org/apache/commons/compress/archivers/zip/X5455_ExtendedTimestamp.html + // See also the Info-ZIP source code unix/unix.c:set_extra_field() and zipfile.c:ef_scan_ut_time(). + izutefBuffer = bufferAlloc(INFO_ZIP_UNIVERSAL_TIMESTAMP_EXTRA_FIELD_SIZE); + // 0x5455 Short tag for this extra block type ("UT") + izutefBuffer.writeUInt16LE(0x5455, 0); + // TSize Short total data size for this block + izutefBuffer.writeUInt16LE(INFO_ZIP_UNIVERSAL_TIMESTAMP_EXTRA_FIELD_SIZE - 4, 2); + // See Info-ZIP source code zip.h for these constant values: + var EB_UT_FL_MTIME = (1 << 0); + var EB_UT_FL_ATIME = (1 << 1); + // Note that we set the atime flag despite not providing the atime field. + // The central directory version of this extra field is specified to never contain the atime field even when the flag is set. + // We set it to match the Info-ZIP behavior in order to minimize incompatibility with other file readers that may have rigid input expectations. + // Flags Byte info bits + izutefBuffer.writeUInt8(EB_UT_FL_MTIME | EB_UT_FL_ATIME, 4); + // (ModTime) Long time of last modification (UTC/GMT) + izutefBuffer.writeUInt32LE(Math.floor(this.mtime.getTime() / 1000), 5); + } + var normalCompressedSize = this.compressedSize; var normalUncompressedSize = this.uncompressedSize; var normalRelativeOffsetOfLocalHeader = this.relativeOffsetOfLocalHeader; - var versionNeededToExtract; - var zeiefBuffer; + var versionNeededToExtract = VERSION_NEEDED_TO_EXTRACT_UTF8; + var zeiefBuffer = EMPTY_BUFFER; if (this.useZip64Format()) { normalCompressedSize = 0xffffffff; normalUncompressedSize = 0xffffffff; @@ -606,9 +633,6 @@ Entry.prototype.getCentralDirectoryRecord = function() { writeUInt64LE(zeiefBuffer, this.relativeOffsetOfLocalHeader, 20); // Disk Start Number 4 bytes Number of the disk on which this file starts // (omit) - } else { - versionNeededToExtract = VERSION_NEEDED_TO_EXTRACT_UTF8; - zeiefBuffer = EMPTY_BUFFER; } // central file header signature 4 bytes (0x02014b50) @@ -634,7 +658,7 @@ Entry.prototype.getCentralDirectoryRecord = function() { // file name length 2 bytes fixedSizeStuff.writeUInt16LE(this.utf8FileName.length, 28); // extra field length 2 bytes - fixedSizeStuff.writeUInt16LE(zeiefBuffer.length, 30); + fixedSizeStuff.writeUInt16LE(izutefBuffer.length + zeiefBuffer.length, 30); // file comment length 2 bytes fixedSizeStuff.writeUInt16LE(this.fileComment.length, 32); // disk number start 2 bytes @@ -651,6 +675,7 @@ Entry.prototype.getCentralDirectoryRecord = function() { // file name (variable size) this.utf8FileName, // extra field (variable size) + izutefBuffer, zeiefBuffer, // file comment (variable size) this.fileComment, @@ -662,7 +687,15 @@ Entry.prototype.getCompressionMethod = function() { return this.compressionLevel === 0 ? NO_COMPRESSION : DEFLATE_COMPRESSION; }; +// These are intentionally computed in the current system timezone +// to match how the DOS encoding operates in this library. +var minDosDate = new Date(1980, 0, 1); +var maxDosDate = new Date(2107, 11, 31, 23, 59, 58); function dateToDosDateTime(jsDate) { + // Clamp out of bounds timestamps. + if (jsDate < minDosDate) jsDate = minDosDate; + else if (jsDate > maxDosDate) jsDate = maxDosDate; + var date = 0; date |= jsDate.getDate() & 0x1f; // 1-31 date |= ((jsDate.getMonth() + 1) & 0xf) << 5; // 0-11, 1-12 From 12e44f43beb614ac6b2c2843d2e984324fa73f17 Mon Sep 17 00:00:00 2001 From: Josh Wolfe <thejoshwolfe@gmail.com> Date: Fri, 8 Nov 2024 09:14:29 -0500 Subject: [PATCH 2/5] tests --- package-lock.json | 8 ++++---- package.json | 2 +- test/test.js | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index cc3c989..e45178d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,7 @@ "buffer-crc32": "^1.0.0" }, "devDependencies": { - "yauzl": "^3.1.3" + "yauzl": "^3.2.0" } }, "node_modules/buffer-crc32": { @@ -30,9 +30,9 @@ "dev": true }, "node_modules/yauzl": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-3.1.3.tgz", - "integrity": "sha512-JCCdmlJJWv7L0q/KylOekyRaUrdEoUxWkWVcgorosTROCFWiS9p2NNPE9Yb91ak7b1N5SxAZEliWpspbZccivw==", + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-3.2.0.tgz", + "integrity": "sha512-Ow9nuGZE+qp1u4JIPvg+uCiUr7xGQWdff7JQSk5VGYTAZMDe2q8lxJ10ygv10qmSj031Ty/6FNJpLO4o1Sgc+w==", "dev": true, "dependencies": { "buffer-crc32": "~0.2.3", diff --git a/package.json b/package.json index 2f5ffba..4fe77fd 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,7 @@ "buffer-crc32": "^1.0.0" }, "devDependencies": { - "yauzl": "^3.1.3" + "yauzl": "^3.2.0" }, "files": [ "index.js" diff --git a/test/test.js b/test/test.js index 662f88f..2b54aa5 100644 --- a/test/test.js +++ b/test/test.js @@ -87,6 +87,46 @@ var BufferList = require("./bl-minimal.js"); }); })(); +// Test: +// * specifying mtime outside the bounds of dos formta +// * forceDosTimestamp +(function() { + var options = { + mtime: new Date(0), // unix epoch + mode: 0o100664, + compress: false, + }; + var zipfile = new yazl.ZipFile(); + zipfile.addFile(__filename, "modern.txt", options); + options.forceDosTimestamp = true; + zipfile.addFile(__filename, "dos.txt", options); + zipfile.end(function(calculatedTotalSize) { + if (calculatedTotalSize === -1) throw new Error("calculatedTotalSize should be known"); + zipfile.outputStream.pipe(new BufferList(function(err, data) { + if (err) throw err; + if (data.length !== calculatedTotalSize) throw new Error("calculatedTotalSize prediction is wrong. " + calculatedTotalSize + " !== " + data.length); + yauzl.fromBuffer(data, function(err, zipfile) { + if (err) throw err; + zipfile.on("entry", function(entry) { + switch (entry.fileName) { + case "modern.txt": + if (entry.getLastModDate().getTime() !== 0) throw new Error("expected unix epoch to be encodable. found: " + entry.getLastModDate()); + break; + case "dos.txt": + var year = entry.getLastModDate().getFullYear(); + if (!(1979 <= year && year <= 1981)) throw new Error("expected dos format year to be clamped to 1980ish. found: " + entry.getLastModDate()); + break; + default: throw new Error(entry.fileName); + } + }); + zipfile.on("end", function() { + console.log("timestamp encodings: pass"); + }); + }); + })); + }); +})(); + // Test: // * forceZip64Format for various subsets of entries. // * specifying size for addReadStream. From cc021074d99cde4ec6baee3b5c26c2409af59a78 Mon Sep 17 00:00:00 2001 From: Josh Wolfe <thejoshwolfe@gmail.com> Date: Fri, 8 Nov 2024 09:16:26 -0500 Subject: [PATCH 3/5] readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 129ec52..e90e5a1 100644 --- a/README.md +++ b/README.md @@ -87,9 +87,9 @@ If `forceZip64Format` is `true`, yazl will use ZIP64 format in this entry's Data and Central Directory Record even if not needed (this may be useful for testing.). Otherwise, yazl will use ZIP64 format where necessary. -By default, yazl includes the Info-ZIP "universal timestamp" extended field (`0x5455` aka `"UT"`) to encode the `mtime`. +Since yazl version 3.3.0, yazl includes the Info-ZIP "universal timestamp" extended field (`0x5455` aka `"UT"`) to encode the `mtime`. The Info-ZIP timestamp is a more modern encoding for the mtime and is generally recommended. -Set `forceDosTimestamp` to `true` to revert to the pre-3.3.0 yazl behvior, disabling this extended field. +Set `forceDosTimestamp` to `true` to revert to the pre-3.3.0 behvior, disabling this extended field. The DOS encoding is always included regardless of this option, because it is required in the fixed-size metadata of every archive entry. The benefits of the Info-ZIP encoding include: timezone is specified as always UTC, which is better for cloud environments and any teams working in multiple timezones; capable of encoding "time 0", the unix epoch in 1970, which is better for some package managers; the precision is 1-second accurate rather than rounded to the nearest even second. The disadvantages of including this field are: it requires an extra 9 bytes of metadata per entry added to the archive. From 2adfd86787bf2673fc93110ec6f01d5ca0b40f6c Mon Sep 17 00:00:00 2001 From: Josh Wolfe <thejoshwolfe@gmail.com> Date: Fri, 8 Nov 2024 09:27:36 -0500 Subject: [PATCH 4/5] clamp unix timestamps too --- README.md | 2 ++ index.js | 5 ++++- test/test.js | 19 +++++++++++++------ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e90e5a1..ce4209b 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,8 @@ Set `forceDosTimestamp` to `true` to revert to the pre-3.3.0 behvior, disabling The DOS encoding is always included regardless of this option, because it is required in the fixed-size metadata of every archive entry. The benefits of the Info-ZIP encoding include: timezone is specified as always UTC, which is better for cloud environments and any teams working in multiple timezones; capable of encoding "time 0", the unix epoch in 1970, which is better for some package managers; the precision is 1-second accurate rather than rounded to the nearest even second. The disadvantages of including this field are: it requires an extra 9 bytes of metadata per entry added to the archive. +When attempting to encode an `mtime` outside the supported range for either format, such as the year 1970 in the DOS format or the year 2039 for the modern format, the time will clamped to the closest supported time. + If `fileComment` is a `string`, it will be encoded with UTF-8. If `fileComment` is a `Buffer`, it should be a UTF-8 encoded string. In UTF-8, `fileComment` must be at most `0xffff` bytes in length. diff --git a/index.js b/index.js index 386eb7c..d6d9268 100644 --- a/index.js +++ b/index.js @@ -605,7 +605,10 @@ Entry.prototype.getCentralDirectoryRecord = function() { // Flags Byte info bits izutefBuffer.writeUInt8(EB_UT_FL_MTIME | EB_UT_FL_ATIME, 4); // (ModTime) Long time of last modification (UTC/GMT) - izutefBuffer.writeUInt32LE(Math.floor(this.mtime.getTime() / 1000), 5); + var timestamp = Math.floor(this.mtime.getTime() / 1000); + if (timestamp < -0x80000000) timestamp = -0x80000000; // 1901-12-13T20:45:52.000Z + if (timestamp > 0x7fffffff) timestamp = 0x7fffffff; // 2038-01-19T03:14:07.000Z + izutefBuffer.writeUInt32LE(timestamp, 5); } var normalCompressedSize = this.compressedSize; diff --git a/test/test.js b/test/test.js index 2b54aa5..5595c7a 100644 --- a/test/test.js +++ b/test/test.js @@ -88,8 +88,9 @@ var BufferList = require("./bl-minimal.js"); })(); // Test: -// * specifying mtime outside the bounds of dos formta -// * forceDosTimestamp +// * specifying mtime outside the bounds of dos format but in bounds for unix format. +// * forceDosTimestamp, and verifying the lower clamping for dos format. +// * specifying mtime after 2038, and verifying the clamping for unix format. (function() { var options = { mtime: new Date(0), // unix epoch @@ -97,9 +98,12 @@ var BufferList = require("./bl-minimal.js"); compress: false, }; var zipfile = new yazl.ZipFile(); - zipfile.addFile(__filename, "modern.txt", options); + zipfile.addFile(__filename, "modern-1970.txt", options); options.forceDosTimestamp = true; - zipfile.addFile(__filename, "dos.txt", options); + zipfile.addFile(__filename, "dos-1970.txt", options); + options.forceDosTimestamp = false; + options.mtime = new Date(2080, 1, 1); // year 2080 is beyond the unix range. + zipfile.addFile(__filename, "2080.txt", options); zipfile.end(function(calculatedTotalSize) { if (calculatedTotalSize === -1) throw new Error("calculatedTotalSize should be known"); zipfile.outputStream.pipe(new BufferList(function(err, data) { @@ -109,13 +113,16 @@ var BufferList = require("./bl-minimal.js"); if (err) throw err; zipfile.on("entry", function(entry) { switch (entry.fileName) { - case "modern.txt": + case "modern-1970.txt": if (entry.getLastModDate().getTime() !== 0) throw new Error("expected unix epoch to be encodable. found: " + entry.getLastModDate()); break; - case "dos.txt": + case "dos-1970.txt": var year = entry.getLastModDate().getFullYear(); if (!(1979 <= year && year <= 1981)) throw new Error("expected dos format year to be clamped to 1980ish. found: " + entry.getLastModDate()); break; + case "2080.txt": + if (entry.getLastModDate().getUTCFullYear() !== 2038) throw new Error("expected timestamp clamped down to year 2038. found: " + entry.getLastModDate()); + break; default: throw new Error(entry.fileName); } }); From 900966792cf401065fd907e0142c2b7f13ce23a5 Mon Sep 17 00:00:00 2001 From: Josh Wolfe <thejoshwolfe@gmail.com> Date: Fri, 8 Nov 2024 09:29:19 -0500 Subject: [PATCH 5/5] typo --- index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.js b/index.js index d6d9268..e3a68fd 100644 --- a/index.js +++ b/index.js @@ -601,7 +601,7 @@ Entry.prototype.getCentralDirectoryRecord = function() { var EB_UT_FL_ATIME = (1 << 1); // Note that we set the atime flag despite not providing the atime field. // The central directory version of this extra field is specified to never contain the atime field even when the flag is set. - // We set it to match the Info-ZIP behavior in order to minimize incompatibility with other file readers that may have rigid input expectations. + // We set it to match the Info-ZIP behavior in order to minimize incompatibility with zip file readers that may have rigid input expectations. // Flags Byte info bits izutefBuffer.writeUInt8(EB_UT_FL_MTIME | EB_UT_FL_ATIME, 4); // (ModTime) Long time of last modification (UTC/GMT)