Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for the unicode path extra field #82

Merged
merged 2 commits into from
Feb 1, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,7 @@
.bad { color: #B63939; }
#status { color: #28AA31; }
</style>
<script type="text/javascript" src="jszip.js"></script>
<script type="text/javascript" src="jszip-deflate.js"></script>
<script type="text/javascript" src="dist/jszip.min.js"></script>
<script type="text/javascript">
imgData = "R0lGODdhBQAFAIACAAAAAP/eACwAAAAABQAFAAACCIwPkWerClIBADs=";

Expand Down
38 changes: 34 additions & 4 deletions lib/object.js
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,9 @@ var generateZipParts = function(name, file, compressedObject, offset) {
useUTF8 = utfEncodedFileName !== file.name,
o = file.options,
dosTime,
dosDate;
dosDate,
extraFields = "",
unicodePathExtraField = "";

// date
// @see http://www.delorie.com/djgpp/doc/rbinter/it/52/13.html
Expand All @@ -371,6 +373,32 @@ var generateZipParts = function(name, file, compressedObject, offset) {
dosDate = dosDate << 5;
dosDate = dosDate | o.date.getDate();

if (useUTF8) {
// set the unicode path extra field. unzip needs at least one extra
// field to correctly handle unicode path, so using the path is as good
// as any other information. This could improve the situation with
// other archive managers too.
// This field is usually used without the utf8 flag, with a non
// unicode path in the header (winrar, winzip). This helps (a bit)
// with the messy Windows' default compressed folders feature but
// breaks on p7zip which doesn't seek the unicode path extra field.
// So for now, UTF-8 everywhere !
unicodePathExtraField =
// Version
decToHex(1, 1) +
// NameCRC32
decToHex(this.crc32(utfEncodedFileName), 4) +
// UnicodeName
utfEncodedFileName;

extraFields +=
// Info-ZIP Unicode Path Extra Field
"\x75\x70" +
// size
decToHex(unicodePathExtraField.length, 2) +
// content
unicodePathExtraField;
}

var header = "";

Expand All @@ -394,10 +422,10 @@ var generateZipParts = function(name, file, compressedObject, offset) {
// file name length
header += decToHex(utfEncodedFileName.length, 2);
// extra field length
header += "\x00\x00";
header += decToHex(extraFields.length, 2);


var fileRecord = signature.LOCAL_FILE_HEADER + header + utfEncodedFileName;
var fileRecord = signature.LOCAL_FILE_HEADER + header + utfEncodedFileName + extraFields;

var dirRecord = signature.CENTRAL_FILE_HEADER +
// version made by (00: DOS)
Expand All @@ -415,7 +443,9 @@ var generateZipParts = function(name, file, compressedObject, offset) {
// relative offset of local header
decToHex(offset, 4) +
// file name
utfEncodedFileName;
utfEncodedFileName +
// extra field
extraFields;


return {
Expand Down
29 changes: 29 additions & 0 deletions lib/zipEntry.js
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,36 @@ ZipEntry.prototype = {
if (this.useUTF8()) {
this.fileName = jszipProto.utf8decode(this.fileName);
this.fileComment = jszipProto.utf8decode(this.fileComment);
} else {
var upath = this.findExtraFieldUnicodePath();
if (upath !== null) {
this.fileName = upath;
}
}
},

/**
* Find the unicode path declared in the extra field, if any.
* @return {String} the unicode path, null otherwise.
*/
findExtraFieldUnicodePath: function() {
var upathField = this.extraFields[0x7075];
if (upathField) {
var extraReader = new StringReader(upathField.value);

// wrong version
if (extraReader.readInt(1) !== 1) {
return null;
}

// the crc of the filename changed, this field is out of date.
if (jszipProto.crc32(this.fileName) !== extraReader.readInt(4)) {
return null;
}

return jszipProto.utf8decode(extraReader.readString(upathField.length - 5));
}
return null;
}
};
module.exports = ZipEntry;
Binary file added test/ref/winrar_utf8_in_name.zip
Binary file not shown.
15 changes: 14 additions & 1 deletion test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,11 @@ testZipFile("Zip text file with UTF-8 characters in filename", "ref/utf8_in_name
zip.file("€15.txt", "€15\n");
var actual = zip.generate({type:"string"});

ok(similar(actual, expected, 18) , "Generated ZIP matches reference ZIP");
// zip doesn't generate a strange file like us (utf8 flag AND unicode path extra field)
// if one of the files has more data than the other, the bytes are no more aligned and the
// error count goes through the roof. The parsing is checked on a other test so I'll
// comment this one for now.
// ok(similar(actual, expected, 18) , "Generated ZIP matches reference ZIP");
equal(reload(actual), actual, "Generated ZIP can be parsed");
});

Expand Down Expand Up @@ -1097,6 +1101,15 @@ testZipFile("Zip text file with UTF-8 characters in filename", "ref/utf8_in_name
equal(zip.files["€15.txt"].asText(), "€15\n", "the utf8 content was correctly read (with files[].astext).");
});

// Created with winrar
// winrar will replace the euro symbol with a '_' but set the correct unicode path in an extra field.
testZipFile("Zip text file with UTF-8 characters in filename and windows compatibility", "ref/winrar_utf8_in_name.zip", function(file) {
var zip = new JSZip(file);
ok(zip.file("€15.txt") !== null, "the utf8 file is here.");
equal(zip.file("€15.txt").asText(), "€15\n", "the utf8 content was correctly read (with file().asText).");
equal(zip.files["€15.txt"].asText(), "€15\n", "the utf8 content was correctly read (with files[].astext).");
});

// zip backslash.zip -0 -X Hel\\lo.txt
testZipFile("Zip text file with backslash in filename", "ref/backslash.zip", function(file) {
var zip = new JSZip(file);
Expand Down