Skip to content
This repository has been archived by the owner on Nov 19, 2024. It is now read-only.

zip: support non-utf8 filenames #149

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/arc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ var (
selectiveCompression bool
implicitTopLevelFolder bool
continueOnError bool
filenameEncoding string
)

func init() {
Expand All @@ -30,6 +31,7 @@ func init() {
flag.BoolVar(&selectiveCompression, "smart", true, "Only compress files which are not already compressed (zip only)")
flag.BoolVar(&implicitTopLevelFolder, "folder-safe", true, "If an archive does not have a single top-level folder, create one implicitly")
flag.BoolVar(&continueOnError, "allow-errors", true, "Log errors and continue processing")
flag.StringVar(&filenameEncoding, "filename-encoding", "", "Specify encoding if filename was not utf8 encoded")
}

func main() {
Expand Down Expand Up @@ -229,6 +231,7 @@ func getFormat(subcommand string) (interface{}, error) {
v.SelectiveCompression = selectiveCompression
v.ImplicitTopLevelFolder = implicitTopLevelFolder
v.ContinueOnError = continueOnError
v.FilenameEncoding = filenameEncoding
case *archiver.Gz:
v.CompressionLevel = compressionLevel
case *archiver.Bz2:
Expand Down
67 changes: 67 additions & 0 deletions encoding.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package archiver

import (
"errors"
"strings"

"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/korean"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
)

var encodings = map[string]encoding.Encoding{
"ibm866": charmap.CodePage866,
"iso8859_2": charmap.ISO8859_2,
"iso8859_3": charmap.ISO8859_3,
"iso8859_4": charmap.ISO8859_4,
"iso8859_5": charmap.ISO8859_5,
"iso8859_6": charmap.ISO8859_6,
"iso8859_7": charmap.ISO8859_7,
"iso8859_8": charmap.ISO8859_8,
"iso8859_8I": charmap.ISO8859_8I,
"iso8859_10": charmap.ISO8859_10,
"iso8859_13": charmap.ISO8859_13,
"iso8859_14": charmap.ISO8859_14,
"iso8859_15": charmap.ISO8859_15,
"iso8859_16": charmap.ISO8859_16,
"koi8r": charmap.KOI8R,
"koi8u": charmap.KOI8U,
"macintosh": charmap.Macintosh,
"windows874": charmap.Windows874,
"windows1250": charmap.Windows1250,
"windows1251": charmap.Windows1251,
"windows1252": charmap.Windows1252,
"windows1253": charmap.Windows1253,
"windows1254": charmap.Windows1254,
"windows1255": charmap.Windows1255,
"windows1256": charmap.Windows1256,
"windows1257": charmap.Windows1257,
"windows1258": charmap.Windows1258,
"macintoshcyrillic": charmap.MacintoshCyrillic,
"gbk": simplifiedchinese.GBK,
"gb18030": simplifiedchinese.GB18030,
"big5": traditionalchinese.Big5,
"eucjp": japanese.EUCJP,
"iso2022jp": japanese.ISO2022JP,
"shiftjis": japanese.ShiftJIS,
"euckr": korean.EUCKR,
"utf16be": unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
"utf16le": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
}

func GetEncoding(charset string) (encoding.Encoding, bool) {
charset = strings.ToLower(charset)
enc, ok := encodings[charset]
return enc, ok
}

func Decode(in []byte, charset string) ([]byte, error) {
if enc, ok := GetEncoding(charset); ok {
return enc.NewDecoder().Bytes(in)
}
return nil, errors.New("charset not found!")
}
20 changes: 19 additions & 1 deletion zip.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ type Zip struct {
// the operation will continue on remaining files.
ContinueOnError bool

// If zip.FileHeader.NonUTF8 was true, this can be
// used to decode filename to utf8
FilenameEncoding string

zw *zip.Writer
zr *zip.Reader
ridx int
Expand Down Expand Up @@ -191,7 +195,8 @@ func (z *Zip) extractFile(f File, to string) error {
return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header)
}

to = filepath.Join(to, header.Name)
filename := z.DecodeFileName(header)
to = filepath.Join(to, filename)

// if a directory, no content; simply make the directory and return
if f.IsDir() {
Expand Down Expand Up @@ -444,6 +449,8 @@ func (z *Zip) Walk(archive string, walkFn WalkFunc) error {
return fmt.Errorf("opening %s: %v", zf.Name, err)
}

zf.FileHeader.Name = z.DecodeFileName(zf.FileHeader)

err = walkFn(File{
FileInfo: zf.FileInfo(),
Header: zf.FileHeader,
Expand Down Expand Up @@ -483,6 +490,8 @@ func (z *Zip) Extract(source, target, destination string) error {
return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header)
}

zfh.Name = z.DecodeFileName(zfh)

// importantly, cleaning the path strips tailing slash,
// which must be appended to folders within the archive
name := path.Clean(zfh.Name)
Expand Down Expand Up @@ -539,6 +548,15 @@ func (*Zip) Match(file io.ReadSeeker) (bool, error) {
return bytes.Equal(buf, []byte("PK\x03\x04")), nil
}

func (z *Zip) DecodeFileName(header zip.FileHeader) string {
if header.NonUTF8 && z.FilenameEncoding != "" {
if filename, err := Decode([]byte(header.Name), z.FilenameEncoding); err == nil {
return string(filename)
}
}
return header.Name
}

func (z *Zip) String() string { return "zip" }

// NewZip returns a new, default instance ready to be customized and used.
Expand Down