Skip to content

Commit

Permalink
Basic support for reading 7z archives
Browse files Browse the repository at this point in the history
Close mholt#53
  • Loading branch information
mholt committed Nov 18, 2022
1 parent 21f40ff commit c207f20
Show file tree
Hide file tree
Showing 5 changed files with 381 additions and 6 deletions.
119 changes: 119 additions & 0 deletions 7z.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
package archiver

import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/fs"
"log"
"path"
"strings"

"github.com/bodgit/sevenzip"
)

func init() {
RegisterFormat(SevenZip{})

// looks like the sevenzip package registers a lot of decompressors for us automatically:
// https://github.com/bodgit/sevenzip/blob/46c5197162c784318b98b9a3f80289a9aa1ca51a/register.go#L38-L61
}

type SevenZip struct {
// If true, errors encountered during reading or writing
// a file within an archive will be logged and the
// operation will continue on remaining files.
ContinueOnError bool

// The password, if dealing with an encrypted archive.
Password string
}

func (z SevenZip) Name() string { return ".7z" }

func (z SevenZip) Match(filename string, stream io.Reader) (MatchResult, error) {
var mr MatchResult

// match filename
if strings.Contains(strings.ToLower(filename), z.Name()) {
mr.ByName = true
}

// match file header
buf, err := readAtMost(stream, len(sevenZipHeader))
if err != nil {
return mr, err
}
mr.ByStream = bytes.Equal(buf, sevenZipHeader)

return mr, nil
}

// Extract extracts files from z, implementing the Extractor interface. Uniquely, however,
// sourceArchive must be an io.ReaderAt and io.Seeker, which are oddly disjoint interfaces
// from io.Reader which is what the method signature requires. We chose this signature for
// the interface because we figure you can Read() from anything you can ReadAt() or Seek()
// with. Due to the nature of the zip archive format, if sourceArchive is not an io.Seeker
// and io.ReaderAt, an error is returned.
func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error {
sra, ok := sourceArchive.(seekReaderAt)
if !ok {
return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints")
}

size, err := streamSizeBySeeking(sra)
if err != nil {
return fmt.Errorf("determining stream size: %w", err)
}

zr, err := sevenzip.NewReaderWithPassword(sra, size, z.Password)
if err != nil {
return err
}

// important to initialize to non-nil, empty value due to how fileIsIncluded works
skipDirs := skipList{}

for i, f := range zr.File {
if err := ctx.Err(); err != nil {
return err // honor context cancellation
}

if !fileIsIncluded(pathsInArchive, f.Name) {
continue
}
if fileIsIncluded(skipDirs, f.Name) {
continue
}

file := File{
FileInfo: f.FileInfo(),
Header: f.FileHeader,
NameInArchive: f.Name,
Open: func() (io.ReadCloser, error) { return f.Open() },
}

err := handleFile(ctx, file)
if errors.Is(err, fs.SkipDir) {
// if a directory, skip this path; if a file, skip the folder path
dirPath := f.Name
if !file.IsDir() {
dirPath = path.Dir(f.Name) + "/"
}
skipDirs.add(dirPath)
} else if err != nil {
if z.ContinueOnError {
log.Printf("[ERROR] %s: %v", f.Name, err)
continue
}
return fmt.Errorf("handling file %d: %s: %w", i, f.Name, err)
}
}

return nil
}

// https://py7zr.readthedocs.io/en/latest/archive_format.html#signature
var sevenZipHeader = []byte("7z\xBC\xAF\x27\x1C")
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Introducing **Archiver 4.0** - a cross-platform, multi-format archive utility an
- Walk or traverse into archive files
- Extract only specific files from archives
- Insert (append) into .tar files
- Read from password-protected 7-Zip files
- Numerous archive and compression formats supported
- Extensible (add more formats just by registering them)
- Cross-platform, static binary
Expand All @@ -45,6 +46,7 @@ Introducing **Archiver 4.0** - a cross-platform, multi-format archive utility an
- .zip
- .tar (including any compressed variants like .tar.gz)
- .rar (read-only)
- .7z (read-only)

Tar files can optionally be compressed using any compression format.

Expand Down
14 changes: 12 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,25 @@ go 1.18
require (
github.com/andybalholm/brotli v1.0.4
github.com/dsnet/compress v0.0.1
github.com/klauspost/compress v1.15.5
github.com/klauspost/compress v1.15.9
github.com/klauspost/pgzip v1.2.5
github.com/nwaples/rardecode/v2 v2.0.0-beta.2
github.com/therootcompany/xz v1.0.1
github.com/ulikunitz/xz v0.5.10
)

require (
github.com/bodgit/sevenzip v1.3.0
github.com/golang/snappy v0.0.4
github.com/pierrec/lz4/v4 v4.1.14
github.com/pierrec/lz4/v4 v4.1.15
golang.org/x/text v0.3.7
)

require (
github.com/bodgit/plumbing v1.2.0 // indirect
github.com/bodgit/windows v1.0.0 // indirect
github.com/connesc/cipherio v0.2.1 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
)
Loading

0 comments on commit c207f20

Please sign in to comment.