Skip to content

Commit

Permalink
Finishing touches
Browse files Browse the repository at this point in the history
Added FastOptions as an alternative to DefaultOptions
A few performance improvements
A few bug fixes
  • Loading branch information
CalebQ42 committed Dec 28, 2023
1 parent d9132ab commit 17d45ee
Show file tree
Hide file tree
Showing 9 changed files with 310 additions and 63 deletions.
17 changes: 13 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,27 @@ Currently has support for reading squashfs files and extracting files and folder
Special thanks to <https://dr-emann.github.io/squashfs/> for some VERY important information in an easy to understand format.
Thanks also to [distri's squashfs library](https://github.com/distr1/distri/tree/master/internal/squashfs) as I referenced it to figure some things out (and double check others).

## FUSE

As of `v1.0`, FUSE capabilities has been moved to [a separate library](https://github.com/CalebQ42/squashfuse).

## Limitations

* No Xattr parsing. This is simply because I haven't done any research on it and how to apply these in a pure go way.
* No Xattr parsing.
* Socket files are not extracted.
* From my research, it seems like a socket file would be useless if it could be created. They are still exposed when fuse mounted.
* From my research, it seems like a socket file would be useless if it could be created.
* Fifo files are ignored on `darwin`

## Issues

* Significantly slower then `unsquashfs` when extracting folders (about 5 ~ 7 times slower on a ~100MB archive using zstd compression)
* Significantly slower then `unsquashfs` when extracting folders
* This seems to be related to above along with the general optimization of `unsquashfs` and it's compression libraries.
* The larger the file's tree, the slower the extraction will be. Arch Linux's Live USB's airootfs.sfs takes ~35x longer for a full extraction.
* Times seem to be largely dependent on file tree size and compression type.
* My main testing image (~100MB) using Zstd takes about 6x longer.
* An Arch Linux airootfs image (~780MB) using XZ compression with LZMA filters takes about 32x longer.
* A Tensorflow docker image (~3.3GB) using Zstd takes about 12x longer.

Note: These numbers are using `FastOptions()`. `DefaultOptions()` takes about 2x longer.

## Recommendations on Usage

Expand Down
33 changes: 26 additions & 7 deletions extraction_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,47 @@ package squashfs
import (
"io"
"io/fs"
"os"
"runtime"

"github.com/CalebQ42/squashfs/internal/routinemanager"
)

type ExtractionOptions struct {
manager *routinemanager.Manager
LogOutput io.Writer //Where the verbose log should write. Defaults to os.Stdout.
LogOutput io.Writer //Where the verbose log should write.
DereferenceSymlink bool //Replace symlinks with the target file.
UnbreakSymlink bool //Try to make sure symlinks remain unbroken when extracted, without changing the symlink.
Verbose bool //Prints extra info to log on an error.
IgnorePerm bool //Ignore file's permissions and instead use Perm.
Perm fs.FileMode //Permission to use when IgnorePerm. Defaults to 0777.
SimultaneousFiles uint16 //Number of files to process in parallel. Defaults to 10.
ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Defaults to 10.
SimultaneousFiles uint16 //Number of files to process in parallel. Default set based on runtime.NumCPU().
ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Default set based on runtime.NumCPU().
}

// The default extraction options.
func DefaultOptions() *ExtractionOptions {
cores := uint16(runtime.NumCPU() / 2)
var files, routines uint16
if cores <= 4 {
files = 1
routines = cores
} else {
files = cores - 4
routines = 4
}
return &ExtractionOptions{
Perm: 0777,
SimultaneousFiles: files,
ExtractionRoutines: routines,
}
}

// Less limited default options. Can run up 2x faster than DefaultOptions.
// Tends to use all available CPU resources.
func FastOptions() *ExtractionOptions {
return &ExtractionOptions{
LogOutput: os.Stdout,
Perm: 0777,
SimultaneousFiles: 10,
ExtractionRoutines: 10,
SimultaneousFiles: uint16(runtime.NumCPU()),
ExtractionRoutines: uint16(runtime.NumCPU()),
}
}
81 changes: 50 additions & 31 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ type File struct {
dirsRead int
}

// Creates a new *File from the given *squashfs.Base
func (r *Reader) FileFromBase(b *squashfs.Base, parent *FS) *File {
return &File{
b: b,
parent: parent,
r: r,
}
}

func (f *File) FS() (*FS, error) {
if !f.IsDir() {
return nil, errors.New("not a directory")
Expand Down Expand Up @@ -179,6 +188,9 @@ func (f *File) deviceDevices() (maj uint32, min uint32) {
}

func (f *File) path() string {
if f.parent == nil {
return f.b.Name
}
return filepath.Join(f.parent.path(), f.b.Name)
}

Expand All @@ -193,7 +205,16 @@ func (f *File) Extract(folder string) error {
func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
if op.manager == nil {
op.manager = routinemanager.NewManager(op.SimultaneousFiles)
log.SetOutput(op.LogOutput)
if op.LogOutput != nil {
log.SetOutput(op.LogOutput)
}
err := os.MkdirAll(path, 0777)
if err != nil {
if op.Verbose {
log.Println("Failed to create initial directory", path)
}
return err
}
}
switch f.b.Inode.Type {
case inode.Dir, inode.EDir:
Expand All @@ -205,7 +226,6 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
return errors.Join(errors.New("failed to create squashfs.Directory: "+path), err)
}
errChan := make(chan error, len(d.Entries))
files := len(d.Entries)
for i := range d.Entries {
b, err := f.r.r.BaseFromEntry(d.Entries[i])
if err != nil {
Expand All @@ -214,37 +234,39 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
}
return errors.Join(errors.New("failed to get base from entry: "+path), err)
}
if b.IsDir() {
files--
extDir := filepath.Join(path, b.Name)
err = os.Mkdir(extDir, 0777)
if err != nil {
if op.Verbose {
log.Println("Failed to create directory", path)
go func(b *squashfs.Base, path string) {
i := op.manager.Lock()
if b.IsDir() {
extDir := filepath.Join(path, b.Name)
err = os.Mkdir(extDir, 0777)
op.manager.Unlock(i)
if err != nil {
if op.Verbose {
log.Println("Failed to create directory", path)
}
errChan <- errors.Join(errors.New("failed to create directory: "+path), err)
return
}
return errors.Join(errors.New("failed to create directory: "+path), err)
}
err = f.ExtractWithOptions(extDir, op)
if err != nil {
if op.Verbose {
log.Println("Failed to extract directory", path)
err = f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent)).ExtractWithOptions(extDir, op)
if err != nil {
if op.Verbose {
log.Println("Failed to extract directory", path)
}
errChan <- errors.Join(errors.New("failed to extract directory: "+path), err)
return
}
return errors.Join(errors.New("failed to extract directory: "+path), err)
errChan <- nil
} else {
fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent))
err = fil.ExtractWithOptions(path, op)
op.manager.Unlock(i)
fil.Close()
errChan <- err
}
} else {
fil := &File{
b: b,
r: f.r,
}
go func(fil *File, folder string) {
i := op.manager.Lock()
defer op.manager.Unlock(i)
errChan <- fil.ExtractWithOptions(folder, op)
}(fil, path)
}
}(b, path)
}
var errCache []error
for i := 0; i < files; i++ {
for i := 0; i < len(d.Entries); i++ {
err := <-errChan
if err != nil {
errCache = append(errCache, err)
Expand Down Expand Up @@ -278,9 +300,6 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
}
return errors.Join(errors.New("failed to write file: "+path), err)
}
if op.Verbose {
log.Println(f.path(), "extracted to", path)
}
case inode.Sym, inode.ESym:
symPath := f.SymlinkPath()
if op.DereferenceSymlink {
Expand Down
19 changes: 12 additions & 7 deletions fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ type FS struct {
parent *FS
}

// Creates a new *FS from the given squashfs.directory
func (r *Reader) FSFromDirectory(d *squashfs.Directory, parent *FS) *FS {
return &FS{
d: d,
r: r,
parent: parent,
}
}

// Glob returns the name of the files at the given pattern.
// All paths are relative to the FS.
// Uses filepath.Match to compare names.
Expand Down Expand Up @@ -101,9 +110,9 @@ func (f *FS) Open(name string) (fs.File, error) {
Path: name,
Err: fs.ErrNotExist,
}
} else {
return f.parent.Open(strings.Join(split[1:], "/"))
}
} else {
return f.parent.Open(strings.Join(split[1:], "/"))
}
i, found := slices.BinarySearchFunc(f.d.Entries, split[0], func(e directory.Entry, name string) int {
return strings.Compare(e.Name, name)
Expand Down Expand Up @@ -137,11 +146,7 @@ func (f *FS) Open(name string) (fs.File, error) {
if err != nil {
return nil, err
}
return (&FS{
d: d,
r: f.r,
parent: f,
}).Open(strings.Join(split[1:], "/"))
return f.r.FSFromDirectory(d, f).Open(strings.Join(split[1:], "/"))
}

// Returns all DirEntry's for the directory at name.
Expand Down
12 changes: 7 additions & 5 deletions reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ func NewReader(r io.ReaderAt) (*Reader, error) {
if err != nil {
return nil, err
}
return &Reader{
out := &Reader{
r: rdr,
FS: &FS{
d: rdr.Root,
},
}, nil
}
out.FS = &FS{
d: rdr.Root,
r: out,
}
return out, nil
}

func (r *Reader) ModTime() time.Time {
Expand Down
3 changes: 2 additions & 1 deletion squashfs/data/fullreader.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"io"
"math"
"runtime"
"sync"

"github.com/CalebQ42/squashfs/internal/decompress"
Expand All @@ -31,7 +32,7 @@ func NewFullReader(r io.ReaderAt, initialOffset int64, d decompress.Decompressor
d: d,
sizes: sizes,
initialOffset: initialOffset,
goroutineLimit: 10,
goroutineLimit: uint16(runtime.NumCPU()),
finalBlockSize: finalBlockSize,
blockSize: blockSize,
retPool: &sync.Pool{
Expand Down
2 changes: 0 additions & 2 deletions squashfs/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package squashfs

import (
"errors"
"fmt"
"io/fs"
"path/filepath"
"slices"
Expand All @@ -22,7 +21,6 @@ type Directory struct {
func (r *Reader) directoryFromRef(ref uint64, name string) (*Directory, error) {
i, err := r.InodeFromRef(ref)
if err != nil {
fmt.Println("yo")
return nil, err
}
var blockStart uint32
Expand Down
13 changes: 7 additions & 6 deletions squashfs/reader_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package squashfs
package squashfs_test

import (
"fmt"
Expand All @@ -8,6 +8,8 @@ import (
"os/exec"
"path/filepath"
"testing"

"github.com/CalebQ42/squashfs/squashfs"
)

const (
Expand Down Expand Up @@ -55,7 +57,7 @@ func TestReader(t *testing.T) {
t.Fatal(err)
}
defer fil.Close()
rdr, err := NewReader(fil)
rdr, err := squashfs.NewReader(fil)
if err != nil {
t.Fatal(err)
}
Expand All @@ -75,7 +77,7 @@ func TestSingleFile(t *testing.T) {
t.Fatal(err)
}
defer fil.Close()
rdr, err := NewReader(fil)
rdr, err := squashfs.NewReader(fil)
if err != nil {
t.Fatal(err)
}
Expand All @@ -90,7 +92,7 @@ func TestSingleFile(t *testing.T) {
t.Fatal(err)
}

func extractToDir(rdr *Reader, b *Base, folder string) error {
func extractToDir(rdr *squashfs.Reader, b *squashfs.Base, folder string) error {
path := filepath.Join(folder, b.Name)
if b.IsDir() {
d, err := b.ToDir(rdr)
Expand All @@ -101,7 +103,7 @@ func extractToDir(rdr *Reader, b *Base, folder string) error {
if err != nil {
return err
}
var nestBast *Base
var nestBast *squashfs.Base
for _, e := range d.Entries {
nestBast, err = rdr.BaseFromEntry(e)
if err != nil {
Expand All @@ -115,7 +117,6 @@ func extractToDir(rdr *Reader, b *Base, folder string) error {
} else if b.IsRegular() {
_, full, err := b.GetRegFileReaders(rdr)
if err != nil {
fmt.Println("yo", path)
return err
}
fil, err := os.Create(path)
Expand Down
Loading

0 comments on commit 17d45ee

Please sign in to comment.