Skip to content

Commit

Permalink
Separate snapshotting of parent dirs from files
Browse files Browse the repository at this point in the history
To make the logic a bit more clear, when snapshotting files, the
parent dirs are now snapshotted in a different loop from the files we
are actually trying to snapshot. Unfortunately this loop is nearly
duplicated but I did managed to group some fo the related logic
together:
- A function to check if the file should be snapshotted (e.g. isn't
whitelisted, etc.)
- Created a `Tar` type to handle some of the logic around tar-ing, e.g.
tracking hardlinks and stat-ing files before adding them

One side effect of this is that now when snapshoting the file system,
files will be stat-ed twice.
  • Loading branch information
bobcatfish committed Aug 24, 2018
1 parent 2fe93f2 commit 530c008
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 65 deletions.
106 changes: 60 additions & 46 deletions pkg/snapshot/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License.
package snapshot

import (
"archive/tar"
"bytes"
"fmt"
"io"
Expand All @@ -35,7 +34,6 @@ import (
type Snapshotter struct {
l *LayeredMap
directory string
hardlinks map[uint64]string
}

// NewSnapshotter creates a new snapshotter rooted at d
Expand Down Expand Up @@ -83,70 +81,87 @@ func (s *Snapshotter) TakeSnapshotFS() ([]byte, error) {
return contents, err
}

func shouldSnapshot(file string, snapshottedFiles map[string]bool) (bool, error) {
if val, ok := snapshottedFiles[file]; ok && val {
return false, nil
}
whitelisted, err := util.CheckWhitelist(file)
if err != nil {
return false, fmt.Errorf("Error checking for %s in whitelist: %s", file, err)
}
if whitelisted && !isBuildFile(file) {
logrus.Infof("Not adding %s to layer, as it's whitelisted", file)
return false, nil
}
return true, nil
}

// snapshotFiles creates a snapshot (tar) and adds the specified files.
// It will not add files which are whitelisted.
func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) {
s.hardlinks = map[uint64]string{}
s.l.Snapshot()
if len(files) == 0 {
logrus.Info("No files changed in this command, skipping snapshotting.")
return false, nil
}
logrus.Infof("Taking snapshot of files %v...", files)
snapshottedFiles := make(map[string]bool)
n := len(files)
for _, file := range files {
parentDirs := util.ParentDirectories(file)
// If we do end up snapshotting the dirs, we need to snapshot them before
// we snapshot their contents
files = append(parentDirs, files...)
}
filesAdded := false
w := tar.NewWriter(f)
defer w.Close()

// Now create the tar.
for i, file := range files {
t := util.NewTar(f)
defer t.Close()

// First add to the tar any parent directories that haven't been added
parentDirs := []string{}
for _, file := range files {
parents := util.ParentDirectories(file)
parentDirs = append(parentDirs, parents...)
}
for _, file := range parentDirs {
file = filepath.Clean(file)
if val, ok := snapshottedFiles[file]; ok && val {
continue
}
whitelisted, err := util.CheckWhitelist(file)
shouldSnapshot, err := shouldSnapshot(file, snapshottedFiles)
if err != nil {
return false, err
return false, fmt.Errorf("Error checking if parent dir %s can be snapshotted: %s", file, err)
}
if whitelisted && !isBuildFile(file) {
logrus.Infof("Not adding %s to layer, as it's whitelisted", file)
if !shouldSnapshot {
continue
}
snapshottedFiles[file] = true
info, err := os.Lstat(file)

fileAdded, err := s.l.MaybeAdd(file)
if err != nil {
return false, err
return false, fmt.Errorf("Unable to add parent dir %s to layered map: %s", file, err)
}

var fileAdded bool
lastParentFileIndex := len(files) - n
isParentDir := i < lastParentFileIndex

// If this is parent dir of the file we're snapshotting, only snapshot it
// if it changed
if isParentDir {
fileAdded, err = s.l.MaybeAdd(file)
} else {
// If this is one of the files we are snapshotting, definitely snapshot it
err = s.l.Add(file)
fileAdded = true
if fileAdded {
err = t.AddFileToTar(file)
if err != nil {
return false, fmt.Errorf("Error adding parent dir %s to tar: %s", file, err)
}
filesAdded = true
}
}
// Next add the files themselves to the tar
for _, file := range files {
file = filepath.Clean(file)
shouldSnapshot, err := shouldSnapshot(file, snapshottedFiles)
if err != nil {
return false, fmt.Errorf("Error checking if file %s can be snapshotted: %s", file, err)
}
if !shouldSnapshot {
continue
}
snapshottedFiles[file] = true

err = s.l.Add(file)
if err != nil {
return false, fmt.Errorf("Unable to add file %s to layered map: %s", file, err)
}
if fileAdded {
filesAdded = true
if err := util.AddToTar(file, info, s.hardlinks, w); err != nil {
return false, err
}
err = t.AddFileToTar(file)
if err != nil {
return false, fmt.Errorf("Error adding file %s to tar: %s", file, err)
}
filesAdded = true
}
return filesAdded, nil
}
Expand All @@ -171,12 +186,11 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) {
// to be flushed or the disk does its own caching/buffering.
syscall.Sync()

s.hardlinks = map[uint64]string{}
s.l.Snapshot()
existingPaths := s.l.GetFlattenedPathsForWhiteOut()
filesAdded := false
w := tar.NewWriter(f)
defer w.Close()
t := util.NewTar(f)
defer t.Close()

// Save the fs state in a map to iterate over later.
memFs := map[string]os.FileInfo{}
Expand All @@ -200,15 +214,15 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) {
if addWhiteout {
logrus.Infof("Adding whiteout for %s", path)
filesAdded = true
if err := util.Whiteout(path, w); err != nil {
if err := t.Whiteout(path); err != nil {
return false, err
}
}
}
}

// Now create the tar.
for path, info := range memFs {
for path := range memFs {
whitelisted, err := util.CheckWhitelist(path)
if err != nil {
return false, err
Expand All @@ -226,7 +240,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) {
if maybeAdd {
logrus.Debugf("Adding %s to layer, because it was changed.", path)
filesAdded = true
if err := util.AddToTar(path, info, s.hardlinks, w); err != nil {
if err := t.AddFileToTar(path); err != nil {
return false, err
}
}
Expand Down
45 changes: 35 additions & 10 deletions pkg/util/tar_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"archive/tar"
"compress/bzip2"
"compress/gzip"
"fmt"
"io"
"io/ioutil"
"os"
Expand All @@ -31,8 +32,32 @@ import (
"github.com/sirupsen/logrus"
)

// AddToTar adds the file i to tar w at path p
func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Writer) error {
// Tar knows how to write files to a tar file.
type Tar struct {
hardlinks map[uint64]string
w *tar.Writer
}

// NewTar will create an instance of Tar that can write files to the writer at f.
func NewTar(f io.Writer) Tar {
w := tar.NewWriter(f)
return Tar{
w: w,
hardlinks: map[uint64]string{},
}
}

// Close will close any open streams used by Tar.
func (t *Tar) Close() {
t.w.Close()
}

// AddFileToTar adds the file at path p to the tar
func (t *Tar) AddFileToTar(p string) error {
i, err := os.Lstat(p)
if err != nil {
return fmt.Errorf("Failed to get file info for %s: %s", p, err)
}
linkDst := ""
if i.Mode()&os.ModeSymlink != 0 {
var err error
Expand All @@ -51,13 +76,13 @@ func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Write
}
hdr.Name = p

hardlink, linkDst := checkHardlink(p, hardlinks, i)
hardlink, linkDst := t.checkHardlink(p, i)
if hardlink {
hdr.Linkname = linkDst
hdr.Typeflag = tar.TypeLink
hdr.Size = 0
}
if err := w.WriteHeader(hdr); err != nil {
if err := t.w.WriteHeader(hdr); err != nil {
return err
}
if !(i.Mode().IsRegular()) || hardlink {
Expand All @@ -68,42 +93,42 @@ func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Write
return err
}
defer r.Close()
if _, err := io.Copy(w, r); err != nil {
if _, err := io.Copy(t.w, r); err != nil {
return err
}
return nil
}

func Whiteout(p string, w *tar.Writer) error {
func (t *Tar) Whiteout(p string) error {
dir := filepath.Dir(p)
name := ".wh." + filepath.Base(p)

th := &tar.Header{
Name: filepath.Join(dir, name),
Size: 0,
}
if err := w.WriteHeader(th); err != nil {
if err := t.w.WriteHeader(th); err != nil {
return err
}

return nil
}

// Returns true if path is hardlink, and the link destination
func checkHardlink(p string, hardlinks map[uint64]string, i os.FileInfo) (bool, string) {
func (t *Tar) checkHardlink(p string, i os.FileInfo) (bool, string) {
hardlink := false
linkDst := ""
if sys := i.Sys(); sys != nil {
if stat, ok := sys.(*syscall.Stat_t); ok {
nlinks := stat.Nlink
if nlinks > 1 {
inode := stat.Ino
if original, exists := hardlinks[inode]; exists && original != p {
if original, exists := t.hardlinks[inode]; exists && original != p {
hardlink = true
logrus.Debugf("%s inode exists in hardlinks map, linking to %s", p, original)
linkDst = original
} else {
hardlinks[inode] = p
t.hardlinks[inode] = p
}
}
}
Expand Down
12 changes: 3 additions & 9 deletions pkg/util/tar_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License.
package util

import (
"archive/tar"
"compress/gzip"
"io"
"io/ioutil"
Expand Down Expand Up @@ -92,16 +91,11 @@ func setUpFilesAndTars(testDir string) error {
}

func createTar(testdir string, writer io.Writer) error {

w := tar.NewWriter(writer)
defer w.Close()
t := NewTar(writer)
defer t.Close()
for _, regFile := range regularFiles {
filePath := filepath.Join(testdir, regFile)
fi, err := os.Stat(filePath)
if err != nil {
return err
}
if err := AddToTar(filePath, fi, map[uint64]string{}, w); err != nil {
if err := t.AddFileToTar(filePath); err != nil {
return err
}
}
Expand Down

0 comments on commit 530c008

Please sign in to comment.