From 607af5f7a6429c70bf57489f6caef438f40c7ef3 Mon Sep 17 00:00:00 2001 From: Christie Wilson Date: Tue, 14 Aug 2018 15:48:59 -0700 Subject: [PATCH 1/4] Always snapshot files in COPY and RUN commands Kaniko uses mtime (as well as file contents and other attributes) to determine if files have changed. COPY and ADD commands should _always_ update the mtime, because they actually overwrite the files. However it turns out that the mtime can lag, so kaniko would sometimes add a new layer when using COPY or ADD on a file, and sometimes would not. This leads to a non-deterministic number of layers. To fix this, we have updated the kaniko commands to be more authoritative in declaring when they have changed a file (e.g. WORKDIR will now only create the directory when it doesn't exist) and we will trust those files and _always_ add them, instead of only adding them if they haven't changed. It is possible for RUN commands to also change the filesystem, in which case kaniko has no choice but to look at the filesystem to determine what has changed. For this case we have added a call to `sync` however we still cannot guarantee that sometimes the mtime will not lag, causing the number of layers to be non-deterministic. However when I tried to cause this behaviour with the RUN command, I couldn't. This changes the snapshotting logic a bit; before this change, the last command of the last stage in a Dockerfile would always scan the whole file system and ignore the files returned by the kaniko command. Instead we will now trust those files and assume that the snapshotting performed by previous commands will be adequate. Docker itself seems to rely on the storage driver to determine when files have changed and so doesn't have to deal with these problems directly. An alternative implementation would use `inotify` to track which files have changed. However that would mean watching every file in the filesystem, and adding new watches as files are added. Not only is there a limit on the number of files that can be watched, but according to the man pages a) this can take a significant amount of time b) there is complication around when events arrive (e.g. by the time they arrive, the files may have changed) and lastly c) events can be lost, which would mean we'd run into this non-deterministic behaviour again anyway. Fixes #251 --- README.md | 20 +++++- .../Dockerfile_test_copy_same_file_many_times | 49 +++++++++++++++ integration/integration_test.go | 7 +-- pkg/commands/volume.go | 14 +++-- pkg/commands/workdir.go | 10 ++- pkg/executor/build.go | 40 ++++++++---- pkg/snapshot/layered_map.go | 15 +++++ pkg/snapshot/snapshot.go | 63 +++++++++++++++---- pkg/snapshot/snapshot_test.go | 12 ++-- pkg/util/util.go | 8 ++- 10 files changed, 192 insertions(+), 46 deletions(-) create mode 100644 integration/dockerfiles/Dockerfile_test_copy_same_file_many_times diff --git a/README.md b/README.md index 6b20567a9e..8a4051fdb2 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ We do **not** recommend running the kaniko executor binary in another image, as - [Security](#security) - [Comparison with Other Tools](#comparison-with-other-tools) - [Community](#community) +- [Limitations](#limitations) _If you are interested in contributing to kaniko, see [DEVELOPMENT.md](DEVELOPMENT.md) and [CONTRIBUTING.md](CONTRIBUTING.md)._ @@ -256,7 +257,8 @@ To configure credentials, you will need to do the following: #### --snapshotMode You can set the `--snapshotMode=` flag to set how kaniko will snapshot the filesystem. -If `--snapshotMode=time` is set, only file mtime will be considered when snapshotting. +If `--snapshotMode=time` is set, only file mtime will be considered when snapshotting (see +[limitations related to mtime](#mtime-and-snapshotting)). #### --build-arg @@ -356,3 +358,19 @@ provides. [kaniko-users](https://groups.google.com/forum/#!forum/kaniko-users) Google group To Contribute to kaniko, see [DEVELOPMENT.md](DEVELOPMENT.md) and [CONTRIBUTING.md](CONTRIBUTING.md). + +## Limitations + +### mtime and snapshotting + +When taking a snapshot, kaniko's hashing algorithms include (or in the case of +[`--snapshotMode=time`](#--snapshotmode), only use) a file's +[`mtime`](https://en.wikipedia.org/wiki/Inode#POSIX_inode_description) to determine +if the file has changed. Unfortunately there is a delay between when changes to a +file are made and when the `mtime` is updated. This means: + +* With the default snapshot mode (`--snapshotMode=full`), whether or not kaniko will + add a layer in the case where a `RUN` command modifies a file but the contents do + not change is non-deterministic. +* With the time-only snapshot mode (`--snapshotMode=time`), kaniko may miss changes + introduced by `RUN` commands entirely. \ No newline at end of file diff --git a/integration/dockerfiles/Dockerfile_test_copy_same_file_many_times b/integration/dockerfiles/Dockerfile_test_copy_same_file_many_times new file mode 100644 index 0000000000..6acea2a4b5 --- /dev/null +++ b/integration/dockerfiles/Dockerfile_test_copy_same_file_many_times @@ -0,0 +1,49 @@ +FROM alpine@sha256:5ce5f501c457015c4b91f91a15ac69157d9b06f1a75cf9107bf2b62e0843983a +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo diff --git a/integration/integration_test.go b/integration/integration_test.go index f9e35d05fe..c16ff5f64c 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -135,11 +135,10 @@ func TestMain(m *testing.M) { defer DeleteFromBucket(fileInBucket) fmt.Println("Building kaniko image") - buildKaniko := exec.Command("docker", "build", "-t", ExecutorImage, "-f", "../deploy/Dockerfile", "..") - err = buildKaniko.Run() + cmd := exec.Command("docker", "build", "-t", ExecutorImage, "-f", "../deploy/Dockerfile", "..") + _, err = RunCommandWithoutTest(cmd) if err != nil { - fmt.Print(err) - fmt.Print("Building kaniko failed.") + fmt.Printf("Building kaniko failed: %s", err) os.Exit(1) } diff --git a/pkg/commands/volume.go b/pkg/commands/volume.go index 5e798e1280..0fdad0c0ea 100644 --- a/pkg/commands/volume.go +++ b/pkg/commands/volume.go @@ -17,6 +17,7 @@ limitations under the License. package commands import ( + "fmt" "os" "strings" @@ -53,13 +54,14 @@ func (v *VolumeCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile. return err } - logrus.Infof("Creating directory %s", volume) - if err := os.MkdirAll(volume, 0755); err != nil { - return err + // Only create and snapshot the dir if it didn't exist already + if _, err := os.Stat(volume); os.IsNotExist(err) { + logrus.Infof("Creating directory %s", volume) + v.snapshotFiles = []string{volume} + if err := os.MkdirAll(volume, 0755); err != nil { + return fmt.Errorf("Could not create directory for volume %s: %s", volume, err) + } } - - //Check if directory already exists? - v.snapshotFiles = append(v.snapshotFiles, volume) } config.Volumes = existingVolumes diff --git a/pkg/commands/workdir.go b/pkg/commands/workdir.go index 00487eaee1..67186f73fe 100644 --- a/pkg/commands/workdir.go +++ b/pkg/commands/workdir.go @@ -47,8 +47,14 @@ func (w *WorkdirCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile config.WorkingDir = filepath.Join(config.WorkingDir, resolvedWorkingDir) } logrus.Infof("Changed working directory to %s", config.WorkingDir) - w.snapshotFiles = []string{config.WorkingDir} - return os.MkdirAll(config.WorkingDir, 0755) + + // Only create and snapshot the dir if it didn't exist already + if _, err := os.Stat(config.WorkingDir); os.IsNotExist(err) { + logrus.Infof("Creating directory %s", config.WorkingDir) + w.snapshotFiles = []string{config.WorkingDir} + return os.MkdirAll(config.WorkingDir, 0755) + } + return nil } // FilesToSnapshot returns the workingdir, which should have been created if it didn't already exist diff --git a/pkg/executor/build.go b/pkg/executor/build.go index a3f958e62e..cb04737bc6 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -87,23 +87,41 @@ func DoBuild(opts *options.KanikoOptions) (v1.Image, error) { if err := dockerCommand.ExecuteCommand(&imageConfig.Config, buildArgs); err != nil { return nil, err } - // Don't snapshot if it's not the final stage and not the final command - // Also don't snapshot if it's the final stage, not the final command, and single snapshot is set - if (!finalStage && !finalCmd) || (finalStage && !finalCmd && opts.SingleSnapshot) { - continue - } - // Now, we get the files to snapshot from this command and take the snapshot snapshotFiles := dockerCommand.FilesToSnapshot() - if finalCmd { - snapshotFiles = nil + var contents []byte + + // If this is an intermediate stage, we only snapshot for the last command and we + // want to snapshot the entire filesystem since we aren't tracking what was changed + // by previous commands. + if !finalStage { + if finalCmd { + contents, err = snapshotter.TakeSnapshotFS() + } + } else { + // If we are in single snapshot mode, we only take a snapshot once, after all + // commands have completed. + if opts.SingleSnapshot { + if finalCmd { + contents, err = snapshotter.TakeSnapshotFS() + } + } else { + // Otherwise, in the final stage we take a snapshot at each command. If we know + // the files that were changed, we'll snapshot those explicitly, otherwise we'll + // check if anything in the filesystem changed. + if snapshotFiles != nil { + contents, err = snapshotter.TakeSnapshot(snapshotFiles) + } else { + contents, err = snapshotter.TakeSnapshotFS() + } + } } - contents, err := snapshotter.TakeSnapshot(snapshotFiles) if err != nil { - return nil, err + return nil, fmt.Errorf("Error taking snapshot of files for command %s: %s", dockerCommand, err) } + util.MoveVolumeWhitelistToWhitelist() if contents == nil { - logrus.Info("No files were changed, appending empty layer to config.") + logrus.Info("No files were changed, appending empty layer to config. No layer added to image.") continue } // Append the layer to the image diff --git a/pkg/snapshot/layered_map.go b/pkg/snapshot/layered_map.go index 6fd0ca4ac6..0d382d766d 100644 --- a/pkg/snapshot/layered_map.go +++ b/pkg/snapshot/layered_map.go @@ -17,6 +17,7 @@ limitations under the License. package snapshot import ( + "fmt" "path/filepath" "strings" ) @@ -82,6 +83,20 @@ func (l *LayeredMap) MaybeAddWhiteout(s string) (bool, error) { return true, nil } +// Add will add the specified file s to the layered map. +func (l *LayeredMap) Add(s string) error { + newV, err := l.hasher(s) + if err != nil { + return fmt.Errorf("Error creating hash for %s: %s", s, err) + } + l.layers[len(l.layers)-1][s] = newV + return nil +} + +// MaybeAdd will add the specified file s to the layered map if +// the layered map's hashing function determines it has changed. If +// it has not changed, it will not be added. Returns true if the file +// was added. func (l *LayeredMap) MaybeAdd(s string) (bool, error) { oldV, ok := l.Get(s) newV, err := l.hasher(s) diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index f0a88cbe16..7385a8d2d0 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -19,10 +19,12 @@ package snapshot import ( "archive/tar" "bytes" + "fmt" "io" "io/ioutil" "os" "path/filepath" + "syscall" "github.com/GoogleContainerTools/kaniko/pkg/constants" "github.com/GoogleContainerTools/kaniko/pkg/util" @@ -49,17 +51,28 @@ func (s *Snapshotter) Init() error { return nil } -// TakeSnapshot takes a snapshot of the filesystem, avoiding directories in the whitelist, and creates +// TakeSnapshot takes a snapshot of the specified files, avoiding directories in the whitelist, and creates // a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { buf := bytes.NewBuffer([]byte{}) var filesAdded bool - var err error - if files == nil { - filesAdded, err = s.snapShotFS(buf) - } else { - filesAdded, err = s.snapshotFiles(buf, files) + filesAdded, err := s.snapshotFiles(buf, files) + if err != nil { + return nil, err + } + contents := buf.Bytes() + if !filesAdded { + return nil, nil } + return contents, err +} + +// TakeSnapshotFS takes a snapshot of the filesystem, avoiding directories in the whitelist, and creates +// a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed +func (s *Snapshotter) TakeSnapshotFS() ([]byte, error) { + buf := bytes.NewBuffer([]byte{}) + var filesAdded bool + filesAdded, err := s.snapShotFS(buf) if err != nil { return nil, err } @@ -70,8 +83,8 @@ func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { return contents, err } -// snapshotFiles takes a snapshot of specific files -// Used for ADD/COPY commands, when we know which files have changed +// snapshotFiles creates a snapshot (tar) and adds the specified files. +// It will not add files which are whitelisted. func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { s.hardlinks = map[uint64]string{} s.l.Snapshot() @@ -81,8 +94,11 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { } logrus.Infof("Taking snapshot of files %v...", files) snapshottedFiles := make(map[string]bool) + n := len(files) for _, file := range files { parentDirs := util.ParentDirectories(file) + // If we do end up snapshotting the dirs, we need to snapshot them before + // we snapshot their contents files = append(parentDirs, files...) } filesAdded := false @@ -90,7 +106,7 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { defer w.Close() // Now create the tar. - for _, file := range files { + for i, file := range files { file = filepath.Clean(file) if val, ok := snapshottedFiles[file]; ok && val { continue @@ -108,12 +124,24 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { if err != nil { return false, err } - // Only add to the tar if we add it to the layeredmap. - addFile, err := s.l.MaybeAdd(file) + + var fileAdded bool + lastParentFileIndex := len(files) - n + isParentDir := i < lastParentFileIndex + + // If this is parent dir of the file we're snapshotting, only snapshot it + // if it changed + if isParentDir { + fileAdded, err = s.l.MaybeAdd(file) + } else { + // If this is one of the files we are snapshotting, definitely snapshot it + err = s.l.Add(file) + fileAdded = true + } if err != nil { - return false, err + return false, fmt.Errorf("Unable to add file %s to layered map: %s", file, err) } - if addFile { + if fileAdded { filesAdded = true if err := util.AddToTar(file, info, s.hardlinks, w); err != nil { return false, err @@ -132,8 +160,17 @@ func isBuildFile(file string) bool { return false } +// shapShotFS creates a snapshot (tar) of all files in the system which are not +// whitelisted and which have changed. func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { logrus.Info("Taking snapshot of full filesystem...") + + // Some of the operations that follow (e.g. hashing) depend on the file system being synced, + // for example the hashing function that determines if files are equal uses the mtime of the files, + // which can lag if sync is not called. Unfortunately there can still be lag if too much data needs + // to be flushed or the disk does its own caching/buffering. + syscall.Sync() + s.hardlinks = map[uint64]string{} s.l.Snapshot() existingPaths := s.l.GetFlattenedPathsForWhiteOut() diff --git a/pkg/snapshot/snapshot_test.go b/pkg/snapshot/snapshot_test.go index 934e862d94..c0dc1afdbf 100644 --- a/pkg/snapshot/snapshot_test.go +++ b/pkg/snapshot/snapshot_test.go @@ -29,7 +29,7 @@ import ( "github.com/pkg/errors" ) -func TestSnapshotFileChange(t *testing.T) { +func TestSnapshotFSFileChange(t *testing.T) { testDir, snapshotter, err := setUpTestDir() defer os.RemoveAll(testDir) @@ -45,7 +45,7 @@ func TestSnapshotFileChange(t *testing.T) { t.Fatalf("Error setting up fs: %s", err) } // Take another snapshot - contents, err := snapshotter.TakeSnapshot(nil) + contents, err := snapshotter.TakeSnapshotFS() if err != nil { t.Fatalf("Error taking snapshot of fs: %s", err) } @@ -81,7 +81,7 @@ func TestSnapshotFileChange(t *testing.T) { } } -func TestSnapshotChangePermissions(t *testing.T) { +func TestSnapshotFSChangePermissions(t *testing.T) { testDir, snapshotter, err := setUpTestDir() defer os.RemoveAll(testDir) if err != nil { @@ -93,7 +93,7 @@ func TestSnapshotChangePermissions(t *testing.T) { t.Fatalf("Error changing permissions on %s: %v", batPath, err) } // Take another snapshot - contents, err := snapshotter.TakeSnapshot(nil) + contents, err := snapshotter.TakeSnapshotFS() if err != nil { t.Fatalf("Error taking snapshot of fs: %s", err) } @@ -166,14 +166,14 @@ func TestSnapshotFiles(t *testing.T) { testutil.CheckErrorAndDeepEqual(t, false, nil, expectedFiles, actualFiles) } -func TestEmptySnapshot(t *testing.T) { +func TestEmptySnapshotFS(t *testing.T) { testDir, snapshotter, err := setUpTestDir() defer os.RemoveAll(testDir) if err != nil { t.Fatal(err) } // Take snapshot with no changes - contents, err := snapshotter.TakeSnapshot(nil) + contents, err := snapshotter.TakeSnapshotFS() if err != nil { t.Fatalf("Error taking snapshot of fs: %s", err) } diff --git a/pkg/util/util.go b/pkg/util/util.go index c95935ec9e..73d3f70e01 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -19,12 +19,13 @@ package util import ( "crypto/md5" "encoding/hex" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" "io" "os" "strconv" "syscall" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // SetLogLevel sets the logrus logging level @@ -68,7 +69,8 @@ func Hasher() func(string) (string, error) { return hasher } -// MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed +// MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed. +// Note that the mtime can lag, so it's possible that a file will have changed but the mtime may look the same. func MtimeHasher() func(string) (string, error) { hasher := func(p string) (string, error) { h := md5.New() From 6dccd4ec4aaea9e042b1a581f273b48e5fa1b105 Mon Sep 17 00:00:00 2001 From: Christie Wilson Date: Thu, 16 Aug 2018 10:52:30 -0700 Subject: [PATCH 2/4] Make it more clear the mtime issue is theoretical Although we were able to reproduce this with the previous behaviour of the COPY and ADD commands, we have fixed that issue and our attempts to cause the issue to occur with RUN did not succeed, so it may be that in practice this will never happen. --- README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8a4051fdb2..e8e91be46c 100644 --- a/README.md +++ b/README.md @@ -369,8 +369,12 @@ When taking a snapshot, kaniko's hashing algorithms include (or in the case of if the file has changed. Unfortunately there is a delay between when changes to a file are made and when the `mtime` is updated. This means: -* With the default snapshot mode (`--snapshotMode=full`), whether or not kaniko will - add a layer in the case where a `RUN` command modifies a file but the contents do - not change is non-deterministic. * With the time-only snapshot mode (`--snapshotMode=time`), kaniko may miss changes - introduced by `RUN` commands entirely. \ No newline at end of file + introduced by `RUN` commands entirely. +* With the default snapshot mode (`--snapshotMode=full`), whether or not kaniko will + add a layer in the case where a `RUN` command modifies a file **but the contents do + not** change is theoretically non-deterministic. This _does not affect the contents_ + which will still be correct, but it does affect the number of layers. + +_Note that these issues are currently theoretical only. If you see this issue occur, please +[open an issue](https://github.com/GoogleContainerTools/kaniko/issues)._ \ No newline at end of file From 2fe93f2911846915694f57004473835ac9016c13 Mon Sep 17 00:00:00 2001 From: Christie Wilson Date: Thu, 23 Aug 2018 18:02:15 -0700 Subject: [PATCH 3/4] No longer try to verify kaniko dir isn't snapshotted This test had previously (before #231) been making a change to a file in the kaniko dir, then checking that it isn't being snapshotted. This was to test the whitelisting logic, which makes sure that changes to /kaniko aren't included in images. However the test creates a temporary dir, so the kaniko dir is actually in /tmp//kaniko, and in #231 the logic was simplified to no longer have a special case for tests. The test continued to pass because `MaybeAdd` noticed that the kaniko file wasn't changing, and didn't add it. After changing this to always add the files, it revealed that this was left behind by accident. I also opened #307 to add integration test coverage for this logic. I also marked `CheckErrorAndDeepEqual` as a helper function so that when it fails, the line number reported is where that was called. --- pkg/snapshot/snapshot_test.go | 1 - testutil/util.go | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/snapshot/snapshot_test.go b/pkg/snapshot/snapshot_test.go index c0dc1afdbf..72b6a750ac 100644 --- a/pkg/snapshot/snapshot_test.go +++ b/pkg/snapshot/snapshot_test.go @@ -141,7 +141,6 @@ func TestSnapshotFiles(t *testing.T) { } filesToSnapshot := []string{ filepath.Join(testDir, "foo"), - filepath.Join(testDir, "kaniko/file"), } contents, err := snapshotter.TakeSnapshot(filesToSnapshot) if err != nil { diff --git a/testutil/util.go b/testutil/util.go index 64819c3efb..5101ba415d 100644 --- a/testutil/util.go +++ b/testutil/util.go @@ -40,6 +40,7 @@ func SetupFiles(path string, files map[string]string) error { } func CheckErrorAndDeepEqual(t *testing.T, shouldErr bool, err error, expected, actual interface{}) { + t.Helper() if err := checkErr(shouldErr, err); err != nil { t.Error(err) return From 7f64037a8ce9f140181b53d3d66ee6ef01b370ec Mon Sep 17 00:00:00 2001 From: Christie Wilson Date: Fri, 24 Aug 2018 15:50:58 -0700 Subject: [PATCH 4/4] Separate snapshotting of parent dirs from files To make the logic a bit more clear, when snapshotting files, the parent dirs are now snapshotted in a different loop from the files we are actually trying to snapshot. Unfortunately this loop is nearly duplicated but I did managed to group some fo the related logic together: - A function to check if the file should be snapshotted (e.g. isn't whitelisted, etc.) - Created a `Tar` type to handle some of the logic around tar-ing, e.g. tracking hardlinks and stat-ing files before adding them One side effect of this is that now when snapshoting the file system, files will be stat-ed twice. --- integration/integration_test.go | 3 +- pkg/snapshot/snapshot.go | 106 +++++++++++++++++--------------- pkg/util/tar_util.go | 45 +++++++++++--- pkg/util/tar_util_test.go | 12 +--- 4 files changed, 97 insertions(+), 69 deletions(-) diff --git a/integration/integration_test.go b/integration/integration_test.go index c16ff5f64c..372922a434 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -136,8 +136,7 @@ func TestMain(m *testing.M) { fmt.Println("Building kaniko image") cmd := exec.Command("docker", "build", "-t", ExecutorImage, "-f", "../deploy/Dockerfile", "..") - _, err = RunCommandWithoutTest(cmd) - if err != nil { + if _, err = RunCommandWithoutTest(cmd); err != nil { fmt.Printf("Building kaniko failed: %s", err) os.Exit(1) } diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index 7385a8d2d0..4f01441dc3 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -17,7 +17,6 @@ limitations under the License. package snapshot import ( - "archive/tar" "bytes" "fmt" "io" @@ -35,7 +34,6 @@ import ( type Snapshotter struct { l *LayeredMap directory string - hardlinks map[uint64]string } // NewSnapshotter creates a new snapshotter rooted at d @@ -55,7 +53,6 @@ func (s *Snapshotter) Init() error { // a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { buf := bytes.NewBuffer([]byte{}) - var filesAdded bool filesAdded, err := s.snapshotFiles(buf, files) if err != nil { return nil, err @@ -71,7 +68,6 @@ func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { // a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed func (s *Snapshotter) TakeSnapshotFS() ([]byte, error) { buf := bytes.NewBuffer([]byte{}) - var filesAdded bool filesAdded, err := s.snapShotFS(buf) if err != nil { return nil, err @@ -83,10 +79,24 @@ func (s *Snapshotter) TakeSnapshotFS() ([]byte, error) { return contents, err } +func shouldSnapshot(file string, snapshottedFiles map[string]bool) (bool, error) { + if val, ok := snapshottedFiles[file]; ok && val { + return false, nil + } + whitelisted, err := util.CheckWhitelist(file) + if err != nil { + return false, fmt.Errorf("Error checking for %s in whitelist: %s", file, err) + } + if whitelisted && !isBuildFile(file) { + logrus.Infof("Not adding %s to layer, as it's whitelisted", file) + return false, nil + } + return true, nil +} + // snapshotFiles creates a snapshot (tar) and adds the specified files. // It will not add files which are whitelisted. func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { - s.hardlinks = map[uint64]string{} s.l.Snapshot() if len(files) == 0 { logrus.Info("No files changed in this command, skipping snapshotting.") @@ -94,59 +104,60 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { } logrus.Infof("Taking snapshot of files %v...", files) snapshottedFiles := make(map[string]bool) - n := len(files) - for _, file := range files { - parentDirs := util.ParentDirectories(file) - // If we do end up snapshotting the dirs, we need to snapshot them before - // we snapshot their contents - files = append(parentDirs, files...) - } filesAdded := false - w := tar.NewWriter(f) - defer w.Close() - // Now create the tar. - for i, file := range files { + t := util.NewTar(f) + defer t.Close() + + // First add to the tar any parent directories that haven't been added + parentDirs := []string{} + for _, file := range files { + parents := util.ParentDirectories(file) + parentDirs = append(parentDirs, parents...) + } + for _, file := range parentDirs { file = filepath.Clean(file) - if val, ok := snapshottedFiles[file]; ok && val { - continue - } - whitelisted, err := util.CheckWhitelist(file) + shouldSnapshot, err := shouldSnapshot(file, snapshottedFiles) if err != nil { - return false, err + return false, fmt.Errorf("Error checking if parent dir %s can be snapshotted: %s", file, err) } - if whitelisted && !isBuildFile(file) { - logrus.Infof("Not adding %s to layer, as it's whitelisted", file) + if !shouldSnapshot { continue } snapshottedFiles[file] = true - info, err := os.Lstat(file) + + fileAdded, err := s.l.MaybeAdd(file) if err != nil { - return false, err + return false, fmt.Errorf("Unable to add parent dir %s to layered map: %s", file, err) } - var fileAdded bool - lastParentFileIndex := len(files) - n - isParentDir := i < lastParentFileIndex - - // If this is parent dir of the file we're snapshotting, only snapshot it - // if it changed - if isParentDir { - fileAdded, err = s.l.MaybeAdd(file) - } else { - // If this is one of the files we are snapshotting, definitely snapshot it - err = s.l.Add(file) - fileAdded = true + if fileAdded { + err = t.AddFileToTar(file) + if err != nil { + return false, fmt.Errorf("Error adding parent dir %s to tar: %s", file, err) + } + filesAdded = true } + } + // Next add the files themselves to the tar + for _, file := range files { + file = filepath.Clean(file) + shouldSnapshot, err := shouldSnapshot(file, snapshottedFiles) if err != nil { + return false, fmt.Errorf("Error checking if file %s can be snapshotted: %s", file, err) + } + if !shouldSnapshot { + continue + } + snapshottedFiles[file] = true + + if err = s.l.Add(file); err != nil { return false, fmt.Errorf("Unable to add file %s to layered map: %s", file, err) } - if fileAdded { - filesAdded = true - if err := util.AddToTar(file, info, s.hardlinks, w); err != nil { - return false, err - } + if err = t.AddFileToTar(file); err != nil { + return false, fmt.Errorf("Error adding file %s to tar: %s", file, err) } + filesAdded = true } return filesAdded, nil } @@ -171,12 +182,11 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { // to be flushed or the disk does its own caching/buffering. syscall.Sync() - s.hardlinks = map[uint64]string{} s.l.Snapshot() existingPaths := s.l.GetFlattenedPathsForWhiteOut() filesAdded := false - w := tar.NewWriter(f) - defer w.Close() + t := util.NewTar(f) + defer t.Close() // Save the fs state in a map to iterate over later. memFs := map[string]os.FileInfo{} @@ -200,7 +210,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { if addWhiteout { logrus.Infof("Adding whiteout for %s", path) filesAdded = true - if err := util.Whiteout(path, w); err != nil { + if err := t.Whiteout(path); err != nil { return false, err } } @@ -208,7 +218,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { } // Now create the tar. - for path, info := range memFs { + for path := range memFs { whitelisted, err := util.CheckWhitelist(path) if err != nil { return false, err @@ -226,7 +236,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { if maybeAdd { logrus.Debugf("Adding %s to layer, because it was changed.", path) filesAdded = true - if err := util.AddToTar(path, info, s.hardlinks, w); err != nil { + if err := t.AddFileToTar(path); err != nil { return false, err } } diff --git a/pkg/util/tar_util.go b/pkg/util/tar_util.go index a66eea6677..f95574deb5 100644 --- a/pkg/util/tar_util.go +++ b/pkg/util/tar_util.go @@ -20,6 +20,7 @@ import ( "archive/tar" "compress/bzip2" "compress/gzip" + "fmt" "io" "io/ioutil" "os" @@ -31,8 +32,32 @@ import ( "github.com/sirupsen/logrus" ) -// AddToTar adds the file i to tar w at path p -func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Writer) error { +// Tar knows how to write files to a tar file. +type Tar struct { + hardlinks map[uint64]string + w *tar.Writer +} + +// NewTar will create an instance of Tar that can write files to the writer at f. +func NewTar(f io.Writer) Tar { + w := tar.NewWriter(f) + return Tar{ + w: w, + hardlinks: map[uint64]string{}, + } +} + +// Close will close any open streams used by Tar. +func (t *Tar) Close() { + t.w.Close() +} + +// AddFileToTar adds the file at path p to the tar +func (t *Tar) AddFileToTar(p string) error { + i, err := os.Lstat(p) + if err != nil { + return fmt.Errorf("Failed to get file info for %s: %s", p, err) + } linkDst := "" if i.Mode()&os.ModeSymlink != 0 { var err error @@ -51,13 +76,13 @@ func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Write } hdr.Name = p - hardlink, linkDst := checkHardlink(p, hardlinks, i) + hardlink, linkDst := t.checkHardlink(p, i) if hardlink { hdr.Linkname = linkDst hdr.Typeflag = tar.TypeLink hdr.Size = 0 } - if err := w.WriteHeader(hdr); err != nil { + if err := t.w.WriteHeader(hdr); err != nil { return err } if !(i.Mode().IsRegular()) || hardlink { @@ -68,13 +93,13 @@ func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Write return err } defer r.Close() - if _, err := io.Copy(w, r); err != nil { + if _, err := io.Copy(t.w, r); err != nil { return err } return nil } -func Whiteout(p string, w *tar.Writer) error { +func (t *Tar) Whiteout(p string) error { dir := filepath.Dir(p) name := ".wh." + filepath.Base(p) @@ -82,7 +107,7 @@ func Whiteout(p string, w *tar.Writer) error { Name: filepath.Join(dir, name), Size: 0, } - if err := w.WriteHeader(th); err != nil { + if err := t.w.WriteHeader(th); err != nil { return err } @@ -90,7 +115,7 @@ func Whiteout(p string, w *tar.Writer) error { } // Returns true if path is hardlink, and the link destination -func checkHardlink(p string, hardlinks map[uint64]string, i os.FileInfo) (bool, string) { +func (t *Tar) checkHardlink(p string, i os.FileInfo) (bool, string) { hardlink := false linkDst := "" if sys := i.Sys(); sys != nil { @@ -98,12 +123,12 @@ func checkHardlink(p string, hardlinks map[uint64]string, i os.FileInfo) (bool, nlinks := stat.Nlink if nlinks > 1 { inode := stat.Ino - if original, exists := hardlinks[inode]; exists && original != p { + if original, exists := t.hardlinks[inode]; exists && original != p { hardlink = true logrus.Debugf("%s inode exists in hardlinks map, linking to %s", p, original) linkDst = original } else { - hardlinks[inode] = p + t.hardlinks[inode] = p } } } diff --git a/pkg/util/tar_util_test.go b/pkg/util/tar_util_test.go index 1762ea6f29..e0c1becac6 100644 --- a/pkg/util/tar_util_test.go +++ b/pkg/util/tar_util_test.go @@ -17,7 +17,6 @@ limitations under the License. package util import ( - "archive/tar" "compress/gzip" "io" "io/ioutil" @@ -92,16 +91,11 @@ func setUpFilesAndTars(testDir string) error { } func createTar(testdir string, writer io.Writer) error { - - w := tar.NewWriter(writer) - defer w.Close() + t := NewTar(writer) + defer t.Close() for _, regFile := range regularFiles { filePath := filepath.Join(testdir, regFile) - fi, err := os.Stat(filePath) - if err != nil { - return err - } - if err := AddToTar(filePath, fi, map[uint64]string{}, w); err != nil { + if err := t.AddFileToTar(filePath); err != nil { return err } }