From 13accbaf3243a5cf477c9e3d5116dc09dd80ae37 Mon Sep 17 00:00:00 2001 From: Priya Wadhwa Date: Tue, 4 Sep 2018 13:37:15 -0700 Subject: [PATCH 1/2] Add Key() to LayeredMap and Snapshotter This will return a string representaiton of the current filesystem to be used with caching. Whenever a file is explictly added (via ADD or COPY), it will be stored in "added" in the LayeredMap. The file will map to a hash created by CacheHasher (which doesn't take into account mtime, since that will be different with every build, making the cache useless) Key() will returns a sha of the added files which will be used in determining the overall cache key for a command. --- pkg/executor/build.go | 2 +- pkg/snapshot/layered_map.go | 36 ++++++++++++--- pkg/snapshot/layered_map_test.go | 78 ++++++++++++++++++++++++++++++++ pkg/snapshot/snapshot.go | 8 +++- pkg/snapshot/snapshot_test.go | 2 +- pkg/util/util.go | 38 ++++++++++++++++ 6 files changed, 155 insertions(+), 9 deletions(-) create mode 100644 pkg/snapshot/layered_map_test.go diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 22807d6a7a..f6ac89f46a 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -62,7 +62,7 @@ func DoBuild(opts *options.KanikoOptions) (v1.Image, error) { if err := util.GetFSFromImage(constants.RootDir, sourceImage); err != nil { return nil, err } - l := snapshot.NewLayeredMap(hasher) + l := snapshot.NewLayeredMap(hasher, util.CacheHasher()) snapshotter := snapshot.NewSnapshotter(l, constants.RootDir) // Take initial snapshot if err := snapshotter.Init(); err != nil { diff --git a/pkg/snapshot/layered_map.go b/pkg/snapshot/layered_map.go index 0d382d766d..c922bbafc4 100644 --- a/pkg/snapshot/layered_map.go +++ b/pkg/snapshot/layered_map.go @@ -17,20 +17,27 @@ limitations under the License. package snapshot import ( + "bytes" + "encoding/json" "fmt" "path/filepath" "strings" + + "github.com/GoogleContainerTools/kaniko/pkg/util" ) type LayeredMap struct { - layers []map[string]string - whiteouts []map[string]string - hasher func(string) (string, error) + layers []map[string]string + whiteouts []map[string]string + added []map[string]string + hasher func(string) (string, error) + cacheHasher func(string) (string, error) } -func NewLayeredMap(h func(string) (string, error)) *LayeredMap { +func NewLayeredMap(h func(string) (string, error), c func(string) (string, error)) *LayeredMap { l := LayeredMap{ - hasher: h, + hasher: h, + cacheHasher: c, } l.layers = []map[string]string{} return &l @@ -39,8 +46,18 @@ func NewLayeredMap(h func(string) (string, error)) *LayeredMap { func (l *LayeredMap) Snapshot() { l.whiteouts = append(l.whiteouts, map[string]string{}) l.layers = append(l.layers, map[string]string{}) + l.added = append(l.added, map[string]string{}) +} + +// Key returns a hash for added files +func (l *LayeredMap) Key() (string, error) { + c := bytes.NewBuffer([]byte{}) + enc := json.NewEncoder(c) + enc.Encode(l.added) + return util.SHA256(c) } +// GetFlattenedPathsForWhiteOut returns all paths in the current FS func (l *LayeredMap) GetFlattenedPathsForWhiteOut() map[string]struct{} { paths := map[string]struct{}{} for _, l := range l.layers { @@ -85,11 +102,18 @@ func (l *LayeredMap) MaybeAddWhiteout(s string) (bool, error) { // Add will add the specified file s to the layered map. func (l *LayeredMap) Add(s string) error { + // Use hash function and add to layers newV, err := l.hasher(s) if err != nil { - return fmt.Errorf("Error creating hash for %s: %s", s, err) + return fmt.Errorf("Error creating hash for %s: %v", s, err) } l.layers[len(l.layers)-1][s] = newV + // Use cache hash function and add to added + cacheV, err := l.cacheHasher(s) + if err != nil { + return fmt.Errorf("Error creating cache hash for %s: %v", s, err) + } + l.added[len(l.added)-1][s] = cacheV return nil } diff --git a/pkg/snapshot/layered_map_test.go b/pkg/snapshot/layered_map_test.go new file mode 100644 index 0000000000..e5ea64f023 --- /dev/null +++ b/pkg/snapshot/layered_map_test.go @@ -0,0 +1,78 @@ +/* +Copyright 2018 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package snapshot + +import ( + "testing" +) + +func Test_CacheKey(t *testing.T) { + tests := []struct { + name string + map1 map[string]string + map2 map[string]string + equal bool + }{ + { + name: "maps are the same", + map1: map[string]string{ + "a": "apple", + "b": "bat", + "c": "cat", + }, + map2: map[string]string{ + "c": "cat", + "b": "bat", + "a": "apple", + }, + equal: true, + }, + { + name: "maps are different", + map1: map[string]string{ + "a": "apple", + "b": "bat", + "c": "cat", + }, + map2: map[string]string{ + "c": "", + "b": "bat", + "a": "apple", + }, + equal: false, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + lm1 := LayeredMap{added: []map[string]string{test.map1}} + lm2 := LayeredMap{added: []map[string]string{test.map2}} + k1, err := lm1.Key() + if err != nil { + t.Fatalf("error getting key for map 1: %v", err) + } + k2, err := lm2.Key() + if err != nil { + t.Fatalf("error getting key for map 2: %v", err) + } + if test.equal && k1 != k2 { + t.Fatalf("keys differ.\nExpected\n%+v\nActual\n%+v", k1, k2) + } + if !test.equal && k1 == k2 { + t.Fatal("keys are the same, expected different keys") + } + }) + } +} diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index 4f01441dc3..55da45d652 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -49,6 +49,11 @@ func (s *Snapshotter) Init() error { return nil } +// Key returns a string based on the current state of the file system +func (s *Snapshotter) Key() (string, error) { + return s.l.Key() +} + // TakeSnapshot takes a snapshot of the specified files, avoiding directories in the whitelist, and creates // a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { @@ -102,7 +107,8 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { logrus.Info("No files changed in this command, skipping snapshotting.") return false, nil } - logrus.Infof("Taking snapshot of files %v...", files) + logrus.Info("Taking snapshot of files...") + logrus.Debugf("Taking snapshot of files %v", files) snapshottedFiles := make(map[string]bool) filesAdded := false diff --git a/pkg/snapshot/snapshot_test.go b/pkg/snapshot/snapshot_test.go index 72b6a750ac..95daa88f63 100644 --- a/pkg/snapshot/snapshot_test.go +++ b/pkg/snapshot/snapshot_test.go @@ -198,7 +198,7 @@ func setUpTestDir() (string, *Snapshotter, error) { } // Take the initial snapshot - l := NewLayeredMap(util.Hasher()) + l := NewLayeredMap(util.Hasher(), util.CacheHasher()) snapshotter := NewSnapshotter(l, testDir) if err := snapshotter.Init(); err != nil { return testDir, nil, errors.Wrap(err, "initializing snapshotter") diff --git a/pkg/util/util.go b/pkg/util/util.go index 617298a7b5..bc09a7c27b 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -18,6 +18,7 @@ package util import ( "crypto/md5" + "crypto/sha256" "encoding/hex" "io" "os" @@ -72,6 +73,36 @@ func Hasher() func(string) (string, error) { return hasher } +// CacheHasher takes into account everything the regular hasher does except for mtime +func CacheHasher() func(string) (string, error) { + hasher := func(p string) (string, error) { + h := md5.New() + fi, err := os.Lstat(p) + if err != nil { + return "", err + } + h.Write([]byte(fi.Mode().String())) + + h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Uid), 36))) + h.Write([]byte(",")) + h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Gid), 36))) + + if fi.Mode().IsRegular() { + f, err := os.Open(p) + if err != nil { + return "", err + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + } + + return hex.EncodeToString(h.Sum(nil)), nil + } + return hasher +} + // MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed. // Note that the mtime can lag, so it's possible that a file will have changed but the mtime may look the same. func MtimeHasher() func(string) (string, error) { @@ -86,3 +117,10 @@ func MtimeHasher() func(string) (string, error) { } return hasher } + +// SHA256 returns the shasum of the contents of r +func SHA256(r io.Reader) (string, error) { + hasher := sha256.New() + _, err := io.Copy(hasher, r) + return hex.EncodeToString(hasher.Sum(make([]byte, 0, hasher.Size()))), err +} From 80a449f5419cf1d83e2d1d5cc1a17f78374af53d Mon Sep 17 00:00:00 2001 From: Priya Wadhwa Date: Fri, 7 Sep 2018 16:03:56 -0700 Subject: [PATCH 2/2] code review comments --- pkg/snapshot/layered_map.go | 9 +++++---- pkg/snapshot/layered_map_test.go | 11 ++++++----- pkg/util/util.go | 5 ++++- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/pkg/snapshot/layered_map.go b/pkg/snapshot/layered_map.go index c922bbafc4..9a356b0e22 100644 --- a/pkg/snapshot/layered_map.go +++ b/pkg/snapshot/layered_map.go @@ -27,10 +27,11 @@ import ( ) type LayeredMap struct { - layers []map[string]string - whiteouts []map[string]string - added []map[string]string - hasher func(string) (string, error) + layers []map[string]string + whiteouts []map[string]string + added []map[string]string + hasher func(string) (string, error) + // cacheHasher doesn't include mtime in it's hash so that filesystem cache keys are stable cacheHasher func(string) (string, error) } diff --git a/pkg/snapshot/layered_map_test.go b/pkg/snapshot/layered_map_test.go index e5ea64f023..6bfff81b33 100644 --- a/pkg/snapshot/layered_map_test.go +++ b/pkg/snapshot/layered_map_test.go @@ -32,11 +32,15 @@ func Test_CacheKey(t *testing.T) { "a": "apple", "b": "bat", "c": "cat", + "d": "dog", + "e": "egg", }, map2: map[string]string{ "c": "cat", + "d": "dog", "b": "bat", "a": "apple", + "e": "egg", }, equal: true, }, @@ -67,11 +71,8 @@ func Test_CacheKey(t *testing.T) { if err != nil { t.Fatalf("error getting key for map 2: %v", err) } - if test.equal && k1 != k2 { - t.Fatalf("keys differ.\nExpected\n%+v\nActual\n%+v", k1, k2) - } - if !test.equal && k1 == k2 { - t.Fatal("keys are the same, expected different keys") + if test.equal != (k1 == k2) { + t.Fatalf("unexpected result: \nExpected\n%s\nActual\n%s\n", k1, k2) } }) } diff --git a/pkg/util/util.go b/pkg/util/util.go index bc09a7c27b..873cbae20e 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -122,5 +122,8 @@ func MtimeHasher() func(string) (string, error) { func SHA256(r io.Reader) (string, error) { hasher := sha256.New() _, err := io.Copy(hasher, r) - return hex.EncodeToString(hasher.Sum(make([]byte, 0, hasher.Size()))), err + if err != nil { + return "", err + } + return hex.EncodeToString(hasher.Sum(make([]byte, 0, hasher.Size()))), nil }