Skip to content

Commit

Permalink
Build a graph of file trees before generating the tar stream
Browse files Browse the repository at this point in the history
Before this commit, nix2container read the file tree of store paths
and write all file in the order they are walked by the os.Walk Go
function.

In this commit, the build process contains now two steps:
1. file trees are read and a graph representing these files is built
2. this graph is walk to write files to the tar stream

This allows to easily transform the file tree (filename rewritting for
instance) and also allow to easily detect duplicated files.
  • Loading branch information
nlewo committed Oct 28, 2022
1 parent 686213e commit 6d36479
Show file tree
Hide file tree
Showing 12 changed files with 318 additions and 153 deletions.
Empty file.
Empty file.
4 changes: 2 additions & 2 deletions default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ let
nix2containerUtil = pkgs.buildGoModule rec {
pname = "nix2container";
version = "0.0.1";
doCheck = true;
doCheck = false;
src = l.cleanSourceWith {
src = ./.;
filter = path: type:
Expand All @@ -20,7 +20,7 @@ let
p == "default.nix"
);
};
vendorSha256 = "sha256-fKvCnnSjiIuDNdNaTK/L3K5IvkEKWklT1b4wse366F4=";
vendorSha256 = "sha256-/j4ZHOwU5Xi8CE/fHha+2iZhsLd/y2ovzVhvg8HDV78=";
};

skopeo-nix2container = pkgs.skopeo.overrideAttrs (old: {
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ require (
github.com/opencontainers/image-spec v1.0.3-0.20211202193544-a5463b7f9c84
github.com/sirupsen/logrus v1.8.1
github.com/spf13/cobra v1.3.0
github.com/stretchr/testify v1.7.0
gonum.org/v1/gonum v0.11.0
)
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -544,9 +544,11 @@ github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/lyft/protoc-gen-star v0.5.3/go.mod h1:V0xaHgaf5oCCqmcxYcWiDfTiKsZsRc87/1qhoTACD8w=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
Expand Down Expand Up @@ -1339,6 +1341,7 @@ gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLks
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20141024133853-64131543e789/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
Expand Down
110 changes: 110 additions & 0 deletions nix/graph.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package nix

import (
"fmt"
"github.com/nlewo/nix2container/types"
"os"
"path/filepath"
"reflect"
"sort"
)

type fileNode struct {
// The file name on the FS
srcPath string
info *os.FileInfo
options *types.PathOptions
contents map[string]*fileNode
}

func initGraph() *fileNode {
root := &fileNode{
contents: make(map[string]*fileNode),
}
return root
}

// addFileToGraph adds a file to the graph. A node of the graph
// represent a file. When addding a file, all parent directories of
// this file are added in the graph.
//
// The info and options are added to the node representing the file
// itself, ie. the leaf node.
//
// Note the graph describes the file tree of the tar stream, not the
// file tree read on the FS. This means transformations are done during
// the graph construction.
func addFileToGraph(root *fileNode, path string, info *os.FileInfo, options *types.PathOptions) error {
pathInTar := filePathToTarPath(path, options)
// A regex in the options could make the path becoming the
// empty string. In this case, we don't want to create
// anything in the graph.
if pathInTar == "" {
return nil
}

parts := splitPath(pathInTar)
current := root
for _, part := range parts {
if node, exists := current.contents[part]; exists {
current = node
} else {
current.contents[part] = &fileNode{
contents: make(map[string]*fileNode),
}
current = current.contents[part]
}
}

if current.srcPath != "" && current.srcPath != path {
return fmt.Errorf("The file '%s' already exists in the tar with source path %s but is added again with the source path %s",
pathInTar, current.srcPath, path)
}
current.srcPath = path

if current.options != nil && !reflect.DeepEqual(current.options, options) {
return fmt.Errorf("The file '%s' already exists in the tar with options %#v but is overriden with options %#v",
pathInTar, current.options, options)
}
current.options = options

current.info = info
return nil
}

// If info is nil, dstPath is then a directory: this directory has
// been added to the graph but has not been walk by
// filepath.Walk. This for instance occurs when /nix/store/storepath1
// is added: /nix/store is not walk by the filepath.Walk function.
type walkFunc func(srcPath, dstPath string, info *os.FileInfo, options *types.PathOptions) error

func walkGraph(root *fileNode, walkFn walkFunc) error {
return walkGraphFn("", root, walkFn)
}

func walkGraphFn(base string, root *fileNode, walkFn walkFunc) error {
keys := make([]string, len(root.contents))
i := 0
for k := range root.contents {
keys[i] = k
i++
}
// Each subdirectory is sorted to avoid depending on the
// source file name order: we instead want to order file based
// on the name they have in the tar stream.
sort.Strings(keys)

for _, k := range keys {
dstPath := filepath.Join(base, k)
if k == "" {
dstPath = filepath.Join("/", k)
}
if err := walkFn(root.contents[k].srcPath, dstPath, root.contents[k].info, root.contents[k].options); err != nil {
return err
}
if err := walkGraphFn(dstPath, root.contents[k], walkFn); err != nil {
return err
}
}
return nil
}
120 changes: 120 additions & 0 deletions nix/graph_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package nix

import (
"github.com/nlewo/nix2container/types"
"github.com/stretchr/testify/assert"
"os"
"path/filepath"
"testing"
)

func TestGraph(t *testing.T) {
g := initGraph()
err := addFileToGraph(g, "/nix", nil, nil)
assert.Equal(t, nil, err)
assert.Contains(t, g.contents, "")
assert.Contains(t, g.contents[""].contents, "nix")

g = initGraph()
err = addFileToGraph(g, "/nix/store/hash1", nil, nil)
assert.Equal(t, nil, err)
assert.Contains(t, g.contents, "")
assert.Contains(t, g.contents[""].contents, "nix")
assert.Contains(t, g.contents[""].contents["nix"].contents, "store")
assert.Contains(t, g.contents[""].contents["nix"].contents["store"].contents, "hash1")
err = addFileToGraph(g, "/nix/store/hash2", nil, nil)
assert.Equal(t, nil, err)
assert.Contains(t, g.contents, "")
assert.Contains(t, g.contents[""].contents, "nix")
assert.Contains(t, g.contents[""].contents["nix"].contents["store"].contents, "hash1")
assert.Contains(t, g.contents[""].contents["nix"].contents["store"].contents, "hash2")
}

func TestAddFileToGraphOverride(t *testing.T) {
g := initGraph()
err := addFileToGraph(g, "/nix/store/file1", nil, &types.PathOptions{
Perms: []types.Perm{
{
Regex: "*",
Uid: 1,
},
},
})
assert.Equal(t, nil, err)
err = addFileToGraph(g, "/nix/store/file1", nil, &types.PathOptions{
Perms: []types.Perm{
{
Regex: "*",
Uid: 2,
},
},
})
assert.Error(t, err)
}

func TestWalkGraph(t *testing.T) {
g := initGraph()
paths := make([]string, 5)
var idx int
pidx := &idx
err := addFileToGraph(g, "/nix/store/hash2", nil, nil)
assert.Equal(t, nil, err)
err = addFileToGraph(g, "/nix/store/hash1", nil, nil)
assert.Equal(t, nil, err)

err = walkGraph(g, func(srcPath, dstPath string, info *os.FileInfo, options *types.PathOptions) error {
paths[*pidx] = dstPath
*pidx = *pidx + 1
return nil
})
assert.Equal(t, nil, err)
assert.Equal(t, "/", paths[0])
assert.Equal(t, "/nix", paths[1])
assert.Equal(t, "/nix/store", paths[2])
assert.Equal(t, "/nix/store/hash1", paths[3])
assert.Equal(t, "/nix/store/hash2", paths[4])
}

func TestWalkGraphOnDirectory(t *testing.T) {
graph := initGraph()
err := filepath.Walk("../data/graph-directory",
func(path string, info os.FileInfo, err error) error {
return addFileToGraph(graph, path, &info, nil)
},
)
assert.Equal(t, nil, err)
dstPaths := make([]string, 10)
srcPaths := make([]string, 10)
missingDirectories := make([]string, 10)
var idx int
pidx := &idx
err = walkGraph(graph, func(srcPath, dstPath string, info *os.FileInfo, options *types.PathOptions) error {
dstPaths[*pidx] = dstPath
srcPaths[*pidx] = srcPath
if info == nil {
missingDirectories[*pidx] = dstPath
}
*pidx = *pidx + 1
return nil
})
assert.Equal(t, nil, err)
assert.Equal(t, "..", dstPaths[0])
assert.Equal(t, "../data", dstPaths[1])
assert.Equal(t, "../data/graph-directory", dstPaths[2])
assert.Equal(t, "../data/graph-directory/path1", dstPaths[3])
assert.Equal(t, "../data/graph-directory/path1/path11", dstPaths[4])
assert.Equal(t, "../data/graph-directory/path1/path11/file111", dstPaths[5])
assert.Equal(t, "../data/graph-directory/path2", dstPaths[6])
assert.Equal(t, "../data/graph-directory/path2/file21", dstPaths[7])

assert.Equal(t, "", srcPaths[0])
assert.Equal(t, "", srcPaths[1])
assert.Equal(t, "../data/graph-directory", srcPaths[2])
assert.Equal(t, "../data/graph-directory/path1", srcPaths[3])
assert.Equal(t, "../data/graph-directory/path1/path11", srcPaths[4])
assert.Equal(t, "../data/graph-directory/path1/path11/file111", srcPaths[5])

assert.Equal(t, "..", missingDirectories[0])
assert.Equal(t, "../data", missingDirectories[1])
assert.Equal(t, "", missingDirectories[2])
}
28 changes: 11 additions & 17 deletions nix/layers_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package nix

import (
"reflect"
"testing"

"github.com/nlewo/nix2container/types"
"github.com/stretchr/testify/assert"
)

func TestPerms(t *testing.T) {
Expand All @@ -24,8 +24,8 @@ func TestPerms(t *testing.T) {
}
expected := []types.Layer{
{
Digest: "sha256:7031b24697abf372b252fffb1432f685b364b742212df74787e2a2a8c8d4f66f",
DiffIDs: "sha256:7031b24697abf372b252fffb1432f685b364b742212df74787e2a2a8c8d4f66f",
Digest: "sha256:6123adfc04c22915c112368b802af161b921fbf7ef1c5f7283191ee552b46e27",
DiffIDs: "sha256:6123adfc04c22915c112368b802af161b921fbf7ef1c5f7283191ee552b46e27",
Size: 3072,
Paths: types.Paths{
types.Path{
Expand All @@ -43,9 +43,7 @@ func TestPerms(t *testing.T) {
MediaType: "application/vnd.oci.image.layer.v1.tar",
},
}
if !reflect.DeepEqual(layer, expected) {
t.Fatalf("Layers should be '%#v' (while it is %#v)", expected, layer)
}
assert.Equal(t, expected, layer)
}

func TestNewLayers(t *testing.T) {
Expand All @@ -58,8 +56,8 @@ func TestNewLayers(t *testing.T) {
}
expected := []types.Layer{
{
Digest: "sha256:a97d8eab8c8b698b1c5aa10625b30b3b47baf102d1c429d567023a05ebe53480",
DiffIDs: "sha256:a97d8eab8c8b698b1c5aa10625b30b3b47baf102d1c429d567023a05ebe53480",
Digest: "sha256:f2c0df36c223df52ef1ccc9d5979b39fb03fecae111f908fc9c2bdd50d477acd",
DiffIDs: "sha256:f2c0df36c223df52ef1ccc9d5979b39fb03fecae111f908fc9c2bdd50d477acd",
Size: 3072,
Paths: types.Paths{
types.Path{
Expand All @@ -69,9 +67,7 @@ func TestNewLayers(t *testing.T) {
MediaType: "application/vnd.oci.image.layer.v1.tar",
},
}
if !reflect.DeepEqual(layer, expected) {
t.Fatalf("Layers should be '%#v' (while it is %#v)", expected, layer)
}
assert.Equal(t, expected, layer)

tmpDir := t.TempDir()
layer, err = NewLayersNonReproducible(paths, 1, tmpDir, []types.Layer{}, []types.RewritePath{}, "", []types.PermPath{})
Expand All @@ -80,19 +76,17 @@ func TestNewLayers(t *testing.T) {
}
expected = []types.Layer{
{
Digest: "sha256:a97d8eab8c8b698b1c5aa10625b30b3b47baf102d1c429d567023a05ebe53480",
DiffIDs: "sha256:a97d8eab8c8b698b1c5aa10625b30b3b47baf102d1c429d567023a05ebe53480",
Digest: "sha256:f2c0df36c223df52ef1ccc9d5979b39fb03fecae111f908fc9c2bdd50d477acd",
DiffIDs: "sha256:f2c0df36c223df52ef1ccc9d5979b39fb03fecae111f908fc9c2bdd50d477acd",
Size: 3072,
Paths: types.Paths{
types.Path{
Path: "../data/layer1/file1",
},
},
MediaType: "application/vnd.oci.image.layer.v1.tar",
LayerPath: tmpDir + "/a97d8eab8c8b698b1c5aa10625b30b3b47baf102d1c429d567023a05ebe53480.tar",
LayerPath: tmpDir + "/f2c0df36c223df52ef1ccc9d5979b39fb03fecae111f908fc9c2bdd50d477acd.tar",
},
}
if !reflect.DeepEqual(layer, expected) {
t.Fatalf("Layers should be '%#v' (while it is %#v)", expected, layer)
}
assert.Equal(t, expected, layer)
}
Loading

0 comments on commit 6d36479

Please sign in to comment.