From 31a494ab52854ffd3c2d93173770e7016da6fc42 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Thu, 18 Aug 2022 11:10:59 -0700 Subject: [PATCH] Add network namespace pool support This adds netNSPoolSize pool options which allow setting a target network namespace pool size. buildkitd will create this number of network namespaces at startup (without blocking). When a container execution finishes, the network namespace gets returned to the pool. If the pool goes above the target size, there is a grace period to allow network namespaces to be reused, and if this passes without reuse, the extra namespaces will be released. Signed-off-by: Aaron Lehmann --- cmd/buildkitd/config/config.go | 1 + cmd/buildkitd/main_containerd_worker.go | 9 ++ cmd/buildkitd/main_oci_worker.go | 9 ++ docs/buildkitd.toml.md | 7 + executor/containerdexecutor/executor.go | 2 +- executor/runcexecutor/executor.go | 2 +- util/network/cniprovider/cni.go | 134 ++++++++++++++++++- util/network/cniprovider/createns_linux.go | 23 ++++ util/network/cniprovider/createns_unix.go | 3 + util/network/cniprovider/createns_windows.go | 4 + util/network/host.go | 4 +- util/network/network.go | 3 +- util/network/none.go | 4 +- 13 files changed, 195 insertions(+), 10 deletions(-) diff --git a/cmd/buildkitd/config/config.go b/cmd/buildkitd/config/config.go index 7ee7b577d5829..e497471ff7886 100644 --- a/cmd/buildkitd/config/config.go +++ b/cmd/buildkitd/config/config.go @@ -53,6 +53,7 @@ type NetworkConfig struct { Mode string `toml:"networkMode"` CNIConfigPath string `toml:"cniConfigPath"` CNIBinaryPath string `toml:"cniBinaryPath"` + NetNSPoolSize int `toml:"netNSPoolSize"` } type OCIConfig struct { diff --git a/cmd/buildkitd/main_containerd_worker.go b/cmd/buildkitd/main_containerd_worker.go index a3bb8ac30ab4f..8f90ee423b681 100644 --- a/cmd/buildkitd/main_containerd_worker.go +++ b/cmd/buildkitd/main_containerd_worker.go @@ -90,6 +90,11 @@ func init() { Usage: "path of cni binary files", Value: defaultConf.Workers.Containerd.NetworkConfig.CNIBinaryPath, }, + cli.IntFlag{ + Name: "containerd-netns-pool-size", + Usage: "size of network namespace pool", + Value: defaultConf.Workers.Containerd.NetworkConfig.NetNSPoolSize, + }, cli.StringFlag{ Name: "containerd-worker-snapshotter", Usage: "snapshotter name to use", @@ -208,6 +213,9 @@ func applyContainerdFlags(c *cli.Context, cfg *config.Config) error { if c.GlobalIsSet("containerd-cni-config-path") { cfg.Workers.Containerd.NetworkConfig.CNIConfigPath = c.GlobalString("containerd-cni-config-path") } + if c.GlobalIsSet("containerd-netns-pool-size") { + cfg.Workers.Containerd.NetworkConfig.NetNSPoolSize = c.GlobalInt("containerd-netns-pool-size") + } if c.GlobalIsSet("containerd-cni-binary-dir") { cfg.Workers.Containerd.NetworkConfig.CNIBinaryPath = c.GlobalString("containerd-cni-binary-dir") } @@ -247,6 +255,7 @@ func containerdWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([ Root: common.config.Root, ConfigPath: common.config.Workers.Containerd.CNIConfigPath, BinaryDir: common.config.Workers.Containerd.CNIBinaryPath, + PoolSize: common.config.Workers.Containerd.NetNSPoolSize, }, } diff --git a/cmd/buildkitd/main_oci_worker.go b/cmd/buildkitd/main_oci_worker.go index 49153af5fb485..4389625da65af 100644 --- a/cmd/buildkitd/main_oci_worker.go +++ b/cmd/buildkitd/main_oci_worker.go @@ -106,6 +106,11 @@ func init() { Usage: "name of specified oci worker binary", Value: defaultConf.Workers.OCI.Binary, }, + cli.IntFlag{ + Name: "oci-netns-pool-size", + Usage: "size of network namespace pool", + Value: defaultConf.Workers.OCI.NetworkConfig.NetNSPoolSize, + }, cli.StringFlag{ Name: "oci-worker-apparmor-profile", Usage: "set the name of the apparmor profile applied to containers", @@ -223,6 +228,9 @@ func applyOCIFlags(c *cli.Context, cfg *config.Config) error { if c.GlobalIsSet("oci-cni-binary-dir") { cfg.Workers.OCI.NetworkConfig.CNIBinaryPath = c.GlobalString("oci-cni-binary-dir") } + if c.GlobalIsSet("oci-netns-pool-size") { + cfg.Workers.OCI.NetworkConfig.NetNSPoolSize = c.GlobalInt("oci-netns-pool-size") + } if c.GlobalIsSet("oci-worker-binary") { cfg.Workers.OCI.Binary = c.GlobalString("oci-worker-binary") } @@ -282,6 +290,7 @@ func ociWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([]worker Root: common.config.Root, ConfigPath: common.config.Workers.OCI.CNIConfigPath, BinaryDir: common.config.Workers.OCI.CNIBinaryPath, + PoolSize: common.config.Workers.OCI.NetNSPoolSize, }, } diff --git a/docs/buildkitd.toml.md b/docs/buildkitd.toml.md index d2c60339f199a..73f2d85684aa8 100644 --- a/docs/buildkitd.toml.md +++ b/docs/buildkitd.toml.md @@ -57,6 +57,9 @@ insecure-entitlements = [ "network.host", "security.insecure" ] apparmor-profile = "" # limit the number of parallel build steps that can run at the same time max-parallelism = 4 + # maintain a pool of reusable network namespaces to amortize the overhead + # of allocating and releasing the namespaces + netNSPoolSize = 16 [worker.oci.labels] "foo" = "bar" @@ -77,6 +80,10 @@ insecure-entitlements = [ "network.host", "security.insecure" ] gc = true # gckeepstorage sets storage limit for default gc profile, in MB. gckeepstorage = 9000 + # maintain a pool of reusable network namespaces to amortize the overhead + # of allocating and releasing the namespaces + netNSPoolSize = 16 + [worker.containerd.labels] "foo" = "bar" diff --git a/executor/containerdexecutor/executor.go b/executor/containerdexecutor/executor.go index 4801a84882b6c..3eb511709dd03 100644 --- a/executor/containerdexecutor/executor.go +++ b/executor/containerdexecutor/executor.go @@ -146,7 +146,7 @@ func (w *containerdExecutor) Run(ctx context.Context, id string, root executor.M if !ok { return errors.Errorf("unknown network mode %s", meta.NetMode) } - namespace, err := provider.New(meta.Hostname) + namespace, err := provider.New(ctx, meta.Hostname) if err != nil { return err } diff --git a/executor/runcexecutor/executor.go b/executor/runcexecutor/executor.go index 1755b5ed4c58f..118bdc899c834 100644 --- a/executor/runcexecutor/executor.go +++ b/executor/runcexecutor/executor.go @@ -161,7 +161,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount, if !ok { return errors.Errorf("unknown network mode %s", meta.NetMode) } - namespace, err := provider.New(meta.Hostname) + namespace, err := provider.New(ctx, meta.Hostname) if err != nil { return err } diff --git a/util/network/cniprovider/cni.go b/util/network/cniprovider/cni.go index 215562c5fc967..f9a2b967b0b86 100644 --- a/util/network/cniprovider/cni.go +++ b/util/network/cniprovider/cni.go @@ -5,19 +5,26 @@ import ( "os" "runtime" "strings" + "sync" + "time" cni "github.com/containerd/go-cni" "github.com/gofrs/flock" "github.com/moby/buildkit/identity" + "github.com/moby/buildkit/util/bklog" "github.com/moby/buildkit/util/network" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" + "go.opentelemetry.io/otel/trace" ) +const aboveTargetGracePeriod = 5 * time.Minute + type Opt struct { Root string ConfigPath string BinaryDir string + PoolSize int } func New(opt Opt) (network.Provider, error) { @@ -48,15 +55,20 @@ func New(opt Opt) (network.Provider, error) { } cp := &cniProvider{CNI: cniHandle, root: opt.Root} + cleanOldNamespaces(cp) + + cp.nsPool = &cniPool{targetSize: opt.PoolSize, provider: cp} if err := cp.initNetwork(); err != nil { return nil, err } + go cp.nsPool.fillPool(context.TODO()) return cp, nil } type cniProvider struct { cni.CNI - root string + root string + nsPool *cniPool } func (c *cniProvider) initNetwork() error { @@ -67,19 +79,114 @@ func (c *cniProvider) initNetwork() error { } defer l.Unlock() } - ns, err := c.New("") + ns, err := c.New(context.TODO(), "") if err != nil { return err } return ns.Close() } -func (c *cniProvider) New(hostname string) (network.Namespace, error) { +type cniPool struct { + provider *cniProvider + mu sync.Mutex + targetSize int + actualSize int + available []*cniNS +} + +func (pool *cniPool) fillPool(ctx context.Context) { + for { + pool.mu.Lock() + actualSize := pool.actualSize + pool.mu.Unlock() + if actualSize >= pool.targetSize { + return + } + if _, err := pool.getNew(ctx); err != nil { + bklog.G(ctx).Errorf("failed to create new network namespace while prefilling pool: %s", err) + return + } + } +} + +func (pool *cniPool) get(ctx context.Context) (*cniNS, error) { + pool.mu.Lock() + if len(pool.available) > 0 { + ns := pool.available[0] + pool.available = pool.available[1:] + pool.mu.Unlock() + trace.SpanFromContext(ctx).AddEvent("returning network namespace from pool") + bklog.G(ctx).Debugf("returning network namespace %s from pool", ns.id) + return ns, nil + } + pool.mu.Unlock() + + return pool.getNew(ctx) +} + +func (pool *cniPool) getNew(ctx context.Context) (*cniNS, error) { + ns, err := pool.provider.newNS(ctx, "") + if err != nil { + return nil, err + } + ns.pool = pool + + pool.mu.Lock() + pool.actualSize++ + pool.mu.Unlock() + return ns, nil +} + +func (pool *cniPool) put(ns *cniNS) { + putTime := time.Now() + ns.lastUsed = putTime + + pool.mu.Lock() + pool.available = append(pool.available, ns) + actualSize := pool.actualSize + pool.mu.Unlock() + + if actualSize > pool.targetSize { + // We have more network namespaces than our target number, so + // release this extra one after some time if it hasn't been + // reused by then. + time.AfterFunc(aboveTargetGracePeriod, func() { + pool.mu.Lock() + if pool.actualSize > pool.targetSize && ns.lastUsed.Equal(putTime) { + for i, poolNS := range pool.available { + if poolNS == ns { + pool.available = append(pool.available[:i], pool.available[i+1:]...) + pool.actualSize-- + defer ns.release() + break + } + } + } + pool.mu.Unlock() + }) + } +} + +func (c *cniProvider) New(ctx context.Context, hostname string) (network.Namespace, error) { + // We can't use the pool for namespaces that need a custom hostname. + // We also avoid using it on windows because we don't have a cleanup + // mechanism for Windows yet. + if hostname == "" || runtime.GOOS == "windows" { + return c.nsPool.get(ctx) + } + return c.newNS(ctx, hostname) +} + +func (c *cniProvider) newNS(ctx context.Context, hostname string) (*cniNS, error) { id := identity.NewID() + trace.SpanFromContext(ctx).AddEvent("creating new network namespace") + bklog.G(ctx).Debugf("creating new network namespace %s", id) nativeID, err := createNetNS(c, id) if err != nil { return nil, err } + trace.SpanFromContext(ctx).AddEvent("finished creating network namespace") + bklog.G(ctx).Debugf("finished creating network namespace %s", id) nsOpts := []cni.NamespaceOpts{} @@ -97,15 +204,24 @@ func (c *cniProvider) New(hostname string) (network.Namespace, error) { deleteNetNS(nativeID) return nil, errors.Wrap(err, "CNI setup error") } - - return &cniNS{nativeID: nativeID, id: id, handle: c.CNI, opts: nsOpts}, nil + trace.SpanFromContext(ctx).AddEvent("finished setting up network namespace") + bklog.G(ctx).Debugf("finished setting up network namespace %s", id) + + return &cniNS{ + nativeID: nativeID, + id: id, + handle: c.CNI, + opts: nsOpts, + }, nil } type cniNS struct { + pool *cniPool handle cni.CNI id string nativeID string opts []cni.NamespaceOpts + lastUsed time.Time } func (ns *cniNS) Set(s *specs.Spec) error { @@ -113,6 +229,14 @@ func (ns *cniNS) Set(s *specs.Spec) error { } func (ns *cniNS) Close() error { + if ns.pool == nil { + return ns.release() + } + ns.pool.put(ns) + return nil +} + +func (ns *cniNS) release() error { err := ns.handle.Remove(context.TODO(), ns.id, ns.nativeID, ns.opts...) if err1 := unmountNetNS(ns.nativeID); err1 != nil && err == nil { err = err1 diff --git a/util/network/cniprovider/createns_linux.go b/util/network/cniprovider/createns_linux.go index f1138a9fd5b3d..a05bc0a441322 100644 --- a/util/network/cniprovider/createns_linux.go +++ b/util/network/cniprovider/createns_linux.go @@ -10,11 +10,34 @@ import ( "unsafe" "github.com/containerd/containerd/oci" + "github.com/moby/buildkit/util/bklog" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "golang.org/x/sys/unix" ) +func cleanOldNamespaces(c *cniProvider) { + nsDir := filepath.Join(c.root, "net/cni") + dirEntries, err := os.ReadDir(nsDir) + if err != nil { + bklog.L.Debugf("could not read %q for cleanup: %s", nsDir, err) + return + } + go func() { + for _, d := range dirEntries { + id := d.Name() + ns := cniNS{ + id: id, + nativeID: filepath.Join(c.root, "net/cni", id), + handle: c.CNI, + } + if err := ns.release(); err != nil { + bklog.L.Warningf("failed to release network namespace %q left over from previous run: %s", id, err) + } + } + }() +} + func createNetNS(c *cniProvider, id string) (string, error) { nsPath := filepath.Join(c.root, "net/cni", id) if err := os.MkdirAll(filepath.Dir(nsPath), 0700); err != nil { diff --git a/util/network/cniprovider/createns_unix.go b/util/network/cniprovider/createns_unix.go index 6aa4e00c56e27..656aaa49be467 100644 --- a/util/network/cniprovider/createns_unix.go +++ b/util/network/cniprovider/createns_unix.go @@ -23,3 +23,6 @@ func unmountNetNS(nativeID string) error { func deleteNetNS(nativeID string) error { return errors.New("deleting netns for cni not supported") } + +func cleanOldNamespaces(_ *cniProvider) { +} diff --git a/util/network/cniprovider/createns_windows.go b/util/network/cniprovider/createns_windows.go index 7a0cc2d272abc..f294d64d49ac9 100644 --- a/util/network/cniprovider/createns_windows.go +++ b/util/network/cniprovider/createns_windows.go @@ -47,3 +47,7 @@ func deleteNetNS(nativeID string) error { return ns.Delete() } + +func cleanOldNamespaces(_ *cniProvider) { + // not implemented on Windows +} diff --git a/util/network/host.go b/util/network/host.go index b390a54cda093..8bc0617d88359 100644 --- a/util/network/host.go +++ b/util/network/host.go @@ -4,6 +4,8 @@ package network import ( + "context" + "github.com/containerd/containerd/oci" specs "github.com/opencontainers/runtime-spec/specs-go" ) @@ -15,7 +17,7 @@ func NewHostProvider() Provider { type host struct { } -func (h *host) New(hostname string) (Namespace, error) { +func (h *host) New(_ context.Context, hostname string) (Namespace, error) { return &hostNS{}, nil } diff --git a/util/network/network.go b/util/network/network.go index e0f5658ebe684..0b43d89e4cde8 100644 --- a/util/network/network.go +++ b/util/network/network.go @@ -1,6 +1,7 @@ package network import ( + "context" "io" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -8,7 +9,7 @@ import ( // Provider interface for Network type Provider interface { - New(hostname string) (Namespace, error) + New(ctx context.Context, hostname string) (Namespace, error) } // Namespace of network for workers diff --git a/util/network/none.go b/util/network/none.go index 81af7b17adf77..d5cc7e419cd55 100644 --- a/util/network/none.go +++ b/util/network/none.go @@ -1,6 +1,8 @@ package network import ( + "context" + specs "github.com/opencontainers/runtime-spec/specs-go" ) @@ -11,7 +13,7 @@ func NewNoneProvider() Provider { type none struct { } -func (h *none) New(hostname string) (Namespace, error) { +func (h *none) New(_ context.Context, hostname string) (Namespace, error) { return &noneNS{}, nil }