Skip to content

Commit

Permalink
cni: use check command when restoring from restart
Browse files Browse the repository at this point in the history
When the Nomad client restarts and restores allocations, the network namespace
for an allocation may exist but no longer be correctly configured. For example,
if the host is rebooted and the task was a Docker task using a pause container,
the network namespace may be recreated by the docker daemon.

When we restore an allocation, use the CNI "check" command to verify that any
existing network namespace matches the expected configuration. This requires CNI
plugins of at least version 1.2.0 to avoid a bug in older plugin versions that
would cause the check to fail.

If the check fails, destroy the network namespace and try to recreate it from
scratch once. If that fails in the second pass, fail the restore so that the
allocation can be recreated (rather than silently having networking fail).

This should fix the gap left #24650 for Docker task drivers and any other
drivers with the `MustInitiateNetwork` capability.

Fixes: #24292
Ref: #24650
  • Loading branch information
tgross committed Jan 7, 2025
1 parent e3ac00f commit fd20ccc
Show file tree
Hide file tree
Showing 10 changed files with 247 additions and 15 deletions.
3 changes: 3 additions & 0 deletions .changelog/24658.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
networking: check network namespaces on Linux during client restarts and fail the allocation if an existing namespace is invalid
```
25 changes: 23 additions & 2 deletions client/allocrunner/network_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package allocrunner

import (
"context"
"errors"
"fmt"

hclog "github.com/hashicorp/go-hclog"
Expand All @@ -28,6 +29,8 @@ const (
dockerNetSpecHostnameKey = "docker_sandbox_hostname"
)

var ErrCNICheckFailed = errors.New("network namespace already exists but was misconfigured")

type networkIsolationSetter interface {
SetNetworkIsolation(*drivers.NetworkIsolationSpec)
}
Expand Down Expand Up @@ -132,6 +135,9 @@ func (h *networkHook) Prerun() error {
Hostname: interpolatedNetworks[0].Hostname,
}

var checkedOnce bool

CREATE:
spec, created, err := h.manager.CreateNetwork(h.alloc.ID, &networkCreateReq)
if err != nil {
return fmt.Errorf("failed to create network for alloc: %v", err)
Expand All @@ -142,11 +148,26 @@ func (h *networkHook) Prerun() error {
h.isolationSetter.SetNetworkIsolation(spec)
}

if created {
status, err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec)
if spec != nil {
status, err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec, created)
if err != nil {
// if the netns already existed but is invalid, we get
// ErrCNICheckFailed. We'll try to recover from this one time by
// recreating the netns from scratch before giving up
if errors.Is(err, ErrCNICheckFailed) && !checkedOnce {
checkedOnce = true
destroyErr := h.manager.DestroyNetwork(h.alloc.ID, spec)
if destroyErr != nil {
return fmt.Errorf("%w: destroying network to retry failed: %v", err, destroyErr)
}
goto CREATE
}

return fmt.Errorf("failed to configure networking for alloc: %v", err)
}
if status == nil {
return nil // netns already existed and was correctly configured
}

// If the driver set the sandbox hostname label, then we will use that
// to set the HostsConfig.Hostname. Otherwise, identify the sandbox
Expand Down
146 changes: 145 additions & 1 deletion client/allocrunner/network_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package allocrunner

import (
"fmt"
"testing"

"github.com/hashicorp/nomad/ci"
Expand All @@ -14,6 +15,7 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/plugins/drivers/testutils"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
)
Expand Down Expand Up @@ -65,7 +67,7 @@ func TestNetworkHook_Prerun_Postrun_group(t *testing.T) {
MockNetworkManager: testutils.MockNetworkManager{
CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) {
test.Eq(t, alloc.ID, allocID)
return spec, false, nil
return spec, true, nil
},

DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error {
Expand Down Expand Up @@ -137,3 +139,145 @@ func TestNetworkHook_Prerun_Postrun_host(t *testing.T) {
must.NoError(t, hook.Postrun())
must.True(t, destroyCalled)
}

// TestNetworkHook_Prerun_Postrun_ExistingNetNS tests that the prerun and
// postrun hooks call the Setup and Destroy with the expected behaviors when the
// network namespace already exists (typical of agent restarts and host reboots)
func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
ci.Parallel(t)

alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{
{Mode: "bridge"},
}

spec := &drivers.NetworkIsolationSpec{
Mode: drivers.NetIsolationModeGroup,
Path: "test",
Labels: map[string]string{"abc": "123"},
}
isolationSetter := &mockNetworkIsolationSetter{t: t, expectedSpec: spec}
statusSetter := &mockNetworkStatusSetter{t: t, expectedStatus: nil}

callCounts := testutil.NewCallCounter()

nm := &testutils.MockDriver{
MockNetworkManager: testutils.MockNetworkManager{
CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) {
test.Eq(t, alloc.ID, allocID)
callCounts.Inc("CreateNetwork")
return spec, false, nil
},

DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error {
test.Eq(t, alloc.ID, allocID)
test.Eq(t, spec, netSpec)
callCounts.Inc("DestroyNetwork")
return nil
},
},
}

fakePlugin := newMockCNIPlugin()

configurator := &cniNetworkConfigurator{
nodeAttrs: map[string]string{
"plugins.cni.version.bridge": "1.6.1",
},
nodeMeta: map[string]string{},
logger: testlog.HCLogger(t),
cni: fakePlugin,
nsOpts: &nsOpts{},
}

envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region)

testCases := []struct {
name string
cniVersion string
checkErrs []error
setupErrs []string
expectPrerunCreateNetworkCalls int
expectPrerunDestroyNetworkCalls int
expectCheckCalls int
expectSetupCalls int
expectPostrunDestroyNetworkCalls int
expectPrerunError string
}{
{
name: "good check",
cniVersion: "1.6.1",
expectPrerunCreateNetworkCalls: 1,
expectPrerunDestroyNetworkCalls: 0,
expectCheckCalls: 1,
expectSetupCalls: 0,
expectPostrunDestroyNetworkCalls: 1,
},
{
name: "initial check fails",
cniVersion: "1.6.1",
checkErrs: []error{fmt.Errorf("whatever")},
expectPrerunCreateNetworkCalls: 2,
expectPrerunDestroyNetworkCalls: 1,
expectCheckCalls: 2,
expectSetupCalls: 0,
expectPostrunDestroyNetworkCalls: 2,
},
{
name: "check fails twice",
cniVersion: "1.6.1",
checkErrs: []error{
fmt.Errorf("whatever"),
fmt.Errorf("whatever"),
},
expectPrerunCreateNetworkCalls: 2,
expectPrerunDestroyNetworkCalls: 1,
expectCheckCalls: 2,
expectSetupCalls: 0,
expectPostrunDestroyNetworkCalls: 2,
expectPrerunError: "failed to configure networking for alloc: network namespace already exists but was misconfigured: whatever",
},
{
name: "old CNI version skips check",
cniVersion: "1.2.0",
expectPrerunCreateNetworkCalls: 1,
expectPrerunDestroyNetworkCalls: 0,
expectCheckCalls: 0,
expectSetupCalls: 0,
expectPostrunDestroyNetworkCalls: 1,
},
}

for _, tc := range testCases {

t.Run(tc.name, func(t *testing.T) {
callCounts.Reset()
fakePlugin.counter.Reset()
fakePlugin.checkErrors = tc.checkErrs
configurator.nodeAttrs["plugins.cni.version.bridge"] = tc.cniVersion
hook := newNetworkHook(testlog.HCLogger(t), isolationSetter,
alloc, nm, configurator, statusSetter, envBuilder.Build())

err := hook.Prerun()
if tc.expectPrerunError == "" {
must.NoError(t, err)
} else {
must.EqError(t, err, tc.expectPrerunError)
}

test.Eq(t, tc.expectPrerunDestroyNetworkCalls,
callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after prerun"))
test.Eq(t, tc.expectPrerunCreateNetworkCalls,
callCounts.Get()["CreateNetwork"], test.Sprint("CreateNetwork calls after prerun"))

test.Eq(t, tc.expectCheckCalls, fakePlugin.counter.Get()["Check"], test.Sprint("Check calls"))
test.Eq(t, tc.expectSetupCalls, fakePlugin.counter.Get()["Setup"], test.Sprint("Setup calls"))

must.NoError(t, hook.Postrun())
test.Eq(t, tc.expectPostrunDestroyNetworkCalls,
callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after postrun"))

})

}
}
8 changes: 4 additions & 4 deletions client/allocrunner/networking.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
// NetworkConfigurator sets up and tears down the interfaces, routes, firewall
// rules, etc for the configured networking mode of the allocation.
type NetworkConfigurator interface {
Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error)
Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error)
Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error
}

Expand All @@ -23,7 +23,7 @@ type NetworkConfigurator interface {
// require further configuration
type hostNetworkConfigurator struct{}

func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error) {
return nil, nil
}
func (h *hostNetworkConfigurator) Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error {
Expand All @@ -41,10 +41,10 @@ type synchronizedNetworkConfigurator struct {
nc NetworkConfigurator
}

func (s *synchronizedNetworkConfigurator) Setup(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
func (s *synchronizedNetworkConfigurator) Setup(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) {
networkingGlobalMutex.Lock()
defer networkingGlobalMutex.Unlock()
return s.nc.Setup(ctx, allocation, spec)
return s.nc.Setup(ctx, allocation, spec, created)
}

func (s *synchronizedNetworkConfigurator) Teardown(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec) error {
Expand Down
4 changes: 2 additions & 2 deletions client/allocrunner/networking_bridge_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,12 @@ func (b *bridgeNetworkConfigurator) ensureForwardingRules() error {
}

// Setup calls the CNI plugins with the add action
func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) {
if err := b.ensureForwardingRules(); err != nil {
return nil, fmt.Errorf("failed to initialize table forwarding rules: %v", err)
}

return b.cni.Setup(ctx, alloc, spec)
return b.cni.Setup(ctx, alloc, spec, created)
}

// Teardown calls the CNI plugins with the delete action
Expand Down
39 changes: 38 additions & 1 deletion client/allocrunner/networking_cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
consulIPTables "github.com/hashicorp/consul/sdk/iptables"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-set/v3"
"github.com/hashicorp/go-version"
"github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/envoy"
Expand Down Expand Up @@ -138,8 +139,19 @@ func addNomadWorkloadCNIArgs(logger log.Logger, alloc *structs.Allocation, cniAr
}
}

var supportsCNICheck = mustCNICheckConstraint()

func mustCNICheckConstraint() version.Constraints {
v, err := version.NewConstraint(">= 1.3.0")
if err != nil {
panic(err)
}
return v
}

// Setup calls the CNI plugins with the add action
func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) {

if err := c.ensureCNIInitialized(); err != nil {
return nil, fmt.Errorf("cni not initialized: %w", err)
}
Expand Down Expand Up @@ -171,6 +183,31 @@ func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Alloc
cniArgs[ConsulIPTablesConfigEnvVar] = string(iptablesCfg)
}

if !created {
// The netns will not be created if it already exists, typically on
// agent restart. If the configuration of a prexisting netns is wrong
// (ex. after a host reboot for docker created netns), networking will
// be broken. CNI's ADD command is not idempotent so we can't simply try
// again. Run CHECK to verify the network is still valid. Older plugins
// have a broken CHECK, so we have to allow the buggy behavior in the
// case of a host reboot with docker-created netns there.
cniVersion, err := version.NewSemver(c.nodeAttrs["plugins.cni.version.bridge"])
if err == nil && supportsCNICheck.Check(cniVersion) {
err := c.cni.Check(ctx, alloc.ID, spec.Path,
c.nsOpts.withCapabilityPortMap(portMaps.ports),
c.nsOpts.withArgs(cniArgs),
)
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrCNICheckFailed, err)
}
} else {
c.logger.Debug("network namespace exists but could not check if networking is valid because bridge plugin version was <1.3.0: continuing anyways")
return nil, nil
}
c.logger.Trace("network namespace exists and passed check: skipping setup")
return nil, nil
}

// Depending on the version of bridge cni plugin used, a known race could occure
// where two alloc attempt to create the nomad bridge at the same time, resulting
// in one of them to fail. This rety attempts to overcome those erroneous failures.
Expand Down
12 changes: 11 additions & 1 deletion client/allocrunner/networking_cni_mock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@ var _ cni.CNI = &mockCNIPlugin{}
type mockCNIPlugin struct {
counter *testutil.CallCounter
setupErrors []string
checkErrors []error
}

func newMockCNIPlugin() *mockCNIPlugin {

callCounts := testutil.NewCallCounter()
callCounts.Reset()
return &mockCNIPlugin{
counter: callCounts,
counter: callCounts,
setupErrors: []string{},
checkErrors: []error{},
}
}

Expand Down Expand Up @@ -65,6 +68,13 @@ func (f *mockCNIPlugin) Remove(ctx context.Context, id, path string, opts ...cni
}

func (f *mockCNIPlugin) Check(ctx context.Context, id, path string, opts ...cni.NamespaceOpts) error {
if f.counter != nil {
f.counter.Inc("Check")
}
numOfCalls := f.counter.Get()["Check"]
if numOfCalls <= len(f.checkErrors) {
return f.checkErrors[numOfCalls-1]
}
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion client/allocrunner/networking_cni_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ func TestSetup(t *testing.T) {
}

// method under test
result, err := c.Setup(context.Background(), alloc, spec)
result, err := c.Setup(context.Background(), alloc, spec, true)
if tc.expectErr == "" {
must.NoError(t, err)
must.Eq(t, tc.expectResult, result)
Expand Down
13 changes: 13 additions & 0 deletions website/content/docs/upgrade/upgrade-specific.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,19 @@ upgrade. However, specific versions of Nomad may have more details provided for
their upgrades as a result of new features or changed behavior. This page is
used to document those details separately from the standard upgrade flow.

## Nomad 1.9.5

#### CNI plugins

Nomad 1.9.5 includes a bug fix for restoring allocation networking after a
client host reboot. This fix requires recent versions of the CNI reference
plugins (minimum 1.2.0) and will fallback to the existing behavior if the CNI
reference plugins cannot support the fix.

We recommend installing the CNI reference plugins from the [CNI project release
page](https://github.com/containernetworking/plugins/releases) rather than your
Linux distribution's package manager.

## Nomad 1.9.4

#### Security updates to default deny lists
Expand Down
Loading

0 comments on commit fd20ccc

Please sign in to comment.