diff --git a/libcontainer/cgroups/systemd/common.go b/libcontainer/cgroups/systemd/common.go index 91c314e09ea..7dafbd40643 100644 --- a/libcontainer/cgroups/systemd/common.go +++ b/libcontainer/cgroups/systemd/common.go @@ -158,14 +158,27 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) { return "", nil } +// DeviceAllow is the dbus type "a(ss)" which means we need a struct +// to represent it in Go. +type deviceAllowEntry struct { + Path string + Perms string +} + +func allowAllDevices() []systemdDbus.Property { + // Setting mode to auto and removing all DeviceAllow rules + // results in allowing access to all devices. + return []systemdDbus.Property{ + newProp("DevicePolicy", "auto"), + newProp("DeviceAllow", []deviceAllowEntry{}), + } +} + // generateDeviceProperties takes the configured device rules and generates a // corresponding set of systemd properties to configure the devices correctly. -func generateDeviceProperties(rules []*devices.Rule) ([]systemdDbus.Property, error) { - // DeviceAllow is the type "a(ss)" which means we need a temporary struct - // to represent it in Go. - type deviceAllowEntry struct { - Path string - Perms string +func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, error) { + if r.SkipDevices { + return allowAllDevices(), nil } properties := []systemdDbus.Property{ @@ -177,7 +190,7 @@ func generateDeviceProperties(rules []*devices.Rule) ([]systemdDbus.Property, er // Figure out the set of rules. configEmu := &cgroupdevices.Emulator{} - for _, rule := range rules { + for _, rule := range r.Devices { if err := configEmu.Apply(*rule); err != nil { return nil, errors.Wrap(err, "apply rule for systemd") } @@ -189,12 +202,7 @@ func generateDeviceProperties(rules []*devices.Rule) ([]systemdDbus.Property, er if configEmu.IsBlacklist() { // However, if we're dealing with an allow-all rule then we can do it. if configEmu.IsAllowAll() { - return []systemdDbus.Property{ - // Run in white-list mode by setting to "auto" and removing all - // DeviceAllow rules. - newProp("DevicePolicy", "auto"), - newProp("DeviceAllow", []deviceAllowEntry{}), - }, nil + return allowAllDevices(), nil } logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule") return properties, nil diff --git a/libcontainer/cgroups/systemd/systemd_test.go b/libcontainer/cgroups/systemd/systemd_test.go index 0c98a11721c..a32acc19073 100644 --- a/libcontainer/cgroups/systemd/systemd_test.go +++ b/libcontainer/cgroups/systemd/systemd_test.go @@ -1,7 +1,15 @@ package systemd import ( + "bytes" + "os" + "os/exec" + "strings" "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/devices" ) func TestSystemdVersion(t *testing.T) { @@ -30,3 +38,133 @@ func TestSystemdVersion(t *testing.T) { } } } + +func newManager(config *configs.Cgroup) cgroups.Manager { + if cgroups.IsCgroup2UnifiedMode() { + return NewUnifiedManager(config, "", false) + } + return NewLegacyManager(config, nil) +} + +func testSkipDevices(t *testing.T, skipDevices bool, expected []string) { + if !IsRunningSystemd() { + t.Skip("Test requires systemd.") + } + if os.Geteuid() != 0 { + t.Skip("Test requires root.") + } + + podConfig := &configs.Cgroup{ + Parent: "system.slice", + Name: "system-runc_test_pods.slice", + Resources: &configs.Resources{ + SkipDevices: skipDevices, + }, + } + // Create "pods" cgroup (a systemd slice to hold containers). + pm := newManager(podConfig) + defer pm.Destroy() //nolint:errcheck + if err := pm.Apply(-1); err != nil { + t.Fatal(err) + } + if err := pm.Set(podConfig.Resources); err != nil { + t.Fatal(err) + } + + config := &configs.Cgroup{ + Parent: "system-runc_test_pods.slice", + ScopePrefix: "test", + Name: "SkipDevices", + Resources: &configs.Resources{ + Devices: []*devices.Rule{ + // Allow access to /dev/full only. + { + Type: devices.CharDevice, + Major: 1, + Minor: 7, + Permissions: "rwm", + Allow: true, + }, + }, + }, + } + + // Create a "container" within the "pods" cgroup. + // This is not a real container, just a process in the cgroup. + cmd := exec.Command("bash", "-c", "read; echo > /dev/full; cat /dev/null; true") + cmd.Env = append(os.Environ(), "LANG=C") + stdinR, stdinW, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + cmd.Stdin = stdinR + var stderr bytes.Buffer + cmd.Stderr = &stderr + err = cmd.Start() + stdinR.Close() + defer stdinW.Close() + if err != nil { + t.Fatal(err) + } + // Make sure to not leave a zombie. + defer func() { + // These may fail, we don't care. + _, _ = stdinW.WriteString("hey\n") + _ = cmd.Wait() + }() + + // Put the process into a cgroup. + m := newManager(config) + defer m.Destroy() //nolint:errcheck + + if err := m.Apply(cmd.Process.Pid); err != nil { + t.Fatal(err) + } + // Check that we put the "container" into the "pod" cgroup. + if !strings.HasPrefix(m.Path("devices"), pm.Path("devices")) { + t.Fatalf("expected container cgroup path %q to be under pod cgroup path %q", + m.Path("devices"), pm.Path("devices")) + } + if err := m.Set(config.Resources); err != nil { + // failed to write "c 1:7 rwm": write /sys/fs/cgroup/devices/system.slice/system-runc_test_pods.slice/test-SkipDevices.scope/devices.allow: operation not permitted + if skipDevices == false && strings.HasSuffix(err.Error(), "/devices.allow: operation not permitted") { + // Cgroup v1 devices controller gives EPERM on trying + // to enable devices that are not enabled + // (skipDevices=false) in a parent cgroup. + // If this happens, test is passing. + return + } + t.Fatal(err) + } + + // Check that we can access /dev/full but not /dev/zero. + if _, err := stdinW.WriteString("wow\n"); err != nil { + t.Fatal(err) + } + if err := cmd.Wait(); err != nil { + t.Fatal(err) + } + for _, exp := range expected { + if !strings.Contains(stderr.String(), exp) { + t.Errorf("expected %q, got: %s", exp, stderr.String()) + } + } +} + +func TestSkipDevicesTrue(t *testing.T) { + testSkipDevices(t, true, []string{ + "echo: write error: No space left on device", + "cat: /dev/null: Operation not permitted", + }) +} + +func TestSkipDevicesFalse(t *testing.T) { + // If SkipDevices is not set for the parent slice, access to both + // devices should fail. This is done to assess the test correctness. + // For cgroup v1, we check for m.Set returning EPERM. + // For cgroup v2, we check for the errors below. + testSkipDevices(t, false, []string{ + "/dev/full: Operation not permitted", + "cat: /dev/null: Operation not permitted", + }) +} diff --git a/libcontainer/cgroups/systemd/v1.go b/libcontainer/cgroups/systemd/v1.go index 41de6e8b70f..e735156c7b8 100644 --- a/libcontainer/cgroups/systemd/v1.go +++ b/libcontainer/cgroups/systemd/v1.go @@ -61,7 +61,7 @@ var legacySubsystems = []subsystem{ func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { var properties []systemdDbus.Property - deviceProperties, err := generateDeviceProperties(r.Devices) + deviceProperties, err := generateDeviceProperties(r) if err != nil { return nil, err } diff --git a/libcontainer/cgroups/systemd/v2.go b/libcontainer/cgroups/systemd/v2.go index 8abb0feb748..7be3da1d7ed 100644 --- a/libcontainer/cgroups/systemd/v2.go +++ b/libcontainer/cgroups/systemd/v2.go @@ -172,7 +172,7 @@ func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]syst // aren't the end of the world, but it is a bit concerning. However // it's unclear if systemd removes all eBPF programs attached when // doing SetUnitProperties... - deviceProperties, err := generateDeviceProperties(r.Devices) + deviceProperties, err := generateDeviceProperties(r) if err != nil { return nil, err }