From 9129a7bbe46d274c3b98b876bafd9b450bec2cb3 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Wed, 10 Aug 2016 03:57:20 +0800 Subject: [PATCH] libcontainer: add support for Intel RDT/CAT in runc This PR fixes issue #433 https://github.com/opencontainers/runc/issues/433 About Intel RDT/CAT feature: Intel platforms with new Xeon CPU support Resource Director Technology (RDT). Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3 Cache is the only resource that is supported in RDT. This feature provides a way for the software to restrict cache allocation to a defined 'subset' of L3 cache which may be overlapping with other 'subsets'. The different subsets are identified by class of service (CLOS) and each CLOS has a capacity bitmask (CBM). For more information about Intel RDT/CAT can be found in the section 17.17 of Intel Software Developer Manual and the kernel document: https://lkml.org/lkml/2016/7/12/747 About Intel RDT/CAT kernel interface: In Linux kernel, the interface is defined and exposed via "resource control" filesystem, which is a "cgroup-like" interface. Comparing with cgroups, it has similar process management lifecycle and interfaces in a container. But unlike cgroups' hierarchy, it has single level filesystem layout. Intel RDT "resource control" filesystem hierarchy: mount -t rscctrl rscctrl /sys/fs/rscctrl tree /sys/fs/rscctrl /sys/fs/rscctrl |-- cpus |-- info | |-- info | |-- l3 | |-- domain_to_cache_id | |-- max_cbm_len | |-- max_closid |-- schemas |-- tasks |-- |-- cpus |-- schemas |-- tasks The file `tasks` has all task ids belonging to the partition "container_id". The task ids in the file will be added or removed among partitions. A task id only stays in one directory at the same time. The file `schemas` has allocation bitmasks/values for L3 cache on each socket, which contains L3 cache id and capacity bitmask (CBM). Format: "L3:=;=;..." For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. The valid L3 cache CBM is a *contiguous bits set* and number of bits that can be set is less than the max bit. The max bits in the CBM is varied among supported Intel Xeon platforms. In Intel RDT "resource control" filesystem layout, the CBM in a "partition" should be a subset of the CBM in root. Kernel will check if it is valid when writing. e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM values to set in a "partition": 0xf, 0xf0, 0x3ff, 0x1f00 and etc. The file `cpus` has a cpu bitmask that specifies the CPUs that are bound to the schemas. Any tasks scheduled on the cpus will use the schemas. For more information about Intel RDT/CAT kernel interface: https://lkml.org/lkml/2016/7/12/764 An example for runc: There are two L3 caches in the two-socket machine, the default CBM is 0xfffff and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache id 0 and the whole L3 cache id 1 for the container: "linux": { "resources": { "intelRdt": { "l3CacheSchema": "L3:0=ffff0;1=fffff", "L3CacheCpus": "00000000,00000000,00000000,00000000,00000000,00000000" } } } Signed-off-by: Xiaochen Shen --- libcontainer/configs/cgroup_unix.go | 7 + libcontainer/container_linux.go | 51 ++++-- libcontainer/factory_linux.go | 28 +++ libcontainer/intelrdt/intelrdt.go | 268 ++++++++++++++++++++++++++++ libcontainer/intelrdt/stats.go | 27 +++ libcontainer/intelrdt/utils.go | 204 +++++++++++++++++++++ libcontainer/process_linux.go | 48 +++-- libcontainer/specconv/spec_linux.go | 8 + libcontainer/state_linux.go | 5 + libcontainer/stats_linux.go | 6 +- utils_linux.go | 5 + 11 files changed, 633 insertions(+), 24 deletions(-) create mode 100644 libcontainer/intelrdt/intelrdt.go create mode 100644 libcontainer/intelrdt/stats.go create mode 100644 libcontainer/intelrdt/utils.go diff --git a/libcontainer/configs/cgroup_unix.go b/libcontainer/configs/cgroup_unix.go index bd6f69b82f4..8735d597c59 100644 --- a/libcontainer/configs/cgroup_unix.go +++ b/libcontainer/configs/cgroup_unix.go @@ -121,4 +121,11 @@ type Resources struct { // Set class identifier for container's network packets NetClsClassid uint32 `json:"net_cls_classid"` + + // Intel RDT: the schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + IntelRdtL3CacheSchema string `json:"intel_rdt_l3_cache_schema"` + + // Intel RDT: the bitmask of the CPUs that are bound to the schema + IntelRdtL3CacheCpus string `json:"intel_rdt_l3_cache_cpus"` } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 70cbc6359f3..cd0e4dded82 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -22,6 +22,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/criurpc" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" "github.com/syndtr/gocapability/capability" @@ -35,6 +36,7 @@ type linuxContainer struct { root string config *configs.Config cgroupManager cgroups.Manager + intelRdtManager intelrdt.Manager initPath string initArgs []string initProcess parentProcess @@ -62,6 +64,9 @@ type State struct { // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore ExternalDescriptors []string `json:"external_descriptors,omitempty"` + + // Intel RDT "resource control" filesystem path + IntelRdtPath string `json:"intel_rdt_path"` } // Container is a libcontainer container object. @@ -156,6 +161,11 @@ func (c *linuxContainer) Stats() (*Stats, error) { if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newSystemErrorWithCause(err, "getting container stats from cgroups") } + if c.intelRdtManager != nil { + if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil { + return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats") + } + } for _, iface := range c.config.Networks { switch iface.Type { case "veth": @@ -180,7 +190,15 @@ func (c *linuxContainer) Set(config configs.Config) error { return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning) } c.config = &config - return c.cgroupManager.Set(c.config) + if err := c.cgroupManager.Set(c.config); err != nil { + return err + } + if c.intelRdtManager != nil { + if err := c.intelRdtManager.Set(c.config); err != nil { + return err + } + } + return nil } func (c *linuxContainer) Start(process *Process) error { @@ -346,16 +364,17 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c return nil, err } return &initProcess{ - cmd: cmd, - childPipe: childPipe, - parentPipe: parentPipe, - manager: c.cgroupManager, - config: c.newInitConfig(p), - container: c, - process: p, - bootstrapData: data, - sharePidns: sharePidns, - rootDir: rootDir, + cmd: cmd, + childPipe: childPipe, + parentPipe: parentPipe, + manager: c.cgroupManager, + intelRdtManager: c.intelRdtManager, + config: c.newInitConfig(p), + container: c, + process: p, + bootstrapData: data, + sharePidns: sharePidns, + rootDir: rootDir, }, nil } @@ -371,10 +390,15 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, if err != nil { return nil, err } + intelRdtPath := "" + if c.intelRdtManager != nil { + intelRdtPath = c.intelRdtManager.GetPath() + } // TODO: set on container for process management return &setnsProcess{ cmd: cmd, cgroupPaths: c.cgroupManager.GetPaths(), + intelRdtPath: intelRdtPath, childPipe: childPipe, parentPipe: parentPipe, config: c.newInitConfig(p), @@ -1190,6 +1214,10 @@ func (c *linuxContainer) currentState() (*State, error) { startTime, _ = c.initProcess.startTime() externalDescriptors = c.initProcess.externalDescriptors() } + IntelRdtPath := "" + if c.intelRdtManager != nil { + IntelRdtPath = c.intelRdtManager.GetPath() + } state := &State{ BaseState: BaseState{ ID: c.ID(), @@ -1201,6 +1229,7 @@ func (c *linuxContainer) currentState() (*State, error) { CgroupPaths: c.cgroupManager.GetPaths(), NamespacePaths: make(map[configs.NamespaceType]string), ExternalDescriptors: externalDescriptors, + IntelRdtPath: IntelRdtPath, } if pid > 0 { for _, ns := range c.config.Namespaces { diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 6cce46e0dd6..8be84e956eb 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -19,6 +19,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs/validate" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/utils" ) @@ -89,6 +90,21 @@ func Cgroupfs(l *LinuxFactory) error { return nil } +// IntelRdtfs is an options func to configure a LinuxFactory to return +// containers that use the Intel RDT "resource control" filesystem to +// create and manage Intel Xeon platform shared resources (e.g., L3 cache). +func IntelRdtFs(l *LinuxFactory) error { + if intelrdt.IntelRdtIsEnabled() { + l.NewIntelRdtManager = func(config *configs.Cgroup, path string) intelrdt.Manager { + return &intelrdt.IntelRdtManager{ + Cgroups: config, + Path: path, + } + } + } + return nil +} + // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. func TmpfsRoot(l *LinuxFactory) error { mounted, err := mount.Mounted(l.Root) @@ -156,6 +172,9 @@ type LinuxFactory struct { // NewCgroupsManager returns an initialized cgroups manager for a single container. NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager + + // NewIntelRdtManager returns an initialized Intel RDT manager for a single container. + NewIntelRdtManager func(config *configs.Cgroup, path string) intelrdt.Manager } func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { @@ -208,6 +227,11 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), } c.state = &stoppedState{c: c} + c.intelRdtManager = nil + if l.NewIntelRdtManager != nil { + c.intelRdtManager = l.NewIntelRdtManager(config.Cgroups, id) + } + return c, nil } @@ -241,6 +265,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) { if err := c.refreshState(); err != nil { return nil, err } + c.intelRdtManager = nil + if l.NewIntelRdtManager != nil { + c.intelRdtManager = l.NewIntelRdtManager(state.Config.Cgroups, state.IntelRdtPath) + } return c, nil } diff --git a/libcontainer/intelrdt/intelrdt.go b/libcontainer/intelrdt/intelrdt.go new file mode 100644 index 00000000000..58848e31511 --- /dev/null +++ b/libcontainer/intelrdt/intelrdt.go @@ -0,0 +1,268 @@ +// +build linux + +package intelrdt + +import ( + "fmt" + "os" + "path/filepath" + "sync" + "syscall" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager interface { + // Applies configuration to the process with the specified pid + Apply(pid int) error + + // Returns the PIDs inside Intel RDT "resource control" filesystem at path + GetPids() ([]int, error) + + // Returns statistics for Intel RDT + GetStats() (*Stats, error) + + // Destroys the Intel RDT "resource control" filesystem + Destroy() error + + // Returns Intel RDT "resource control" filesystem path to save in + // a state file and to be able to restore the object later. + GetPath() string + + // Set Intel RDT "resource control" filesystem as configured. + Set(container *configs.Config) error +} + +// This implements interface Manager +type IntelRdtManager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Path string +} + +type intelRdtData struct { + root string + config *configs.Cgroup + pid int +} + +// The absolute path to the root of the Intel RDT "resource control" filesystem. +var intelRdtRootLock sync.Mutex +var intelRdtRoot string + +// Gets the root path of Intel RDT "resource control" filesystem. +func getIntelRdtRoot() (string, error) { + intelRdtRootLock.Lock() + defer intelRdtRootLock.Unlock() + + if intelRdtRoot != "" { + return intelRdtRoot, nil + } + + root, err := findIntelRdtMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + intelRdtRoot = root + return intelRdtRoot, nil +} + +func getIntelRdtData(c *configs.Cgroup, pid int) (*intelRdtData, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + return &intelRdtData{ + root: rootPath, + config: c, + pid: pid, + }, nil +} + +// Applies configuration to the process with the specified pid +func (m *IntelRdtManager) Apply(pid int) (err error) { + d, err := getIntelRdtData(m.Cgroups, pid) + if err != nil { + if !IsNotFound(err) { + return err + } + // We will not return error here if: + // 1. The hw or kernel doesn't support Intel RDT/CAT feature. + // 2. Intel RDT "resource control" filesystem is not mounted. + // We will try to mount again: + // mount -t rscctrl rscctrl /sys/fs/rscctrl + if err := syscall.Mount("rscctrl", "sys/fs/rscctrl", "rscctrl", 0, ""); err != nil { + return err + } + } + + path, err := d.join(m.GetPath()) + if err != nil { + return err + } + + m.Path = path + return nil +} + +// Destroys the Intel RDT "resource control" filesystem +func (m *IntelRdtManager) Destroy() error { + m.mu.Lock() + defer m.mu.Unlock() + if err := os.RemoveAll(m.Path); err != nil { + return err + } + m.Path = "" + return nil +} + +// Returns Intel RDT "resource control" filesystem path to save in +// a state file and to be able to restore the object later. +func (m *IntelRdtManager) GetPath() string { + m.mu.Lock() + path := m.Path + m.mu.Unlock() + return path +} + +// Returns statistics for Intel RDT +func (m *IntelRdtManager) GetStats() (*Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := NewStats() + path := m.GetPath() + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + + // The stats in root of Intel RDT "resource control" filesystem + info, err := getIntelRdtParamString(filepath.Join(rootPath, "info"), "info") + if err != nil { + return nil, err + } + domainToCacheId, err := getIntelRdtParamString(filepath.Join(rootPath, "l3"), "domain_to_cache_id") + if err != nil { + return nil, err + } + maxCbmLen, err := getIntelRdtParamUint(filepath.Join(rootPath, "l3"), "max_cbm_len") + if err != nil { + return nil, err + } + maxClosid, err := getIntelRdtParamUint(filepath.Join(rootPath, "l3"), "max_closid") + if err != nil { + return nil, err + } + rootL3CacheSchema, err := getIntelRdtParamString(rootPath, "schemas") + if err != nil { + return nil, err + } + rootL3CacheCpus, err := getIntelRdtParamString(rootPath, "cpus") + if err != nil { + return nil, err + } + stats.IntelRdtRootStats.Info = info + stats.IntelRdtRootStats.DomainToCacheId = domainToCacheId + stats.IntelRdtRootStats.MaxCbmLen = maxCbmLen + stats.IntelRdtRootStats.MaxClosid = maxClosid + stats.IntelRdtRootStats.RootL3CacheSchema = rootL3CacheSchema + stats.IntelRdtRootStats.RootL3CacheCpus = rootL3CacheCpus + + // The stats in "container_id" partition + schema, err := getIntelRdtParamString(path, "schemas") + if err != nil { + return nil, err + } + cpus, err := getIntelRdtParamString(path, "cpus") + if err != nil { + return nil, err + } + stats.IntelRdtStats.L3CacheSchema = schema + stats.IntelRdtStats.L3CacheCpus = cpus + + return stats, nil +} + +// Set Intel RDT "resource control" filesystem as configured. +func (m *IntelRdtManager) Set(container *configs.Config) error { + path := m.GetPath() + + // About L3 cache schema file: + // The schema has allocation masks/values for L3 cache on each socket, + // which contains L3 cache id and capacity bitmask (CBM). + // Format: "L3:=;=;..." + // For example, on a two-socket machine, L3's schema line could be: + // L3:0=ff;1=c0 + // Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + // + // About L3 cache CBM validity: + // The valid L3 cache CBM is a *contiguous bits set* and number of + // bits that can be set is less than the max bit. The max bits in the + // CBM is varied among supported Intel Xeon platforms. In Intel RDT + // "resource control" filesystem layout, the CBM in a "partition" + // should be a subset of the CBM in root. Kernel will check if it is + // valid when writing. + // e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, + // which mapping to entire L3 cache capacity. Some valid CBM values + // to set in a "partition": 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + l3CacheSchema := container.Cgroups.Resources.IntelRdtL3CacheSchema + if l3CacheSchema != "" { + if err := writeFile(path, "schemas", l3CacheSchema); err != nil { + return err + } + } + + // The bitmask of the CPUs that are bound to the schema + l3CacheCpus := container.Cgroups.Resources.IntelRdtL3CacheCpus + if l3CacheCpus != "" { + if err := writeFile(path, "cpus", l3CacheCpus); err != nil { + return err + } + } + + return nil +} + +// Returns the PIDs inside Intel RDT "resource control" filesystem at path +func (m *IntelRdtManager) GetPids() ([]int, error) { + return readTasksFile(m.GetPath()) +} + +func (raw *intelRdtData) join(name string) (string, error) { + path := filepath.Join(raw.root, name) + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + + if err := WriteIntelRdtTasks(path, raw.pid); err != nil { + return "", err + } + return path, nil +} + +type NotFoundError struct { + ResourceControl string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl) +} + +func NewNotFoundError(res string) error { + return &NotFoundError{ + ResourceControl: res, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} diff --git a/libcontainer/intelrdt/stats.go b/libcontainer/intelrdt/stats.go new file mode 100644 index 00000000000..ec16dbdcd87 --- /dev/null +++ b/libcontainer/intelrdt/stats.go @@ -0,0 +1,27 @@ +// +build linux + +package intelrdt + +// The stats in root of Intel RDT "resource control" filesystem +type IntelRdtRootStats struct { + Info string `json:"info,omitempty"` + DomainToCacheId string `json:"domain_to_cache_id,omitempty"` + MaxCbmLen uint64 `json:"max_cbm_len,omitempty"` + MaxClosid uint64 `json:"max_closid,omitempty"` + RootL3CacheSchema string `json:"root_l3_cache_schema,omitempty"` + RootL3CacheCpus string `json:"root_l3_cache_cpus,omitempty"` +} + +type IntelRdtStats struct { + L3CacheSchema string `json:"l3_cache_schema,omitempty"` + L3CacheCpus string `json:"l3_cache_cpus,omitempty"` +} + +type Stats struct { + IntelRdtRootStats IntelRdtRootStats `json:"intel_rdt_root_stats,omitempty"` + IntelRdtStats IntelRdtStats `json:"intel_rdt_stats,omitempty"` +} + +func NewStats() *Stats { + return &Stats{} +} diff --git a/libcontainer/intelrdt/utils.go b/libcontainer/intelrdt/utils.go new file mode 100644 index 00000000000..c83a3602a57 --- /dev/null +++ b/libcontainer/intelrdt/utils.go @@ -0,0 +1,204 @@ +// +build linux + +package intelrdt + +import ( + "bufio" + "errors" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" +) + +const ( + IntelRdtTasks = "tasks" +) + +var ( + ErrNotValidFormat = errors.New("line is not a valid key value format") + ErrIntelRdtNotEnabled = errors.New("intelrdt: config provided but Intel RDT not supported") +) + +func parseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// Parses a param and returns as name, value +func getIntelRdtParamKeyValue(t string) (string, uint64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := parseUint(parts[1], 10, 64) + if err != nil { + return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err) + } + + return parts[0], value, nil + default: + return "", 0, ErrNotValidFormat + } +} + +// Gets a single uint64 value from the specified file. +func getIntelRdtParamUint(path, file string) (uint64, error) { + fileName := filepath.Join(path, file) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + return 0, err + } + + res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName) + } + return res, nil +} + +// Gets a string value from the specified file +func getIntelRdtParamString(path, file string) (string, error) { + contents, err := ioutil.ReadFile(filepath.Join(path, file)) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(contents)), nil +} + +func writeFile(dir, file, data string) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", file) + } + if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", data, file, err) + } + return nil +} + +func readTasksFile(dir string) ([]int, error) { + f, err := os.Open(filepath.Join(dir, IntelRdtTasks)) + if err != nil { + return nil, err + } + defer f.Close() + + var ( + s = bufio.NewScanner(f) + out = []int{} + ) + + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + return out, nil +} + +// Return the mount point path of Intel RDT "resource control" filesysem +func findIntelRdtMountpointDir() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for Intel RDT + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "rscctrl" { + // Check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + return fields[4], nil + } + } + if err := s.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError("intelrdt") +} + +func parseCpuInfoFile(path string) (bool, error) { + f, err := os.Open(path) + if err != nil { + return false, err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return false, err + } + + text := s.Text() + flags := strings.Split(text, " ") + + for _, flag := range flags { + if flag == "rdt" { + return true, nil + } + } + } + return false, nil +} + +// WriteIntelRdtTasks writes the specified pid into the tasks file +func WriteIntelRdtTasks(dir string, pid int) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", IntelRdtTasks) + } + + // Dont attach any pid if -1 is specified as a pid + if pid != -1 { + if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err) + } + } + return nil +} + +// Check if Intel RDT is supported or not +func IntelRdtIsEnabled() bool { + // Read /proc/cpuinfo to check if Intel RDT flag is set + enabled, err := parseCpuInfoFile("/proc/cpuinfo") + if err != nil { + return false + } + return enabled +} diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 33db39239d1..c420222e02a 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -15,6 +15,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" ) @@ -47,6 +48,7 @@ type setnsProcess struct { parentPipe *os.File childPipe *os.File cgroupPaths map[string]string + intelRdtPath string config *initConfig fds []string process *Process @@ -87,6 +89,15 @@ func (p *setnsProcess) start() (err error) { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } } + if p.intelRdtPath != "" { + // if Intel RDT "resource control" filesystem path exists + _, err := os.Stat(p.intelRdtPath) + if err == nil { + if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil { + return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid()) + } + } + } // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting oom score") @@ -178,17 +189,18 @@ func (p *setnsProcess) setExternalDescriptors(newFds []string) { } type initProcess struct { - cmd *exec.Cmd - parentPipe *os.File - childPipe *os.File - config *initConfig - manager cgroups.Manager - container *linuxContainer - fds []string - process *Process - bootstrapData io.Reader - sharePidns bool - rootDir *os.File + cmd *exec.Cmd + parentPipe *os.File + childPipe *os.File + config *initConfig + manager cgroups.Manager + intelRdtManager intelrdt.Manager + container *linuxContainer + fds []string + process *Process + bootstrapData io.Reader + sharePidns bool + rootDir *os.File } func (p *initProcess) pid() int { @@ -257,10 +269,18 @@ func (p *initProcess) start() error { if err := p.manager.Apply(p.pid()); err != nil { return newSystemErrorWithCause(err, "applying cgroup configuration for process") } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Apply(p.pid()); err != nil { + return newSystemErrorWithCause(err, "applying Intel RDT configuration for process") + } + } defer func() { if err != nil { // TODO: should not be the responsibility to call here p.manager.Destroy() + if p.intelRdtManager != nil { + p.intelRdtManager.Destroy() + } } }() if err := p.createNetworkInterfaces(); err != nil { @@ -290,6 +310,12 @@ loop: if err := p.manager.Set(p.config.Config); err != nil { return newSystemErrorWithCause(err, "setting cgroup config for ready process") } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting Intel RDT config for ready process") + } + } + // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting oom score for ready process") diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 856371c693f..2ee721f7a04 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -446,6 +446,14 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (* }) } } + if r.IntelRdt != nil { + if r.IntelRdt.L3CacheSchema != nil { + c.Resources.IntelRdtL3CacheSchema = *r.IntelRdt.L3CacheSchema + } + if r.IntelRdt.L3CacheCpus != nil { + c.Resources.IntelRdtL3CacheCpus = *r.IntelRdt.L3CacheCpus + } + } return c, nil } diff --git a/libcontainer/state_linux.go b/libcontainer/state_linux.go index 266282404c7..9babeef1c7c 100644 --- a/libcontainer/state_linux.go +++ b/libcontainer/state_linux.go @@ -44,6 +44,11 @@ func destroy(c *linuxContainer) error { } } err := c.cgroupManager.Destroy() + if c.intelRdtManager != nil { + if ierr := c.intelRdtManager.Destroy(); err == nil { + err = ierr + } + } if rerr := os.RemoveAll(c.root); err == nil { err = rerr } diff --git a/libcontainer/stats_linux.go b/libcontainer/stats_linux.go index c629dc67de9..29fd641e9dd 100644 --- a/libcontainer/stats_linux.go +++ b/libcontainer/stats_linux.go @@ -1,8 +1,10 @@ package libcontainer import "github.com/opencontainers/runc/libcontainer/cgroups" +import "github.com/opencontainers/runc/libcontainer/intelrdt" type Stats struct { - Interfaces []*NetworkInterface - CgroupStats *cgroups.Stats + Interfaces []*NetworkInterface + CgroupStats *cgroups.Stats + IntelRdtStats *intelrdt.Stats } diff --git a/utils_linux.go b/utils_linux.go index 6f877ab0efc..f56d6e0c16b 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -38,6 +38,11 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) { return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available") } } + intelRdtManager := libcontainer.IntelRdtFs + if intelRdtManager != nil { + return libcontainer.New(abs, cgroupManager, intelRdtManager, libcontainer.CriuPath(context.GlobalString("criu"))) + } + return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath(context.GlobalString("criu"))) }