Skip to content

Commit

Permalink
Add support for rdma cgroup introduced in Linux Kernel 4.11
Browse files Browse the repository at this point in the history
Signed-off-by: Aditya Rajan <[email protected]>
  • Loading branch information
flouthoc committed Aug 23, 2021
1 parent 2aabb29 commit b3d1448
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 0 deletions.
1 change: 1 addition & 0 deletions libcontainer/cgroups/fs/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ var (
&NetPrioGroup{},
&PerfEventGroup{},
&FreezerGroup{},
&RdmaGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
HugePageSizes, _ = cgroups.GetHugePageSize()
Expand Down
25 changes: 25 additions & 0 deletions libcontainer/cgroups/fs/rdma.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package fs

import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)

type RdmaGroup struct{}

func (s *RdmaGroup) Name() string {
return "rdma"
}

func (s *RdmaGroup) Apply(path string, d *cgroupData) error {
return join(path, d.pid)
}

func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
return fscommon.RdmaSet(path, r)
}

func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
return fscommon.RdmaGetStats(path, stats)
}
8 changes: 8 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ func (m *manager) GetStats() (*cgroups.Stats, error) {
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// rdma (since kernel 4.11)
if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
if len(errs) > 0 && !m.rootless {
return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
}
Expand Down Expand Up @@ -183,6 +187,10 @@ func (m *manager) Set(r *configs.Resources) error {
if err := setHugeTlb(m.dirPath, r); err != nil {
return err
}
// rdma (since kernel 4.11)
if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
return err
}
// freezer (since kernel 5.2, pseudo-controller)
if err := setFreezer(m.dirPath, r.Freezer); err != nil {
return err
Expand Down
121 changes: 121 additions & 0 deletions libcontainer/cgroups/fscommon/rdma.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package fscommon

import (
"bufio"
"errors"
"math"
"os"
"strconv"
"strings"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)

// parseRdmaKV parses raw string to RdmaEntry.
func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
var value uint32

parts := strings.SplitN(raw, "=", 3)

if len(parts) != 2 {
return errors.New("Unable to parse RDMA entry")
}

k, v := parts[0], parts[1]

if v == "max" {
value = math.MaxUint32
} else {
val64, err := strconv.ParseUint(v, 10, 32)
if err != nil {
return err
}
value = uint32(val64)
}
if k == "hca_handle" {
entry.HcaHandles = value
} else if k == "hca_object" {
entry.HcaObjects = value
}

return nil
}

// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
// example entry: mlx4_0 hca_handle=2 hca_object=2000
func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
rdmaEntries := make([]cgroups.RdmaEntry, 0)
fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
if err != nil {
return nil, err
}
defer fd.Close() //nolint:errorlint
scanner := bufio.NewScanner(fd)
for scanner.Scan() {
parts := strings.SplitN(scanner.Text(), " ", 4)
if len(parts) == 3 {
entry := new(cgroups.RdmaEntry)
entry.Device = parts[0]
err = parseRdmaKV(parts[1], entry)
if err != nil {
continue
}
err = parseRdmaKV(parts[2], entry)
if err != nil {
continue
}

rdmaEntries = append(rdmaEntries, *entry)
}
}
return rdmaEntries, scanner.Err()
}

// RdmaGetStats returns rdma stats such as totalLimit and current entries.
func RdmaGetStats(path string, stats *cgroups.Stats) error {
currentEntries, err := readRdmaEntries(path, "rdma.current")
if err != nil {
if errors.Is(err, os.ErrNotExist) {
err = nil
}
return err
}
maxEntries, err := readRdmaEntries(path, "rdma.max")
if err != nil {
return err
}
// If device got removed between reading two files, ignore returning stats.
if len(currentEntries) != len(maxEntries) {
return nil
}

stats.RdmaStats = cgroups.RdmaStats{
RdmaLimit: maxEntries,
RdmaCurrent: currentEntries,
}

return nil
}

func createCmdString(device string, limits configs.LinuxRdma) string {
cmdString := device
if limits.HcaHandles != nil {
cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
}
if limits.HcaObjects != nil {
cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
}
return cmdString
}

// RdmaSet sets RDMA resources.
func RdmaSet(path string, r *configs.Resources) error {
for device, limits := range r.Rdma {
if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
return err
}
}
return nil
}
57 changes: 57 additions & 0 deletions libcontainer/cgroups/fscommon/rdma_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package fscommon

import (
"os"
"path/filepath"
"testing"

"github.com/opencontainers/runc/libcontainer/configs"
)

/* Roadmap for future */
// (Low-priority) TODO: Check if it is possible to virtually mimic an actual RDMA device.
// TODO: Think of more edge-cases to add.

// TestRdmaSet performs an E2E test of RdmaSet(), parseRdmaKV() using dummy device and a dummy cgroup file-system.
// Note: Following test does not guarantees that your host supports RDMA since this mocks underlying infrastructure.
func TestRdmaSet(t *testing.T) {
testCgroupPath := filepath.Join(t.TempDir(), "rdma")

// Ensure the full mock cgroup path exists.
err := os.Mkdir(testCgroupPath, 0o755)
if err != nil {
t.Fatal(err)
}

rdmaDevice := "mlx5_1"
maxHandles := uint32(100)
maxObjects := uint32(300)

rdmaStubResource := &configs.Resources{
Rdma: map[string]configs.LinuxRdma{
rdmaDevice: {
HcaHandles: &maxHandles,
HcaObjects: &maxObjects,
},
},
}

if err := RdmaSet(testCgroupPath, rdmaStubResource); err != nil {
t.Fatal(err)
}

// The default rdma.max must be written.
rdmaEntries, err := readRdmaEntries(testCgroupPath, "rdma.max")
if err != nil {
t.Fatal(err)
}
if len(rdmaEntries) != 1 {
t.Fatal("rdma_test: Got the wrong values while parsing entries from rdma.max")
}
if rdmaEntries[0].HcaHandles != maxHandles {
t.Fatalf("rdma_test: Got the wrong value for hca_handles")
}
if rdmaEntries[0].HcaObjects != maxObjects {
t.Fatalf("rdma_test: Got the wrong value for hca_Objects")
}
}
12 changes: 12 additions & 0 deletions libcontainer/cgroups/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,17 @@ type HugetlbStats struct {
Failcnt uint64 `json:"failcnt"`
}

type RdmaEntry struct {
Device string `json:"device,omitempty"`
HcaHandles uint32 `json:"hca_handles,omitempty"`
HcaObjects uint32 `json:"hca_objects,omitempty"`
}

type RdmaStats struct {
RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"`
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
}

type Stats struct {
CpuStats CpuStats `json:"cpu_stats,omitempty"`
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
Expand All @@ -154,6 +165,7 @@ type Stats struct {
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
// the map is in the format "size of hugepage: stats of the hugepage"
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
RdmaStats RdmaStats `json:"rdma_stats,omitempty"`
}

func NewStats() *Stats {
Expand Down
1 change: 1 addition & 0 deletions libcontainer/cgroups/systemd/v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ var legacySubsystems = []subsystem{
&fs.NetPrioGroup{},
&fs.NetClsGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
&fs.RdmaGroup{},
}

func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
Expand Down
3 changes: 3 additions & 0 deletions libcontainer/configs/cgroup_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ type Resources struct {
// Set class identifier for container's network packets
NetClsClassid uint32 `json:"net_cls_classid_u"`

// Rdma resource restriction configuration
Rdma map[string]LinuxRdma `json:"rdma"`

// Used on cgroups v2:

// CpuWeight sets a proportional bandwidth limit.
Expand Down
9 changes: 9 additions & 0 deletions libcontainer/configs/rdma.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package configs

// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
type LinuxRdma struct {
// Maximum number of HCA handles that can be opened. Default is "no limit".
HcaHandles *uint32 `json:"hca_handles,omitempty"`
// Maximum number of HCA objects that can be created. Default is "no limit".
HcaObjects *uint32 `json:"hca_objects,omitempty"`
}
9 changes: 9 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,15 @@ func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*confi
Limit: l.Limit,
})
}
if len(r.Rdma) > 0 {
c.Resources.Rdma = make(map[string]configs.LinuxRdma, len(r.Rdma))
for k, v := range r.Rdma {
c.Resources.Rdma[k] = configs.LinuxRdma{
HcaHandles: v.HcaHandles,
HcaObjects: v.HcaObjects,
}
}
}
if r.Network != nil {
if r.Network.ClassID != nil {
c.Resources.NetClsClassid = *r.Network.ClassID
Expand Down

0 comments on commit b3d1448

Please sign in to comment.