Skip to content

Commit

Permalink
Add support of rootless containers
Browse files Browse the repository at this point in the history
* support podmand rootless containers
* support docker rootless containers

Fixes #311

PiperOrigin-RevId: 458573377
  • Loading branch information
avagin authored and gvisor-bot committed Jul 6, 2022
1 parent d101b6b commit 4a165da
Show file tree
Hide file tree
Showing 7 changed files with 242 additions and 21 deletions.
6 changes: 6 additions & 0 deletions .buildkite/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,12 @@ steps:
agents:
cgroup: "v2"
os: "ubuntu"
- <<: *common
label: ":podman: Podman"
command: sudo ./test/podman/run.sh
agents:
cgroup: "v2"
os: "ubuntu"

# Check the website builds.
- <<: *common
Expand Down
48 changes: 48 additions & 0 deletions runsc/cmd/do.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type Do struct {
ip string
quiet bool
overlay bool
uidMap idMapSlice
gidMap idMapSlice
}

// Name implements subcommands.Command.Name.
Expand All @@ -72,13 +74,53 @@ used for testing only.
`
}

type idMapSlice []specs.LinuxIDMapping

// String implements flag.Value.String.
func (is *idMapSlice) String() string {
return fmt.Sprintf("%#v", is)
}

// Get implements flag.Value.Get.
func (is *idMapSlice) Get() interface{} {
return is
}

// Set implements flag.Value.Set.
func (is *idMapSlice) Set(s string) error {
fs := strings.Fields(s)
if len(fs) != 3 {
return fmt.Errorf("invalid mapping: %s", s)
}
var cid, hid, size int
var err error
if cid, err = strconv.Atoi(fs[0]); err != nil {
return fmt.Errorf("invalid mapping: %s", s)
}
if hid, err = strconv.Atoi(fs[1]); err != nil {
return fmt.Errorf("invalid mapping: %s", s)
}
if size, err = strconv.Atoi(fs[2]); err != nil {
return fmt.Errorf("invalid mapping: %s", s)
}
m := specs.LinuxIDMapping{
ContainerID: uint32(cid),
HostID: uint32(hid),
Size: uint32(size),
}
*is = append(*is, m)
return nil
}

// SetFlags implements subcommands.Command.SetFlags.
func (c *Do) SetFlags(f *flag.FlagSet) {
f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`)
f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory")
f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox")
f.BoolVar(&c.quiet, "quiet", false, "suppress runsc messages to stdout. Application output is still sent to stdout and stderr")
f.BoolVar(&c.overlay, "force-overlay", true, "use an overlay. WARNING: disabling gives the command write access to the host")
f.Var(&c.uidMap, "uid-map", "Add a user id mapping [ContainerID, HostID, Size]")
f.Var(&c.gidMap, "gid-map", "Add a group id mapping [ContainerID, HostID, Size]")
}

// Execute implements subcommands.Command.Execute.
Expand Down Expand Up @@ -129,6 +171,12 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su

cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))

if c.uidMap != nil {
addNamespace(spec, specs.LinuxNamespace{Type: specs.UserNamespace})
spec.Linux.UIDMappings = c.uidMap
spec.Linux.GIDMappings = c.gidMap
}

if conf.Network == config.NetworkNone {
addNamespace(spec, specs.LinuxNamespace{Type: specs.NetworkNamespace})
} else if conf.Rootless {
Expand Down
30 changes: 27 additions & 3 deletions runsc/cmd/gofer.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"runtime/debug"
"strings"

Expand Down Expand Up @@ -63,8 +65,9 @@ type Gofer struct {
applyCaps bool
setUpRoot bool

specFD int
mountsFD int
specFD int
mountsFD int
syncUsernsFD int
}

// Name implements subcommands.Command.
Expand Down Expand Up @@ -92,6 +95,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) {
f.Var(&g.ioFDs, "io-fds", "list of FDs to connect gofer servers. They must follow this order: root first, then mounts as defined in the spec")
f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).")
f.IntVar(&g.syncUsernsFD, "sync-userns-fd", -1, "file descriptor used to synchronize rootless user namespace initialization.")
}

// Execute implements subcommands.Command.
Expand All @@ -113,6 +117,26 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
util.Fatalf("reading spec: %v", err)
}

if g.syncUsernsFD >= 0 {
f := os.NewFile(uintptr(g.syncUsernsFD), "sync FD")
defer f.Close()
var b [1]byte
if n, err := f.Read(b[:]); n != 0 || err != io.EOF {
util.Fatalf("failed to sync: %v: %v", n, err)
}

f.Close()
// SETUID changes UID on the current system thread, so we have
// to re-execute current binary.
runtime.LockOSThread()
if _, _, errno := unix.RawSyscall(unix.SYS_SETUID, 0, 0, 0); errno != 0 {
util.Fatalf("failed to set UID: %v", errno)
}
if _, _, errno := unix.RawSyscall(unix.SYS_SETGID, 0, 0, 0); errno != 0 {
util.Fatalf("failed to set GID: %v", errno)
}
}

if g.setUpRoot {
if err := setupRootFS(spec, conf); err != nil {
util.Fatalf("Error setting up root FS: %v", err)
Expand All @@ -122,7 +146,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Disable caps when calling myself again.
// Note: minimal argument handling for the default case to keep it simple.
args := os.Args
args = append(args, "--apply-caps=false", "--setup-root=false")
args = append(args, "--apply-caps=false", "--setup-root=false", "--sync-userns-fd=-1")
util.Fatalf("setCapsAndCallSelf(%v, %v): %v", args, goferCaps, setCapsAndCallSelf(args, goferCaps))
panic("unreachable")
}
Expand Down
83 changes: 77 additions & 6 deletions runsc/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -971,15 +971,49 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
{Type: specs.UTSNamespace},
}

rootlessEUID := unix.Getuid() != 0
var syncFile *os.File
// Setup any uid/gid mappings, and create or join the configured user
// namespace so the gofer's view of the filesystem aligns with the
// users in the sandbox.
userNS := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec)
nss = append(nss, userNS...)
specutils.SetUIDGIDMappings(cmd, spec)
if len(userNS) != 0 {
// We need to set UID and GID to have capabilities in a new user namespace.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
if !rootlessEUID {
userNS := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec)
nss = append(nss, userNS...)
specutils.SetUIDGIDMappings(cmd, spec)
if len(userNS) != 0 {
// We need to set UID and GID to have capabilities in a new user namespace.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
}
} else {
userNS := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec)
if len(userNS) == 0 {
return nil, nil, fmt.Errorf("unable to run a rootless container without userns")
}
fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
syncFile = os.NewFile(uintptr(fds[0]), "sync FD")
defer syncFile.Close()

f := os.NewFile(uintptr(fds[1]), "sync other FD")
donations.DonateAndClose("sync-userns-fd", f)
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &unix.SysProcAttr{}
}
cmd.SysProcAttr.AmbientCaps = []uintptr{
unix.CAP_CHOWN,
unix.CAP_DAC_OVERRIDE,
unix.CAP_DAC_READ_SEARCH,
unix.CAP_FOWNER,
unix.CAP_FSETID,
unix.CAP_SYS_CHROOT,
unix.CAP_SETUID,
unix.CAP_SETGID,
unix.CAP_SYS_ADMIN,
unix.CAP_SETPCAP,
}
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
}

donations.Transfer(cmd, nextFD)
Expand All @@ -990,6 +1024,43 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
if err := specutils.StartInNS(cmd, nss); err != nil {
return nil, nil, fmt.Errorf("gofer: %v", err)
}

if rootlessEUID {
log.Debugf("Setting user mappings")
args := []string{strconv.Itoa(cmd.Process.Pid)}
for _, idMap := range spec.Linux.UIDMappings {
log.Infof("Mapping host uid %d to container uid %d (size=%d)",
idMap.HostID, idMap.ContainerID, idMap.Size)
args = append(args,
strconv.Itoa(int(idMap.ContainerID)),
strconv.Itoa(int(idMap.HostID)),
strconv.Itoa(int(idMap.Size)),
)
}

out, err := exec.Command("newuidmap", args...).CombinedOutput()
log.Debugf("newuidmap: %#v\n%s", args, out)
if err != nil {
return nil, nil, fmt.Errorf("newuidmap failed: %w", err)
}

args = []string{strconv.Itoa(cmd.Process.Pid)}
for _, idMap := range spec.Linux.GIDMappings {
log.Infof("Mapping host uid %d to container uid %d (size=%d)",
idMap.HostID, idMap.ContainerID, idMap.Size)
args = append(args,
strconv.Itoa(int(idMap.ContainerID)),
strconv.Itoa(int(idMap.HostID)),
strconv.Itoa(int(idMap.Size)),
)
}
out, err = exec.Command("newgidmap", args...).CombinedOutput()
log.Debugf("newgidmap: %#v\n%s", args, out)
if err != nil {
return nil, nil, fmt.Errorf("newgidmap failed: %w", err)
}
}

log.Infof("Gofer started, PID: %d", cmd.Process.Pid)
c.GoferPid = cmd.Process.Pid
c.goferIsChild = true
Expand Down
10 changes: 8 additions & 2 deletions runsc/sandbox/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package sandbox
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"math"
Expand Down Expand Up @@ -536,6 +537,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
donations := donation.Agency{}
defer donations.Close()

rootlessEUID := unix.Getuid() != 0
//
// These flags must come BEFORE the "boot" command in cmd.Args.
//
Expand Down Expand Up @@ -722,13 +724,13 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
log.Warningf("Running sandbox in test mode as current user (uid=%d gid=%d). This is only safe in tests!", os.Getuid(), os.Getgid())
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
} else if rootlessEUID || specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
cmd.Args = append(cmd.Args, "--setup-root")

const nobody = 65534
if conf.Rootless {
if rootlessEUID || conf.Rootless {
log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
} else {
// Map nobody in the new namespace to nobody in the parent namespace.
Expand Down Expand Up @@ -1419,6 +1421,10 @@ func (s *Sandbox) configureStdios(conf *config.Config, stdios []*os.File) error
for _, file := range stdios {
log.Debugf("Changing %q ownership to %d/%d", file.Name(), s.UID, s.GID)
if err := file.Chown(s.UID, s.GID); err != nil {
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) || errors.Is(err, unix.EROFS) {
log.Warningf("can't change an owner of %s: %s", file.Name(), err)
continue
}
return err
}
}
Expand Down
42 changes: 32 additions & 10 deletions runsc/specutils/namespace.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func setNS(fd, nsType uintptr) error {
// that will restore the namespace to the original value.
//
// Preconditions: Must be called with os thread locked.
func ApplyNS(ns specs.LinuxNamespace) (func(), error) {
func ApplyNS(ns specs.LinuxNamespace) (func() error, error) {
log.Infof("Applying namespace %v at path %q", ns.Type, ns.Path)
newNS, err := os.Open(ns.Path)
if err != nil {
Expand All @@ -140,27 +140,49 @@ func ApplyNS(ns specs.LinuxNamespace) (func(), error) {
oldNS.Close()
return nil, fmt.Errorf("error setting namespace of type %v and path %q: %v", ns.Type, ns.Path, err)
}
return func() {
return func() error {
log.Infof("Restoring namespace %v", ns.Type)
defer oldNS.Close()
if err := setNS(oldNS.Fd(), flag); err != nil {
panic(fmt.Sprintf("error restoring namespace: of type %v: %v", ns.Type, err))
return fmt.Errorf("error restoring namespace: of type %v: %v", ns.Type, err)
}
return nil
}, nil
}

// StartInNS joins or creates the given namespaces and calls cmd.Start before
// restoring the namespaces to the original values.
func StartInNS(cmd *exec.Cmd, nss []specs.LinuxNamespace) error {
// We are about to setup namespaces, which requires the os thread being
// locked so that Go doesn't change the thread out from under us.
runtime.LockOSThread()
defer runtime.UnlockOSThread()
errChan := make(chan error)
go func() {
runtime.LockOSThread()
defer runtime.UnlockOSThread()

rstFuncs, err := startInNS(cmd, nss)
errChan <- err
for _, rstFunc := range rstFuncs {
err := rstFunc()
if err == nil {
continue
}

// One or more namespaces have not been restored, but
// we can't destroy the current system thread, because
// a child process is execited with Pdeathsig.
log.Debugf("Block the current system thread due to: %s", err)
c := make(chan interface{})
<-c
}
}()
return <-errChan
}

func startInNS(cmd *exec.Cmd, nss []specs.LinuxNamespace) ([]func() error, error) {
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &unix.SysProcAttr{}
}

var deferFuncs []func() error
for _, ns := range nss {
if ns.Path == "" {
// No path. Just set a flag to create a new namespace.
Expand All @@ -171,12 +193,12 @@ func StartInNS(cmd *exec.Cmd, nss []specs.LinuxNamespace) error {
// before exiting.
restoreNS, err := ApplyNS(ns)
if err != nil {
return err
return deferFuncs, err
}
defer restoreNS()
deferFuncs = append(deferFuncs, restoreNS)
}

return cmd.Start()
return deferFuncs, cmd.Start()
}

// SetUIDGIDMappings sets the given uid/gid mappings from the spec on the cmd.
Expand Down
Loading

0 comments on commit 4a165da

Please sign in to comment.