Skip to content

Commit

Permalink
[POC] Allow to run gvisor for a default docker container
Browse files Browse the repository at this point in the history
  • Loading branch information
avagin committed Sep 29, 2020
1 parent 7e4acfc commit 1924e9e
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 139 deletions.
2 changes: 1 addition & 1 deletion runsc/cmd/do.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
conf := args[0].(*config.Config)
waitStatus := args[1].(*syscall.WaitStatus)

if conf.Rootless {
if conf.Rootless && !conf.Unprivileged {
if err := specutils.MaybeRunAsRoot(); err != nil {
return Errorf("Error executing inside namespace: %v", err)
}
Expand Down
37 changes: 20 additions & 17 deletions runsc/cmd/gofer.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,26 +107,29 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("reading spec: %v", err)
}

if g.setUpRoot {
if err := setupRootFS(spec, conf); err != nil {
Fatalf("Error setting up root FS: %v", err)
root := "/"
if !conf.Unprivileged {
if g.setUpRoot {
if err := setupRootFS(spec, conf); err != nil {
Fatalf("Error setting up root FS: %v", err)
}
}
}
if g.applyCaps {
// Disable caps when calling myself again.
// Note: minimal argument handling for the default case to keep it simple.
args := os.Args
args = append(args, "--apply-caps=false", "--setup-root=false")
if err := setCapsAndCallSelf(args, goferCaps); err != nil {
Fatalf("Unable to apply caps: %v", err)
if g.applyCaps {
// Disable caps when calling myself again.
// Note: minimal argument handling for the default case to keep it simple.
args := os.Args
args = append(args, "--apply-caps=false", "--setup-root=false")
if err := setCapsAndCallSelf(args, goferCaps); err != nil {
Fatalf("Unable to apply caps: %v", err)
}
panic("unreachable")
}
panic("unreachable")
}

// Find what path is going to be served by this gofer.
root := spec.Root.Path
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
root = "/root"
// Find what path is going to be served by this gofer.
root = spec.Root.Path
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
root = "/root"
}
}

// Resolve mount points paths, then replace mounts from our spec and send the
Expand Down
2 changes: 2 additions & 0 deletions runsc/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ type Config struct {
// mapped to the caller's user.
Rootless bool `flag:"rootless"`

Unprivileged bool `flag:"unprivileged"`

// AlsoLogToStderr allows to send log messages to stderr.
AlsoLogToStderr bool `flag:"alsologtostderr"`

Expand Down
1 change: 1 addition & 0 deletions runsc/config/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func RegisterFlags() {
flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
flag.Bool("unprivileged", false, "it allows the sandbox to be started with a user that is not root and doesn't have privileges to create a new user namespace. Sandbox and Gofer processes may run with same privileges as current user.")
flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.")
flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
flag.Bool("oci-seccomp", false, "Enables loading OCI seccomp filters inside the sandbox.")
Expand Down
37 changes: 20 additions & 17 deletions runsc/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -963,23 +963,26 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu

// Enter new namespaces to isolate from the rest of the system. Don't unshare
// cgroup because gofer is added to a cgroup in the caller's namespace.
nss := []specs.LinuxNamespace{
{Type: specs.IPCNamespace},
{Type: specs.MountNamespace},
{Type: specs.NetworkNamespace},
{Type: specs.PIDNamespace},
{Type: specs.UTSNamespace},
}

// Setup any uid/gid mappings, and create or join the configured user
// namespace so the gofer's view of the filesystem aligns with the
// users in the sandbox.
userNS := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec)
nss = append(nss, userNS...)
specutils.SetUIDGIDMappings(cmd, spec)
if len(userNS) != 0 {
// We need to set UID and GID to have capabilities in a new user namespace.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
nss := []specs.LinuxNamespace{}
if !conf.Unprivileged {
nss := []specs.LinuxNamespace{
{Type: specs.IPCNamespace},
{Type: specs.MountNamespace},
{Type: specs.NetworkNamespace},
{Type: specs.PIDNamespace},
{Type: specs.UTSNamespace},
}

// Setup any uid/gid mappings, and create or join the configured user
// namespace so the gofer's view of the filesystem aligns with the
// users in the sandbox.
userNS := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec)
nss = append(nss, userNS...)
specutils.SetUIDGIDMappings(cmd, spec)
if len(userNS) != 0 {
// We need to set UID and GID to have capabilities in a new user namespace.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
}
}

// Start the gofer in the given namespace.
Expand Down
211 changes: 107 additions & 104 deletions runsc/sandbox/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,119 +536,122 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
// are virtualized inside the sandbox. Be paranoid and run inside an empty
// namespace for these. Don't unshare cgroup because sandbox is added to a
// cgroup in the caller's namespace.
log.Infof("Sandbox will be started in new mount, IPC and UTS namespaces")
nss := []specs.LinuxNamespace{
{Type: specs.IPCNamespace},
{Type: specs.MountNamespace},
{Type: specs.UTSNamespace},
}
nss := []specs.LinuxNamespace{}
if !conf.Unprivileged {
log.Infof("Sandbox will be started in new mount, IPC and UTS namespaces")
nss = []specs.LinuxNamespace{
{Type: specs.IPCNamespace},
{Type: specs.MountNamespace},
{Type: specs.UTSNamespace},
}

if gPlatform.Requirements().RequiresCurrentPIDNS {
// TODO(b/75837838): Also set a new PID namespace so that we limit
// access to other host processes.
log.Infof("Sandbox will be started in the current PID namespace")
} else {
log.Infof("Sandbox will be started in a new PID namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.PIDNamespace})
cmd.Args = append(cmd.Args, "--pidns=true")
}

// Joins the network namespace if network is enabled. the sandbox talks
// directly to the host network, which may have been configured in the
// namespace.
if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != config.NetworkNone {
log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
nss = append(nss, ns)
} else if conf.Network == config.NetworkHost {
log.Infof("Sandbox will be started in the host network namespace")
} else {
log.Infof("Sandbox will be started in new network namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
}

// User namespace depends on the network type. Host network requires to run
// inside the user namespace specified in the spec or the current namespace
// if none is configured.
if conf.Network == config.NetworkHost {
if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok {
log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
nss = append(nss, userns)
specutils.SetUIDGIDMappings(cmd, args.Spec)
if gPlatform.Requirements().RequiresCurrentPIDNS {
// TODO(b/75837838): Also set a new PID namespace so that we limit
// access to other host processes.
log.Infof("Sandbox will be started in the current PID namespace")
} else {
log.Infof("Sandbox will be started in the current user namespace")
log.Infof("Sandbox will be started in a new PID namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.PIDNamespace})
cmd.Args = append(cmd.Args, "--pidns=true")
}
// When running in the caller's defined user namespace, apply the same
// capabilities to the sandbox process to ensure it abides to the same
// rules.
cmd.Args = append(cmd.Args, "--apply-caps=true")

// If we have CAP_SYS_ADMIN, we can create an empty chroot and
// bind-mount the executable inside it.
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")

} else if specutils.HasCapabilities(capability.CAP_SYS_ADMIN) {
log.Infof("Sandbox will be started in minimal chroot")
cmd.Args = append(cmd.Args, "--setup-root")

// Joins the network namespace if network is enabled. the sandbox talks
// directly to the host network, which may have been configured in the
// namespace.
if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != config.NetworkNone {
log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
nss = append(nss, ns)
} else if conf.Network == config.NetworkHost {
log.Infof("Sandbox will be started in the host network namespace")
} else {
return fmt.Errorf("can't run sandbox process in minimal chroot since we don't have CAP_SYS_ADMIN")
log.Infof("Sandbox will be started in new network namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
}
} else {
// If we have CAP_SETUID and CAP_SETGID, then we can also run
// as user nobody.
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
log.Warningf("Running sandbox in test mode as current user (uid=%d gid=%d). This is only safe in tests!", os.Getuid(), os.Getgid())
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
cmd.Args = append(cmd.Args, "--setup-root")

const nobody = 65534
if conf.Rootless {
log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: os.Getuid(),
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: os.Getgid(),
Size: 1,
},
}

// User namespace depends on the network type. Host network requires to run
// inside the user namespace specified in the spec or the current namespace
// if none is configured.
if conf.Network == config.NetworkHost {
if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok {
log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
nss = append(nss, userns)
specutils.SetUIDGIDMappings(cmd, args.Spec)
} else {
// Map nobody in the new namespace to nobody in the parent namespace.
//
// A sandbox process will construct an empty
// root for itself, so it has to have
// CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
log.Infof("Sandbox will be started in the current user namespace")
}
// When running in the caller's defined user namespace, apply the same
// capabilities to the sandbox process to ensure it abides to the same
// rules.
cmd.Args = append(cmd.Args, "--apply-caps=true")

// If we have CAP_SYS_ADMIN, we can create an empty chroot and
// bind-mount the executable inside it.
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")

} else if specutils.HasCapabilities(capability.CAP_SYS_ADMIN) {
log.Infof("Sandbox will be started in minimal chroot")
cmd.Args = append(cmd.Args, "--setup-root")
} else {
return fmt.Errorf("can't run sandbox process in minimal chroot since we don't have CAP_SYS_ADMIN")
}

// Set credentials to run as user and group nobody.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody}
cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT))
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
// If we have CAP_SETUID and CAP_SETGID, then we can also run
// as user nobody.
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
log.Warningf("Running sandbox in test mode as current user (uid=%d gid=%d). This is only safe in tests!", os.Getuid(), os.Getgid())
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
cmd.Args = append(cmd.Args, "--setup-root")

const nobody = 65534
if conf.Rootless {
log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: os.Getuid(),
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: os.Getgid(),
Size: 1,
},
}

} else {
// Map nobody in the new namespace to nobody in the parent namespace.
//
// A sandbox process will construct an empty
// root for itself, so it has to have
// CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
}

// Set credentials to run as user and group nobody.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody}
cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT))
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
}
}

Expand Down

0 comments on commit 1924e9e

Please sign in to comment.