Skip to content

Commit

Permalink
Open bind mount sources from the host userns
Browse files Browse the repository at this point in the history
The source of the bind mount might not be accessible in a different user
namespace because a component of the source path might not be traversed
under the users and groups mapped inside the user namespace. This caused
errors such as the following:

  # time="2020-06-22T13:48:26Z" level=error msg="container_linux.go:367:
  starting container process caused: process_linux.go:459:
  container init caused: rootfs_linux.go:58:
  mounting \"/tmp/busyboxtest/source-inaccessible/dir\"
  to rootfs at \"/tmp/inaccessible\" caused:
  stat /tmp/busyboxtest/source-inaccessible/dir: permission denied"

To solve this problem, this patch performs the following:

1. in nsexec.c, it opens the source path in the host userns (so we have
   the right permissions to open it) but in the container mntns (so the
   kernel cross mntns mount check let us mount it later:
   https://github.com/torvalds/linux/blob/v5.8/fs/namespace.c#L2312).

2. in nsexec.c, it passes the file descriptors of the source to the
   child process with SCM_RIGHTS.

3. In runc-init in Golang, it finishes the mounts while inside the
   userns even without access to the some components of the source
   paths.

Passing the fds with SCM_RIGHTS is necessary because once the child
process is in the container mntns, it is already in the container userns
so it cannot temporarily join the host mntns.

This patch uses the existing mechanism with _LIBCONTAINER_* environment
variables to pass the file descriptors from runc to runc init.

This patch uses the existing mechanism with the Netlink-style bootstrap
to pass information about the list of source mounts to nsexec.c.

Rootless containers don't use this bind mount sources fdpassing
mechanism because we can't setns() to the target mntns in a rootless
container (we don't have the privileges when we are in the host userns).

This patch takes care of using O_CLOEXEC on mount files, and close them
early.

Fixes: opencontainers#2484.

Signed-off-by: Alban Crequy <[email protected]>
Signed-off-by: Rodrigo Campos <[email protected]>
Co-authored-by: Rodrigo Campos <[email protected]>
  • Loading branch information
alban and rata committed Aug 10, 2021
1 parent 7d4bac6 commit 72a78a9
Show file tree
Hide file tree
Showing 7 changed files with 384 additions and 25 deletions.
69 changes: 65 additions & 4 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,27 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
return cmd
}

// shouldSendMountSources says whether the child process must setup bind mounts with
// the source pre-opened (O_PATH) in the host user namespace.
// See https://github.com/opencontainers/runc/issues/2484
func (c *linuxContainer) shouldSendMountSources() bool {
// Passing the mount sources via SCM_RIGHTS is only necessary when
// both userns and mntns are active.
if len(c.config.Mounts) == 0 {
return false
}
// nsexec.c send_mountsources() requires setns(mntns) capabilities
// CAP_SYS_CHROOT and CAP_SYS_ADMIN
if c.config.RootlessEUID {
return false
}
if !c.config.Namespaces.Contains(configs.NEWUSER) ||
!c.config.Namespaces.Contains(configs.NEWNS) {
return false
}
return true
}

func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
Expand All @@ -523,10 +544,32 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPa
}
}
_, sharePidns := nsMaps[configs.NEWPID]
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
if err != nil {
return nil, err
}

if c.shouldSendMountSources() {
mountFileFdsList := ""
for _, m := range c.config.Mounts {
if m.Device != "bind" {
// StartInitialization() finds out the Mounts indices by counting ";".
// We take care of adding the right number of ";"
mountFileFdsList += ";"
continue
}
// The fd passed here will not be used: nsexec.c will overwrite it with dup3(). We just need
// to allocate a fd so that we know the number to pass in the environment variable. The fd
// must not be closed before cmd.Start(), so we reuse messageSockPair.child because the
// lifecycle of that fd is already taken care of.
cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
mountFileFdsList += fmt.Sprintf("%d;", stdioFdCount+len(cmd.ExtraFiles)-1)
}
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_MOUNT_FILE_FDS="+mountFileFdsList,
)
}

init := &initProcess{
cmd: cmd,
messageSockPair: messageSockPair,
Expand All @@ -551,7 +594,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
}
// for setns process, we don't have to set cloneflags as the process namespaces
// will only be set via setns syscall
data, err := c.bootstrapData(0, state.NamespacePaths)
data, err := c.bootstrapData(0, state.NamespacePaths, initSetns)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -1178,7 +1221,9 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
case "bind":
// The prepareBindMount() function checks if source
// exists. So it cannot be used for other filesystem types.
if err := prepareBindMount(m, c.config.Rootfs); err != nil {
// TODO: pass something else than nil? Not sure if criu is
// impacted by issue #2484
if err := prepareBindMount(m, c.config.Rootfs, nil); err != nil {
return err
}
default:
Expand Down Expand Up @@ -2008,7 +2053,7 @@ func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
// such as one that uses nsenter package to bootstrap the container's
// init process correctly, i.e. with correct namespaces, uid/gid
// mapping etc.
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) {
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (io.Reader, error) {
// create the netlink message
r := nl.NewNetlinkRequest(int(InitMsg), 0)

Expand Down Expand Up @@ -2090,6 +2135,22 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Value: c.config.RootlessEUID,
})

// bind mount source to open
if it == initStandard && c.shouldSendMountSources() {
var mounts []byte
for _, m := range c.config.Mounts {
if m.Device == "bind" {
mounts = append(mounts, []byte(m.Source)...)
}
mounts = append(mounts, byte(0))
}

r.AddData(&Bytemsg{
Type: MountSourcesAttr,
Value: mounts,
})
}

return bytes.NewReader(r.Serialize()), nil
}

Expand Down
21 changes: 20 additions & 1 deletion libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"regexp"
"runtime/debug"
"strconv"
"strings"

securejoin "github.com/cyphar/filepath-securejoin"
"github.com/moby/sys/mountinfo"
Expand Down Expand Up @@ -380,6 +381,24 @@ func (l *LinuxFactory) StartInitialization() (err error) {
return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)
}

// Get mount files (O_PATH)
envMountFileFds := os.Getenv("_LIBCONTAINER_MOUNT_FILE_FDS")
mountFilesArr := strings.Split(envMountFileFds, ";")
mountFiles := make([]*os.File, len(mountFilesArr))
for i, fdStr := range mountFilesArr {
if fdStr == "" {
continue
}
mountFileFd, err := strconv.Atoi(fdStr)
if err != nil {
return fmt.Errorf("unable to parse _LIBCONTAINER_MOUNT_FILE_FDS(%q): %w",
envMountFileFds, err)
}
mountFile := os.NewFile(uintptr(mountFileFd), "mount-file")
defer mountFile.Close()
mountFiles[i] = mountFile
}

// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
Expand All @@ -402,7 +421,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
}
}()

i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd)
i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFiles)
if err != nil {
return err
}
Expand Down
11 changes: 10 additions & 1 deletion libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ type initer interface {
Init() error
}

func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int) (initer, error) {
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFiles []*os.File) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
Expand All @@ -88,6 +88,14 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
}
switch t {
case initSetns:
// mountFiles must not be sent in this case, make sure the slice
// are all nil pointers.
for _, m := range mountFiles {
if m != nil {
return nil, fmt.Errorf("mountFiles must be empty, but got: %+v", mountFiles)
}
}

return &linuxSetnsInit{
pipe: pipe,
consoleSocket: consoleSocket,
Expand All @@ -102,6 +110,7 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
config: config,
fifoFd: fifoFd,
logFd: logFd,
mountFiles: mountFiles,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
Expand Down
1 change: 1 addition & 0 deletions libcontainer/message_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const (
RootlessEUIDAttr uint16 = 27287
UidmapPathAttr uint16 = 27288
GidmapPathAttr uint16 = 27289
MountSourcesAttr uint16 = 27290
)

type Int32msg struct {
Expand Down
Loading

0 comments on commit 72a78a9

Please sign in to comment.