Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix auto-uid-allocation in Docker containers #7692

Merged
merged 6 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 19 additions & 63 deletions src/libstore/build/local-derivation-goal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "json-utils.hh"
#include "cgroup.hh"
#include "personality.hh"
#include "namespaces.hh"

#include <regex>
#include <queue>
Expand Down Expand Up @@ -167,7 +168,8 @@ void LocalDerivationGoal::killSandbox(bool getStats)
}


void LocalDerivationGoal::tryLocalBuild() {
void LocalDerivationGoal::tryLocalBuild()
{
unsigned int curBuilds = worker.getNrLocalBuilds();
if (curBuilds >= settings.maxBuildJobs) {
state = &DerivationGoal::tryToBuild;
Expand Down Expand Up @@ -205,6 +207,17 @@ void LocalDerivationGoal::tryLocalBuild() {
#endif
}

#if __linux__
if (useChroot) {
if (!mountNamespacesSupported() || !pidNamespacesSupported()) {
if (!settings.sandboxFallback)
throw Error("this system does not support the kernel namespaces that are required for sandboxing; use '--no-sandbox' to disable sandboxing");
debug("auto-disabling sandboxing because the prerequisite namespaces are not available");
useChroot = false;
}
}
#endif

if (useBuildUsers()) {
if (!buildUser)
buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot);
Expand Down Expand Up @@ -888,12 +901,7 @@ void LocalDerivationGoal::startBuilder()

userNamespaceSync.create();

Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces";
static bool userNamespacesEnabled =
pathExists(maxUserNamespaces)
&& trim(readFile(maxUserNamespaces)) != "0";

usingUserNamespace = userNamespacesEnabled;
usingUserNamespace = userNamespacesSupported();

Pid helper = startProcess([&]() {

Expand All @@ -920,64 +928,15 @@ void LocalDerivationGoal::startBuilder()
flags |= CLONE_NEWUSER;

pid_t child = clone(childEntry, stack + stackSize, flags, this);
if (child == -1 && errno == EINVAL) {
/* Fallback for Linux < 2.13 where CLONE_NEWPID and
CLONE_PARENT are not allowed together. */
flags &= ~CLONE_NEWPID;
child = clone(childEntry, stack + stackSize, flags, this);
}
if (usingUserNamespace && child == -1 && (errno == EPERM || errno == EINVAL)) {
/* Some distros patch Linux to not allow unprivileged
* user namespaces. If we get EPERM or EINVAL, try
* without CLONE_NEWUSER and see if that works.
* Details: https://salsa.debian.org/kernel-team/linux/-/commit/d98e00eda6bea437e39b9e80444eee84a32438a6
*/
usingUserNamespace = false;
flags &= ~CLONE_NEWUSER;
child = clone(childEntry, stack + stackSize, flags, this);
}
edolstra marked this conversation as resolved.
Show resolved Hide resolved
if (child == -1) {
switch(errno) {
case EPERM:
case EINVAL: {
int errno_ = errno;
if (!userNamespacesEnabled && errno==EPERM)
notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/user/max_user_namespaces");
if (userNamespacesEnabled) {
Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone";
if (pathExists(procSysKernelUnprivilegedUsernsClone)
&& trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") {
notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/kernel/unprivileged_userns_clone");
}
}
Path procSelfNsUser = "/proc/self/ns/user";
if (!pathExists(procSelfNsUser))
notice("/proc/self/ns/user does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing");
/* Otherwise exit with EPERM so we can handle this in the
parent. This is only done when sandbox-fallback is set
to true (the default). */
if (settings.sandboxFallback)
_exit(1);
/* Mention sandbox-fallback in the error message so the user
knows that having it disabled contributed to the
unrecoverability of this failure */
throw SysError(errno_, "creating sandboxed builder process using clone(), without sandbox-fallback");
}
default:
throw SysError("creating sandboxed builder process using clone()");
}
}

if (child == -1)
throw SysError("creating sandboxed builder process using clone()");
writeFull(builderOut.writeSide.get(),
fmt("%d %d\n", usingUserNamespace, child));
_exit(0);
});

int res = helper.wait();
if (res != 0 && settings.sandboxFallback) {
useChroot = false;
initTmpDir();
goto fallback;
} else if (res != 0)
if (helper.wait() != 0)
throw Error("unable to start build process");

userNamespaceSync.readSide = -1;
Expand Down Expand Up @@ -1045,9 +1004,6 @@ void LocalDerivationGoal::startBuilder()
} else
#endif
{
#if __linux__
fallback:
#endif
pid = startProcess([&]() {
runChild();
});
Expand Down
100 changes: 100 additions & 0 deletions src/libutil/namespaces.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#if __linux__

#include "namespaces.hh"
#include "util.hh"
#include "finally.hh"

#include <mntent.h>

namespace nix {

bool userNamespacesSupported()
{
static auto res = [&]() -> bool
{
if (!pathExists("/proc/self/ns/user")) {
debug("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y");
return false;
}

Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces";
if (!pathExists(maxUserNamespaces) ||
trim(readFile(maxUserNamespaces)) == "0")
{
debug("user namespaces appear to be disabled; check '/proc/sys/user/max_user_namespaces'");
return false;
}

Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone";
if (pathExists(procSysKernelUnprivilegedUsernsClone)
&& trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0")
{
debug("user namespaces appear to be disabled; check '/proc/sys/kernel/unprivileged_userns_clone'");
return false;
}

Pid pid = startProcess([&]()
{
auto res = unshare(CLONE_NEWUSER);
_exit(res ? 1 : 0);
});

bool supported = pid.wait() == 0;

if (!supported)
debug("user namespaces do not work on this system");

return supported;
}();
return res;
}

bool mountNamespacesSupported()
{
static auto res = [&]() -> bool
{
bool useUserNamespace = userNamespacesSupported();

Pid pid = startProcess([&]()
{
auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0));
_exit(res ? 1 : 0);
});

bool supported = pid.wait() == 0;

if (!supported)
debug("mount namespaces do not work on this system");

return supported;
}();
return res;
}

bool pidNamespacesSupported()
{
static auto res = [&]() -> bool
{
/* Check whether /proc is fully visible, i.e. there are no
filesystems mounted on top of files inside /proc. If this
is not the case, then we cannot mount a new /proc inside
the sandbox that matches the sandbox's PID namespace.
See https://lore.kernel.org/lkml/[email protected]/T/. */
auto fp = fopen("/proc/mounts", "r");
if (!fp) return false;
Finally delFP = [&]() { fclose(fp); };

while (auto ent = getmntent(fp))
if (hasPrefix(std::string_view(ent->mnt_dir), "/proc/")) {
debug("PID namespaces do not work because /proc is not fully visible; disabling sandboxing");
return false;
}

return true;
}();
return res;
}

}

#endif
15 changes: 15 additions & 0 deletions src/libutil/namespaces.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

namespace nix {

#if __linux__

bool userNamespacesSupported();

bool mountNamespacesSupported();

bool pidNamespacesSupported();

#endif

}