Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] use go 1.22 #4262

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ task:
chown -R rootless.rootless /home/rootless
# set PATH
echo 'export PATH=/usr/local/go/bin:/usr/local/bin:$PATH' >> /root/.bashrc
# set CGO_CFLAGS
# echo 'export CGO_CFLAGS="-DDONT_USE_PR_GET_TID_ADDRESS=1"' >> /root/.bashrc
# Setup ssh localhost for terminal emulation (script -e did not work)
ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -N ""
cat /root/.ssh/id_ed25519.pub >> /root/.ssh/authorized_keys
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-20.04, ubuntu-22.04, actuated-arm64-6cpu-8gb]
go-version: [1.20.x, 1.21.x]
go-version: [1.20.x, 1.21.x, 1.22.x]
rootless: ["rootless", ""]
race: ["-race", ""]
criu: ["", "criu-dev"]
dmz: ["", "runc_nodmz"]
tidscan: ["", "-DDONT_USE_PR_GET_TID_ADDRESS=1"]
exclude:
# Disable most of criu-dev jobs, as they are expensive
# (need to compile criu) and don't add much value/coverage.
Expand Down Expand Up @@ -151,6 +152,7 @@ jobs:
- name: build
env:
EXTRA_BUILDTAGS: ${{ matrix.dmz }}
CGO_CFLAGS: ${{ matrix.tidscan }}
run: sudo -E PATH="$PATH" make EXTRA_FLAGS="${{ matrix.race }}" all

- name: install bats
Expand All @@ -162,6 +164,7 @@ jobs:
if: matrix.rootless != 'rootless'
env:
EXTRA_BUILDTAGS: ${{ matrix.dmz }}
CGO_CFLAGS: ${{ matrix.tidscan }}
run: sudo -E PATH="$PATH" -- make TESTFLAGS="${{ matrix.race }}" localunittest

- name: add rootless user
Expand Down Expand Up @@ -219,7 +222,7 @@ jobs:
- name: install go
uses: actions/setup-go@v5
with:
go-version: 1.21.x # TODO: switch to 1.x (latest stable) once Go 1.22 vs glibc issue is fixed.
go-version: 1.x # Latest stable

- name: unit test
env:
Expand Down
12 changes: 12 additions & 0 deletions libcontainer/nsenter/nsenter_go121.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
//go:build go1.21

package nsenter

// Since Go 1.21 <https://github.com/golang/go/commit/c426c87012b5e>, the Go
// runtime will try to call pthread_getattr_np(pthread_self()). This causes
// issues with nsexec and requires some kludges to overwrite the internal
// thread-local glibc cache of the current TID. See find_glibc_tls_tid_address
// for the horrific details.

// #cgo CFLAGS: -DRUNC_TID_KLUDGE=1
import "C"
15 changes: 0 additions & 15 deletions libcontainer/nsenter/nsenter_go122.go

This file was deleted.

208 changes: 198 additions & 10 deletions libcontainer/nsenter/nsexec.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#define _GNU_SOURCE
#include <endian.h>
#include <errno.h>
Expand All @@ -13,8 +12,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include <syscall.h>
#include <unistd.h>
#include <pthread.h> /* only used for pthread_self() -- see clone_parent() */

#include <sys/ioctl.h>
#include <sys/prctl.h>
Expand Down Expand Up @@ -111,17 +113,11 @@ struct nlconfig_t {
#define GIDMAPPATH_ATTR 27289
#define TIMENSOFFSET_ATTR 27290

/*
* Use the raw syscall for versions of glibc which don't include a function for
* it, namely (glibc 2.12).
*/
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
# define _GNU_SOURCE
# include "syscall.h"
/* The setns() libc wrapper was added in glibc 2.14. */
#if !__GLIBC_PREREQ(2, 14)
# if !defined(SYS_setns) && defined(__NR_setns)
# define SYS_setns __NR_setns
# endif

# ifndef SYS_setns
# error "setns(2) syscall not supported by glibc version"
# endif
Expand Down Expand Up @@ -311,6 +307,157 @@ static int child_func(void *arg)
longjmp(*ca->env, ca->jmpval);
}

/* The gettid() libc wrapper was added in glibc 2.30 */
#if !__GLIBC_PREREQ(2, 30)
# if !defined(SYS_gettid) && defined(__NR_gettid)
# define SYS_gettid __NR_gettid
# endif
pid_t gettid(void)
{
# ifdef SYS_gettid
return syscall(SYS_gettid);
# else
/* We are single-threaded here, so just using the pid is okay. */
return getpid();
# endif
}
#endif

#if !defined(RUNC_TID_KLUDGE)
# define RUNC_TID_KLUDGE 0
#endif
#if RUNC_TID_KLUDGE
# if !defined(DONT_USE_PR_GET_TID_ADDRESS)
# define DONT_USE_PR_GET_TID_ADDRESS 0
# endif
# define TID_OFFSET_SCAN_MAX 1024
static int tid_offset = 0;

struct pthread_tid {
pthread_t handle;
pid_t tid;
};

static void *thd_func(void *arg)
{
int i;
struct pthread_tid *t1 = (struct pthread_tid *)arg;
struct pthread_tid t2 = {
.handle = pthread_self(),
.tid = gettid(),
};
pid_t *tid1, *tid2;

for (i = 0; i < TID_OFFSET_SCAN_MAX; i++) {
tid1 = (pid_t *) (t1->handle + i);
tid2 = (pid_t *) (t2.handle + i);
// After created a thread, only several fields in pthread_t struct will be changed,
// including the field of tid.
if (*tid1 != *tid2 && *tid1 == t1->tid && *tid2 == t2.tid) {
tid_offset = i;
break;
}
}

// If we can't find tid offset, print some useful debug infos.
if (tid_offset <= 0) {
for (i = 0; i < TID_OFFSET_SCAN_MAX; i++) {
tid1 = (pid_t *) (t1->handle + i);
tid2 = (pid_t *) (t2.handle + i);
write_log(WARNING, "tid_offset scan index(%d): tid1 %d, tid2 %d\n", i, *tid1, *tid2);
}
}
return NULL;
}

static pid_t *find_tls_tid_address(void)
{
/*
* glibc sets CLONE_CHILD_CLEARTID to &THREAD_SELF->tid (the thread-local
* cache of the thread's tid), which we can retrieve using
* PR_GET_TID_ADDRESS on kernels that support it (Linux >= 3.5 and
* CONFIG_CHECKPOINT_RESTORE=y). Otherwise we have to do a somewhat-hairy
* linear scan for the address based on pthread_self().
*
* Other libcs (like musl) set up processes differently, meaning this logic
* will only work for runc builds using glibc (more precisely, the process
* which spawned "runc init" needs to be a glibc-based process using
* glibc's fork() primitives -- this is the case for runc when built with
* glibc). The linear scan should still technically work for the musl
* versions I've checked, but at the moment we only do this for glibc.
*/

pid_t *tid_addr = NULL, *tid_tmp = NULL;
pthread_t thdTemp;
int err;
struct pthread_tid main_tid = {
.handle = pthread_self(),
.tid = gettid(),
};

# if !DONT_USE_PR_GET_TID_ADDRESS
if (!prctl(PR_GET_TID_ADDRESS, &tid_addr))
/*
* Make sure the address actually contains the current TID. musl uses a
* different pointer with CLONE_CHILD_CLEARTID, so PR_GET_TID_ADDRESS
* succeeding doesn't mean the address is the one we want.
*/
if (tid_addr && *tid_addr == main_tid.tid) {
return tid_addr;
}
# endif
write_log(DEBUG, "let's try using tid scan to find out the tid address in struct thread");

/*
* If we cannot use PR_GET_TID_ADDRESS to get &PTHREAD_SELF->tid, we
* are probably running on a CONFIG_CHECKPOINT_RESTORE=n kernel.
* Unfortunately the layout of "struct pthread" is not public, so we
* need to get the address by force.
*
* So, we treat the structure as though it were pid_t[] to find an
* offset whose value matches the tid of the current process. In order
* to avoid accidentally choosing an offset in some internal data
* structure in tcbhead_t, we first try some known-correct offsets on
* the current architecture. If none of those work, we do a linear
* scan. Yes, this is *much* worse than PR_GET_TID_ADDRESS and is
* pretty terrifying, but we should never get here on the vast majority
* of machines.
*
* (To be honest, maybe it's better to just hope Go doesn't notice any
* issues with glibc rather than trying to hack internal glibc
* structures to make them "work" with Go. But it seems we need to do
* this...)
*/
if (tid_offset <= 0) {
err = pthread_create(&thdTemp, NULL, thd_func, &main_tid);
if (err != 0)
bail("failed to create thread");
pthread_join(thdTemp, NULL);
if (tid_offset >= 0) {
tid_tmp = (pid_t *) (thdTemp + tid_offset);
// After the thread is exited, the tid field will be reset to -1.
if (*tid_tmp != -1) {
write_log(WARNING, "after the pthread exited, tid should be -1, but got %d\n", *tid_tmp);
tid_offset = -1;
}
// This is to test centos7 only, I will remove these lines later
pid_t *prctlTidAddr = NULL;
if (!prctl(PR_GET_TID_ADDRESS, &prctlTidAddr)) {
write_log(WARNING, "tid_offset: %d, prctlTidAddr: %ld, tid addr: %ld\n", tid_offset, (long int)prctlTidAddr, main_tid.handle + tid_offset);
if (prctlTidAddr != (pid_t *)(main_tid.handle + tid_offset)) {
bail("invalid tid_offset");
}
}
}
}
if (tid_offset > 0) {
tid_addr = (pid_t *) (main_tid.handle + tid_offset);
}

return tid_addr;
}
#endif /* RUNC_TID_KLUDGE */

static int clone_parent(jmp_buf *env, int jmpval) __attribute__((noinline));
static int clone_parent(jmp_buf *env, int jmpval)
{
Expand All @@ -319,7 +466,48 @@ static int clone_parent(jmp_buf *env, int jmpval)
.jmpval = jmpval,
};

return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
/*
* Since glibc 2.25 (see c579f48edba88380635ab98cb612030e3ed8691e),
* glibc no longer updates the TLS state containing the current process
* tid after clone(2). This results in stale TIDs being used when Go
* 1.22 and later call pthread_gettattr_np(pthread_self()), resulting
* in crashes on ancient glibcs and errors on newer glibcs.
*
* Luckily, because the address containing pthread's cached TID is also
* used for CLONE_CHILD_CLEARTID, we can poke around in glibc's internal
* cache by getting the address using PR_GET_TID_ADDRESS. For kernels
* without PR_GET_TID_ADDRESS support (Linux < 3.5 or
* CONFIG_CHECKPOINT_RESTORE=n), we have to do some far uglier tricks to
* find the address. We then overwrite the address with the correct TID
* using CLONE_CHILD_SETTID, and set CLONE_CHILD_CLEARTID to match glibc's
* arch_fork() (which also allows descendants to find the address with
* PR_GET_TID_ADDRESS).
*
* Yes, this is pretty horrific, but the core issue here is that we
* need to run Go code ("runc init") in the child after fork(), which
* is not allowed by glibc (see signal-safety(7)). We cannot exec to
* solve the problem because we are in a security critical situation
* here, and doing an exec would allow for container escapes (obvious
* issues include that the shared libraries loaded from a re-exec would
* come from the container, and doing an exec here would reset mm->user_ns
* which would allow for breakouts by userns containers with
* SYS_CAP_PTRACE).
*
* Note that all of this is only guaranteed to work if "runc init" was
* spawned from a *glibc* fork. A fork from another libc might not work, so
* we only do this for glibc.
*/
pid_t *tid_addr = NULL;
#if RUNC_TID_KLUDGE
tid_addr = find_tls_tid_address();
if (tid_addr == NULL) {
bail("can't find the tid address for struct pthread");
}
#endif

return clone(child_func, ca.stack_ptr,
CLONE_PARENT | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, &ca,
NULL /* parent_tid */ , NULL /* tls */ , tid_addr /* child_tid */ );
}

/* Returns the clone(2) flag for a namespace, given the name of a namespace. */
Expand Down
Loading