Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sandbox: support shared process namespace #149

Merged
merged 1 commit into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions vmm/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@ pub const RESOLV_FILENAME: &str = "resolv.conf";
pub const SANDBOX_NS_PATH: &str = "/run/sandbox-ns";
pub const NET_NAMESPACE: &str = "network";
pub const IPC_NAMESPACE: &str = "ipc";
pub const PID_NAMESPACE: &str = "pid";
pub const UTS_NAMESPACE: &str = "uts";
pub const CGROUP_NAMESPACE: &str = "cgroup";
5 changes: 4 additions & 1 deletion vmm/sandbox/src/cloud_hypervisor/factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::{
devices::{console::Console, fs::Fs, pmem::Pmem, rng::Rng, vsock::Vsock},
CloudHypervisorVM,
},
sandbox::has_shared_pid_namespace,
utils::get_netns,
vm::VMFactory,
};
Expand All @@ -46,7 +47,9 @@ impl VMFactory for CloudHypervisorVMFactory {
) -> containerd_sandbox::error::Result<Self::VM> {
let netns = get_netns(&s.sandbox);
let mut vm = CloudHypervisorVM::new(id, &netns, &s.base_dir, &self.vm_config);

if has_shared_pid_namespace(&s.sandbox) {
vm.config.cmdline.push_str(" task.share_pidns")
}
// add image as a disk
if !self.vm_config.common.image_path.is_empty() {
let rootfs_device = Pmem::new("rootfs", &self.vm_config.common.image_path, true);
Expand Down
19 changes: 16 additions & 3 deletions vmm/sandbox/src/container/handler/ns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@ limitations under the License.

use async_trait::async_trait;
use containerd_sandbox::error::Result;
use vmm_common::{CGROUP_NAMESPACE, IPC_NAMESPACE, NET_NAMESPACE, SANDBOX_NS_PATH, UTS_NAMESPACE};
use vmm_common::{
CGROUP_NAMESPACE, IPC_NAMESPACE, NET_NAMESPACE, PID_NAMESPACE, SANDBOX_NS_PATH, UTS_NAMESPACE,
};

use crate::{container::handler::Handler, sandbox::KuasarSandbox, vm::VM};
use crate::{
container::handler::Handler,
sandbox::{has_shared_pid_namespace, KuasarSandbox},
vm::VM,
};

pub struct NamespaceHandler {
container_id: String,
Expand All @@ -38,6 +44,7 @@ where
T: VM + Sync + Send,
{
async fn handle(&self, sandbox: &mut KuasarSandbox<T>) -> Result<()> {
let share_pidns = has_shared_pid_namespace(&sandbox.data);
let container = sandbox.container_mut(&self.container_id)?;
let spec = if let Some(s) = &mut container.data.spec {
s
Expand All @@ -47,8 +54,14 @@ where
if let Some(l) = spec.linux.as_mut() {
l.namespaces
.retain(|n| n.r#type != NET_NAMESPACE && n.r#type != CGROUP_NAMESPACE);

l.namespaces.iter_mut().for_each(|n| {
n.path = if n.r#type == IPC_NAMESPACE || n.r#type == UTS_NAMESPACE {
// IPC and UTS namespace is shared in default
// PID namespaces is shared if it is set in pod config
n.path = if n.r#type == IPC_NAMESPACE
|| n.r#type == UTS_NAMESPACE
|| (n.r#type == PID_NAMESPACE && share_pidns)
{
format!("{}/{}", SANDBOX_NS_PATH, n.r#type)
} else {
"".to_string()
Expand Down
16 changes: 16 additions & 0 deletions vmm/sandbox/src/sandbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::{collections::HashMap, io::ErrorKind, path::Path, sync::Arc};
use anyhow::anyhow;
use async_trait::async_trait;
use containerd_sandbox::{
cri::api::v1::NamespaceMode,
data::SandboxData,
error::{Error, Result},
signal::ExitSignal,
Expand Down Expand Up @@ -667,6 +668,21 @@ fn parse_dnsoptions(servers: &[String], searches: &[String], options: &[String])
resolv_content
}

pub fn has_shared_pid_namespace(data: &SandboxData) -> bool {
if let Some(conf) = &data.config {
if let Some(pid_ns_mode) = conf
.linux
.as_ref()
.and_then(|l| l.security_context.as_ref())
.and_then(|s| s.namespace_options.as_ref())
.map(|n| n.pid())
{
return pid_ns_mode == NamespaceMode::Pod;
}
}
false
}

#[derive(Default, Debug, Deserialize)]
pub struct SandboxConfig {
#[serde(default)]
Expand Down
4 changes: 4 additions & 0 deletions vmm/task/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use tokio::fs::read_to_string;
const SHAREFS_TYPE: &str = "task.sharefs_type";
const LOG_LEVEL: &str = "task.log_level";
const TASK_DEBUG: &str = "task.debug";
const SHARE_PIDNS: &str = "task.share_pidns";

macro_rules! parse_cmdline {
($param:ident, $key:ident, $field:expr) => {
Expand All @@ -41,6 +42,7 @@ macro_rules! parse_cmdline {
pub struct TaskConfig {
pub(crate) sharefs_type: String,
pub(crate) log_level: String,
pub(crate) share_pidns: bool,
pub(crate) debug: bool,
}

Expand All @@ -49,6 +51,7 @@ impl Default for TaskConfig {
TaskConfig {
sharefs_type: "9p".to_string(),
log_level: "info".to_string(),
share_pidns: false,
debug: false,
}
}
Expand All @@ -66,6 +69,7 @@ impl TaskConfig {
parse_cmdline!(param, SHAREFS_TYPE, config.sharefs_type, String::from);
parse_cmdline!(param, LOG_LEVEL, config.log_level, String::from);
parse_cmdline!(param, TASK_DEBUG, config.debug);
parse_cmdline!(param, SHARE_PIDNS, config.share_pidns);
}
Ok(config)
}
Expand Down
1 change: 1 addition & 0 deletions vmm/task/src/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub async fn listen_debug_console(addr: &str) -> Result<()> {
tokio::spawn(async move {
let mut incoming = l.incoming();
while let Some(Ok(s)) = incoming.next().await {
debug!("get a debug console request");
if let Err(e) = debug_console(s).await {
error!("failed to open debug console {:?}", e);
}
Expand Down
120 changes: 76 additions & 44 deletions vmm/task/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ limitations under the License.
#![warn(clippy::expect_fun_call, clippy::expect_used)]

use std::{
collections::HashMap, convert::TryFrom, path::Path, process::exit, str::FromStr, sync::Arc,
thread,
collections::HashMap, convert::TryFrom, os::fd::AsRawFd, path::Path, process::exit,
str::FromStr, sync::Arc,
};

use containerd_shim::{
asynchronous::{monitor::monitor_notify_by_pid, util::asyncify},
error::Error,
io_error, other,
io_error, other, other_error,
protos::{shim::shim_ttrpc_async::create_task, ttrpc::asynchronous::Server},
util::{mkdir, IntoOption},
Result,
Expand All @@ -35,17 +35,15 @@ use log::{debug, error, info, warn, LevelFilter};
use nix::{
errno::Errno,
sched::{unshare, CloneFlags},
sys::{
wait,
wait::{WaitPidFlag, WaitStatus},
},
unistd::{getpid, gettid, Pid},
sys::wait::{self, WaitPidFlag, WaitStatus},
unistd::{fork, getpid, pause, pipe, ForkResult, Pid},
};
use signal_hook_tokio::Signals;
use tokio::fs::File;
use vmm_common::{
api::sandbox_ttrpc::create_sandbox_service, mount::mount, ETC_RESOLV, HOSTNAME_FILENAME,
IPC_NAMESPACE, KUASAR_STATE_DIR, RESOLV_FILENAME, SANDBOX_NS_PATH, UTS_NAMESPACE,
IPC_NAMESPACE, KUASAR_STATE_DIR, PID_NAMESPACE, RESOLV_FILENAME, SANDBOX_NS_PATH,
UTS_NAMESPACE,
};

use crate::{
Expand Down Expand Up @@ -137,6 +135,7 @@ lazy_static! {
static ref CLONE_FLAG_TABLE: HashMap<String, CloneFlags> = HashMap::from([
(String::from(IPC_NAMESPACE), CloneFlags::CLONE_NEWIPC),
(String::from(UTS_NAMESPACE), CloneFlags::CLONE_NEWUTS),
(String::from(PID_NAMESPACE), CloneFlags::CLONE_NEWPID),
]);
}

Expand Down Expand Up @@ -170,7 +169,7 @@ async fn start_task_server() -> anyhow::Result<()> {
}
}

late_init_call().await?;
late_init_call(&config).await?;

start_ttrpc_server().await?.start().await?;

Expand Down Expand Up @@ -305,7 +304,7 @@ async fn init_vm_rootfs() -> Result<()> {

// Continue to do initialization that depend on shared path.
// such as adding guest hook, preparing sandbox files and namespaces.
async fn late_init_call() -> Result<()> {
async fn late_init_call(config: &TaskConfig) -> Result<()> {
// Setup DNS, bind mount to /etc/resolv.conf
let dns_file = Path::new(KUASAR_STATE_DIR).join(RESOLV_FILENAME);
if dns_file.exists() {
Expand All @@ -321,7 +320,7 @@ async fn late_init_call() -> Result<()> {
}

// Setup sandbox namespace
setup_sandbox_ns().await?;
setup_sandbox_ns(config.share_pidns).await?;

Ok(())
}
Expand Down Expand Up @@ -368,12 +367,12 @@ async fn start_ttrpc_server() -> anyhow::Result<Server> {
.register_service(sandbox_service))
}

async fn setup_sandbox_ns() -> Result<()> {
setup_persistent_ns(vec![
String::from(IPC_NAMESPACE),
String::from(UTS_NAMESPACE),
])
.await?;
async fn setup_sandbox_ns(share_pidns: bool) -> Result<()> {
let mut nss = vec![String::from(IPC_NAMESPACE), String::from(UTS_NAMESPACE)];
if share_pidns {
nss.push(String::from(PID_NAMESPACE));
}
setup_persistent_ns(nss).await?;
Ok(())
}

Expand All @@ -398,36 +397,69 @@ async fn setup_persistent_ns(ns_types: Vec<String>) -> Result<()> {
.ok_or(other!("bad ns type {}", ns_type))?;
}

let operator = move || -> anyhow::Result<()> {
unshare(clone_type)?;
fork_sandbox(ns_types, clone_type)?;

Ok(())
}

fn fork_sandbox(ns_types: Vec<String>, clone_type: CloneFlags) -> Result<()> {
debug!("fork sandbox process {:?}, {:b}", ns_types, clone_type);
let (r, w) = pipe().map_err(other_error!(e, "create pipe when fork sandbox error"))?;
match unsafe { fork().map_err(other_error!(e, "failed to fork"))? } {
ForkResult::Parent { child } => {
debug!("forked process {} for the sandbox", child);
drop(w);
let mut resp = [0u8; 4];
// just wait the pipe close, do not care the read result
nix::unistd::read(r.as_raw_fd(), &mut resp).unwrap_or_default();
Ok(())
}
ForkResult::Child => {
drop(r);
unshare(clone_type).unwrap();
if !ns_types.iter().any(|n| n == PID_NAMESPACE) {
debug!("mount namespaces in child");
mount_ns(getpid(), &ns_types);
exit(0);
}
// if we need share pid ns, we fork a pause process to act as the pid 1 of the shared pid ns
match unsafe { fork().unwrap() } {
ForkResult::Parent { child } => {
mount_ns(child, &ns_types);
exit(0);
}
ForkResult::Child => {
debug!("mount namespaces in grand child");
drop(w);
loop {
pause();
}
}
}
}
}
}

// set hostname
fn mount_ns(pid: Pid, ns_types: &Vec<String>) {
if ns_types.iter().any(|n| n == UTS_NAMESPACE) {
let hostname = std::fs::read_to_string(Path::new(KUASAR_STATE_DIR).join(HOSTNAME_FILENAME))
.map(|s| s.trim().to_string())
.unwrap_or_default();
if !hostname.is_empty() {
nix::unistd::sethostname(hostname)?;
}

for ns_type in &ns_types {
let sandbox_ns_path = format!("{}/{}", SANDBOX_NS_PATH, ns_type);
let ns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), ns_type);
mount(
Some("none"),
Some(ns_path.as_str()),
&["bind".to_string()],
&sandbox_ns_path,
)?;
}
Ok(())
};

thread::spawn(move || {
if let Err(e) = operator() {
error!("setup persistent ns failed: {:?}", e);
exit(-1)
debug!("set hostname for sandbox: {}", hostname);
nix::unistd::sethostname(hostname).unwrap();
}
});

Ok(())
}
for ns_type in ns_types {
let sandbox_ns_path = format!("{}/{}", SANDBOX_NS_PATH, ns_type);
let ns_path = format!("/proc/{}/ns/{}", pid, ns_type);
debug!("mount {} to {}", ns_path, sandbox_ns_path);
mount(
Some("none"),
Some(ns_path.as_str()),
&["bind".to_string()],
&sandbox_ns_path,
)
.unwrap();
}
}
Loading