From 3d7384f37a105d5a40b8f6e2c0464a7649af9812 Mon Sep 17 00:00:00 2001 From: Yashodhan Joshi Date: Tue, 2 Aug 2022 15:03:37 +0530 Subject: [PATCH 1/3] Possible solution for exec return status issue This attempts to solve the problem by bubbling up the exit status form init process to intermediate and from intermediate to the main/ exec calling process. The init process's image will be replaced after exec call, and the intermediate process will wait for this process to exit. Once exited, the intemediate process itself will exit with the same exit status as the init process. THe exec calling process (the main process) will wait for this intermediate process, and will exit with the its status. This also allows calling multiple execs for the same container, without us needing to generate random hash For each pipe, which will potentially return the exit status. --- .../src/container/builder_impl.rs | 5 ++- .../src/process/container_init_process.rs | 11 +----- .../process/container_intermediate_process.rs | 9 +++-- .../src/process/container_main_process.rs | 37 ++++++------------- crates/libcontainer/src/process/fork.rs | 15 ++++---- crates/youki/src/commands/exec.rs | 37 ++++--------------- 6 files changed, 35 insertions(+), 79 deletions(-) diff --git a/crates/libcontainer/src/container/builder_impl.rs b/crates/libcontainer/src/container/builder_impl.rs index fc8f0d9a39..ed8f5de388 100644 --- a/crates/libcontainer/src/container/builder_impl.rs +++ b/crates/libcontainer/src/container/builder_impl.rs @@ -122,7 +122,8 @@ impl<'a> ContainerBuilderImpl<'a> { cgroup_manager: cmanager, }; - let init_pid = process::container_main_process::container_main_process(&container_args)?; + let (intermediate, init_pid) = + process::container_main_process::container_main_process(&container_args)?; // if file to write the pid to is specified, write pid of the child if let Some(pid_file) = &self.pid_file { @@ -139,7 +140,7 @@ impl<'a> ContainerBuilderImpl<'a> { .context("Failed to save container state")?; } - Ok(init_pid) + Ok(intermediate) } fn cleanup_container(&self) -> Result<()> { diff --git a/crates/libcontainer/src/process/container_init_process.rs b/crates/libcontainer/src/process/container_init_process.rs index 2a0ff4d903..ade3cf6623 100644 --- a/crates/libcontainer/src/process/container_init_process.rs +++ b/crates/libcontainer/src/process/container_init_process.rs @@ -15,10 +15,7 @@ use nix::unistd::setsid; use nix::unistd::{self, Gid, Uid}; use oci_spec::runtime::{LinuxNamespaceType, Spec, User}; use std::collections::HashMap; -use std::fs::File; -use std::io::Write; use std::os::unix::io::AsRawFd; -use std::os::unix::prelude::FromRawFd; use std::{ env, fs, path::{Path, PathBuf}, @@ -162,7 +159,6 @@ pub fn container_init_process( args: &ContainerArgs, main_sender: &mut channel::MainSender, init_receiver: &mut channel::InitReceiver, - fifo_fd: i32, ) -> Result<()> { let syscall = args.syscall; let spec = args.spec; @@ -417,12 +413,7 @@ pub fn container_init_process( if proc.args().is_some() { ExecutorManager::exec(spec)?; - if fifo_fd != 0 { - let f = &mut unsafe { File::from_raw_fd(fifo_fd) }; - // TODO: impl - write!(f, "1")?; - } - Ok(()) + unreachable!("process image should have been replaced after exec"); } else { bail!("on non-Windows, at least one process arg entry is required") } diff --git a/crates/libcontainer/src/process/container_intermediate_process.rs b/crates/libcontainer/src/process/container_intermediate_process.rs index 02c349cf6a..be263d60ce 100644 --- a/crates/libcontainer/src/process/container_intermediate_process.rs +++ b/crates/libcontainer/src/process/container_intermediate_process.rs @@ -1,6 +1,7 @@ use crate::{namespaces::Namespaces, process::channel, process::fork}; use anyhow::{Context, Error, Result}; use libcgroups::common::CgroupManager; +use nix::sys::wait::{waitpid, WaitStatus}; use nix::unistd::{Gid, Pid, Uid}; use oci_spec::runtime::{LinuxNamespaceType, LinuxResources}; use procfs::process::Process; @@ -14,8 +15,7 @@ pub fn container_intermediate_process( intermediate_chan: &mut (channel::IntermediateSender, channel::IntermediateReceiver), init_chan: &mut (channel::InitSender, channel::InitReceiver), main_sender: &mut channel::MainSender, - fifo_fd: i32, -) -> Result<()> { +) -> Result { let (inter_sender, inter_receiver) = intermediate_chan; let (init_sender, init_receiver) = init_chan; let command = &args.syscall; @@ -96,7 +96,8 @@ pub fn container_intermediate_process( inter_sender .close() .context("failed to close sender in the intermediate process")?; - container_init_process(args, main_sender, init_receiver, fifo_fd) + container_init_process(args, main_sender, init_receiver)?; + Ok(0) })?; // Once we fork the container init process, the job for intermediate process // is done. We notify the container main process about the pid we just @@ -116,7 +117,7 @@ pub fn container_intermediate_process( .close() .context("failed to close unused init sender")?; - Ok(()) + Ok(waitpid(pid, None)?) } fn apply_cgroups( diff --git a/crates/libcontainer/src/process/container_main_process.rs b/crates/libcontainer/src/process/container_main_process.rs index 294ebed397..c8d345622a 100644 --- a/crates/libcontainer/src/process/container_main_process.rs +++ b/crates/libcontainer/src/process/container_main_process.rs @@ -8,14 +8,14 @@ use anyhow::{Context, Result}; use nix::{ sys::{ socket::{self, UnixAddr}, - stat, + wait::WaitStatus, }, - unistd::{self, mkfifo, Pid}, + unistd::{self,Pid}, }; use oci_spec::runtime; use std::{io::IoSlice, path::Path}; -pub fn container_main_process(container_args: &ContainerArgs) -> Result { +pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pid)> { // We use a set of channels to communicate between parent and child process. // Each channel is uni-directional. Because we will pass these channel to // forked process, we have to be deligent about closing any unused channel. @@ -25,33 +25,18 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result { let inter_chan = &mut channel::intermediate_channel()?; let init_chan = &mut channel::init_channel()?; - // TODO: implement Option version - let mut fifo_fd = 0; - // let container_root = &container_args - // .container - // .as_ref() - // .context("container state is required")? - // .root; - let container_root = &std::path::Path::new("/run/youki/tutorial_container/"); - let fifo_path = container_root.join("state.fifo"); - if container_args.init { - mkfifo(&fifo_path, stat::Mode::S_IRWXU).context("failed to create the fifo file.")?; - } - - let mut open_flags = nix::fcntl::OFlag::empty(); - open_flags.insert(nix::fcntl::OFlag::O_PATH); - open_flags.insert(nix::fcntl::OFlag::O_CLOEXEC); - fifo_fd = nix::fcntl::open(&fifo_path, open_flags, stat::Mode::S_IRWXU)?; - log::debug!("fifo_fd: {}", fifo_fd); - let intermediate_pid = fork::container_fork(|| { - container_intermediate_process::container_intermediate_process( + let t = container_intermediate_process::container_intermediate_process( container_args, inter_chan, init_chan, main_sender, - fifo_fd, - ) + )?; + match t { + WaitStatus::Exited(_, s) => Ok(s), + WaitStatus::Signaled(_, sig, _) => Ok(sig as i32), + _ => Ok(0), + } })?; // Close down unused fds. The corresponding fds are duplicated to the // child process during fork. @@ -113,7 +98,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result { log::debug!("init pid is {:?}", init_pid); - Ok(init_pid) + Ok((intermediate_pid, init_pid)) } fn sync_seccomp( diff --git a/crates/libcontainer/src/process/fork.rs b/crates/libcontainer/src/process/fork.rs index 5ac1f32cc2..ac37ef4404 100644 --- a/crates/libcontainer/src/process/fork.rs +++ b/crates/libcontainer/src/process/fork.rs @@ -8,15 +8,16 @@ use nix::unistd::Pid; // using clone, we would have to manually make sure all the variables are // correctly send to the new process, especially Rust borrow checker will be a // lot of hassel to deal with every details. -pub fn container_fork Result<()>>(cb: F) -> Result { +pub fn container_fork Result>(cb: F) -> Result { match unsafe { unistd::fork()? } { unistd::ForkResult::Parent { child } => Ok(child), unistd::ForkResult::Child => { - let ret = if let Err(error) = cb() { - log::debug!("failed to run fork: {:?}", error); - -1 - } else { - 0 + let ret = match cb() { + Err(error) => { + log::debug!("failed to run fork: {:?}", error); + -1 + } + Ok(ec) => ec, }; std::process::exit(ret); } @@ -31,7 +32,7 @@ mod test { #[test] fn test_container_fork() -> Result<()> { - let pid = container_fork(|| Ok(()))?; + let pid = container_fork(|| Ok(0))?; match waitpid(pid, None).expect("wait pid failed.") { WaitStatus::Exited(p, status) => { assert_eq!(pid, p); diff --git a/crates/youki/src/commands/exec.rs b/crates/youki/src/commands/exec.rs index 1e55bd762b..b24becb931 100644 --- a/crates/youki/src/commands/exec.rs +++ b/crates/youki/src/commands/exec.rs @@ -1,17 +1,11 @@ -use anyhow::{bail, Context, Result}; -use nix::{ - libc, - poll::{PollFd, PollFlags}, -}; -use std::{fs::OpenOptions, io::Read, os::unix::prelude::RawFd, path::PathBuf}; +use anyhow::Result; +use nix::sys::wait::{waitpid, WaitStatus}; +use std::path::PathBuf; use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::create_syscall}; use liboci_cli::Exec; -use super::load_container; - pub fn exec(args: Exec, root_path: PathBuf) -> Result { - let container = load_container(&root_path, &args.container_id)?; let syscall = create_syscall(); let pid = ContainerBuilder::new(args.container_id.clone(), syscall.as_ref()) .with_root_path(root_path)? @@ -25,26 +19,9 @@ pub fn exec(args: Exec, root_path: PathBuf) -> Result { .with_container_args(args.command.clone()) .build()?; - let pidfd = pidfd_open(pid.as_raw(), 0)?; - let poll_fd = PollFd::new(pidfd, PollFlags::POLLIN); - nix::poll::poll(&mut [poll_fd], -1).context("failed to wait for the container id")?; - - let fifo_path = &container.root.join("state.fifo"); - println!("fifo_path: {:?}", fifo_path); - let mut f = OpenOptions::new().read(true).open(fifo_path)?; - let mut contents = String::new(); - f.read_to_string(&mut contents)?; - println!("get the value: {:?}", contents); - - // TODO - Ok(0) -} - -fn pidfd_open(pid: libc::pid_t, flags: libc::c_uint) -> Result { - let fd = unsafe { libc::syscall(libc::SYS_pidfd_open, pid, flags) }; - if fd == -1 { - bail!("faild to pifd_open syscall") - } else { - Ok(fd as RawFd) + match waitpid(pid, None)? { + WaitStatus::Exited(_, status) => Ok(status), + WaitStatus::Signaled(_, sig, _) => Ok(sig as i32), + _ => Ok(0), } } From 862bf852ad34dd50f61a747b87a64301cde6b1f9 Mon Sep 17 00:00:00 2001 From: Yashodhan Joshi Date: Tue, 23 Aug 2022 14:47:31 +0530 Subject: [PATCH 2/3] Make the intermediate process wait conditionally, only for exec Now the intermediate process waits for the main process only when exec is called, not when the container is created. This prevents it from waiting for created contianers, for which exec might not be called. This also fixes some formatting issues --- crates/libcontainer/src/container/builder_impl.rs | 8 ++++++-- crates/libcontainer/src/container/init_builder.rs | 1 + crates/libcontainer/src/container/tenant_builder.rs | 1 + .../src/process/container_intermediate_process.rs | 11 +++++++++-- .../src/process/container_main_process.rs | 5 +++-- 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/crates/libcontainer/src/container/builder_impl.rs b/crates/libcontainer/src/container/builder_impl.rs index ed8f5de388..31d7548ca4 100644 --- a/crates/libcontainer/src/container/builder_impl.rs +++ b/crates/libcontainer/src/container/builder_impl.rs @@ -38,6 +38,8 @@ pub(super) struct ContainerBuilderImpl<'a> { pub container: Option, /// File descriptos preserved/passed to the container init process. pub preserve_fds: i32, + /// Denotes if the builder is for the exec call or not + pub is_exec_builder: bool, } impl<'a> ContainerBuilderImpl<'a> { @@ -122,8 +124,10 @@ impl<'a> ContainerBuilderImpl<'a> { cgroup_manager: cmanager, }; - let (intermediate, init_pid) = - process::container_main_process::container_main_process(&container_args)?; + let (intermediate, init_pid) = process::container_main_process::container_main_process( + &container_args, + self.is_exec_builder, + )?; // if file to write the pid to is specified, write pid of the child if let Some(pid_file) = &self.pid_file { diff --git a/crates/libcontainer/src/container/init_builder.rs b/crates/libcontainer/src/container/init_builder.rs index e9ef18bc43..e2bd199798 100644 --- a/crates/libcontainer/src/container/init_builder.rs +++ b/crates/libcontainer/src/container/init_builder.rs @@ -87,6 +87,7 @@ impl<'a> InitContainerBuilder<'a> { notify_path, container: Some(container.clone()), preserve_fds: self.base.preserve_fds, + is_exec_builder: false, }; builder_impl.create()?; diff --git a/crates/libcontainer/src/container/tenant_builder.rs b/crates/libcontainer/src/container/tenant_builder.rs index e9be97b632..0661ce0769 100644 --- a/crates/libcontainer/src/container/tenant_builder.rs +++ b/crates/libcontainer/src/container/tenant_builder.rs @@ -128,6 +128,7 @@ impl<'a> TenantContainerBuilder<'a> { notify_path: notify_path.clone(), container: None, preserve_fds: self.base.preserve_fds, + is_exec_builder: true, }; let pid = builder_impl.create()?; diff --git a/crates/libcontainer/src/process/container_intermediate_process.rs b/crates/libcontainer/src/process/container_intermediate_process.rs index be263d60ce..26c1304a56 100644 --- a/crates/libcontainer/src/process/container_intermediate_process.rs +++ b/crates/libcontainer/src/process/container_intermediate_process.rs @@ -1,7 +1,7 @@ use crate::{namespaces::Namespaces, process::channel, process::fork}; use anyhow::{Context, Error, Result}; use libcgroups::common::CgroupManager; -use nix::sys::wait::{waitpid, WaitStatus}; +use nix::sys::wait::{waitpid, WaitStatus}; use nix::unistd::{Gid, Pid, Uid}; use oci_spec::runtime::{LinuxNamespaceType, LinuxResources}; use procfs::process::Process; @@ -15,6 +15,7 @@ pub fn container_intermediate_process( intermediate_chan: &mut (channel::IntermediateSender, channel::IntermediateReceiver), init_chan: &mut (channel::InitSender, channel::InitReceiver), main_sender: &mut channel::MainSender, + wait: bool, ) -> Result { let (inter_sender, inter_receiver) = intermediate_chan; let (init_sender, init_receiver) = init_chan; @@ -117,7 +118,13 @@ pub fn container_intermediate_process( .close() .context("failed to close unused init sender")?; - Ok(waitpid(pid, None)?) + if wait { + Ok(waitpid(pid, None)?) + } else { + // we don't actually want to wait, so + // pid and status doesn't really matter + Ok(WaitStatus::Exited(Pid::from_raw(0), 0)) + } } fn apply_cgroups( diff --git a/crates/libcontainer/src/process/container_main_process.rs b/crates/libcontainer/src/process/container_main_process.rs index c8d345622a..11ee210bc1 100644 --- a/crates/libcontainer/src/process/container_main_process.rs +++ b/crates/libcontainer/src/process/container_main_process.rs @@ -10,12 +10,12 @@ use nix::{ socket::{self, UnixAddr}, wait::WaitStatus, }, - unistd::{self,Pid}, + unistd::{self, Pid}, }; use oci_spec::runtime; use std::{io::IoSlice, path::Path}; -pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pid)> { +pub fn container_main_process(container_args: &ContainerArgs, wait: bool) -> Result<(Pid, Pid)> { // We use a set of channels to communicate between parent and child process. // Each channel is uni-directional. Because we will pass these channel to // forked process, we have to be deligent about closing any unused channel. @@ -31,6 +31,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pi inter_chan, init_chan, main_sender, + wait, )?; match t { WaitStatus::Exited(_, s) => Ok(s), From 8b36be6271fd949e4e648f2b906067f4fa320719 Mon Sep 17 00:00:00 2001 From: Yashodhan Joshi Date: Mon, 29 Aug 2022 20:39:44 +0530 Subject: [PATCH 3/3] Remove the is_exec_builder as it is redundant, replace with !init --- crates/libcontainer/src/container/builder_impl.rs | 8 ++------ crates/libcontainer/src/container/init_builder.rs | 1 - crates/libcontainer/src/container/tenant_builder.rs | 1 - 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/crates/libcontainer/src/container/builder_impl.rs b/crates/libcontainer/src/container/builder_impl.rs index 31d7548ca4..2bd0ebd3e3 100644 --- a/crates/libcontainer/src/container/builder_impl.rs +++ b/crates/libcontainer/src/container/builder_impl.rs @@ -38,8 +38,6 @@ pub(super) struct ContainerBuilderImpl<'a> { pub container: Option, /// File descriptos preserved/passed to the container init process. pub preserve_fds: i32, - /// Denotes if the builder is for the exec call or not - pub is_exec_builder: bool, } impl<'a> ContainerBuilderImpl<'a> { @@ -124,10 +122,8 @@ impl<'a> ContainerBuilderImpl<'a> { cgroup_manager: cmanager, }; - let (intermediate, init_pid) = process::container_main_process::container_main_process( - &container_args, - self.is_exec_builder, - )?; + let (intermediate, init_pid) = + process::container_main_process::container_main_process(&container_args, !self.init)?; // if file to write the pid to is specified, write pid of the child if let Some(pid_file) = &self.pid_file { diff --git a/crates/libcontainer/src/container/init_builder.rs b/crates/libcontainer/src/container/init_builder.rs index e2bd199798..e9ef18bc43 100644 --- a/crates/libcontainer/src/container/init_builder.rs +++ b/crates/libcontainer/src/container/init_builder.rs @@ -87,7 +87,6 @@ impl<'a> InitContainerBuilder<'a> { notify_path, container: Some(container.clone()), preserve_fds: self.base.preserve_fds, - is_exec_builder: false, }; builder_impl.create()?; diff --git a/crates/libcontainer/src/container/tenant_builder.rs b/crates/libcontainer/src/container/tenant_builder.rs index 0661ce0769..e9be97b632 100644 --- a/crates/libcontainer/src/container/tenant_builder.rs +++ b/crates/libcontainer/src/container/tenant_builder.rs @@ -128,7 +128,6 @@ impl<'a> TenantContainerBuilder<'a> { notify_path: notify_path.clone(), container: None, preserve_fds: self.base.preserve_fds, - is_exec_builder: true, }; let pid = builder_impl.create()?;