Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement basic foreground mode #1656

Merged
merged 4 commits into from
Mar 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions crates/libcontainer/src/container/init_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub struct InitContainerBuilder<'a> {
base: ContainerBuilder<'a>,
bundle: PathBuf,
use_systemd: bool,
detached: bool,
}

impl<'a> InitContainerBuilder<'a> {
Expand All @@ -31,6 +32,7 @@ impl<'a> InitContainerBuilder<'a> {
base: builder,
bundle,
use_systemd: true,
detached: true,
}
}

Expand All @@ -40,6 +42,11 @@ impl<'a> InitContainerBuilder<'a> {
self
}

pub fn with_detach(mut self, detached: bool) -> Self {
self.detached = detached;
self
}

/// Creates a new container
pub fn build(self) -> Result<Container> {
let spec = self.load_spec().context("failed to load spec")?;
Expand Down Expand Up @@ -90,15 +97,11 @@ impl<'a> InitContainerBuilder<'a> {
notify_path,
container: Some(container.clone()),
preserve_fds: self.base.preserve_fds,
// TODO: This should be set properly based on how the command is
// given. For now, set the detached to true because this is what
// `youki create` defaults to.
detached: true,
detached: self.detached,
executor_manager: self.base.executor_manager,
};

// TODO: Fix waiting on this pid (init process) when detached = false.
let _pid = builder_impl.create()?;
builder_impl.create()?;

container.refresh_state()?;

Expand Down
8 changes: 4 additions & 4 deletions crates/libcontainer/src/notify_socket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ impl NotifyListener {
// and chdir back after the socket is created.
let workdir = socket_path.parent().unwrap();
let socket_name = socket_path.file_name().unwrap();
let cwd = unistd::getcwd().context("Failed to get cwd")?;
let cwd = unistd::getcwd().context("failed to get cwd")?;
unistd::chdir(workdir).context(format!(
"Failed to chdir into {}",
"failed to chdir into {}",
workdir.to_str().unwrap()
))?;
let stream = UnixListener::bind(socket_name)
.context(format!("Failed to bind {}", socket_name.to_str().unwrap()))?;
.context(format!("failed to bind {}", socket_name.to_str().unwrap()))?;
unistd::chdir(&cwd)
.context(format!("Failed to chdir back to {}", cwd.to_str().unwrap()))?;
.context(format!("failed to chdir back to {}", cwd.to_str().unwrap()))?;

Ok(Self { socket: stream })
}
Expand Down
3 changes: 3 additions & 0 deletions crates/liboci-cli/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,7 @@ pub struct Run {
/// name of the container instance to be started
#[clap(value_parser = clap::builder::NonEmptyStringValueParser::new(), required = true)]
pub container_id: String,
/// Detach from the container process
#[clap(short, long)]
pub detach: bool,
}
1 change: 1 addition & 0 deletions crates/youki/src/commands/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub fn create(args: Create, root_path: PathBuf, systemd_cgroup: bool) -> Result<
.validate_id()?
.as_init(&args.bundle)
.with_systemd(systemd_cgroup)
.with_detach(true)
.build()?;

Ok(())
Expand Down
181 changes: 177 additions & 4 deletions crates/youki/src/commands/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,19 @@ use std::path::PathBuf;
use anyhow::{Context, Result};
use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::create_syscall};
use liboci_cli::Run;
use nix::{
sys::{
signal::{self, kill},
signalfd::SigSet,
wait::{waitpid, WaitPidFlag, WaitStatus},
},
unistd::Pid,
};

use crate::workload::executor::default_executors;

pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> {
pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result<i32> {
let syscall = create_syscall();
// TODO: `youki run` should support passing in `detached` flags. Defaults to
// detached = true right now.
let mut container = ContainerBuilder::new(args.container_id.clone(), syscall.as_ref())
.with_executor(default_executors())?
.with_pid_file(args.pid_file.as_ref())?
Expand All @@ -19,9 +25,176 @@ pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> {
.validate_id()?
.as_init(&args.bundle)
.with_systemd(systemd_cgroup)
.with_detach(args.detach)
.build()?;

container
.start()
.with_context(|| format!("failed to start container {}", args.container_id))
.with_context(|| format!("failed to start container {}", args.container_id))?;

if args.detach {
return Ok(0);
}

// Using `debug_assert` here rather than returning an error because this is
// a invariant. The design when the code path arrives to this point, is that
// the container state must have recorded the container init pid.
debug_assert!(
container.pid().is_some(),
"expects a container init pid in the container state"
);
handle_foreground(container.pid().unwrap())
}

// handle_foreground will match the `runc` behavior running the foreground mode.
// The youki main process will wait and reap the container init process. The
// youki main process also forwards most of the signals to the container init
// process.
fn handle_foreground(init_pid: Pid) -> Result<i32> {
utam0k marked this conversation as resolved.
Show resolved Hide resolved
// We mask all signals here and forward most of the signals to the container
// init process.
let signal_set = SigSet::all();
signal_set
.thread_set_mask()
.with_context(|| "failed to call pthread_sigmask")?;
loop {
match signal_set
.wait()
.with_context(|| "failed to call sigwait")?
{
signal::SIGCHLD => {
// Reap all child until either container init process exits or
// no more child to be reaped. Once the container init process
// exits we can then return.
loop {
match waitpid(None, Some(WaitPidFlag::WNOHANG))? {
WaitStatus::Exited(pid, status) => {
if pid.eq(&init_pid) {
return Ok(status);
}

// Else, some random child process exited, ignoring...
}
WaitStatus::Signaled(pid, signal, _) => {
if pid.eq(&init_pid) {
return Ok(signal as i32);
}

// Else, some random child process exited, ignoring...
}
WaitStatus::StillAlive => {
// No more child to reap.
break;
}
_ => {}
}
}
}
signal::SIGURG => {
// In `runc`, SIGURG is used by go runtime and should not be forwarded to
// the container process. Here, we just ignore the signal.
}
signal::SIGWINCH => {
// TODO: resize the terminal
yihuaf marked this conversation as resolved.
Show resolved Hide resolved
}
signal => {
// There is nothing we can do if we fail to forward the signal.
let _ = kill(init_pid, Some(signal));
}
}
}
}

#[cfg(test)]
mod tests {
use std::time::Duration;

use nix::{
sys::{signal::Signal::SIGKILL, wait},
unistd,
};

use super::*;

#[test]
fn test_foreground_forward_sigkill() -> Result<()> {
// To set up the test correctly, we need to run the test in dedicated
// process, so the rust unit test runtime and other unit tests will not
// mess with the signal handling. We use `sigkill` as a simple way to
// make sure the signal is properly forwarded. In this test, P0 is the
// rust process that runs this unit test (in a thread). P1 mocks youki
// main and P2 mocks the container init process
match unsafe { unistd::fork()? } {
unistd::ForkResult::Parent { child } => {
// Inside P0
//
// We need to make sure that the child process has entered into
// the signal forwarding loops. There is no way to 100% sync
// that the child has executed the for loop waiting to forward
// the signal. There are sync mechanisms with condvar or
// channels to make it as close to calling the handle_foreground
// function as possible, but still have a tiny (highly unlikely
// but probable) window that a race can still happen. So instead
// we just wait for 1 second for everything to settle. In
// general, I don't like sleep in tests to avoid race condition,
// but I'd rather not over-engineer this now. We can revisit
// this later if the test becomes flaky.
std::thread::sleep(Duration::from_secs(1));
// Send the `sigkill` signal to P1 who will forward the signal
// to P2. P2 will then exit and send a sigchld to P1. P1 will
// then reap P2 and exits. In P0, we can then reap P1.
kill(child, SIGKILL)?;
wait::waitpid(child, None)?;
}
unistd::ForkResult::Child => {
// Inside P1. Fork P2 as mock container init process and run
// signal handler process inside.
match unsafe { unistd::fork()? } {
unistd::ForkResult::Parent { child } => {
// Inside P1.
handle_foreground(child)?;
}
unistd::ForkResult::Child => {
// Inside P2. This process block and waits the `sigkill`
// from the parent. Use thread::sleep here with a long
// duration to minimic blocking forever.
std::thread::sleep(Duration::from_secs(3600));
}
};
}
};

Ok(())
}

#[test]
fn test_foreground_exit() -> Result<()> {
// The setup is similar to `handle_foreground`, but instead of
// forwarding signal, the container init process will exit. Again, we
// use `sleep` to simulate the conditions to aovid fine grained
// synchronization for now.
match unsafe { unistd::fork()? } {
unistd::ForkResult::Parent { child } => {
// Inside P0
std::thread::sleep(Duration::from_secs(1));
wait::waitpid(child, None)?;
}
unistd::ForkResult::Child => {
// Inside P1. Fork P2 as mock container init process and run
// signal handler process inside.
match unsafe { unistd::fork()? } {
unistd::ForkResult::Parent { child } => {
// Inside P1.
handle_foreground(child)?;
}
unistd::ForkResult::Child => {
// Inside P2. The process exits after 1 second.
std::thread::sleep(Duration::from_secs(1));
}
};
}
};

Ok(())
}
}
8 changes: 7 additions & 1 deletion crates/youki/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,13 @@ fn main() -> Result<()> {
CommonCmd::Pause(pause) => commands::pause::pause(pause, root_path),
CommonCmd::Ps(ps) => commands::ps::ps(ps, root_path),
CommonCmd::Resume(resume) => commands::resume::resume(resume, root_path),
CommonCmd::Run(run) => commands::run::run(run, root_path, systemd_cgroup),
CommonCmd::Run(run) => match commands::run::run(run, root_path, systemd_cgroup) {
Ok(exit_code) => std::process::exit(exit_code),
Err(e) => {
eprintln!("run failed : {e}");
std::process::exit(-1);
}
},
CommonCmd::Spec(spec) => commands::spec_json::spec(spec),
CommonCmd::Update(update) => commands::update::update(update, root_path),
},
Expand Down