diff --git a/Cargo.lock b/Cargo.lock index b8c340835..f085f0dac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1736,7 +1736,7 @@ dependencies = [ "flate2", "libcgroups", "libcontainer", - "nix", + "nix 0.27.1", "num_cpus", "oci-spec", "once_cell", @@ -1895,7 +1895,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75adb4021282a72ca63ebbc0e4247750ad74ede68ff062d247691072d709ad8b" dependencies = [ "cc", - "nix", + "nix 0.26.2", "num_cpus", "pkg-config", ] @@ -1918,7 +1918,7 @@ dependencies = [ "libbpf-sys", "libc", "mockall", - "nix", + "nix 0.27.1", "oci-spec", "procfs", "quickcheck", @@ -1944,7 +1944,7 @@ dependencies = [ "libc", "libcgroups", "libseccomp", - "nix", + "nix 0.27.1", "oci-spec", "once_cell", "prctl", @@ -2227,6 +2227,18 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.4.0", + "cfg-if", + "libc", + "memoffset 0.9.0", +] + [[package]] name = "no-std-net" version = "0.6.0" @@ -2602,7 +2614,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "059a34f111a9dee2ce1ac2826a68b24601c4298cfeb1a587c3cb493d5ab46f52" dependencies = [ "libc", - "nix", + "nix 0.26.2", ] [[package]] @@ -3105,7 +3117,7 @@ name = "runtimetest" version = "0.0.1" dependencies = [ "anyhow", - "nix", + "nix 0.27.1", "oci-spec", ] @@ -5726,7 +5738,7 @@ dependencies = [ "libcgroups", "libcontainer", "liboci-cli", - "nix", + "nix 0.27.1", "once_cell", "pentacle", "procfs", diff --git a/crates/libcgroups/Cargo.toml b/crates/libcgroups/Cargo.toml index 5ff1bf265..aa38f0822 100644 --- a/crates/libcgroups/Cargo.toml +++ b/crates/libcgroups/Cargo.toml @@ -16,21 +16,21 @@ keywords = ["youki", "container", "cgroups"] default = ["v1", "v2", "systemd"] v1 = [] v2 = [] -systemd = ["v2"] -cgroupsv2_devices = ["rbpf", "libbpf-sys", "errno", "libc"] +systemd = ["v2", "nix/socket", "nix/uio"] +cgroupsv2_devices = ["rbpf", "libbpf-sys", "errno", "libc", "nix/dir"] [dependencies] -nix = "0.26.2" +nix = { version = "0.27.1", features = ["signal", "user", "fs"] } procfs = "0.15.1" oci-spec = { version = "~0.6.2", features = ["runtime"] } fixedbitset = "0.4.2" serde = { version = "1.0", features = ["derive"] } -rbpf = {version = "0.2.0", optional = true } +rbpf = { version = "0.2.0", optional = true } libbpf-sys = { version = "1.2.1", optional = true } errno = { version = "0.3.5", optional = true } libc = { version = "0.2.149", optional = true } thiserror = "1.0.49" -tracing = { version = "0.1.37", features = ["attributes"]} +tracing = { version = "0.1.37", features = ["attributes"] } [dev-dependencies] anyhow = "1.0" diff --git a/crates/libcgroups/src/systemd/dbus_native/dbus.rs b/crates/libcgroups/src/systemd/dbus_native/dbus.rs index df21f191e..3f5a46c72 100644 --- a/crates/libcgroups/src/systemd/dbus_native/dbus.rs +++ b/crates/libcgroups/src/systemd/dbus_native/dbus.rs @@ -7,6 +7,7 @@ use super::utils::{DbusError, Result, SystemdClientError}; use nix::sys::socket; use std::collections::HashMap; use std::io::{IoSlice, IoSliceMut}; +use std::os::fd::AsRawFd; use std::path::PathBuf; use std::sync::atomic::{AtomicU32, Ordering}; @@ -121,17 +122,18 @@ impl DbusConnection { /// Open a new dbus connection to given address /// authenticating as user with given uid pub fn new(addr: &str, uid: u32, system: bool) -> Result { - let socket = socket::socket( + // Use ManuallyDrop to keep the socket open. + let socket = std::mem::ManuallyDrop::new(socket::socket( socket::AddressFamily::Unix, socket::SockType::Stream, socket::SockFlag::empty(), None, - )?; + )?); let addr = socket::UnixAddr::new(addr)?; - socket::connect(socket, &addr)?; + socket::connect(socket.as_raw_fd(), &addr)?; let mut dbus = Self { - socket, + socket: socket.as_raw_fd(), msg_ctr: AtomicU32::new(0), id: None, system, @@ -237,11 +239,11 @@ impl DbusConnection { let mut ret = Vec::with_capacity(512); loop { let mut reply: [u8; REPLY_BUF_SIZE] = [0_u8; REPLY_BUF_SIZE]; - let reply_buffer = IoSliceMut::new(&mut reply[0..]); + let mut reply_buffer = [IoSliceMut::new(&mut reply[0..])]; let reply_rcvd = socket::recvmsg::<()>( self.socket, - &mut [reply_buffer], + &mut reply_buffer, None, socket::MsgFlags::empty(), )?; diff --git a/crates/libcontainer/Cargo.toml b/crates/libcontainer/Cargo.toml index e4b462840..3a1005445 100644 --- a/crates/libcontainer/Cargo.toml +++ b/crates/libcontainer/Cargo.toml @@ -22,23 +22,35 @@ cgroupsv2_devices = ["libcgroups/cgroupsv2_devices"] [dependencies] bitflags = "2.4.0" caps = "0.5.5" -chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] } +chrono = { version = "0.4", default-features = false, features = [ + "clock", + "serde", +] } fastrand = "^2.0.1" futures = { version = "0.3", features = ["thread-pool"] } libc = "0.2.149" -nix = "0.26.2" +nix = { version = "0.27.1", features = [ + "socket", + "sched", + "mount", + "mman", + "resource", + "dir", + "term", + "hostname", +] } oci-spec = { version = "~0.6.2", features = ["runtime"] } once_cell = "1.18.0" procfs = "0.15.1" prctl = "1.0.0" libcgroups = { version = "0.2.0", path = "../libcgroups", default-features = false } -libseccomp = { version = "0.3.0", optional=true } +libseccomp = { version = "0.3.0", optional = true } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" rust-criu = "0.4.0" regex = "1.9.6" thiserror = "1.0.49" -tracing = { version = "0.1.37", features = ["attributes"]} +tracing = { version = "0.1.37", features = ["attributes"] } safe-path = "0.1.0" [dev-dependencies] diff --git a/crates/libcontainer/src/channel.rs b/crates/libcontainer/src/channel.rs index a772a10d3..23d255f94 100644 --- a/crates/libcontainer/src/channel.rs +++ b/crates/libcontainer/src/channel.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use std::{ io::{IoSlice, IoSliceMut}, marker::PhantomData, - os::unix::prelude::RawFd, + os::{fd::AsRawFd, unix::prelude::RawFd}, }; #[derive(Debug, thiserror::Error)] @@ -210,10 +210,16 @@ where // Use socketpair as the underlying pipe. fn unix_channel() -> Result<(RawFd, RawFd), ChannelError> { - Ok(socket::socketpair( + let (f1, f2) = socket::socketpair( socket::AddressFamily::Unix, socket::SockType::SeqPacket, None, socket::SockFlag::SOCK_CLOEXEC, - )?) + )?; + // It is not straightforward to share the OwnedFd across forks, so we + // treat them as i32. We use ManuallyDrop to keep the connection open. + let f1 = std::mem::ManuallyDrop::new(f1); + let f2 = std::mem::ManuallyDrop::new(f2); + + Ok((f1.as_raw_fd(), f2.as_raw_fd())) } diff --git a/crates/libcontainer/src/process/fork.rs b/crates/libcontainer/src/process/fork.rs index 114a22440..c0e055396 100644 --- a/crates/libcontainer/src/process/fork.rs +++ b/crates/libcontainer/src/process/fork.rs @@ -1,4 +1,4 @@ -use std::{ffi::c_int, num::NonZeroUsize}; +use std::{ffi::c_int, fs::File, num::NonZeroUsize}; use libc::SIGCHLD; use nix::{ @@ -164,12 +164,14 @@ fn clone(cb: CloneCb, flags: u64, exit_signal: Option) -> Result` doesn't have any meaning because we won't use it. + mman::mmap::( None, NonZeroUsize::new(default_stack_size).ok_or(CloneError::ZeroStackSize)?, mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE, mman::MapFlags::MAP_PRIVATE | mman::MapFlags::MAP_ANONYMOUS | mman::MapFlags::MAP_STACK, - -1, + None, 0, ) .map_err(CloneError::StackAllocation)? diff --git a/crates/libcontainer/src/process/seccomp_listener.rs b/crates/libcontainer/src/process/seccomp_listener.rs index 608ff725b..a4cf7f46e 100644 --- a/crates/libcontainer/src/process/seccomp_listener.rs +++ b/crates/libcontainer/src/process/seccomp_listener.rs @@ -5,7 +5,7 @@ use nix::{ unistd, }; use oci_spec::runtime; -use std::{io::IoSlice, path::Path}; +use std::{io::IoSlice, os::fd::AsRawFd, path::Path}; use super::channel; @@ -76,7 +76,7 @@ fn sync_seccomp_send_msg(listener_path: &Path, msg: &[u8], fd: i32) -> Result<() ); SeccompListenerError::UnixOther(err) })?; - socket::connect(socket, &unix_addr).map_err(|err| { + socket::connect(socket.as_raw_fd(), &unix_addr).map_err(|err| { tracing::error!( ?err, ?listener_path, @@ -91,15 +91,19 @@ fn sync_seccomp_send_msg(listener_path: &Path, msg: &[u8], fd: i32) -> Result<() let iov = [IoSlice::new(msg)]; let fds = [fd]; let cmsgs = socket::ControlMessage::ScmRights(&fds); - socket::sendmsg::(socket, &iov, &[cmsgs], socket::MsgFlags::empty(), None).map_err( - |err| { - tracing::error!(?err, "failed to write container state to seccomp listener"); - SeccompListenerError::UnixOther(err) - }, - )?; + socket::sendmsg::( + socket.as_raw_fd(), + &iov, + &[cmsgs], + socket::MsgFlags::empty(), + None, + ) + .map_err(|err| { + tracing::error!(?err, "failed to write container state to seccomp listener"); + SeccompListenerError::UnixOther(err) + })?; // The spec requires the listener socket to be closed immediately after sending. - let _ = unistd::close(socket); - + drop(socket); Ok(()) } diff --git a/crates/libcontainer/src/syscall/linux.rs b/crates/libcontainer/src/syscall/linux.rs index 1fc247347..0baa0b952 100644 --- a/crates/libcontainer/src/syscall/linux.rs +++ b/crates/libcontainer/src/syscall/linux.rs @@ -13,6 +13,7 @@ use nix::{ use oci_spec::runtime::LinuxRlimit; use std::ffi::{CStr, CString, OsStr}; use std::fs; +use std::os::fd::BorrowedFd; use std::os::unix::ffi::OsStrExt; use std::os::unix::fs::symlink; use std::os::unix::io::RawFd; @@ -305,7 +306,8 @@ impl Syscall for LinuxSyscall { /// Set namespace for process fn set_ns(&self, rawfd: i32, nstype: CloneFlags) -> Result<()> { - nix::sched::setns(rawfd, nstype)?; + let fd = unsafe { BorrowedFd::borrow_raw(rawfd) }; + nix::sched::setns(fd, nstype)?; Ok(()) } diff --git a/crates/libcontainer/src/tty.rs b/crates/libcontainer/src/tty.rs index e43ecae58..b1023dab9 100644 --- a/crates/libcontainer/src/tty.rs +++ b/crates/libcontainer/src/tty.rs @@ -81,16 +81,18 @@ pub fn setup_console_socket( linked: linked.to_path_buf().into(), console_socket_path: console_socket_path.to_path_buf().into(), })?; - - let mut csocketfd = socket::socket( - socket::AddressFamily::Unix, - socket::SockType::Stream, - socket::SockFlag::empty(), - None, - ) - .map_err(|err| TTYError::CreateConsoleSocketFd { source: err })?; - csocketfd = match socket::connect( - csocketfd, + // Using ManuallyDrop to keep the socket open. + let csocketfd = std::mem::ManuallyDrop::new( + socket::socket( + socket::AddressFamily::Unix, + socket::SockType::Stream, + socket::SockFlag::empty(), + None, + ) + .map_err(|err| TTYError::CreateConsoleSocketFd { source: err })?, + ); + let csocketfd = match socket::connect( + csocketfd.as_raw_fd(), &socket::UnixAddr::new(socket_name).map_err(|err| TTYError::InvalidSocketName { source: err, socket_name: socket_name.to_string(), @@ -101,7 +103,7 @@ pub fn setup_console_socket( source: errno, socket_name: socket_name.to_string(), })?, - Ok(()) => csocketfd, + Ok(()) => csocketfd.as_raw_fd(), }; Ok(csocketfd) } @@ -113,7 +115,13 @@ pub fn setup_console(console_fd: &RawFd) -> Result<()> { .map_err(|err| TTYError::CreatePseudoTerminal { source: err })?; let pty_name: &[u8] = b"/dev/ptmx"; let iov = [IoSlice::new(pty_name)]; - let fds = [openpty_result.master]; + + let [master, slave] = [openpty_result.master, openpty_result.slave]; + // Use ManuallyDrop to keep FDs open. + let master = std::mem::ManuallyDrop::new(master); + let slave = std::mem::ManuallyDrop::new(slave); + + let fds = [master.as_raw_fd()]; let cmsg = socket::ControlMessage::ScmRights(&fds); socket::sendmsg::( console_fd.as_raw_fd(), @@ -124,10 +132,10 @@ pub fn setup_console(console_fd: &RawFd) -> Result<()> { ) .map_err(|err| TTYError::SendPtyMaster { source: err })?; - if unsafe { libc::ioctl(openpty_result.slave, libc::TIOCSCTTY) } < 0 { + if unsafe { libc::ioctl(slave.as_raw_fd(), libc::TIOCSCTTY) } < 0 { tracing::warn!("could not TIOCSCTTY"); }; - let slave = openpty_result.slave; + let slave = slave.as_raw_fd(); connect_stdio(&slave, &slave, &slave)?; close(console_fd.as_raw_fd()).map_err(|err| TTYError::CloseConsoleSocket { source: err })?; diff --git a/crates/libcontainer/src/utils.rs b/crates/libcontainer/src/utils.rs index 78186d093..a15ed933d 100644 --- a/crates/libcontainer/src/utils.rs +++ b/crates/libcontainer/src/utils.rs @@ -4,7 +4,6 @@ use std::collections::HashMap; use std::fs::{self, DirBuilder, File}; use std::os::linux::fs::MetadataExt; use std::os::unix::fs::DirBuilderExt; -use std::os::unix::prelude::AsRawFd; use std::path::{Component, Path, PathBuf}; use nix::sys::stat::Mode; @@ -249,7 +248,7 @@ pub fn ensure_procfs(path: &Path) -> Result<(), EnsureProcfsError> { tracing::error!(?err, ?path, "failed to open procfs file"); err })?; - let fstat_info = statfs::fstatfs(&procfs_fd.as_raw_fd()).map_err(|err| { + let fstat_info = statfs::fstatfs(&procfs_fd).map_err(|err| { tracing::error!(?err, ?path, "failed to fstatfs the procfs"); err })?; diff --git a/crates/youki/Cargo.toml b/crates/youki/Cargo.toml index c4924d20d..d961e5f3d 100644 --- a/crates/youki/Cargo.toml +++ b/crates/youki/Cargo.toml @@ -31,7 +31,7 @@ chrono = { version = "0.4", default-features = false, features = ["clock", "serd libcgroups = { version = "0.2.0", path = "../libcgroups", default-features = false } libcontainer = { version = "0.2.0", path = "../libcontainer", default-features = false } liboci-cli = { version = "0.2.0", path = "../liboci-cli" } -nix = "0.26.2" +nix = "0.27.1" once_cell = "1.18.0" pentacle = "1.0.0" procfs = "0.15.1" diff --git a/tests/rust-integration-tests/integration_test/Cargo.toml b/tests/rust-integration-tests/integration_test/Cargo.toml index c9be1b97f..09707ba5e 100644 --- a/tests/rust-integration-tests/integration_test/Cargo.toml +++ b/tests/rust-integration-tests/integration_test/Cargo.toml @@ -9,7 +9,7 @@ chrono = { version = "0.4", default-features = false, features = ["clock"] } flate2 = "1.0" libcgroups = { path = "../../../crates/libcgroups" } libcontainer = { path = "../../../crates/libcontainer" } -nix = "0.26.2" +nix = "0.27.1" num_cpus = "1.16" oci-spec = { version = "0.6.1", features = ["runtime"] } once_cell = "1.18.0" diff --git a/tests/rust-integration-tests/integration_test/src/tests/seccomp_notify/seccomp_agent.rs b/tests/rust-integration-tests/integration_test/src/tests/seccomp_notify/seccomp_agent.rs index 6d5370f58..10664b2e0 100644 --- a/tests/rust-integration-tests/integration_test/src/tests/seccomp_notify/seccomp_agent.rs +++ b/tests/rust-integration-tests/integration_test/src/tests/seccomp_notify/seccomp_agent.rs @@ -4,7 +4,14 @@ use nix::{ sys::socket::{self, UnixAddr}, unistd, }; -use std::{io::IoSliceMut, os::unix::prelude::RawFd, path::Path}; +use std::{ + io::IoSliceMut, + os::{ + fd::{AsFd, AsRawFd}, + unix::prelude::RawFd, + }, + path::Path, +}; const DEFAULT_BUFFER_SIZE: usize = 4096; @@ -24,14 +31,14 @@ pub fn recv_seccomp_listener(seccomp_listener: &Path) -> SeccompAgentResult { None, ) .context("failed to create seccomp listener socket")?; - socket::bind(socket, &addr).context("failed to bind to seccomp listener socket")?; + + socket::bind(socket.as_raw_fd(), &addr).context("failed to bind to seccomp listener socket")?; // Force the backlog to be 1 so in the case of an error, only one connection // from clients will be waiting. - socket::listen(socket, 1).context("failed to listen on seccomp listener")?; - let conn = match socket::accept(socket) { + socket::listen(&socket.as_fd(), 1).context("failed to listen on seccomp listener")?; + let conn = match socket::accept(socket.as_raw_fd()) { Ok(conn) => conn, Err(e) => { - let _ = unistd::close(socket); bail!("failed to accept connection: {}", e); } }; @@ -47,15 +54,13 @@ pub fn recv_seccomp_listener(seccomp_listener: &Path) -> SeccompAgentResult { Ok(msg) => msg, Err(e) => { let _ = unistd::close(conn); - let _ = unistd::close(socket); bail!("failed to receive message: {}", e); } }; // We received the message correctly here, so we can now safely close the socket and connection. let _ = unistd::close(conn); - let _ = unistd::close(socket); - + drop(socket); // We are expecting 1 SCM_RIGHTS message with 1 fd. let cmsg = msg .cmsgs() diff --git a/tests/rust-integration-tests/runtimetest/Cargo.toml b/tests/rust-integration-tests/runtimetest/Cargo.toml index b0ee75729..786d784c8 100644 --- a/tests/rust-integration-tests/runtimetest/Cargo.toml +++ b/tests/rust-integration-tests/runtimetest/Cargo.toml @@ -5,6 +5,6 @@ edition = "2021" [dependencies] oci-spec = { version = "0.6.1", features = ["runtime"] } -nix = "0.26.2" +nix = "0.27.1" anyhow = "1.0"