diff --git a/Cargo.lock b/Cargo.lock index 5961d1c3ce5..8c21f8bfeb1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -236,7 +236,7 @@ checksum = "a1e9efbe14612da0a19fb983059a0b621e9cf6225d7018ecab4f9988215540dc" dependencies = [ "futures-io", "futures-util", - "log", + "log 0.4.21", "pin-project-lite", "tungstenite", ] @@ -483,7 +483,7 @@ checksum = "4b922faaf31122819ec80c4047cc684c6979a087366c069611e33649bf98e18d" dependencies = [ "heck 0.4.1", "indexmap 1.9.3", - "log", + "log 0.4.21", "proc-macro2", "quote", "serde", @@ -689,7 +689,7 @@ dependencies = [ "getopts", "lazy_static", "libc", - "log", + "log 0.4.21", "miow 0.3.7", "regex", "rustfix", @@ -807,7 +807,7 @@ dependencies = [ "cranelift-isle", "gimli 0.26.2", "hashbrown 0.12.3", - "log", + "log 0.4.21", "regalloc2", "smallvec 1.13.1", "target-lexicon 0.12.14", @@ -838,7 +838,7 @@ dependencies = [ "fxhash", "hashbrown 0.12.3", "indexmap 1.9.3", - "log", + "log 0.4.21", "smallvec 1.13.1", ] @@ -856,7 +856,7 @@ checksum = "0d70abacb8cfef3dc8ff7e8836e9c1d70f7967dfdac824a4cd5e30223415aca6" dependencies = [ "cranelift-codegen", "hashbrown 0.12.3", - "log", + "log 0.4.21", "smallvec 1.13.1", "target-lexicon 0.12.14", ] @@ -1411,7 +1411,7 @@ dependencies = [ "serde_path_to_error", "serde_yaml 0.8.26", "sparx", - "time", + "time 0.3.34", "url", "uuid", "wcgi-host", @@ -1435,7 +1435,7 @@ dependencies = [ "serde_path_to_error", "serde_yaml 0.8.26", "sparx", - "time", + "time 0.3.34", "url", "uuid", "wcgi-host", @@ -1590,7 +1590,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9f0c14694cbd524c8720dd69b0e3179344f04ebb5f90f2e4a440c6ea3b2f1ee" dependencies = [ "colored 1.9.4", - "log", + "log 0.4.21", ] [[package]] @@ -1698,6 +1698,19 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "fuse" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80e57070510966bfef93662a81cb8aa2b1c7db0964354fa9921434f04b9e8660" +dependencies = [ + "libc", + "log 0.3.9", + "pkg-config", + "thread-scoped", + "time 0.1.45", +] + [[package]] name = "futures" version = "0.1.31" @@ -1830,7 +1843,7 @@ dependencies = [ "cfg-if 1.0.0", "js-sys", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -1896,7 +1909,7 @@ dependencies = [ "async-tungstenite", "futures 0.3.30", "graphql_client", - "log", + "log 0.4.21", "pin-project", "serde", "serde_json", @@ -2004,7 +2017,7 @@ version = "3.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4498fc115fa7d34de968184e473529abb40eeb6be8bc5f7faba3d08c316cb3e3" dependencies = [ - "log", + "log 0.4.21", "pest", "pest_derive", "quick-error", @@ -2639,6 +2652,15 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" +dependencies = [ + "log 0.4.21", +] + [[package]] name = "log" version = "0.4.21" @@ -2693,6 +2715,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "managed" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ca88d725a0a943b096803bd34e73a4437208b6077654cc4ecb2947a5f91618d" + [[package]] name = "matchers" version = "0.1.0" @@ -2828,7 +2856,7 @@ dependencies = [ "iovec", "kernel32-sys", "libc", - "log", + "log 0.4.21", "miow 0.2.2", "net2", "slab", @@ -2842,8 +2870,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", - "log", - "wasi", + "log 0.4.21", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.48.0", ] @@ -2893,7 +2921,7 @@ checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" dependencies = [ "lazy_static", "libc", - "log", + "log 0.4.21", "openssl", "openssl-probe", "openssl-sys", @@ -3677,7 +3705,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "300d4fbfb40c1c66a78ba3ddd41c1110247cf52f97b87d0f2fc9209bd49b030c" dependencies = [ "fxhash", - "log", + "log 0.4.21", "slice-group-by", "smallvec 1.13.1", ] @@ -3773,7 +3801,7 @@ dependencies = [ "hyper-tls", "ipnet", "js-sys", - "log", + "log 0.4.21", "mime", "mime_guess", "native-tls", @@ -3943,7 +3971,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2c50b74badcddeb8f7652fa8323ce440b95286f8e4b64ebfd871c609672704e" dependencies = [ "anyhow", - "log", + "log 0.4.21", "serde", "serde_json", ] @@ -3967,7 +3995,7 @@ version = "0.21.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ - "log", + "log 0.4.21", "ring", "rustls-webpki 0.101.7", "sct", @@ -3979,7 +4007,7 @@ version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" dependencies = [ - "log", + "log 0.4.21", "ring", "rustls-pki-types", "rustls-webpki 0.102.2", @@ -4407,7 +4435,7 @@ dependencies = [ "dashmap", "futures 0.3.30", "lazy_static", - "log", + "log 0.4.21", "parking_lot 0.12.1", "serial_test_derive", ] @@ -4470,6 +4498,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + [[package]] name = "simdutf8" version = "0.1.4" @@ -4512,6 +4549,17 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +[[package]] +name = "smoltcp" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee34c1e1bfc7e9206cc0fb8030a90129b4e319ab53856249bb27642cab914fb3" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "managed", +] + [[package]] name = "socket2" version = "0.4.10" @@ -4790,6 +4838,12 @@ dependencies = [ "syn 2.0.53", ] +[[package]] +name = "thread-scoped" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcbb6aa301e5d3b0b5ef639c9a9c7e2f1c944f177b460c04dc24c69b1fa2bd99" + [[package]] name = "thread_local" version = "1.1.8" @@ -4800,6 +4854,17 @@ dependencies = [ "once_cell", ] +[[package]] +name = "time" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi 0.3.9", +] + [[package]] name = "time" version = "0.3.34" @@ -4863,7 +4928,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec03259a0567ad58eed30812bc3e5eda8030f154abc70317ab57b14f00699ca4" dependencies = [ "idna 0.2.3", - "log", + "log 0.4.21", "regex", "serde_json", "thiserror", @@ -4906,6 +4971,7 @@ dependencies = [ "mio 0.8.11", "num_cpus", "pin-project-lite", + "signal-hook-registry", "socket2 0.5.6", "tokio-macros", "windows-sys 0.48.0", @@ -4931,7 +4997,7 @@ dependencies = [ "bytes 0.4.12", "futures 0.1.31", "iovec", - "log", + "log 0.4.21", "mio 0.6.23", "scoped-tls 0.1.2", "tokio 0.1.22", @@ -4980,7 +5046,7 @@ checksum = "57fc868aae093479e3131e3d165c93b1c7474109d13c90ec0dda2a1bbfff0674" dependencies = [ "bytes 0.4.12", "futures 0.1.31", - "log", + "log 0.4.21", ] [[package]] @@ -5013,7 +5079,7 @@ dependencies = [ "crossbeam-utils 0.7.2", "futures 0.1.31", "lazy_static", - "log", + "log 0.4.21", "mio 0.6.23", "num_cpus", "parking_lot 0.9.0", @@ -5051,6 +5117,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "tokio-stream" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio 1.36.0", + "tokio-util", +] + [[package]] name = "tokio-sync" version = "0.1.8" @@ -5086,7 +5164,7 @@ dependencies = [ "crossbeam-utils 0.7.2", "futures 0.1.31", "lazy_static", - "log", + "log 0.4.21", "num_cpus", "slab", "tokio-executor", @@ -5111,7 +5189,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "212d5dcb2a1ce06d81107c3d0ffa3121fe974b73f068c8282cb1c32328113b6c" dependencies = [ "futures-util", - "log", + "log 0.4.21", "rustls 0.21.10", "rustls-native-certs", "tokio 1.36.0", @@ -5128,7 +5206,7 @@ checksum = "e2a0b10e610b39c38b031a2fcab08e4b82f16ece36504988dcbd81dbba650d82" dependencies = [ "bytes 0.4.12", "futures 0.1.31", - "log", + "log 0.4.21", "mio 0.6.23", "tokio-codec", "tokio-io", @@ -5145,7 +5223,7 @@ dependencies = [ "futures 0.1.31", "iovec", "libc", - "log", + "log 0.4.21", "mio 0.6.23", "mio-uds", "tokio-codec", @@ -5289,7 +5367,7 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "log", + "log 0.4.21", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -5322,7 +5400,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" dependencies = [ - "log", + "log 0.4.21", "once_cell", "tracing-core", ] @@ -5437,7 +5515,7 @@ dependencies = [ "data-encoding", "http", "httparse", - "log", + "log 0.4.21", "rand", "rustls 0.21.10", "sha1", @@ -5575,7 +5653,7 @@ checksum = "11f214ce18d8b2cbe84ed3aa6486ed3f5b285cf8d8fbdbce9f3f767a724adc35" dependencies = [ "base64 0.21.7", "flate2", - "log", + "log 0.4.21", "once_cell", "rustls 0.22.2", "rustls-pki-types", @@ -5667,6 +5745,7 @@ dependencies = [ "thiserror", "tokio 1.36.0", "tracing", + "tracing-test", "typetag", "webc", ] @@ -5706,6 +5785,7 @@ dependencies = [ "rkyv", "serde", "serial_test", + "smoltcp", "socket2 0.4.10", "thiserror", "tokio 1.36.0", @@ -5860,6 +5940,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -5901,7 +5987,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", - "log", + "log 0.4.21", "once_cell", "proc-macro2", "quote", @@ -6105,7 +6191,7 @@ dependencies = [ "serde", "serde_json", "serde_path_to_error", - "time", + "time 0.3.34", "tokio 1.36.0", "tracing", "url", @@ -6216,6 +6302,7 @@ dependencies = [ "edge-schema 0.0.2", "edge-util", "flate2", + "fuse", "futures 0.3.30", "futures-util", "hex", @@ -6227,7 +6314,7 @@ dependencies = [ "interfaces", "is-terminal", "libc", - "log", + "log 0.4.21", "mac_address", "mio 0.8.11", "normpath", @@ -6240,16 +6327,19 @@ dependencies = [ "pretty_assertions", "regex", "reqwest", + "rkyv", "semver 1.0.22", "serde", "serde_json", "serde_yaml 0.8.26", "sha2", + "shared-buffer", "tar", "target-lexicon 0.12.14", "tempfile", "thiserror", - "time", + "time 0.1.45", + "time 0.3.34", "tldextract", "tokio 1.36.0", "tokio-tungstenite", @@ -6323,7 +6413,7 @@ dependencies = [ "distance", "fern", "is-terminal", - "log", + "log 0.4.21", "target-lexicon 0.12.14", "unix_mode", "wasmer-compiler", @@ -6412,8 +6502,8 @@ dependencies = [ "getrandom", "lazy_static", "libc", - "log", - "time", + "log 0.4.21", + "time 0.3.34", "wasmer", "wasmer-types", ] @@ -6500,6 +6590,7 @@ dependencies = [ "thiserror", "tracing", "tracing-test", + "virtual-fs", "virtual-net", "wasmer", "wasmer-wasix-types", @@ -6542,7 +6633,7 @@ dependencies = [ "indexmap 1.9.3", "indicatif", "lazy_static", - "log", + "log 0.4.21", "lzma-rs", "minisign", "pretty_assertions", @@ -6556,7 +6647,7 @@ dependencies = [ "tar", "tempfile", "thiserror", - "time", + "time 0.3.34", "tldextract", "tokio 1.36.0", "tokio-tungstenite", @@ -6654,6 +6745,7 @@ dependencies = [ "async-trait", "base64 0.21.7", "bincode", + "blake3", "bytecheck", "bytes 1.5.0", "cfg-if 1.0.0", @@ -6695,6 +6787,7 @@ dependencies = [ "termios", "thiserror", "tokio 1.36.0", + "tokio-stream", "tower", "tower-http", "tracing", @@ -6736,7 +6829,7 @@ dependencies = [ "num_enum", "pretty_assertions", "serde", - "time", + "time 0.3.34", "tracing", "wai-bindgen-gen-core", "wai-bindgen-gen-rust", diff --git a/Cargo.toml b/Cargo.toml index 07912d775cb..94ccd81d580 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,6 +89,8 @@ wasmer-toml = "0.9.2" wasmparser = { version = "0.121.0", default-features = false } webc = { version = "5.8.0", default-features = false, features = ["package"] } shared-buffer = "0.1.4" +rkyv = { version = "0.7.40", features = ["indexmap", "validation", "strict"] } +memmap2 = { version = "0.6.2" } [build-dependencies] test-generator = { path = "tests/lib/test-generator" } diff --git a/lib/cli/Cargo.toml b/lib/cli/Cargo.toml index 598731616f9..ea784e5f0b3 100644 --- a/lib/cli/Cargo.toml +++ b/lib/cli/Cargo.toml @@ -41,6 +41,7 @@ default = [ # Tun-tap client for connecting to Wasmer Edge VPNs tun-tap = ["dep:tun-tap", "virtual-net/tokio-tungstenite", "tokio-tungstenite", "mio", "futures-util", "mac_address", "dep:interfaces"] journal = ["wasmer-wasix/journal"] +fuse = ["dep:fuse", "dep:time01", "dep:shared-buffer", "dep:rkyv"] backend = [] coredump = ["wasm-coredump-builder"] sys = ["compiler", "wasmer-vm"] @@ -79,6 +80,7 @@ wasmer-wasix = { version = "0.18.2", path = "../wasix", features = [ "logging", "webc_runner_rt_wcgi", "webc_runner_rt_dcgi", + "webc_runner_rt_dproxy", "webc_runner_rt_emscripten", "host-fs", ] } @@ -104,6 +106,14 @@ wasmer-api = { version = "=0.0.24", path = "../backend-api" } edge-schema = { version = "=0.0.2" } edge-util = { version = "=0.0.1" } +# Used by the mount command + +shared-buffer = { workspace = true, optional = true } +rkyv = { workspace = true, optional = true } +fuse = { version = "0.3", optional = true } +time01 = { package = "time", version = "0.1.45", optional = true } + + # Third-party dependencies. is-terminal = "0.4.7" diff --git a/lib/cli/src/commands/journal/import.rs b/lib/cli/src/commands/journal/import.rs index bd38bad37e1..00bf0e50fe3 100644 --- a/lib/cli/src/commands/journal/import.rs +++ b/lib/cli/src/commands/journal/import.rs @@ -8,13 +8,13 @@ use crate::commands::CliCommand; /// Imports events into a journal file. Events are streamed as JSON /// objects into `stdin` #[derive(Debug, Parser)] -pub struct CmdJournaImport { +pub struct CmdJournalImport { /// Path to the journal that will be printed #[clap(index = 1)] journal_path: PathBuf, } -impl CliCommand for CmdJournaImport { +impl CliCommand for CmdJournalImport { type Output = (); fn run(self) -> Result<(), anyhow::Error> { diff --git a/lib/cli/src/commands/journal/inspect.rs b/lib/cli/src/commands/journal/inspect.rs index 73382c09a2c..9779e1f6f06 100644 --- a/lib/cli/src/commands/journal/inspect.rs +++ b/lib/cli/src/commands/journal/inspect.rs @@ -7,13 +7,13 @@ use crate::commands::CliCommand; /// Prints a summarized version of contents of a journal to stdout #[derive(Debug, Parser)] -pub struct CmdJournaInspect { +pub struct CmdJournalInspect { /// Path to the journal that will be printed #[clap(index = 1)] journal_path: PathBuf, } -impl CliCommand for CmdJournaInspect { +impl CliCommand for CmdJournalInspect { type Output = (); fn run(self) -> Result<(), anyhow::Error> { diff --git a/lib/cli/src/commands/journal/mod.rs b/lib/cli/src/commands/journal/mod.rs index 49d5ce723a8..4e650719576 100644 --- a/lib/cli/src/commands/journal/mod.rs +++ b/lib/cli/src/commands/journal/mod.rs @@ -5,12 +5,16 @@ mod export; mod filter; mod import; mod inspect; +#[cfg(feature = "fuse")] +mod mount; pub use compact::*; pub use export::*; pub use filter::*; pub use import::*; pub use inspect::*; +#[cfg(feature = "fuse")] +pub use mount::*; /// Manage Journal files. #[derive(clap::Subcommand, Debug)] @@ -20,11 +24,14 @@ pub enum CmdJournal { /// Exports the contents of a journal to stdout as JSON objects Export(CmdJournalExport), /// Imports the events into a journal as JSON objects - Import(CmdJournaImport), + Import(CmdJournalImport), /// Inspects the contents of a journal and summarizes it to `stdout` - Inspect(CmdJournaInspect), + Inspect(CmdJournalInspect), /// Filters out certain events from a journal Filter(CmdJournalFilter), + /// Mounts the journal at a particular directory + #[cfg(feature = "fuse")] + Mount(CmdJournalMount), } impl CliCommand for CmdJournal { @@ -37,6 +44,8 @@ impl CliCommand for CmdJournal { Self::Export(cmd) => cmd.run(), Self::Inspect(cmd) => cmd.run(), Self::Filter(cmd) => cmd.run(), + #[cfg(feature = "fuse")] + Self::Mount(cmd) => cmd.run(), } } } diff --git a/lib/cli/src/commands/journal/mount/cmd.rs b/lib/cli/src/commands/journal/mount/cmd.rs new file mode 100644 index 00000000000..66d03f0e274 --- /dev/null +++ b/lib/cli/src/commands/journal/mount/cmd.rs @@ -0,0 +1,42 @@ +use std::{path::PathBuf, process::Stdio}; + +use clap::Parser; +use wasmer_wasix::fs::WasiFdSeed; + +use super::fs::JournalFileSystemBuilder; +use crate::commands::CliCommand; + +/// Mounts a journal as a file system on the local machine +#[derive(Debug, Parser)] +pub struct CmdJournalMount { + /// Path to the journal that will be printed + #[clap(index = 1)] + journal_path: PathBuf, + /// Path to the directory where the file system will be mounted + #[clap(index = 2)] + mount_path: PathBuf, +} + +impl CliCommand for CmdJournalMount { + type Output = (); + + fn run(self) -> Result<(), anyhow::Error> { + // First we unmount any existing file system on this path + std::process::Command::new("/bin/umount") + .arg(self.mount_path.to_string_lossy().as_ref()) + .stderr(Stdio::null()) + .stdout(Stdio::null()) + .spawn()? + .wait() + .ok(); + + let fs = JournalFileSystemBuilder::new(&self.journal_path) + .with_fd_seed(WasiFdSeed::default()) + .with_progress_bar(false) + .build()?; + + // Mounts the journal file system at a path + fuse::mount(fs, &self.mount_path, &[])?; + Ok(()) + } +} diff --git a/lib/cli/src/commands/journal/mount/fs.rs b/lib/cli/src/commands/journal/mount/fs.rs new file mode 100644 index 00000000000..f5baaf894d3 --- /dev/null +++ b/lib/cli/src/commands/journal/mount/fs.rs @@ -0,0 +1,1214 @@ +#![allow(unused)] +use std::{ + borrow::Cow, + collections::HashMap, + ffi::OsStr, + fs::File, + hash::{Hash, Hasher}, + io, + path::{Path, PathBuf}, + sync::{atomic::AtomicU32, Arc, Mutex}, + time::Duration, +}; + +use fuse::{ + FileAttr, Filesystem, ReplyAttr, ReplyBmap, ReplyCreate, ReplyData, ReplyDirectory, ReplyEmpty, + ReplyEntry, ReplyLock, ReplyOpen, ReplyStatfs, ReplyWrite, ReplyXattr, Request, +}; +use indicatif::{ProgressBar, ProgressStyle}; +use shared_buffer::OwnedBuffer; +use tokio::runtime::Handle; +use virtual_fs::{ + mem_fs::{self, OffloadBackingStore}, + AsyncReadExt, AsyncSeekExt, AsyncWriteExt, FileOpener, FileSystem, FsError, +}; +use wasmer_wasix::{ + fs::WasiFdSeed, + journal::{ + copy_journal, ArchivedJournalEntry, ArchivedJournalEntryFileDescriptorWriteV1, Journal, + JournalEntry, JournalEntryFileDescriptorWriteV1, LogFileJournal, LogWriteResult, + ReadableJournal, WritableJournal, + }, + types::Oflags, + wasmer_wasix_types::wasi, + VIRTUAL_ROOT_FD, +}; + +#[derive(Debug)] +struct State { + handle: tokio::runtime::Handle, + mem_fs: mem_fs::FileSystem, + inos: HashMap>, + lookup: HashMap< + u32, + Arc>>, + >, + seed: WasiFdSeed, + fake_offset: u64, +} + +#[derive(Debug)] +struct MutexState { + inner: Mutex, +} + +#[derive(Debug)] +pub struct JournalFileSystemBuilder { + path: PathBuf, + fd_seed: WasiFdSeed, + progress_bar: bool, +} + +impl JournalFileSystemBuilder { + pub fn new(path: &Path) -> Self { + Self { + path: path.to_path_buf(), + fd_seed: WasiFdSeed::default(), + progress_bar: false, + } + } + + pub fn with_fd_seed(mut self, fd_seed: WasiFdSeed) -> Self { + self.fd_seed = fd_seed; + self + } + + pub fn with_progress_bar(mut self, val: bool) -> Self { + self.progress_bar = val; + self + } + + // Opens the journal and copies all its contents into + // and memory file system + pub fn build(self) -> anyhow::Result { + let journal = LogFileJournal::new(&self.path)?; + let backing_store = journal.backing_store(); + let file_len = backing_store.owned_buffer().len(); + + let mem_fs = mem_fs::FileSystem::default().with_backing_offload(backing_store)?; + let state = MutexState { + inner: Mutex::new(State { + handle: tokio::runtime::Handle::current(), + mem_fs, + inos: Default::default(), + seed: self.fd_seed, + lookup: Default::default(), + fake_offset: 0, + }), + }; + + let progress = if self.progress_bar { + let mut pb = ProgressBar::new(file_len as u64); + pb.set_style(ProgressStyle::with_template("{msg}\n{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta})") + .unwrap() + .progress_chars("#>-")); + pb.set_message("Loading journal..."); + + Some(pb) + } else { + None + }; + + tokio::task::block_in_place(|| { + if let Some(progress) = progress { + copy_journal_with_progress(&journal, &state, progress) + } else { + copy_journal(&journal, &state) + } + })?; + + let ret = JournalFileSystem { + handle: tokio::runtime::Handle::current(), + journal, + state, + }; + + Ok(ret) + } +} + +pub fn copy_journal_with_progress( + from: &R, + to: &W, + mut progress: ProgressBar, +) -> anyhow::Result<()> { + while let Some(record) = from.read()? { + progress.set_position(record.record_end); + to.write(record.into_inner())?; + } + progress.finish_and_clear(); + println!("Journal is mounted"); + Ok(()) +} + +#[derive(Debug)] +pub struct JournalFileSystem { + handle: tokio::runtime::Handle, + journal: LogFileJournal, + state: MutexState, +} + +impl JournalFileSystem { + fn reverse_ino(&self, ino: u64) -> Result, libc::c_int> { + if ino == 1 { + return Ok("/".into()); + } + let path = { + let mut state = self.state.inner.lock().unwrap(); + match state.inos.get(&ino).cloned() { + Some(path) => path, + None => { + return Err(libc::ENOENT); + } + } + }; + Ok(path) + } + + fn attr<'a>(&self, path: Cow<'a, str>) -> Result { + let mut state = self.state.inner.lock().unwrap(); + + let res = state.mem_fs.metadata(&Path::new(path.as_ref())); + match res { + Ok(meta) => { + // The ino is just the hash of the name + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + path.hash(&mut hasher); + let ino = hasher.finish(); + state + .inos + .entry(ino) + .or_insert_with(|| path.into_owned().into()); + + // Build a file attr and return it + Ok(FileAttr { + ino, + size: meta.len, + blocks: (1u64.max(meta.len) - 1 / 512) + 1, + atime: time01::Timespec::new(meta.accessed as i64, 0), + mtime: time01::Timespec::new(meta.modified as i64, 0), + ctime: time01::Timespec::new(meta.created as i64, 0), + crtime: time01::Timespec::new(meta.created as i64, 0), + kind: file_type_to_kind(meta.ft), + perm: 0o644, + nlink: 1, + uid: 0, + gid: 0, + rdev: 0, + flags: 0, + }) + } + Err(FsError::EntryNotFound) => Err(libc::ENOENT), + Err(_) => Err(libc::EIO), + } + } +} + +impl WritableJournal for MutexState { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + let mut state = self.inner.lock().unwrap(); + let ret = LogWriteResult { + record_start: state.fake_offset, + record_end: state.fake_offset + entry.estimate_size() as u64, + }; + state.fake_offset += ret.record_size(); + match entry { + JournalEntry::FileDescriptorWriteV1 { + fd, + offset, + data, + is_64bit, + } => { + let handle = state.handle.clone(); + if let Some(file) = state.lookup.get_mut(&fd) { + handle.block_on(async { + let mut file = file.lock().await; + file.seek(io::SeekFrom::Start(offset)).await; + file.write_all(&data).await + })?; + } + } + JournalEntry::CloseFileDescriptorV1 { fd } => { + state.lookup.remove(&fd); + } + JournalEntry::OpenFileDescriptorV1 { + fd, + dirfd, + dirflags, + path, + o_flags, + fs_rights_base, + fs_rights_inheriting, + fs_flags, + } => { + state.seed.clip_val(fd + 1); + let file = state + .mem_fs + .new_open_options() + .create(o_flags.contains(Oflags::CREATE)) + .truncate(o_flags.contains(Oflags::TRUNC)) + .write(true) + .read(true) + .open(path.as_ref())?; + state + .lookup + .insert(fd, Arc::new(tokio::sync::Mutex::new(file))); + } + JournalEntry::RenumberFileDescriptorV1 { old_fd, new_fd } => { + state.seed.clip_val(new_fd + 1); + if let Some(file) = state.lookup.remove(&old_fd) { + state.lookup.insert(new_fd, file); + } + } + JournalEntry::DuplicateFileDescriptorV1 { + original_fd, + copied_fd, + } => { + state.seed.clip_val(copied_fd + 1); + if let Some(file) = state.lookup.get(&original_fd).cloned() { + state.lookup.insert(copied_fd, file); + } + } + JournalEntry::CreateDirectoryV1 { fd, path } => { + state.mem_fs.create_dir(&Path::new(path.as_ref())).ok(); + } + JournalEntry::RemoveDirectoryV1 { fd, path } => { + state.mem_fs.remove_dir(&Path::new(path.as_ref()))?; + } + JournalEntry::FileDescriptorSetSizeV1 { fd, st_size } => { + let handle = state.handle.clone(); + if let Some(file) = state.lookup.get(&fd) { + handle.block_on(async { + let mut file = file.lock().await; + file.set_len(st_size) + })?; + } + } + JournalEntry::FileDescriptorAllocateV1 { fd, offset, len } => { + let handle = state.handle.clone(); + if let Some(file) = state.lookup.get(&fd) { + handle.block_on(async { + let mut file = file.lock().await; + file.set_len(offset + len) + })?; + } + } + JournalEntry::UnlinkFileV1 { fd, path } => { + state.mem_fs.remove_file(&Path::new(path.as_ref()))?; + } + JournalEntry::PathRenameV1 { + old_fd, + old_path, + new_fd, + new_path, + } => { + let handle = state.handle.clone(); + handle.block_on(async { + state + .mem_fs + .rename(&Path::new(old_path.as_ref()), &Path::new(new_path.as_ref())) + .await + })?; + } + JournalEntry::SocketOpenV1 { fd, .. } => { + state.seed.clip_val(fd + 1); + } + JournalEntry::CreatePipeV1 { fd1, fd2 } => { + state.seed.clip_val(fd1 + 1); + state.seed.clip_val(fd2 + 1); + } + JournalEntry::CreateEventV1 { fd, .. } => { + state.seed.clip_val(fd + 1); + } + JournalEntry::EpollCreateV1 { fd } => { + state.seed.clip_val(fd + 1); + } + JournalEntry::EpollCtlV1 { + epfd, + op, + fd, + event, + } => { + state.seed.clip_val(fd + 1); + } + JournalEntry::SocketAcceptedV1 { fd, .. } => { + state.seed.clip_val(fd + 1); + } + _ => {} + } + Ok(ret) + } +} + +impl JournalFileSystem { + fn compute_path<'a>(&'a self, parent: u64, name: &'a OsStr) -> Result, i32> { + // Get the path from the ino otherwise it is not a known + // path (this means the other methods have to be hit first) + let path = match self.reverse_ino(parent) { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::compute_path reverse_ino({parent}) errno={err}"); + return Err(err); + } + }; + + // Add the name as a postfix + let name = name.to_string_lossy(); + let path = if path.ends_with("/") { + path + name + } else { + path + "/" + name + }; + Ok(path) + } +} + +impl Filesystem for JournalFileSystem { + fn init(&mut self, _req: &Request) -> Result<(), libc::c_int> { + Ok(()) + } + + fn destroy(&mut self, _req: &Request) {} + + fn lookup(&mut self, _req: &Request, parent: u64, name: &OsStr, reply: ReplyEntry) { + let path = match self.compute_path(parent, name) { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::lookup err={err}"); + return reply.error(err); + } + }; + + match self.attr(path) { + Ok(meta) => reply.entry(&time01::Timespec::new(1, 0), &meta, 0), + Err(err) => { + tracing::trace!("fs::lookup err={err}"); + reply.error(err) + } + } + } + + fn getattr(&mut self, _req: &Request, ino: u64, reply: ReplyAttr) { + let path = match self.reverse_ino(ino) { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::getattr reverse_ino({ino}) errno={err}"); + reply.error(err); + return; + } + }; + + match self.attr(path) { + Ok(meta) => reply.attr(&time01::Timespec::new(1, 0), &meta), + Err(err) => reply.error(err), + } + } + + fn setattr( + &mut self, + _req: &Request, + ino: u64, + _mode: Option, + _uid: Option, + _gid: Option, + size: Option, + _atime: Option, + _mtime: Option, + fh: Option, + _crtime: Option, + _chgtime: Option, + _bkuptime: Option, + _flags: Option, + reply: ReplyAttr, + ) { + let mut entries = Vec::new(); + + let attr = match fh { + Some(fd) => { + let fd = fd as u32; + let mut state = self.state.inner.lock().unwrap(); + let file = match state.lookup.get_mut(&fd) { + Some(f) => f.clone(), + None => { + tracing::trace!("fs::getattr noent (fd={fd})"); + reply.error(libc::ENOENT); + return; + } + }; + + self.handle.block_on(async { + let mut file = file.lock().await; + + if let Some(size) = size { + entries.push(JournalEntry::FileDescriptorSetSizeV1 { + fd: fd as u32, + st_size: size, + }) + } + + FileAttr { + ino, + size: file.size(), + blocks: (1u64.max(file.size()) - 1 / 512) + 1, + atime: time01::Timespec::new(file.last_accessed() as i64, 0), + mtime: time01::Timespec::new(file.last_modified() as i64, 0), + ctime: time01::Timespec::new(file.created_time() as i64, 0), + crtime: time01::Timespec::new(file.created_time() as i64, 0), + kind: fuse::FileType::RegularFile, + perm: 0o644, + nlink: 1, + uid: 0, + gid: 0, + rdev: 0, + flags: 0, + } + }) + } + None => { + let path = match self.reverse_ino(ino) { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::setattr reverse_ino({ino}) errno={err}"); + reply.error(err); + return; + } + }; + + let fh; + let mut state = self.state.inner.lock().unwrap(); + let file = state + .mem_fs + .new_open_options() + .read(true) + .write(true) + .open(&Path::new(path.as_ref())); + match file { + Ok(file) => { + // Reserve a file descriptor and close the state + fh = state.seed.next_val(); + drop(state); + + entries.push(JournalEntry::OpenFileDescriptorV1 { + fd: fh, + dirfd: VIRTUAL_ROOT_FD, + dirflags: 0, + path, + o_flags: wasi::Oflags::empty(), + fs_rights_base: wasi::Rights::all(), + fs_rights_inheriting: wasi::Rights::all(), + fs_flags: wasi::Fdflags::empty(), + }); + if let Some(size) = size { + entries.push(JournalEntry::FileDescriptorSetSizeV1 { + fd: fh as u32, + st_size: size, + }) + } + entries.push(JournalEntry::CloseFileDescriptorV1 { fd: fh }); + + for entry in entries.iter() { + if self.state.write(entry.clone()).is_err() { + tracing::trace!("fs::open err=EIO"); + reply.error(libc::EIO); + return; + } + } + for entry in entries.iter() { + if self.journal.write(entry.clone()).is_err() { + tracing::trace!("fs::open err=EIO"); + reply.error(libc::EIO); + return; + } + } + FileAttr { + ino, + size: file.size(), + blocks: (1u64.max(file.size()) - 1 / 512) + 1, + atime: time01::Timespec::new(file.last_accessed() as i64, 0), + mtime: time01::Timespec::new(file.last_modified() as i64, 0), + ctime: time01::Timespec::new(file.created_time() as i64, 0), + crtime: time01::Timespec::new(file.created_time() as i64, 0), + kind: fuse::FileType::RegularFile, + perm: 0o644, + nlink: 1, + uid: 0, + gid: 0, + rdev: 0, + flags: 0, + } + } + Err(FsError::EntryNotFound) => { + // Maybe its a directory, in which case we are done + if let Ok(meta) = state.mem_fs.metadata(&Path::new(path.as_ref())) { + FileAttr { + ino, + size: meta.len, + blocks: (1u64.max(meta.len) - 1 / 512) + 1, + atime: time01::Timespec::new(meta.accessed as i64, 0), + mtime: time01::Timespec::new(meta.modified as i64, 0), + ctime: time01::Timespec::new(meta.created as i64, 0), + crtime: time01::Timespec::new(meta.created as i64, 0), + kind: file_type_to_kind(meta.ft), + perm: 0o644, + nlink: 1, + uid: 0, + gid: 0, + rdev: 0, + flags: 0, + } + } else { + tracing::trace!("fs::setattr open_file({path}) err=ENOENT"); + reply.error(libc::ENOENT); + return; + } + } + Err(err) => { + tracing::trace!("fs::setattr open_file({path}) err={err}"); + reply.error(libc::EIO); + return; + } + } + } + }; + + // Return the data + reply.attr(&time01::Timespec::new(1, 0), &attr) + } + + fn setxattr( + &mut self, + _req: &Request, + _ino: u64, + _name: &OsStr, + _value: &[u8], + _flags: u32, + _position: u32, + reply: ReplyEmpty, + ) { + tracing::trace!("fs::setxattr err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn getxattr( + &mut self, + _req: &Request, + _ino: u64, + _name: &OsStr, + _size: u32, + reply: ReplyXattr, + ) { + tracing::trace!("fs::getxattr size(0)"); + reply.size(0) + } + + fn open(&mut self, _req: &Request, ino: u64, flags: u32, reply: ReplyOpen) { + let path = match self.reverse_ino(ino) { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::open reverse_ino({ino}) errno={err}"); + reply.error(err); + return; + } + }; + + // Reserve a file descriptor + let fh = { + let mut state = self.state.inner.lock().unwrap(); + state.seed.next_val() + }; + + // Write the journals + let entry = JournalEntry::OpenFileDescriptorV1 { + fd: fh, + dirfd: VIRTUAL_ROOT_FD, + dirflags: 0, + path, + o_flags: wasi::Oflags::empty(), + fs_rights_base: wasi::Rights::all(), + fs_rights_inheriting: wasi::Rights::all(), + fs_flags: wasi::Fdflags::empty(), + }; + if self.state.write(entry.clone()).is_err() { + tracing::trace!("fs::open err=EIO"); + reply.error(libc::EIO); + return; + } + if self.journal.write(entry).is_err() { + tracing::trace!("fs::open err=EIO"); + reply.error(libc::EIO); + return; + } + + tracing::trace!("fs::open opened fh={fh}"); + reply.opened(fh as u64, flags); + } + + fn release( + &mut self, + _req: &Request, + _ino: u64, + fh: u64, + _flags: u32, + _lock_owner: u64, + _flush: bool, + reply: ReplyEmpty, + ) { + let fh = fh as u32; + + { + // Check that the file handle exists + let mut state = self.state.inner.lock().unwrap(); + if !state.lookup.contains_key(&fh) { + tracing::trace!("fs::release err=ENOENT (fd={fh})"); + reply.error(libc::ENOENT); + return; + } + } + + // Write the journals + let entry = JournalEntry::CloseFileDescriptorV1 { fd: fh }; + if self.state.write(entry.clone()).is_err() { + tracing::trace!("fs::release err=EIO"); + reply.error(libc::EIO); + return; + } + if self.journal.write(entry).is_err() { + tracing::trace!("fs::release err=EIO"); + reply.error(libc::EIO); + return; + } + + tracing::trace!("fs::release ok"); + reply.ok(); + } + + fn create( + &mut self, + _req: &Request, + parent: u64, + name: &OsStr, + mode: u32, + flags: u32, + reply: ReplyCreate, + ) { + let path = match self.compute_path(parent, name) { + Ok(a) => a, + Err(err) => return reply.error(err), + }; + + // The ino is just the hash of the name + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + path.hash(&mut hasher); + let ino = hasher.finish(); + + // Reserve a file descriptor + let fh = { + let mut state = self.state.inner.lock().unwrap(); + state.seed.next_val() + }; + + // Write the journals + let entry = JournalEntry::OpenFileDescriptorV1 { + fd: fh, + dirfd: VIRTUAL_ROOT_FD, + dirflags: 0, + path, + o_flags: wasi::Oflags::CREATE, + fs_rights_base: wasi::Rights::all(), + fs_rights_inheriting: wasi::Rights::all(), + fs_flags: wasi::Fdflags::empty(), + }; + if let Err(err) = self.state.write(entry.clone()) { + tracing::trace!("fs::create (j1) err=EIO - {err}"); + reply.error(libc::EIO); + return; + } + if let Err(err) = self.journal.write(entry) { + tracing::trace!("fs::create (j2) err=EIO - {err}"); + reply.error(libc::EIO); + return; + } + + let now = time01::get_time(); + reply.created( + &time01::Timespec::new(1, 0), + &FileAttr { + ino, + size: 0, + blocks: 0, + atime: now, + mtime: now, + ctime: now, + crtime: now, + kind: fuse::FileType::RegularFile, + perm: 0o644, + nlink: 1, + uid: 0, + gid: 0, + rdev: 0, + flags: 0, + }, + 0, + fh as u64, + flags, + ); + } + + fn read( + &mut self, + _req: &Request, + ino: u64, + fh: u64, + offset: i64, + size: u32, + reply: ReplyData, + ) { + let fh = fh as u32; + + // Grab the file from the file handle + let mut state = self.state.inner.lock().unwrap(); + let file = match state.lookup.get_mut(&fh) { + Some(a) => a, + None => { + tracing::trace!("fs::read lookup(fh={fh}) noent err=EIO"); + reply.error(libc::ENOENT); + return; + } + }; + + // Read the data from the file and return it + let data: Result<_, io::Error> = self.handle.block_on(async { + let mut file = file.lock().await; + + let mut buf = Vec::with_capacity(size as usize); + unsafe { buf.set_len(size as usize) }; + file.seek(io::SeekFrom::Start(offset as u64)).await?; + let amt = file.read(&mut buf).await?; + unsafe { buf.set_len(amt) }; + Ok(buf) + }); + let data = match data { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::read data err=EIO"); + reply.error(libc::EIO); + return; + } + }; + + // Return the data + reply.data(&data); + } + + fn write( + &mut self, + _req: &Request, + _ino: u64, + fh: u64, + offset: i64, + data: &[u8], + _flags: u32, + reply: ReplyWrite, + ) { + let fh = fh as u32; + + { + // Check that the file handle exists + let mut state = self.state.inner.lock().unwrap(); + if !state.lookup.contains_key(&fh) { + tracing::trace!("fs::write err=ENOENT"); + reply.error(libc::ENOENT); + return; + } + } + + // Write the entry to the log file + let fd = fh as u32; + let entry = JournalEntry::FileDescriptorWriteV1 { + fd, + offset: offset as u64, + data: data.into(), + is_64bit: false, + }; + + let res = match self.journal.write(entry) { + Ok(res) => res, + Err(err) => { + tracing::trace!("fs::write err=EIO - {err}"); + reply.error(libc::EIO); + return; + } + }; + + // We load the record from the journal and use this to write to the memory file system + // because the memory file system has an optimization where it will automatically offload + // the data to the mmap of the journal rather than store it in memory. In effect it offloads + // to the disk + { + let mut state = self.state.inner.lock().unwrap(); + let handle = state.handle.clone(); + if let Some(file) = state.lookup.get_mut(&fd) { + let res: Result<_, io::Error> = handle.block_on(async { + let mut file = file.lock().await; + file.seek(io::SeekFrom::Start(offset as u64)).await; + + // Unsafe!!! This assumes the structure does not change + // where the first bytes in the entry are an aligned + // array that corresponds to the data itself + let size = data.len() as u64; + let mut mmap_offset = res.record_start; + let align = mmap_offset % 16; + if align != 0 { + mmap_offset += 16 - align; + } + + // Add the entry + if file.write_from_mmap(mmap_offset, size).is_err() { + // We fall back on just writing the data normally + file.seek(io::SeekFrom::Start(offset as u64)).await; + file.write_all(&data).await?; + } + Ok(()) + }); + if let Err(err) = res { + tracing::trace!("fs::write err=EIO"); + reply.error(libc::EIO); + return; + } + } else { + tracing::trace!("fs::write err=EIO"); + reply.error(libc::EIO); + return; + } + } + + reply.written(data.len() as u32); + } + + fn readdir( + &mut self, + _req: &Request, + ino: u64, + _fh: u64, + offset: i64, + mut reply: ReplyDirectory, + ) { + // Get the path from the ino otherwise it is not a known + // path (this means the other methods have to be hit first) + let path = match self.reverse_ino(ino) { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::readir reverse_ino({ino}) err={}", err); + reply.error(err); + return; + } + }; + + let mut state = self.state.inner.lock().unwrap(); + let read_dir = state.mem_fs.read_dir(&Path::new(path.as_ref())); + let read_dir = match read_dir { + Ok(a) => a, + Err(FsError::EntryNotFound) => { + tracing::trace!("fs::readir read_dir({}) err=ENOENT", path); + return; + } + Err(err) => { + tracing::trace!("fs::readir read_dir({}) err={}", path, err); + reply.error(libc::EIO); + return; + } + }; + + for (i, entry) in read_dir.into_iter().enumerate().skip(offset as usize) { + let entry = match entry { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::readir direntry(index={i}) err={}", err); + reply.error(libc::EIO); + return; + } + }; + let path = entry.path.to_string_lossy(); + let name = match entry.path.file_name() { + Some(n) => n, + None => { + tracing::trace!("fs::readir file_name err=EIO"); + reply.error(libc::EIO); + return; + } + }; + + // The ino is just the hash of the name + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + path.hash(&mut hasher); + let ino = hasher.finish(); + state + .inos + .entry(ino) + .or_insert_with(|| path.into_owned().into()); + + // Compute the directory kind + let kind = match entry.file_type() { + Ok(ft) => file_type_to_kind(ft), + _ => fuse::FileType::RegularFile, + }; + + // i + 1 means the index of the next entry + reply.add(ino, (i + 1) as i64, kind, name); + } + reply.ok(); + } + + fn mkdir(&mut self, _req: &Request, parent: u64, name: &OsStr, _mode: u32, reply: ReplyEntry) { + let path = match self.compute_path(parent, name) { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::mkdir compute_path err={err}"); + return reply.error(err); + } + }; + + let entry = JournalEntry::CreateDirectoryV1 { + fd: VIRTUAL_ROOT_FD, + path: path.clone(), + }; + self.state.write(entry.clone()); + self.journal.write(entry); + + match self.attr(path) { + Ok(meta) => { + tracing::trace!("fs::mkdir ok"); + reply.entry(&time01::Timespec::new(1, 0), &meta, 0) + } + Err(err) => { + tracing::trace!("fs::mkdir attr err={err}"); + reply.error(err) + } + } + } + + fn rmdir(&mut self, _req: &Request, parent: u64, name: &OsStr, reply: ReplyEmpty) { + let path = match self.compute_path(parent, name) { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::rmdir err={err}"); + return reply.error(err); + } + }; + + let entry = JournalEntry::RemoveDirectoryV1 { + fd: VIRTUAL_ROOT_FD, + path: path.clone(), + }; + self.state.write(entry.clone()); + self.journal.write(entry); + tracing::trace!("fs::rmdir ok"); + reply.ok(); + } + + fn unlink(&mut self, _req: &Request, parent: u64, name: &OsStr, reply: ReplyEmpty) { + let path = match self.compute_path(parent, name) { + Ok(a) => a, + Err(err) => { + tracing::trace!("fs::unlink err={err}"); + return reply.error(err); + } + }; + + let entry = JournalEntry::UnlinkFileV1 { + fd: VIRTUAL_ROOT_FD, + path: path.clone(), + }; + self.state.write(entry.clone()); + self.journal.write(entry); + tracing::trace!("fs::unlink ok"); + reply.ok(); + } + + fn forget(&mut self, _req: &Request, _ino: u64, _nlookup: u64) { + tracing::trace!("fs::forget ok"); + } + + fn readlink(&mut self, _req: &Request, _ino: u64, reply: ReplyData) { + tracing::trace!("fs::readlink err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn mknod( + &mut self, + _req: &Request, + _parent: u64, + _name: &OsStr, + _mode: u32, + _rdev: u32, + reply: ReplyEntry, + ) { + tracing::trace!("fs::mknod err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn symlink( + &mut self, + _req: &Request, + _parent: u64, + _name: &OsStr, + _link: &Path, + reply: ReplyEntry, + ) { + tracing::trace!("fs::symlink err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn rename( + &mut self, + _req: &Request, + _parent: u64, + _name: &OsStr, + _newparent: u64, + _newname: &OsStr, + reply: ReplyEmpty, + ) { + tracing::trace!("fs::rename err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn link( + &mut self, + _req: &Request, + _ino: u64, + _newparent: u64, + _newname: &OsStr, + reply: ReplyEntry, + ) { + tracing::trace!("fs::link err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn fsync(&mut self, _req: &Request, _ino: u64, _fh: u64, _datasync: bool, reply: ReplyEmpty) { + tracing::trace!("fs::fsync err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn opendir(&mut self, _req: &Request, _ino: u64, _flags: u32, reply: ReplyOpen) { + tracing::trace!("fs::opendir opened"); + reply.opened(0, 0); + } + + fn releasedir(&mut self, _req: &Request, _ino: u64, _fh: u64, _flags: u32, reply: ReplyEmpty) { + tracing::trace!("fs::releasedir ok"); + reply.ok(); + } + + fn fsyncdir( + &mut self, + _req: &Request, + _ino: u64, + _fh: u64, + _datasync: bool, + reply: ReplyEmpty, + ) { + tracing::trace!("fs::fsyncdir err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn statfs(&mut self, _req: &Request, _ino: u64, reply: ReplyStatfs) { + tracing::trace!("fs::statfs ok"); + reply.statfs(0, 0, 0, 0, 0, 512, 255, 0); + } + + fn listxattr(&mut self, _req: &Request, _ino: u64, _size: u32, reply: ReplyXattr) { + tracing::trace!("fs::listxattr err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn removexattr(&mut self, _req: &Request, _ino: u64, _name: &OsStr, reply: ReplyEmpty) { + tracing::trace!("fs::removexattr err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn access(&mut self, _req: &Request, _ino: u64, _mask: u32, reply: ReplyEmpty) { + tracing::trace!("fs::access err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn getlk( + &mut self, + _req: &Request, + _ino: u64, + _fh: u64, + _lock_owner: u64, + _start: u64, + _end: u64, + _typ: u32, + _pid: u32, + reply: ReplyLock, + ) { + tracing::trace!("fs::getlk err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn setlk( + &mut self, + _req: &Request, + _ino: u64, + _fh: u64, + _lock_owner: u64, + _start: u64, + _end: u64, + _typ: u32, + _pid: u32, + _sleep: bool, + reply: ReplyEmpty, + ) { + tracing::trace!("fs::setlk err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + fn bmap(&mut self, _req: &Request, _ino: u64, _blocksize: u32, _idx: u64, reply: ReplyBmap) { + tracing::trace!("fs::bmp err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + #[cfg(target_os = "macos")] + fn setvolname(&mut self, _req: &Request, _name: &OsStr, reply: ReplyEmpty) { + tracing::trace!("fs::setvolname err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + #[cfg(target_os = "macos")] + fn exchange( + &mut self, + _req: &Request, + _parent: u64, + _name: &OsStr, + _newparent: u64, + _newname: &OsStr, + _options: u64, + reply: ReplyEmpty, + ) { + tracing::trace!("fs::exchange err=ENOSYS"); + reply.error(libc::ENOSYS); + } + + #[cfg(target_os = "macos")] + fn getxtimes(&mut self, _req: &Request, _ino: u64, reply: ReplyXTimes) { + tracing::trace!("fs::getxtimes err=ENOSYS"); + reply.error(libc::ENOSYS); + } +} + +fn file_type_to_kind(ft: virtual_fs::FileType) -> fuse::FileType { + if ft.dir { + fuse::FileType::Directory + } else if ft.symlink { + fuse::FileType::Symlink + } else if ft.block_device { + fuse::FileType::BlockDevice + } else if ft.char_device { + fuse::FileType::CharDevice + } else if ft.socket { + fuse::FileType::Socket + } else { + fuse::FileType::RegularFile + } +} diff --git a/lib/cli/src/commands/journal/mount/mod.rs b/lib/cli/src/commands/journal/mount/mod.rs new file mode 100644 index 00000000000..44c28850b14 --- /dev/null +++ b/lib/cli/src/commands/journal/mount/mod.rs @@ -0,0 +1,4 @@ +mod cmd; +mod fs; + +pub use cmd::*; diff --git a/lib/cli/src/commands/run/mod.rs b/lib/cli/src/commands/run/mod.rs index 37cfe27e9a0..7fcd32a8917 100644 --- a/lib/cli/src/commands/run/mod.rs +++ b/lib/cli/src/commands/run/mod.rs @@ -35,6 +35,7 @@ use wasmer_wasix::{ journal::CompactingLogFileJournal, runners::{ dcgi::{DcgiInstanceFactory, DcgiRunner}, + dproxy::DProxyRunner, emscripten::EmscriptenRunner, wasi::WasiRunner, wcgi::{self, AbortHandle, NoOpWcgiCallbacks, WcgiRunner}, @@ -185,6 +186,8 @@ impl Run { if DcgiRunner::can_run_command(cmd.metadata())? { self.run_dcgi(id, pkg, uses, runtime) + } else if DProxyRunner::can_run_command(cmd.metadata())? { + self.run_dproxy(id, pkg, runtime) } else if WcgiRunner::can_run_command(cmd.metadata())? { self.run_wcgi(id, pkg, uses, runtime) } else if WasiRunner::can_run_command(cmd.metadata())? { @@ -302,6 +305,17 @@ impl Run { runner.run_command(command_name, pkg, runtime) } + fn run_dproxy( + &self, + command_name: &str, + pkg: &BinaryPackage, + runtime: Arc, + ) -> Result<(), Error> { + let mut inner = self.build_wasi_runner(&runtime)?; + let mut runner = wasmer_wasix::runners::dproxy::DProxyRunner::new(inner, pkg); + runner.run_command(command_name, pkg, runtime) + } + fn run_emscripten( &self, command_name: &str, diff --git a/lib/cli/src/commands/run/wasi.rs b/lib/cli/src/commands/run/wasi.rs index 2a9b7369e07..17fa452b2b7 100644 --- a/lib/cli/src/commands/run/wasi.rs +++ b/lib/cli/src/commands/run/wasi.rs @@ -155,7 +155,7 @@ pub struct Wasi { pub snapshot_on: Vec, /// Adds a periodic interval (measured in milli-seconds) that the runtime will automatically - /// takes snapshots of the running process and write them to the journal. When specifying + /// take snapshots of the running process and write them to the journal. When specifying /// this parameter it implies that `--snapshot-on interval` has also been specified. #[cfg(feature = "journal")] #[clap(long = "snapshot-period")] diff --git a/lib/compiler/Cargo.toml b/lib/compiler/Cargo.toml index d7a4b9c1e36..02edd487019 100644 --- a/lib/compiler/Cargo.toml +++ b/lib/compiler/Cargo.toml @@ -34,7 +34,7 @@ enum-iterator = "0.7.0" bytes = "1.0" self_cell = "1.0" -rkyv = { version = "0.7.40", features = ["indexmap", "validation", "strict"] } +rkyv = { workspace = true } shared-buffer = { workspace = true } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] diff --git a/lib/journal/Cargo.toml b/lib/journal/Cargo.toml index 0558b42c5f6..78e5bc544fc 100644 --- a/lib/journal/Cargo.toml +++ b/lib/journal/Cargo.toml @@ -17,6 +17,7 @@ log-file = [ "shared-buffer" ] wasmer = { default-features = false, path = "../api", version = "=4.2.7" } wasmer-wasix-types = { path = "../wasi-types", version = "0.18.2", features = [ "enable-serde" ] } virtual-net = { path = "../virtual-net", version = "0.6.3", default-features = false, features = ["rkyv"] } +virtual-fs = { path = "../virtual-fs", default-features = false } shared-buffer = { workspace = true, optional = true } thiserror = "1" @@ -28,7 +29,7 @@ base64 = "0.21" bincode = { version = "1.3" } serde = { version = "1.0", default-features = false, features = ["derive"] } anyhow = "1.0" -rkyv = { version = "0.7.40", features = ["indexmap", "validation", "strict"] } +rkyv = { workspace = true } bytecheck = { version = "0.6.8" } lz4_flex = { version = "0.11" } num_enum = "0.5.7" diff --git a/lib/journal/src/base64.rs b/lib/journal/src/base64.rs index 50d251f8329..be558dd6127 100644 --- a/lib/journal/src/base64.rs +++ b/lib/journal/src/base64.rs @@ -14,7 +14,8 @@ pub fn serialize(v: &[u8], s: S) -> Result { pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result, D::Error> { let base64 = String::deserialize(d)?; #[allow(deprecated)] - base64::decode(decompress_size_prepended(base64.as_bytes()).map_err(serde::de::Error::custom)?) + let bytes = base64::decode(base64).map_err(serde::de::Error::custom)?; + decompress_size_prepended(&bytes) .map_err(serde::de::Error::custom) .map(|d| d.into()) } diff --git a/lib/journal/src/concrete/aligned_cow_str.rs b/lib/journal/src/concrete/aligned_cow_str.rs new file mode 100644 index 00000000000..c9676cbaf49 --- /dev/null +++ b/lib/journal/src/concrete/aligned_cow_str.rs @@ -0,0 +1,114 @@ +use std::{borrow::Cow, fmt, ops::Deref}; + +use rkyv::{ + ser::{ScratchSpace, Serializer}, + vec::{ArchivedVec, VecResolver}, + Archive, Archived, Serialize, +}; + +#[derive(Clone)] +pub struct AlignedCowStr<'a> { + inner: Cow<'a, str>, +} + +impl<'a> AlignedCowStr<'a> { + pub const ALIGNMENT: usize = 16; + + pub fn into_inner(self) -> Cow<'a, str> { + self.inner + } + + #[inline] + pub fn as_slice(&self) -> &str { + self.inner.as_ref() + } + + pub fn len(&self) -> usize { + self.inner.len() + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } +} + +impl<'a> Default for AlignedCowStr<'a> { + fn default() -> Self { + Self { + inner: String::new().into(), + } + } +} + +impl<'a> fmt::Debug for AlignedCowStr<'a> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.inner.fmt(f) + } +} + +impl<'a> From for AlignedCowStr<'a> { + fn from(value: String) -> Self { + Self { + inner: value.into(), + } + } +} + +#[allow(clippy::from_over_into)] +impl<'a> Into for AlignedCowStr<'a> { + fn into(self) -> String { + self.inner.into_owned() + } +} + +impl<'a> From> for AlignedCowStr<'a> { + fn from(value: Cow<'a, str>) -> Self { + Self { inner: value } + } +} + +#[allow(clippy::from_over_into)] +impl<'a> Into> for AlignedCowStr<'a> { + fn into(self) -> Cow<'a, str> { + self.inner + } +} + +impl<'a> Deref for AlignedCowStr<'a> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<'a> AsRef for AlignedCowStr<'a> { + #[inline] + fn as_ref(&self) -> &str { + self.inner.as_ref() + } +} + +impl<'a> Archive for AlignedCowStr<'a> { + type Archived = ArchivedVec; + type Resolver = VecResolver; + + #[inline] + unsafe fn resolve(&self, pos: usize, resolver: Self::Resolver, out: *mut Self::Archived) { + ArchivedVec::resolve_from_len(self.inner.as_bytes().len(), pos, resolver, out); + } +} + +impl<'a, S: ScratchSpace + Serializer + ?Sized> Serialize for AlignedCowStr<'a> { + #[inline] + fn serialize(&self, serializer: &mut S) -> Result { + serializer.align(Self::ALIGNMENT)?; + unsafe { + ArchivedVec::>::serialize_copy_from_slice( + self.inner.as_bytes(), + serializer, + ) + } + } +} diff --git a/lib/journal/src/concrete/aligned_cow_vec.rs b/lib/journal/src/concrete/aligned_cow_vec.rs new file mode 100644 index 00000000000..49ff1bb93c7 --- /dev/null +++ b/lib/journal/src/concrete/aligned_cow_vec.rs @@ -0,0 +1,213 @@ +use std::{ + borrow::{Borrow, BorrowMut, Cow}, + fmt::{self, Pointer}, + ops::{Deref, DerefMut}, +}; + +use rkyv::{ + ser::{ScratchSpace, Serializer}, + vec::{ArchivedVec, VecResolver}, + Archive, Archived, Serialize, +}; + +/// An aligned COW vector of bytes which avoids copying data +/// when its constructed. The vector is aligned on the 16-byte +/// boundary +#[derive(Clone)] +pub struct AlignedCowVec<'a, T> +where + [T]: ToOwned, +{ + inner: Cow<'a, [T]>, +} + +impl<'a, T> AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, +{ + /// The alignment of the vector + pub const ALIGNMENT: usize = 16; + + pub fn into_inner(self) -> Cow<'a, [T]> { + self.inner + } + + #[inline] + pub fn as_slice(&self) -> &[T] { + self.inner.as_ref() + } + + pub fn len(&self) -> usize { + self.inner.len() + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn len_with_padding(&self) -> usize { + let mut ret = self.inner.len() * std::mem::size_of::(); + let padding = ret % Self::ALIGNMENT; + if padding != 0 { + ret += Self::ALIGNMENT - padding; + } + ret + } +} + +impl<'a, T> Default for AlignedCowVec<'a, T> +where + T: 'a + Clone, + [T]: ToOwned, +{ + fn default() -> Self { + Self { + inner: Vec::new().into(), + } + } +} + +impl<'a, T> fmt::Debug for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_slice().fmt(f) + } +} + +impl<'a, T> From> for AlignedCowVec<'a, T> +where + T: 'a + Clone, + [T]: ToOwned, +{ + fn from(value: Vec) -> Self { + Self { + inner: value.into(), + } + } +} + +#[allow(clippy::from_over_into)] +impl<'a> Into> for AlignedCowVec<'a, u8> { + fn into(self) -> Vec { + self.inner.into_owned() + } +} + +impl<'a, T> From> for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, +{ + fn from(value: Cow<'a, [T]>) -> Self { + Self { inner: value } + } +} + +#[allow(clippy::from_over_into)] +impl<'a, T> Into> for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, +{ + fn into(self) -> Cow<'a, [T]> { + self.inner + } +} + +impl<'a, T> Deref for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, +{ + type Target = [T]; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<'a, T> DerefMut for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, + <[T] as ToOwned>::Owned: BorrowMut<[T]>, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + self.inner.to_mut().borrow_mut() + } +} + +impl<'a, T> AsMut<[T]> for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, + <[T] as ToOwned>::Owned: BorrowMut<[T]>, +{ + #[inline] + fn as_mut(&mut self) -> &mut [T] { + self.inner.to_mut().borrow_mut() + } +} + +impl<'a, T> AsRef<[T]> for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, +{ + #[inline] + fn as_ref(&self) -> &[T] { + self.inner.as_ref() + } +} + +impl<'a, T> Borrow<[T]> for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, +{ + #[inline] + fn borrow(&self) -> &[T] { + self.inner.borrow() + } +} + +impl<'a, T> BorrowMut<[T]> for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, + <[T] as ToOwned>::Owned: BorrowMut<[T]>, +{ + #[inline] + fn borrow_mut(&mut self) -> &mut [T] { + self.inner.to_mut().borrow_mut() + } +} + +impl<'a, T> Archive for AlignedCowVec<'a, T> +where + T: 'a, + [T]: ToOwned, +{ + type Archived = ArchivedVec; + type Resolver = VecResolver; + + #[inline] + unsafe fn resolve(&self, pos: usize, resolver: Self::Resolver, out: *mut Self::Archived) { + ArchivedVec::resolve_from_len(self.len(), pos, resolver, out); + } +} + +impl<'a, S: ScratchSpace + Serializer + ?Sized> Serialize for AlignedCowVec<'a, u8> { + #[inline] + fn serialize(&self, serializer: &mut S) -> Result { + serializer.align(Self::ALIGNMENT)?; + unsafe { + ArchivedVec::>::serialize_copy_from_slice(self.as_slice(), serializer) + } + } +} diff --git a/lib/journal/src/concrete/arc.rs b/lib/journal/src/concrete/arc.rs index fa7a84146d8..1790777215a 100644 --- a/lib/journal/src/concrete/arc.rs +++ b/lib/journal/src/concrete/arc.rs @@ -3,7 +3,7 @@ use std::ops::Deref; use std::sync::Arc; impl ReadableJournal for Arc { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.deref().read() } @@ -13,13 +13,13 @@ impl ReadableJournal for Arc { } impl WritableJournal for Arc { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.deref().write(entry) } } impl ReadableJournal for Arc { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.deref().read() } @@ -29,7 +29,7 @@ impl ReadableJournal for Arc { } impl WritableJournal for Arc { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.deref().write(entry) } } diff --git a/lib/journal/src/concrete/archived.rs b/lib/journal/src/concrete/archived.rs index ad0cbf98a88..6a800480389 100644 --- a/lib/journal/src/concrete/archived.rs +++ b/lib/journal/src/concrete/archived.rs @@ -83,6 +83,7 @@ pub enum JournalEntryRecordType { SocketSetOptTimeV1 = 57, SocketShutdownV1 = 58, SnapshotV1 = 59, + ClearEtherealV1 = 60, } impl JournalEntryRecordType { @@ -96,6 +97,11 @@ impl JournalEntryRecordType { JournalEntryRecordType::InitModuleV1 => ArchivedJournalEntry::InitModuleV1( rkyv::archived_root::(data), ), + JournalEntryRecordType::ClearEtherealV1 => { + ArchivedJournalEntry::ClearEtherealV1(rkyv::archived_root::< + JournalEntryClearEtherealV1, + >(data)) + } JournalEntryRecordType::ProcessExitV1 => ArchivedJournalEntry::ProcessExitV1( rkyv::archived_root::(data), ), @@ -342,6 +348,7 @@ impl<'a> JournalEntry<'a> { pub fn archive_record_type(&self) -> JournalEntryRecordType { match self { Self::InitModuleV1 { .. } => JournalEntryRecordType::InitModuleV1, + Self::ClearEtherealV1 { .. } => JournalEntryRecordType::ClearEtherealV1, Self::UpdateMemoryRegionV1 { .. } => JournalEntryRecordType::UpdateMemoryRegionV1, Self::ProcessExitV1 { .. } => JournalEntryRecordType::ProcessExitV1, Self::SetThreadV1 { .. } => JournalEntryRecordType::SetThreadV1, @@ -425,34 +432,27 @@ impl<'a> JournalEntry<'a> { pub fn serialize_archive( self, serializer: &mut T, - ) -> anyhow::Result<()> + ) -> anyhow::Result where T::Error: std::fmt::Display, { - let padding = |size: usize| { - let padding = size % 16; - let padding = match padding { - 0 => 0, - a => 16 - a, - }; - vec![0u8; padding] - }; - match self { + let amt = match self { JournalEntry::InitModuleV1 { wasm_hash } => { serializer.serialize_value(&JournalEntryInitModuleV1 { wasm_hash }) } + JournalEntry::ClearEtherealV1 => { + serializer.serialize_value(&JournalEntryClearEtherealV1 {}) + } JournalEntry::UpdateMemoryRegionV1 { region, data } => { serializer.serialize_value(&JournalEntryUpdateMemoryRegionV1 { start: region.start, end: region.end, - _padding: padding(data.len()), - compressed_data: compress_prepend_size(data.as_ref()), + compressed_data: compress_prepend_size(data.as_ref()).into(), }) } JournalEntry::ProcessExitV1 { exit_code } => { serializer.serialize_value(&JournalEntryProcessExitV1 { exit_code: exit_code.map(|e| e.into()), - _padding: 0, }) } JournalEntry::SetThreadV1 { @@ -461,12 +461,15 @@ impl<'a> JournalEntry<'a> { memory_stack, store_data, is_64bit, + start, + layout, } => serializer.serialize_value(&JournalEntrySetThreadV1 { id, - _padding: padding(call_stack.len() + memory_stack.len() + store_data.len()), - call_stack: call_stack.into_owned(), - memory_stack: memory_stack.into_owned(), - store_data: store_data.into_owned(), + call_stack: call_stack.into(), + memory_stack: memory_stack.into(), + store_data: store_data.into(), + start: start.into(), + layout: layout.into(), is_64bit, }), JournalEntry::CloseThreadV1 { id, exit_code } => { @@ -489,8 +492,7 @@ impl<'a> JournalEntry<'a> { } => serializer.serialize_value(&JournalEntryFileDescriptorWriteV1 { fd, offset, - _padding: padding(data.len()), - data: data.into_owned(), + data: data.into(), is_64bit, }), JournalEntry::SetClockTimeV1 { clock_id, time } => { @@ -500,7 +502,7 @@ impl<'a> JournalEntry<'a> { }) } JournalEntry::CloseFileDescriptorV1 { fd } => { - serializer.serialize_value(&JournalEntryCloseFileDescriptorV1 { fd, _padding: 0 }) + serializer.serialize_value(&JournalEntryCloseFileDescriptorV1 { fd }) } JournalEntry::OpenFileDescriptorV1 { fd, @@ -515,8 +517,7 @@ impl<'a> JournalEntry<'a> { fd, dirfd, dirflags, - _padding: padding(path.as_bytes().len()), - path: path.into_owned(), + path: path.into(), o_flags: o_flags.bits(), fs_rights_base: fs_rights_base.bits(), fs_rights_inheriting: fs_rights_inheriting.bits(), @@ -535,15 +536,13 @@ impl<'a> JournalEntry<'a> { JournalEntry::CreateDirectoryV1 { fd, path } => { serializer.serialize_value(&JournalEntryCreateDirectoryV1 { fd, - _padding: padding(path.as_bytes().len()), - path: path.into_owned(), + path: path.into(), }) } JournalEntry::RemoveDirectoryV1 { fd, path } => { serializer.serialize_value(&JournalEntryRemoveDirectoryV1 { fd, - _padding: padding(path.as_bytes().len()), - path: path.into_owned(), + path: path.into(), }) } JournalEntry::PathSetTimesV1 { @@ -556,8 +555,7 @@ impl<'a> JournalEntry<'a> { } => serializer.serialize_value(&JournalEntryPathSetTimesV1 { fd, flags, - _padding: padding(path.as_bytes().len()), - path: path.into_owned(), + path: path.into(), st_atim, st_mtim, fst_flags: fst_flags.bits(), @@ -612,27 +610,24 @@ impl<'a> JournalEntry<'a> { new_path, } => serializer.serialize_value(&JournalEntryCreateHardLinkV1 { old_fd, - _padding: padding(old_path.as_bytes().len() + new_path.as_bytes().len()), - old_path: old_path.into_owned(), + old_path: old_path.into(), old_flags, new_fd, - new_path: new_path.into_owned(), + new_path: new_path.into(), }), JournalEntry::CreateSymbolicLinkV1 { old_path, fd, new_path, } => serializer.serialize_value(&JournalEntryCreateSymbolicLinkV1 { - _padding: padding(old_path.as_bytes().len() + new_path.as_bytes().len()), - old_path: old_path.into_owned(), + old_path: old_path.into(), fd, - new_path: new_path.into_owned(), + new_path: new_path.into(), }), JournalEntry::UnlinkFileV1 { fd, path } => { serializer.serialize_value(&JournalEntryUnlinkFileV1 { fd, - _padding: padding(path.as_bytes().len()), - path: path.into_owned(), + path: path.into(), }) } JournalEntry::PathRenameV1 { @@ -642,18 +637,15 @@ impl<'a> JournalEntry<'a> { new_path, } => serializer.serialize_value(&JournalEntryPathRenameV1 { old_fd, - _padding: padding(old_path.as_bytes().len() + new_path.as_bytes().len()), - old_path: old_path.into_owned(), + old_path: old_path.into(), new_fd, - new_path: new_path.into_owned(), + new_path: new_path.into(), }), JournalEntry::ChangeDirectoryV1 { path } => { - serializer.serialize_value(&JournalEntryChangeDirectoryV1 { - path: path.into_owned(), - }) + serializer.serialize_value(&JournalEntryChangeDirectoryV1 { path: path.into() }) } JournalEntry::EpollCreateV1 { fd } => { - serializer.serialize_value(&JournalEntryEpollCreateV1 { fd, _padding: 0 }) + serializer.serialize_value(&JournalEntryEpollCreateV1 { fd }) } JournalEntry::EpollCtlV1 { epfd, @@ -698,19 +690,18 @@ impl<'a> JournalEntry<'a> { JournalEntry::PortDelAddrV1 { addr } => { serializer.serialize_value(&JournalEntryPortDelAddrV1 { addr }) } - JournalEntry::PortAddrClearV1 => return Ok(()), + JournalEntry::PortAddrClearV1 => serializer.serialize_value(&()), JournalEntry::PortBridgeV1 { network, token, security, } => serializer.serialize_value(&JournalEntryPortBridgeV1 { - _padding: padding(network.as_bytes().len() + token.as_bytes().len()), - network: network.into_owned(), - token: token.into_owned(), + network: network.into(), + token: token.into(), security: security.into(), }), - JournalEntry::PortUnbridgeV1 => return Ok(()), - JournalEntry::PortDhcpAcquireV1 => return Ok(()), + JournalEntry::PortUnbridgeV1 => serializer.serialize_value(&()), + JournalEntry::PortDhcpAcquireV1 => serializer.serialize_value(&()), JournalEntry::PortGatewaySetV1 { ip } => { serializer.serialize_value(&JournalEntryPortGatewaySetV1 { ip }) } @@ -725,7 +716,7 @@ impl<'a> JournalEntry<'a> { preferred_until, expires_at, }), - JournalEntry::PortRouteClearV1 => return Ok(()), + JournalEntry::PortRouteClearV1 => serializer.serialize_value(&()), JournalEntry::PortRouteDelV1 { ip } => { serializer.serialize_value(&JournalEntryPortRouteDelV1 { ip }) } @@ -743,18 +734,26 @@ impl<'a> JournalEntry<'a> { JournalEntry::SocketBindV1 { fd, addr } => { serializer.serialize_value(&JournalEntrySocketBindV1 { fd, addr }) } - JournalEntry::SocketConnectedV1 { fd, addr } => { - serializer.serialize_value(&JournalEntrySocketConnectedV1 { fd, addr }) - } + JournalEntry::SocketConnectedV1 { + fd, + local_addr, + peer_addr, + } => serializer.serialize_value(&JournalEntrySocketConnectedV1 { + fd, + local_addr, + peer_addr, + }), JournalEntry::SocketAcceptedV1 { listen_fd, fd, + local_addr: addr, peer_addr, fd_flags, non_blocking: nonblocking, } => serializer.serialize_value(&JournalEntrySocketAcceptedV1 { listen_fd, fd, + local_addr: addr, peer_addr, fd_flags: fd_flags.bits(), nonblocking, @@ -814,8 +813,7 @@ impl<'a> JournalEntry<'a> { is_64bit, } => serializer.serialize_value(&JournalEntrySocketSendToV1 { fd, - _padding: padding(data.len()), - data: data.into_owned(), + data: data.into(), flags, addr, is_64bit, @@ -827,8 +825,7 @@ impl<'a> JournalEntry<'a> { is_64bit, } => serializer.serialize_value(&JournalEntrySocketSendV1 { fd, - _padding: padding(data.len()), - data: data.into_owned(), + data: data.into(), flags, is_64bit, }), @@ -869,7 +866,7 @@ impl<'a> JournalEntry<'a> { } } .map_err(|err| anyhow::format_err!("failed to serialize journal record - {}", err))?; - Ok(()) + Ok(amt) } } @@ -888,31 +885,32 @@ pub(crate) struct JournalEntryHeader { pub enum ArchivedJournalEntry<'a> { InitModuleV1(&'a ArchivedJournalEntryInitModuleV1), + ClearEtherealV1(&'a ArchivedJournalEntryClearEtherealV1), ProcessExitV1(&'a ArchivedJournalEntryProcessExitV1), - SetThreadV1(&'a ArchivedJournalEntrySetThreadV1), + SetThreadV1(&'a ArchivedJournalEntrySetThreadV1<'a>), CloseThreadV1(&'a ArchivedJournalEntryCloseThreadV1), FileDescriptorSeekV1(&'a ArchivedJournalEntryFileDescriptorSeekV1), - FileDescriptorWriteV1(&'a ArchivedJournalEntryFileDescriptorWriteV1), - UpdateMemoryRegionV1(&'a ArchivedJournalEntryUpdateMemoryRegionV1), + FileDescriptorWriteV1(&'a ArchivedJournalEntryFileDescriptorWriteV1<'a>), + UpdateMemoryRegionV1(&'a ArchivedJournalEntryUpdateMemoryRegionV1<'a>), SetClockTimeV1(&'a ArchivedJournalEntrySetClockTimeV1), - OpenFileDescriptorV1(&'a ArchivedJournalEntryOpenFileDescriptorV1), + OpenFileDescriptorV1(&'a ArchivedJournalEntryOpenFileDescriptorV1<'a>), CloseFileDescriptorV1(&'a ArchivedJournalEntryCloseFileDescriptorV1), RenumberFileDescriptorV1(&'a ArchivedJournalEntryRenumberFileDescriptorV1), DuplicateFileDescriptorV1(&'a ArchivedJournalEntryDuplicateFileDescriptorV1), - CreateDirectoryV1(&'a ArchivedJournalEntryCreateDirectoryV1), - RemoveDirectoryV1(&'a ArchivedJournalEntryRemoveDirectoryV1), - PathSetTimesV1(&'a ArchivedJournalEntryPathSetTimesV1), + CreateDirectoryV1(&'a ArchivedJournalEntryCreateDirectoryV1<'a>), + RemoveDirectoryV1(&'a ArchivedJournalEntryRemoveDirectoryV1<'a>), + PathSetTimesV1(&'a ArchivedJournalEntryPathSetTimesV1<'a>), FileDescriptorSetTimesV1(&'a ArchivedJournalEntryFileDescriptorSetTimesV1), FileDescriptorSetSizeV1(&'a ArchivedJournalEntryFileDescriptorSetSizeV1), FileDescriptorSetFlagsV1(&'a ArchivedJournalEntryFileDescriptorSetFlagsV1), FileDescriptorSetRightsV1(&'a ArchivedJournalEntryFileDescriptorSetRightsV1), FileDescriptorAdviseV1(&'a ArchivedJournalEntryFileDescriptorAdviseV1), FileDescriptorAllocateV1(&'a ArchivedJournalEntryFileDescriptorAllocateV1), - CreateHardLinkV1(&'a ArchivedJournalEntryCreateHardLinkV1), - CreateSymbolicLinkV1(&'a ArchivedJournalEntryCreateSymbolicLinkV1), - UnlinkFileV1(&'a ArchivedJournalEntryUnlinkFileV1), - PathRenameV1(&'a ArchivedJournalEntryPathRenameV1), - ChangeDirectoryV1(&'a ArchivedJournalEntryChangeDirectoryV1), + CreateHardLinkV1(&'a ArchivedJournalEntryCreateHardLinkV1<'a>), + CreateSymbolicLinkV1(&'a ArchivedJournalEntryCreateSymbolicLinkV1<'a>), + UnlinkFileV1(&'a ArchivedJournalEntryUnlinkFileV1<'a>), + PathRenameV1(&'a ArchivedJournalEntryPathRenameV1<'a>), + ChangeDirectoryV1(&'a ArchivedJournalEntryChangeDirectoryV1<'a>), EpollCreateV1(&'a ArchivedJournalEntryEpollCreateV1), EpollCtlV1(&'a ArchivedJournalEntryEpollCtlV1), TtySetV1(&'a ArchivedJournalEntryTtySetV1), @@ -921,7 +919,7 @@ pub enum ArchivedJournalEntry<'a> { PortAddAddrV1(&'a ArchivedJournalEntryPortAddAddrV1), PortDelAddrV1(&'a ArchivedJournalEntryPortDelAddrV1), PortAddrClearV1, - PortBridgeV1(&'a ArchivedJournalEntryPortBridgeV1), + PortBridgeV1(&'a ArchivedJournalEntryPortBridgeV1<'a>), PortUnbridgeV1, PortDhcpAcquireV1, PortGatewaySetV1(&'a ArchivedJournalEntryPortGatewaySetV1), @@ -938,8 +936,8 @@ pub enum ArchivedJournalEntry<'a> { SocketLeaveIpv4MulticastV1(&'a ArchivedJournalEntrySocketLeaveIpv4MulticastV1), SocketLeaveIpv6MulticastV1(&'a ArchivedJournalEntrySocketLeaveIpv6MulticastV1), SocketSendFileV1(&'a ArchivedJournalEntrySocketSendFileV1), - SocketSendToV1(&'a ArchivedJournalEntrySocketSendToV1), - SocketSendV1(&'a ArchivedJournalEntrySocketSendV1), + SocketSendToV1(&'a ArchivedJournalEntrySocketSendToV1<'a>), + SocketSendV1(&'a ArchivedJournalEntrySocketSendV1<'a>), SocketSetOptFlagV1(&'a ArchivedJournalEntrySocketSetOptFlagV1), SocketSetOptSizeV1(&'a ArchivedJournalEntrySocketSetOptSizeV1), SocketSetOptTimeV1(&'a ArchivedJournalEntrySocketSetOptTimeV1), @@ -950,7 +948,7 @@ pub enum ArchivedJournalEntry<'a> { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryInitModuleV1 { pub wasm_hash: [u8; 8], } @@ -958,29 +956,35 @@ pub struct JournalEntryInitModuleV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryClearEtherealV1 {} + +#[repr(C)] +#[repr(align(8))] +#[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryProcessExitV1 { pub exit_code: Option, - pub _padding: u32, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntrySetThreadV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntrySetThreadV1<'a> { pub id: u32, - pub call_stack: Vec, - pub memory_stack: Vec, - pub store_data: Vec, - pub _padding: Vec, + pub call_stack: AlignedCowVec<'a, u8>, + pub memory_stack: AlignedCowVec<'a, u8>, + pub store_data: AlignedCowVec<'a, u8>, + pub start: JournalThreadStartTypeV1, + pub layout: JournalWasiMemoryLayout, pub is_64bit: bool, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryCloseThreadV1 { pub id: u32, pub exit_code: Option, @@ -989,40 +993,44 @@ pub struct JournalEntryCloseThreadV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryFileDescriptorSeekV1 { pub fd: u32, - pub offset: i64, pub whence: JournalWhenceV1, + pub offset: i64, } +/// WARNING!!!! Do not change this structure without updating +/// "/lib/cli/src/commands/journal/mount/fs.rs" +/// +/// The code over there assumes that the aligned vector is the +/// first item in the serialized entry #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryFileDescriptorWriteV1 { - pub fd: u32, +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryFileDescriptorWriteV1<'a> { + /// DO NOT MOVE! + pub data: AlignedCowVec<'a, u8>, pub offset: u64, - pub data: Vec, - pub _padding: Vec, + pub fd: u32, pub is_64bit: bool, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryUpdateMemoryRegionV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryUpdateMemoryRegionV1<'a> { + pub compressed_data: AlignedCowVec<'a, u8>, pub start: u64, pub end: u64, - pub compressed_data: Vec, - pub _padding: Vec, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySetClockTimeV1 { pub clock_id: JournalSnapshot0ClockidV1, pub time: u64, @@ -1031,32 +1039,30 @@ pub struct JournalEntrySetClockTimeV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryOpenFileDescriptorV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryOpenFileDescriptorV1<'a> { pub fd: u32, pub dirfd: u32, pub dirflags: u32, - pub path: String, - pub _padding: Vec, + pub fs_flags: u16, pub o_flags: u16, pub fs_rights_base: u64, pub fs_rights_inheriting: u64, - pub fs_flags: u16, + pub path: AlignedCowStr<'a>, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryCloseFileDescriptorV1 { pub fd: u32, - pub _padding: u32, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryRenumberFileDescriptorV1 { pub old_fd: u32, pub new_fd: u32, @@ -1065,7 +1071,7 @@ pub struct JournalEntryRenumberFileDescriptorV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryDuplicateFileDescriptorV1 { pub original_fd: u32, pub copied_fd: u32, @@ -1074,32 +1080,29 @@ pub struct JournalEntryDuplicateFileDescriptorV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryCreateDirectoryV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryCreateDirectoryV1<'a> { pub fd: u32, - pub path: String, - pub _padding: Vec, + pub path: AlignedCowStr<'a>, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryRemoveDirectoryV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryRemoveDirectoryV1<'a> { pub fd: u32, - pub path: String, - pub _padding: Vec, + pub path: AlignedCowStr<'a>, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryPathSetTimesV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryPathSetTimesV1<'a> { pub fd: u32, pub flags: u32, - pub path: String, - pub _padding: Vec, + pub path: AlignedCowStr<'a>, pub st_atim: u64, pub st_mtim: u64, pub fst_flags: u16, @@ -1108,18 +1111,18 @@ pub struct JournalEntryPathSetTimesV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryFileDescriptorSetTimesV1 { pub fd: u32, + pub fst_flags: u16, pub st_atim: u64, pub st_mtim: u64, - pub fst_flags: u16, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryFileDescriptorSetSizeV1 { pub fd: u32, pub st_size: u64, @@ -1128,7 +1131,7 @@ pub struct JournalEntryFileDescriptorSetSizeV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryFileDescriptorSetFlagsV1 { pub fd: u32, pub flags: u16, @@ -1137,7 +1140,7 @@ pub struct JournalEntryFileDescriptorSetFlagsV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryFileDescriptorSetRightsV1 { pub fd: u32, pub fs_rights_base: u64, @@ -1147,7 +1150,7 @@ pub struct JournalEntryFileDescriptorSetRightsV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryFileDescriptorAdviseV1 { pub fd: u32, pub offset: u64, @@ -1158,7 +1161,7 @@ pub struct JournalEntryFileDescriptorAdviseV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryFileDescriptorAllocateV1 { pub fd: u32, pub offset: u64, @@ -1168,70 +1171,65 @@ pub struct JournalEntryFileDescriptorAllocateV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryCreateHardLinkV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryCreateHardLinkV1<'a> { pub old_fd: u32, - pub old_path: String, + pub old_path: AlignedCowStr<'a>, pub old_flags: u32, pub new_fd: u32, - pub new_path: String, - pub _padding: Vec, + pub new_path: AlignedCowStr<'a>, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryCreateSymbolicLinkV1 { - pub old_path: String, +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryCreateSymbolicLinkV1<'a> { pub fd: u32, - pub new_path: String, - pub _padding: Vec, + pub old_path: AlignedCowStr<'a>, + pub new_path: AlignedCowStr<'a>, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryUnlinkFileV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryUnlinkFileV1<'a> { pub fd: u32, - pub path: String, - pub _padding: Vec, + pub path: AlignedCowStr<'a>, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryPathRenameV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryPathRenameV1<'a> { pub old_fd: u32, - pub old_path: String, + pub old_path: AlignedCowStr<'a>, pub new_fd: u32, - pub new_path: String, - pub _padding: Vec, + pub new_path: AlignedCowStr<'a>, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryChangeDirectoryV1 { - pub path: String, +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryChangeDirectoryV1<'a> { + pub path: AlignedCowStr<'a>, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryEpollCreateV1 { pub fd: u32, - pub _padding: u32, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryEpollCtlV1 { pub epfd: u32, pub op: JournalEpollCtlV1, @@ -1242,7 +1240,7 @@ pub struct JournalEntryEpollCtlV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryTtySetV1 { pub cols: u32, pub rows: u32, @@ -1259,7 +1257,7 @@ pub struct JournalEntryTtySetV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryCreatePipeV1 { pub fd1: u32, pub fd2: u32, @@ -1268,7 +1266,7 @@ pub struct JournalEntryCreatePipeV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryCreateEventV1 { pub initial_val: u64, pub flags: u16, @@ -1278,7 +1276,7 @@ pub struct JournalEntryCreateEventV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryPortAddAddrV1 { pub cidr: JournalIpCidrV1, } @@ -1286,7 +1284,7 @@ pub struct JournalEntryPortAddAddrV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryPortDelAddrV1 { pub addr: IpAddr, } @@ -1294,18 +1292,17 @@ pub struct JournalEntryPortDelAddrV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntryPortBridgeV1 { - pub network: String, - pub token: String, - pub _padding: Vec, +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntryPortBridgeV1<'a> { + pub network: AlignedCowStr<'a>, + pub token: AlignedCowStr<'a>, pub security: JournalStreamSecurityV1, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryPortGatewaySetV1 { pub ip: IpAddr, } @@ -1313,7 +1310,7 @@ pub struct JournalEntryPortGatewaySetV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryPortRouteAddV1 { pub cidr: JournalIpCidrV1, pub via_router: IpAddr, @@ -1324,7 +1321,7 @@ pub struct JournalEntryPortRouteAddV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntryPortRouteDelV1 { pub ip: IpAddr, } @@ -1332,7 +1329,7 @@ pub struct JournalEntryPortRouteDelV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketOpenV1 { pub af: JournalAddressfamilyV1, pub ty: JournalSocktypeV1, @@ -1343,7 +1340,7 @@ pub struct JournalEntrySocketOpenV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketListenV1 { pub fd: u32, pub backlog: u32, @@ -1352,7 +1349,7 @@ pub struct JournalEntrySocketListenV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketBindV1 { pub fd: u32, pub addr: SocketAddr, @@ -1361,19 +1358,21 @@ pub struct JournalEntrySocketBindV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketConnectedV1 { pub fd: u32, - pub addr: SocketAddr, + pub local_addr: SocketAddr, + pub peer_addr: SocketAddr, } #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketAcceptedV1 { pub listen_fd: u32, pub fd: u32, + pub local_addr: SocketAddr, pub peer_addr: SocketAddr, pub fd_flags: u16, pub nonblocking: bool, @@ -1382,7 +1381,7 @@ pub struct JournalEntrySocketAcceptedV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketJoinIpv4MulticastV1 { pub fd: u32, pub multiaddr: Ipv4Addr, @@ -1392,7 +1391,7 @@ pub struct JournalEntrySocketJoinIpv4MulticastV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketJoinIpv6MulticastV1 { pub fd: u32, pub multiaddr: Ipv6Addr, @@ -1402,7 +1401,7 @@ pub struct JournalEntrySocketJoinIpv6MulticastV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketLeaveIpv4MulticastV1 { pub fd: u32, pub multiaddr: Ipv4Addr, @@ -1412,7 +1411,7 @@ pub struct JournalEntrySocketLeaveIpv4MulticastV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketLeaveIpv6MulticastV1 { pub fd: u32, pub multiaddr: Ipv6Addr, @@ -1422,7 +1421,7 @@ pub struct JournalEntrySocketLeaveIpv6MulticastV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketSendFileV1 { pub socket_fd: u32, pub file_fd: u32, @@ -1433,11 +1432,10 @@ pub struct JournalEntrySocketSendFileV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntrySocketSendToV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntrySocketSendToV1<'a> { pub fd: u32, - pub data: Vec, - pub _padding: Vec, + pub data: AlignedCowVec<'a, u8>, pub flags: u16, pub addr: SocketAddr, pub is_64bit: bool, @@ -1446,11 +1444,10 @@ pub struct JournalEntrySocketSendToV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] -pub struct JournalEntrySocketSendV1 { +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalEntrySocketSendV1<'a> { pub fd: u32, - pub data: Vec, - pub _padding: Vec, + pub data: AlignedCowVec<'a, u8>, pub flags: u16, pub is_64bit: bool, } @@ -1458,7 +1455,7 @@ pub struct JournalEntrySocketSendV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketSetOptFlagV1 { pub fd: u32, pub opt: JournalSockoptionV1, @@ -1468,7 +1465,7 @@ pub struct JournalEntrySocketSetOptFlagV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketSetOptSizeV1 { pub fd: u32, pub opt: JournalSockoptionV1, @@ -1478,7 +1475,7 @@ pub struct JournalEntrySocketSetOptSizeV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketSetOptTimeV1 { pub fd: u32, pub ty: JournalTimeTypeV1, @@ -1488,7 +1485,7 @@ pub struct JournalEntrySocketSetOptTimeV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySocketShutdownV1 { pub fd: u32, pub how: JournalSocketShutdownV1, @@ -1497,7 +1494,7 @@ pub struct JournalEntrySocketShutdownV1 { #[repr(C)] #[repr(align(8))] #[derive(Debug, Clone, RkyvSerialize, RkyvDeserialize, Archive)] -#[archive_attr(derive(CheckBytes))] +#[archive_attr(derive(CheckBytes), repr(align(8)))] pub struct JournalEntrySnapshotV1 { pub since_epoch: Duration, pub trigger: JournalSnapshotTriggerV1, @@ -1773,3 +1770,35 @@ pub enum JournalSocketShutdownV1 { Write, Both, } + +#[repr(C)] +#[repr(align(8))] +#[derive( + Debug, + Clone, + Copy, + RkyvSerialize, + RkyvDeserialize, + Archive, + PartialOrd, + Ord, + PartialEq, + Eq, + Hash, +)] +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub enum JournalThreadStartTypeV1 { + MainThread, + ThreadSpawn { start_ptr: u64 }, +} + +#[repr(C)] +#[repr(align(8))] +#[derive(Debug, Clone, Copy, RkyvSerialize, RkyvDeserialize, Archive, PartialEq, Eq, Hash)] +#[archive_attr(derive(CheckBytes), repr(align(8)))] +pub struct JournalWasiMemoryLayout { + pub stack_upper: u64, + pub stack_lower: u64, + pub guard_size: u64, + pub stack_size: u64, +} diff --git a/lib/journal/src/concrete/archived_from.rs b/lib/journal/src/concrete/archived_from.rs index b37e53f3c26..1e76643a54f 100644 --- a/lib/journal/src/concrete/archived_from.rs +++ b/lib/journal/src/concrete/archived_from.rs @@ -2,6 +2,7 @@ use lz4_flex::block::decompress_size_prepended; use std::borrow::Cow; use std::time::SystemTime; use wasmer_wasix_types::wasi; +use wasmer_wasix_types::wasix::{ThreadStartType, WasiMemoryLayout}; use super::*; @@ -176,6 +177,7 @@ impl From for JournalSnapshotTriggerV1 { SnapshotTrigger::FirstListen => JournalSnapshotTriggerV1::Listen, SnapshotTrigger::FirstEnviron => JournalSnapshotTriggerV1::Environ, SnapshotTrigger::FirstStdin => JournalSnapshotTriggerV1::Stdin, + SnapshotTrigger::FirstSigint => JournalSnapshotTriggerV1::Sigint, SnapshotTrigger::PeriodicInterval => JournalSnapshotTriggerV1::Timer, SnapshotTrigger::Sigint => JournalSnapshotTriggerV1::Sigint, SnapshotTrigger::Sigalrm => JournalSnapshotTriggerV1::Sigalrm, @@ -569,6 +571,74 @@ impl From<&'_ ArchivedJournalSocketShutdownV1> for SocketShutdownHow { } } +impl From for ThreadStartType { + fn from(value: JournalThreadStartTypeV1) -> Self { + match value { + JournalThreadStartTypeV1::MainThread => ThreadStartType::MainThread, + JournalThreadStartTypeV1::ThreadSpawn { start_ptr } => { + ThreadStartType::ThreadSpawn { start_ptr } + } + } + } +} + +impl From<&'_ ArchivedJournalThreadStartTypeV1> for ThreadStartType { + fn from(value: &'_ ArchivedJournalThreadStartTypeV1) -> Self { + match value { + ArchivedJournalThreadStartTypeV1::MainThread => ThreadStartType::MainThread, + ArchivedJournalThreadStartTypeV1::ThreadSpawn { start_ptr } => { + ThreadStartType::ThreadSpawn { + start_ptr: *start_ptr, + } + } + } + } +} + +impl From for JournalThreadStartTypeV1 { + fn from(value: ThreadStartType) -> Self { + match value { + ThreadStartType::MainThread => JournalThreadStartTypeV1::MainThread, + ThreadStartType::ThreadSpawn { start_ptr } => { + JournalThreadStartTypeV1::ThreadSpawn { start_ptr } + } + } + } +} + +impl From for WasiMemoryLayout { + fn from(value: JournalWasiMemoryLayout) -> Self { + Self { + stack_upper: value.stack_upper, + stack_lower: value.stack_lower, + guard_size: value.guard_size, + stack_size: value.stack_size, + } + } +} + +impl From<&'_ ArchivedJournalWasiMemoryLayout> for WasiMemoryLayout { + fn from(value: &'_ ArchivedJournalWasiMemoryLayout) -> Self { + Self { + stack_upper: value.stack_upper, + stack_lower: value.stack_lower, + guard_size: value.guard_size, + stack_size: value.stack_size, + } + } +} + +impl From for JournalWasiMemoryLayout { + fn from(value: WasiMemoryLayout) -> Self { + Self { + stack_upper: value.stack_upper, + stack_lower: value.stack_lower, + guard_size: value.guard_size, + stack_size: value.stack_size, + } + } +} + impl<'a> TryFrom> for JournalEntry<'a> { type Error = anyhow::Error; @@ -579,12 +649,14 @@ impl<'a> TryFrom> for JournalEntry<'a> { wasm_hash: *wasm_hash, } } + ArchivedJournalEntry::ClearEtherealV1(ArchivedJournalEntryClearEtherealV1 { + .. + }) => Self::ClearEtherealV1, ArchivedJournalEntry::UpdateMemoryRegionV1( ArchivedJournalEntryUpdateMemoryRegionV1 { start, end, compressed_data, - _padding: _, }, ) => Self::UpdateMemoryRegionV1 { region: (*start)..(*end), @@ -592,7 +664,6 @@ impl<'a> TryFrom> for JournalEntry<'a> { }, ArchivedJournalEntry::ProcessExitV1(ArchivedJournalEntryProcessExitV1 { exit_code, - _padding: _, }) => Self::ProcessExitV1 { exit_code: exit_code.as_ref().map(|code| code.into()), }, @@ -601,13 +672,16 @@ impl<'a> TryFrom> for JournalEntry<'a> { call_stack, memory_stack, store_data, - _padding: _, is_64bit, + start, + layout, }) => Self::SetThreadV1 { id: *id, call_stack: call_stack.as_ref().into(), memory_stack: memory_stack.as_ref().into(), store_data: store_data.as_ref().into(), + start: start.into(), + layout: layout.into(), is_64bit: *is_64bit, }, ArchivedJournalEntry::CloseThreadV1(ArchivedJournalEntryCloseThreadV1 { @@ -623,7 +697,6 @@ impl<'a> TryFrom> for JournalEntry<'a> { fd, offset, is_64bit, - _padding: _, }, ) => Self::FileDescriptorWriteV1 { data: data.as_ref().into(), @@ -652,48 +725,43 @@ impl<'a> TryFrom> for JournalEntry<'a> { fs_rights_base, fs_rights_inheriting, fs_flags, - _padding: _, }, ) => Self::OpenFileDescriptorV1 { fd: *fd, dirfd: *dirfd, dirflags: *dirflags, - path: path.as_ref().into(), + path: String::from_utf8_lossy(path.as_ref()), o_flags: wasi::Oflags::from_bits_truncate(*o_flags), fs_rights_base: wasi::Rights::from_bits_truncate(*fs_rights_base), fs_rights_inheriting: wasi::Rights::from_bits_truncate(*fs_rights_inheriting), fs_flags: wasi::Fdflags::from_bits_truncate(*fs_flags), }, ArchivedJournalEntry::CloseFileDescriptorV1( - ArchivedJournalEntryCloseFileDescriptorV1 { fd, _padding: _ }, + ArchivedJournalEntryCloseFileDescriptorV1 { fd }, ) => Self::CloseFileDescriptorV1 { fd: *fd }, ArchivedJournalEntry::RemoveDirectoryV1(ArchivedJournalEntryRemoveDirectoryV1 { fd, path, - _padding: _, }) => Self::RemoveDirectoryV1 { fd: *fd, - path: path.as_ref().into(), - }, - ArchivedJournalEntry::UnlinkFileV1(ArchivedJournalEntryUnlinkFileV1 { - fd, - path, - _padding: _, - }) => Self::UnlinkFileV1 { - fd: *fd, - path: path.as_ref().into(), + path: String::from_utf8_lossy(path.as_ref()), }, + ArchivedJournalEntry::UnlinkFileV1(ArchivedJournalEntryUnlinkFileV1 { fd, path }) => { + Self::UnlinkFileV1 { + fd: *fd, + path: String::from_utf8_lossy(path.as_ref()), + } + } ArchivedJournalEntry::PathRenameV1(ArchivedJournalEntryPathRenameV1 { old_fd, old_path, new_fd, new_path, - _padding: _, }) => Self::PathRenameV1 { old_fd: *old_fd, - old_path: old_path.as_ref().into(), + old_path: String::from_utf8_lossy(old_path.as_ref()), new_fd: *new_fd, - new_path: new_path.as_ref().into(), + new_path: String::from_utf8_lossy(new_path.as_ref()), }, ArchivedJournalEntry::SnapshotV1(ArchivedJournalEntrySnapshotV1 { since_epoch, @@ -729,10 +797,9 @@ impl<'a> TryFrom> for JournalEntry<'a> { ArchivedJournalEntry::CreateDirectoryV1(ArchivedJournalEntryCreateDirectoryV1 { fd, path, - _padding: _, }) => Self::CreateDirectoryV1 { fd: *fd, - path: path.as_ref().into(), + path: String::from_utf8_lossy(path.as_ref()), }, ArchivedJournalEntry::PathSetTimesV1(ArchivedJournalEntryPathSetTimesV1 { fd, @@ -741,10 +808,9 @@ impl<'a> TryFrom> for JournalEntry<'a> { st_atim, st_mtim, fst_flags, - _padding: _, }) => Self::PathSetTimesV1 { fd: *fd, - path: path.as_ref().into(), + path: String::from_utf8_lossy(path.as_ref()), flags: *flags, st_atim: *st_atim, st_mtim: *st_mtim, @@ -812,35 +878,32 @@ impl<'a> TryFrom> for JournalEntry<'a> { old_flags, new_fd, new_path, - _padding: _, }) => Self::CreateHardLinkV1 { old_fd: *old_fd, - old_path: old_path.as_ref().into(), + old_path: String::from_utf8_lossy(old_path.as_ref()), old_flags: *old_flags, new_fd: *new_fd, - new_path: new_path.as_ref().into(), + new_path: String::from_utf8_lossy(new_path.as_ref()), }, ArchivedJournalEntry::CreateSymbolicLinkV1( ArchivedJournalEntryCreateSymbolicLinkV1 { old_path, fd, new_path, - _padding: _, }, ) => Self::CreateSymbolicLinkV1 { - old_path: old_path.as_ref().into(), + old_path: String::from_utf8_lossy(old_path.as_ref()), fd: *fd, - new_path: new_path.as_ref().into(), + new_path: String::from_utf8_lossy(new_path.as_ref()), }, ArchivedJournalEntry::ChangeDirectoryV1(ArchivedJournalEntryChangeDirectoryV1 { path, }) => Self::ChangeDirectoryV1 { - path: path.as_ref().into(), + path: String::from_utf8_lossy(path.as_ref()), }, - ArchivedJournalEntry::EpollCreateV1(ArchivedJournalEntryEpollCreateV1 { - fd, - _padding: _, - }) => Self::EpollCreateV1 { fd: *fd }, + ArchivedJournalEntry::EpollCreateV1(ArchivedJournalEntryEpollCreateV1 { fd }) => { + Self::EpollCreateV1 { fd: *fd } + } ArchivedJournalEntry::EpollCtlV1(ArchivedJournalEntryEpollCtlV1 { epfd, ref op, @@ -902,10 +965,9 @@ impl<'a> TryFrom> for JournalEntry<'a> { network, token, ref security, - _padding: _, }) => Self::PortBridgeV1 { - network: network.as_ref().into(), - token: token.as_ref().into(), + network: String::from_utf8_lossy(network.as_ref()), + token: String::from_utf8_lossy(token.as_ref()), security: security.into(), }, ArchivedJournalEntry::PortUnbridgeV1 => Self::PortUnbridgeV1, @@ -960,20 +1022,24 @@ impl<'a> TryFrom> for JournalEntry<'a> { } ArchivedJournalEntry::SocketConnectedV1(ArchivedJournalEntrySocketConnectedV1 { fd, - addr, + local_addr, + peer_addr, }) => Self::SocketConnectedV1 { fd: *fd, - addr: addr.as_socket_addr(), + local_addr: local_addr.as_socket_addr(), + peer_addr: peer_addr.as_socket_addr(), }, ArchivedJournalEntry::SocketAcceptedV1(ArchivedJournalEntrySocketAcceptedV1 { listen_fd, fd, + local_addr, peer_addr, fd_flags, nonblocking, }) => Self::SocketAcceptedV1 { listen_fd: *listen_fd, fd: *fd, + local_addr: local_addr.as_socket_addr(), peer_addr: peer_addr.as_socket_addr(), fd_flags: wasi::Fdflags::from_bits_truncate(*fd_flags), non_blocking: *nonblocking, @@ -1039,7 +1105,6 @@ impl<'a> TryFrom> for JournalEntry<'a> { flags, addr, is_64bit, - _padding: _, }) => Self::SocketSendToV1 { fd: *fd, data: data.as_ref().into(), @@ -1052,7 +1117,6 @@ impl<'a> TryFrom> for JournalEntry<'a> { data, flags, is_64bit, - _padding: _, }) => Self::SocketSendV1 { fd: *fd, data: data.as_ref().into(), diff --git a/lib/journal/src/concrete/boxed.rs b/lib/journal/src/concrete/boxed.rs index 75cb3de356a..9db8cdba87d 100644 --- a/lib/journal/src/concrete/boxed.rs +++ b/lib/journal/src/concrete/boxed.rs @@ -3,7 +3,7 @@ use std::ops::Deref; use super::*; impl ReadableJournal for Box { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.deref().read() } @@ -13,13 +13,13 @@ impl ReadableJournal for Box { } impl WritableJournal for Box { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.deref().write(entry) } } impl ReadableJournal for Box { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.deref().read() } @@ -29,7 +29,7 @@ impl ReadableJournal for Box { } impl WritableJournal for Box { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.deref().write(entry) } } diff --git a/lib/journal/src/concrete/buffered.rs b/lib/journal/src/concrete/buffered.rs index 67b16631120..c5011c417c0 100644 --- a/lib/journal/src/concrete/buffered.rs +++ b/lib/journal/src/concrete/buffered.rs @@ -40,23 +40,32 @@ impl Default for BufferedJournal { } impl WritableJournal for BufferedJournalTx { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { let entry = entry.into_owned(); let state = self.state.lock().unwrap(); let estimate_size = entry.estimate_size(); state.records.lock().unwrap().push(entry); - Ok(estimate_size as u64) + Ok(LogWriteResult { + record_start: state.offset as u64, + record_end: state.offset as u64 + estimate_size as u64, + }) } } impl ReadableJournal for BufferedJournalRx { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { let mut state = self.state.lock().unwrap(); let ret = state.records.lock().unwrap().get(state.offset).cloned(); + + let record_start = state.offset as u64; if ret.is_some() { state.offset += 1; } - Ok(ret) + Ok(ret.map(|r| LogReadResult { + record_start, + record_end: state.offset as u64, + record: r, + })) } fn as_restarted(&self) -> anyhow::Result> { @@ -69,13 +78,13 @@ impl ReadableJournal for BufferedJournalRx { } impl WritableJournal for BufferedJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.tx.write(entry) } } impl ReadableJournal for BufferedJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.rx.read() } diff --git a/lib/journal/src/concrete/compacting.rs b/lib/journal/src/concrete/compacting.rs index 27a94adb7f1..1a002e0bc45 100644 --- a/lib/journal/src/concrete/compacting.rs +++ b/lib/journal/src/concrete/compacting.rs @@ -252,9 +252,9 @@ impl CompactingJournalTx { // Read all the events and feed them into the filtered journal and then // strip off the filter so that its a normal journal again while let Some(entry) = replay_rx.read()? { - let amt = new_journal.write(entry)?; - if amt > 0 { - result.total_size += amt; + let res = new_journal.write(entry.into_inner())?; + if res.record_size() > 0 { + result.total_size += res.record_size(); result.total_events += 1; } } @@ -280,7 +280,7 @@ impl CompactingJournalTx { // extra events are added we strip off the filter again let replay_rx = state.inner_rx.as_restarted()?; while let Some(entry) = replay_rx.read()? { - new_journal.write(entry)?; + new_journal.write(entry.into_inner())?; } let new_journal = new_journal.into_inner(); @@ -301,7 +301,7 @@ impl CompactingJournalTx { } impl WritableJournal for CompactingJournalTx { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { let mut state = self.state.lock().unwrap(); let event_index = state.event_index; state.event_index += 1; @@ -511,7 +511,7 @@ impl CompactingJournal { } impl ReadableJournal for CompactingJournalRx { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.inner.read() } @@ -521,13 +521,13 @@ impl ReadableJournal for CompactingJournalRx { } impl WritableJournal for CompactingJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.tx.write(entry) } } impl ReadableJournal for CompactingJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.rx.read() } diff --git a/lib/journal/src/concrete/compacting_log_file.rs b/lib/journal/src/concrete/compacting_log_file.rs index 1face3b2ecd..7bc33026ce2 100644 --- a/lib/journal/src/concrete/compacting_log_file.rs +++ b/lib/journal/src/concrete/compacting_log_file.rs @@ -189,7 +189,7 @@ impl Drop for CompactingLogFileJournalTx { } impl ReadableJournal for CompactingLogFileJournalRx { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.inner.read() } @@ -199,14 +199,14 @@ impl ReadableJournal for CompactingLogFileJournalRx { } impl WritableJournal for CompactingLogFileJournalTx { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { - let amt = self.inner.write(entry)?; + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + let res = self.inner.write(entry)?; let triggered = { let mut state = self.state.lock().unwrap(); - if amt > 0 { + if res.record_size() > 0 { state.cnt_records += 1; - state.cnt_size += amt; + state.cnt_size += res.record_size(); } let mut triggered = false; @@ -235,12 +235,12 @@ impl WritableJournal for CompactingLogFileJournalTx { self.compact_now()?; } - Ok(amt) + Ok(res) } } impl ReadableJournal for CompactingLogFileJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.rx.read() } @@ -250,7 +250,7 @@ impl ReadableJournal for CompactingLogFileJournal { } impl WritableJournal for CompactingLogFileJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.tx.write(entry) } } diff --git a/lib/journal/src/concrete/counting.rs b/lib/journal/src/concrete/counting.rs index ddeda96a5a7..e7c32c038d1 100644 --- a/lib/journal/src/concrete/counting.rs +++ b/lib/journal/src/concrete/counting.rs @@ -23,7 +23,7 @@ impl CountingJournal { } impl ReadableJournal for CountingJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { Ok(None) } @@ -33,11 +33,14 @@ impl ReadableJournal for CountingJournal { } impl WritableJournal for CountingJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { let size = entry.estimate_size() as u64; - self.n_cnt.fetch_add(1, Ordering::SeqCst); + let offset = self.n_cnt.fetch_add(1, Ordering::SeqCst); self.n_size.fetch_add(size, Ordering::SeqCst); - Ok(size) + Ok(LogWriteResult { + record_start: offset as u64, + record_end: offset as u64 + size, + }) } } diff --git a/lib/journal/src/concrete/filter.rs b/lib/journal/src/concrete/filter.rs index 32a8ee3ce7c..f0bef520119 100644 --- a/lib/journal/src/concrete/filter.rs +++ b/lib/journal/src/concrete/filter.rs @@ -207,11 +207,14 @@ impl FilteredJournal { } impl WritableJournal for FilteredJournalTx { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { let event_index = self.config.event_index.fetch_add(1, Ordering::SeqCst); if let Some(events) = self.config.filter_events.as_ref() { if !events.contains(&event_index) { - return Ok(0); + return Ok(LogWriteResult { + record_start: 0, + record_end: 0, + }); } } @@ -223,19 +226,29 @@ impl WritableJournal for FilteredJournalTx { | JournalEntry::EpollCtlV1 { .. } | JournalEntry::TtySetV1 { .. } => { if self.config.filter_core { - return Ok(0); + return Ok(LogWriteResult { + record_start: 0, + record_end: 0, + }); } entry } + JournalEntry::ClearEtherealV1 => entry, JournalEntry::SetThreadV1 { .. } | JournalEntry::CloseThreadV1 { .. } => { if self.config.filter_threads { - return Ok(0); + return Ok(LogWriteResult { + record_start: 0, + record_end: 0, + }); } entry } JournalEntry::UpdateMemoryRegionV1 { .. } => { if self.config.filter_memory { - return Ok(0); + return Ok(LogWriteResult { + record_start: 0, + record_end: 0, + }); } entry } @@ -254,10 +267,16 @@ impl WritableJournal for FilteredJournalTx { | JournalEntry::FileDescriptorSetTimesV1 { fd, .. } | JournalEntry::FileDescriptorSetSizeV1 { fd, .. } => { if self.config.filter_stdio && fd <= 2 { - return Ok(0); + return Ok(LogWriteResult { + record_start: 0, + record_end: 0, + }); } if self.config.filter_fs { - return Ok(0); + return Ok(LogWriteResult { + record_start: 0, + record_end: 0, + }); } entry } @@ -272,13 +291,19 @@ impl WritableJournal for FilteredJournalTx { | JournalEntry::CreatePipeV1 { .. } | JournalEntry::CreateEventV1 { .. } => { if self.config.filter_fs { - return Ok(0); + return Ok(LogWriteResult { + record_start: 0, + record_end: 0, + }); } entry } JournalEntry::SnapshotV1 { .. } => { if self.config.filter_snapshots { - return Ok(0); + return Ok(LogWriteResult { + record_start: 0, + record_end: 0, + }); } entry } @@ -309,7 +334,10 @@ impl WritableJournal for FilteredJournalTx { | JournalEntry::SocketSetOptTimeV1 { .. } | JournalEntry::SocketShutdownV1 { .. } => { if self.config.filter_net { - return Ok(0); + return Ok(LogWriteResult { + record_start: 0, + record_end: 0, + }); } entry } @@ -319,7 +347,7 @@ impl WritableJournal for FilteredJournalTx { } impl ReadableJournal for FilteredJournalRx { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.inner.read() } @@ -331,13 +359,13 @@ impl ReadableJournal for FilteredJournalRx { } impl WritableJournal for FilteredJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.tx.write(entry) } } impl ReadableJournal for FilteredJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.rx.read() } diff --git a/lib/journal/src/concrete/log_file.rs b/lib/journal/src/concrete/log_file.rs index a254b7e7e11..19a7d7ea494 100644 --- a/lib/journal/src/concrete/log_file.rs +++ b/lib/journal/src/concrete/log_file.rs @@ -1,6 +1,7 @@ use bytes::Buf; -use rkyv::ser::serializers::{ - AllocScratch, CompositeSerializer, SharedSerializeMap, WriteSerializer, +use rkyv::ser::{ + serializers::{AllocScratch, CompositeSerializer, SharedSerializeMap, WriteSerializer}, + Serializer, }; use shared_buffer::OwnedBuffer; use std::{ @@ -9,6 +10,7 @@ use std::{ path::Path, sync::{Arc, Mutex}, }; +use virtual_fs::mem_fs::OffloadBackingStore; use super::*; @@ -23,6 +25,7 @@ use super::*; /// /// The logfile snapshot capturer uses a 64bit number as a entry encoding /// delimiter. +#[derive(Debug)] pub struct LogFileJournal { tx: LogFileJournalTx, rx: LogFileJournalRx, @@ -44,6 +47,17 @@ pub struct LogFileJournalRx { tx: LogFileJournalTx, buffer_pos: Mutex, buffer: OwnedBuffer, + store: OffloadBackingStore, +} + +impl LogFileJournalRx { + pub fn owned_buffer(&self) -> OwnedBuffer { + self.store.owned_buffer().clone() + } + + pub fn backing_store(&self) -> OffloadBackingStore { + self.store.clone() + } } impl LogFileJournalTx { @@ -51,7 +65,8 @@ impl LogFileJournalTx { let state = self.state.lock().unwrap(); let file = state.file.try_clone()?; - let buffer = OwnedBuffer::from_file(&file)?; + let store = OffloadBackingStore::from_file(&file); + let buffer = store.owned_buffer(); // If the buffer exists we valid the magic number let mut buffer_pos = 0; @@ -75,6 +90,7 @@ impl LogFileJournalTx { tx: self.clone(), buffer_pos: Mutex::new(buffer_pos), buffer, + store, }) } } @@ -89,21 +105,32 @@ impl LogFileJournal { Self::from_file(file) } + pub fn owned_buffer(&self) -> OwnedBuffer { + self.rx.owned_buffer() + } + + pub fn backing_store(&self) -> OffloadBackingStore { + self.rx.backing_store() + } + pub fn from_file(mut file: std::fs::File) -> anyhow::Result { // Move to the end of the file and write the // magic if one is needed - if file.seek(SeekFrom::End(0)).unwrap() == 0 { + let underlying_file = file.try_clone()?; + let end_pos = file.seek(SeekFrom::End(0))?; + let mut serializer = WriteSerializer::with_pos(file, end_pos as usize); + if serializer.pos() == 0 { let magic = JOURNAL_MAGIC_NUMBER; let magic = magic.to_be_bytes(); - file.write_all(&magic)?; + serializer.write(&magic)?; } // Create the tx let tx = LogFileJournalTx { state: Arc::new(Mutex::new(TxState { - file: file.try_clone()?, + file: underlying_file, serializer: CompositeSerializer::new( - WriteSerializer::new(file), + serializer, AllocScratch::default(), SharedSerializeMap::default(), ), @@ -118,47 +145,47 @@ impl LogFileJournal { } impl WritableJournal for LogFileJournalTx { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { tracing::debug!("journal event: {:?}", entry); let mut state = self.state.lock().unwrap(); // Write the header (with a record size of zero) let record_type: JournalEntryRecordType = entry.archive_record_type(); - state.file.write_all(&(record_type as u16).to_be_bytes())?; - let offset_size = state.file.stream_position()?; - state.file.write_all(&[0u8; 6])?; // record and pad size (48 bits) + let offset_header = state.serializer.pos() as u64; + state.serializer.write(&[0u8; 8])?; // Now serialize the actual data to the log - let offset_start = state.file.stream_position()?; + let offset_start = state.serializer.pos() as u64; entry.serialize_archive(&mut state.serializer)?; - let offset_end = state.file.stream_position()?; + let offset_end = state.serializer.pos() as u64; let record_size = offset_end - offset_start; - - // If the alightment is out then fail - if record_size % 8 != 0 { - tracing::error!( - "alignment is out for journal event (type={:?}, record_size={}, alignment={})", - record_type, - record_size, - record_size % 8 - ); - } + tracing::trace!( + "delimiter header={offset_header},start={offset_start},record_size={record_size}" + ); // Write the record and then move back to the end again - state.file.seek(SeekFrom::Start(offset_size))?; - state.file.write_all(&record_size.to_be_bytes()[2..8])?; + state.file.seek(SeekFrom::Start(offset_header))?; + let header_bytes = { + let a = (record_type as u16).to_be_bytes(); + let b = &record_size.to_be_bytes()[2..8]; + [a[0], a[1], b[0], b[1], b[2], b[3], b[4], b[5]] + }; + state.file.write_all(&header_bytes)?; state.file.seek(SeekFrom::Start(offset_end))?; // Now write the actual data and update the offsets - Ok(record_size) + Ok(LogWriteResult { + record_start: offset_start, + record_end: offset_end, + }) } } impl ReadableJournal for LogFileJournalRx { /// UNSAFE: This method uses unsafe operations to remove the need to zero /// the buffer before its read the log entries into it - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { let mut buffer_pos = self.buffer_pos.lock().unwrap(); // Get a memory reference to the data on the disk at @@ -170,6 +197,8 @@ impl ReadableJournal for LogFileJournalRx { if buffer_ptr.len() < 8 { return Ok(None); } + + let record_type: JournalEntryRecordType; let header = { let b = buffer_ptr; @@ -189,40 +218,36 @@ impl ReadableJournal for LogFileJournalRx { record_type: u16::from_be_bytes([b[0], b[1]]), record_size: u64::from_be_bytes([0u8, 0u8, b[2], b[3], b[4], b[5], b[6], b[7]]), }; + + // Now we read the entry + record_type = match header.record_type.try_into() { + Ok(t) => t, + Err(_) => { + tracing::debug!( + "unknown journal entry type ({}) - the journal stops here", + header.record_type + ); + return Ok(None); + } + }; + buffer_ptr.advance(8); *buffer_pos += 8; header }; - - if header.record_size as usize > buffer_ptr.len() { - *buffer_pos += buffer_ptr.len(); - tracing::trace!( - "journal is corrupt (record_size={} vs remaining={})", - header.record_size, - buffer_ptr.len() - ); - return Ok(None); - } + let record_start = *buffer_pos as u64; // Move the buffer position forward past the record let entry = &buffer_ptr[..(header.record_size as usize)]; buffer_ptr.advance(header.record_size as usize); *buffer_pos += header.record_size as usize; - // Now we read the entry - let record_type: JournalEntryRecordType = match header.record_type.try_into() { - Ok(t) => t, - Err(_) => { - tracing::debug!( - "unknown journal entry type ({}) - skipping", - header.record_type - ); - continue; - } - }; - let record = unsafe { record_type.deserialize_archive(entry)? }; - return Ok(Some(record)); + return Ok(Some(LogReadResult { + record_start, + record_end: *buffer_pos as u64, + record, + })); } } @@ -233,13 +258,13 @@ impl ReadableJournal for LogFileJournalRx { } impl WritableJournal for LogFileJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.tx.write(entry) } } impl ReadableJournal for LogFileJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.rx.read() } @@ -256,6 +281,8 @@ impl Journal for LogFileJournal { #[cfg(test)] mod tests { + use wasmer_wasix_types::wasix::WasiMemoryLayout; + use super::*; #[tracing_test::traced_test] @@ -276,6 +303,13 @@ mod tests { memory_stack: vec![22; 16].into(), store_data: vec![33; 136].into(), is_64bit: false, + layout: WasiMemoryLayout { + stack_upper: 0, + stack_lower: 1024, + guard_size: 16, + stack_size: 1024, + }, + start: wasmer_wasix_types::wasix::ThreadStartType::MainThread, }) .unwrap(); journal.write(JournalEntry::PortAddrClearV1).unwrap(); @@ -283,10 +317,10 @@ mod tests { // Read the events and validate let journal = LogFileJournal::new(file.path()).unwrap(); - let event1 = journal.read().unwrap(); - let event2 = journal.read().unwrap(); - let event3 = journal.read().unwrap(); - let event4 = journal.read().unwrap(); + let event1 = journal.read().unwrap().map(LogReadResult::into_inner); + let event2 = journal.read().unwrap().map(LogReadResult::into_inner); + let event3 = journal.read().unwrap().map(LogReadResult::into_inner); + let event4 = journal.read().unwrap().map(LogReadResult::into_inner); // Check the events assert_eq!(event1, Some(JournalEntry::CreatePipeV1 { fd1: 1, fd2: 2 })); @@ -298,6 +332,13 @@ mod tests { memory_stack: vec![22; 16].into(), store_data: vec![33; 136].into(), is_64bit: false, + layout: WasiMemoryLayout { + stack_upper: 0, + stack_lower: 1024, + guard_size: 16, + stack_size: 1024, + }, + start: wasmer_wasix_types::wasix::ThreadStartType::MainThread, }) ); assert_eq!(event3, Some(JournalEntry::PortAddrClearV1)); @@ -314,7 +355,7 @@ mod tests { .unwrap(); // The event should not be visible yet unless we reload the log file - assert_eq!(journal.read().unwrap(), None); + assert_eq!(journal.read().unwrap().map(LogReadResult::into_inner), None); // Reload the load file let journal = LogFileJournal::new(file.path()).unwrap(); @@ -327,11 +368,11 @@ mod tests { }) .unwrap(); - let event1 = journal.read().unwrap(); - let event2 = journal.read().unwrap(); - let event3 = journal.read().unwrap(); - let event4 = journal.read().unwrap(); - let event5 = journal.read().unwrap(); + let event1 = journal.read().unwrap().map(LogReadResult::into_inner); + let event2 = journal.read().unwrap().map(LogReadResult::into_inner); + let event3 = journal.read().unwrap().map(LogReadResult::into_inner); + let event4 = journal.read().unwrap().map(LogReadResult::into_inner); + let event5 = journal.read().unwrap().map(LogReadResult::into_inner); assert_eq!(event1, Some(JournalEntry::CreatePipeV1 { fd1: 1, fd2: 2 })); assert_eq!( event2, @@ -341,6 +382,13 @@ mod tests { memory_stack: vec![22; 16].into(), store_data: vec![33; 136].into(), is_64bit: false, + layout: WasiMemoryLayout { + stack_upper: 0, + stack_lower: 1024, + guard_size: 16, + stack_size: 1024, + }, + start: wasmer_wasix_types::wasix::ThreadStartType::MainThread, }) ); assert_eq!(event3, Some(JournalEntry::PortAddrClearV1)); @@ -358,12 +406,12 @@ mod tests { // Load it again let journal = LogFileJournal::new(file.path()).unwrap(); - let event1 = journal.read().unwrap(); - let event2 = journal.read().unwrap(); - let event3 = journal.read().unwrap(); - let event4 = journal.read().unwrap(); - let event5 = journal.read().unwrap(); - let event6 = journal.read().unwrap(); + let event1 = journal.read().unwrap().map(LogReadResult::into_inner); + let event2 = journal.read().unwrap().map(LogReadResult::into_inner); + let event3 = journal.read().unwrap().map(LogReadResult::into_inner); + let event4 = journal.read().unwrap().map(LogReadResult::into_inner); + let event5 = journal.read().unwrap().map(LogReadResult::into_inner); + let event6 = journal.read().unwrap().map(LogReadResult::into_inner); tracing::info!("event1 {:?}", event1); tracing::info!("event2 {:?}", event2); @@ -381,6 +429,13 @@ mod tests { memory_stack: vec![22; 16].into(), store_data: vec![33; 136].into(), is_64bit: false, + layout: WasiMemoryLayout { + stack_upper: 0, + stack_lower: 1024, + guard_size: 16, + stack_size: 1024, + }, + start: wasmer_wasix_types::wasix::ThreadStartType::MainThread, }) ); assert_eq!(event3, Some(JournalEntry::PortAddrClearV1)); diff --git a/lib/journal/src/concrete/mod.rs b/lib/journal/src/concrete/mod.rs index 9cc1abe2224..b564fa1fe81 100644 --- a/lib/journal/src/concrete/mod.rs +++ b/lib/journal/src/concrete/mod.rs @@ -1,3 +1,5 @@ +mod aligned_cow_str; +mod aligned_cow_vec; mod arc; mod archived; mod archived_from; @@ -20,6 +22,8 @@ mod unsupported; pub(super) use super::*; +pub use aligned_cow_str::*; +pub use aligned_cow_vec::*; pub use arc::*; pub use archived::*; pub use boxed::*; diff --git a/lib/journal/src/concrete/null.rs b/lib/journal/src/concrete/null.rs index 779e9d8ce28..f7b34e62abe 100644 --- a/lib/journal/src/concrete/null.rs +++ b/lib/journal/src/concrete/null.rs @@ -9,7 +9,7 @@ pub struct NullJournal { } impl ReadableJournal for NullJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { Ok(None) } @@ -19,11 +19,14 @@ impl ReadableJournal for NullJournal { } impl WritableJournal for NullJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { if self.debug_print { tracing::debug!("journal event: {:?}", entry); } - Ok(entry.estimate_size() as u64) + Ok(LogWriteResult { + record_start: 0, + record_end: entry.estimate_size() as u64, + }) } } diff --git a/lib/journal/src/concrete/pipe.rs b/lib/journal/src/concrete/pipe.rs index 1f9f7d15a31..94a8c851c9a 100644 --- a/lib/journal/src/concrete/pipe.rs +++ b/lib/journal/src/concrete/pipe.rs @@ -14,12 +14,18 @@ pub struct PipeJournal { #[derive(Debug)] pub struct PipeJournalRx { - receiver: Arc>>>, + receiver: Arc>>>, +} + +#[derive(Debug)] +struct SenderState { + offset: u64, + sender: mpsc::Sender>, } #[derive(Debug)] pub struct PipeJournalTx { - sender: Arc>>>, + sender: Arc>, } impl PipeJournal { @@ -29,7 +35,10 @@ impl PipeJournal { let end1 = PipeJournal { tx: PipeJournalTx { - sender: Arc::new(Mutex::new(tx1)), + sender: Arc::new(Mutex::new(SenderState { + offset: 0, + sender: tx1, + })), }, rx: PipeJournalRx { receiver: Arc::new(Mutex::new(rx2)), @@ -38,7 +47,10 @@ impl PipeJournal { let end2 = PipeJournal { tx: PipeJournalTx { - sender: Arc::new(Mutex::new(tx2)), + sender: Arc::new(Mutex::new(SenderState { + offset: 0, + sender: tx2, + })), }, rx: PipeJournalRx { receiver: Arc::new(Mutex::new(rx1)), @@ -50,20 +62,31 @@ impl PipeJournal { } impl WritableJournal for PipeJournalTx { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { let entry = entry.into_owned(); - let entry_size = entry.estimate_size(); - - let sender = self.sender.lock().unwrap(); - sender.send(entry).map_err(|err| { - anyhow::format_err!("failed to send journal event through the pipe - {}", err) - })?; - Ok(entry_size as u64) + let entry_size = entry.estimate_size() as u64; + + let mut sender = self.sender.lock().unwrap(); + sender + .sender + .send(LogReadResult { + record_start: sender.offset, + record_end: sender.offset + entry_size, + record: entry, + }) + .map_err(|err| { + anyhow::format_err!("failed to send journal event through the pipe - {}", err) + })?; + sender.offset += entry_size; + Ok(LogWriteResult { + record_start: sender.offset, + record_end: sender.offset + entry_size, + }) } } impl ReadableJournal for PipeJournalRx { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { let rx = self.receiver.lock().unwrap(); match rx.try_recv() { Ok(e) => Ok(Some(e)), @@ -82,13 +105,13 @@ impl ReadableJournal for PipeJournalRx { } impl WritableJournal for PipeJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.tx.write(entry) } } impl ReadableJournal for PipeJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.rx.read() } diff --git a/lib/journal/src/concrete/printing.rs b/lib/journal/src/concrete/printing.rs index c12ea6e2fac..baa29599149 100644 --- a/lib/journal/src/concrete/printing.rs +++ b/lib/journal/src/concrete/printing.rs @@ -29,7 +29,7 @@ impl PrintingJournal { } impl ReadableJournal for PrintingJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { Ok(None) } @@ -39,14 +39,17 @@ impl ReadableJournal for PrintingJournal { } impl WritableJournal for PrintingJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { match self.mode { JournalPrintingMode::Text => println!("{}", entry), JournalPrintingMode::Json => { println!("{}", serde_json::to_string_pretty(&entry)?) } } - Ok(entry.estimate_size() as u64) + Ok(LogWriteResult { + record_start: 0, + record_end: entry.estimate_size() as u64, + }) } } @@ -65,6 +68,9 @@ impl<'a> fmt::Display for JournalEntry<'a> { JournalEntry::InitModuleV1 { wasm_hash } => { write!(f, "init-module (hash={:x?})", wasm_hash) } + JournalEntry::ClearEtherealV1 => { + write!(f, "clear-ethereal") + } JournalEntry::UpdateMemoryRegionV1 { region, data } => write!( f, "memory-update (start={}, end={}, data.len={})", @@ -136,11 +142,11 @@ impl<'a> fmt::Display for JournalEntry<'a> { "fd-duplicate (original={}, copied={})", original_fd, copied_fd ), - JournalEntry::CreateDirectoryV1 { path, .. } => { - write!(f, "path-create-dir (path={})", path) + JournalEntry::CreateDirectoryV1 { fd, path } => { + write!(f, "path-create-dir (fd={}, path={})", fd, path) } - JournalEntry::RemoveDirectoryV1 { path, .. } => { - write!(f, "path-remove-dir (path={})", path) + JournalEntry::RemoveDirectoryV1 { fd, path } => { + write!(f, "path-remove-dir (fd={}, path={})", fd, path) } JournalEntry::PathSetTimesV1 { path, @@ -246,18 +252,27 @@ impl<'a> fmt::Display for JournalEntry<'a> { JournalEntry::SocketBindV1 { fd, addr } => { write!(f, "sock-bind (fd={}, addr={})", fd, addr) } - JournalEntry::SocketConnectedV1 { fd, addr } => { - write!(f, "sock-connect (fd={}, addr={})", fd, addr) + JournalEntry::SocketConnectedV1 { + fd, + local_addr, + peer_addr, + } => { + write!( + f, + "sock-connect (fd={}, addr={}, peer={})", + fd, local_addr, peer_addr + ) } JournalEntry::SocketAcceptedV1 { listen_fd, fd, + local_addr, peer_addr, .. } => write!( f, - "sock-accept (listen-fd={}, sock_fd={}, peer={})", - listen_fd, fd, peer_addr + "sock-accept (listen-fd={}, sock_fd={}, addr={}, peer={})", + listen_fd, fd, local_addr, peer_addr ), JournalEntry::SocketJoinIpv4MulticastV1 { fd, diff --git a/lib/journal/src/concrete/recombined.rs b/lib/journal/src/concrete/recombined.rs index 05ae9e53cb9..6a97c9b44e1 100644 --- a/lib/journal/src/concrete/recombined.rs +++ b/lib/journal/src/concrete/recombined.rs @@ -12,13 +12,13 @@ impl RecombinedJournal { } impl WritableJournal for RecombinedJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { self.tx.write(entry) } } impl ReadableJournal for RecombinedJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { self.rx.read() } diff --git a/lib/journal/src/concrete/tests.rs b/lib/journal/src/concrete/tests.rs index cc48dedb4af..faebfcc296d 100644 --- a/lib/journal/src/concrete/tests.rs +++ b/lib/journal/src/concrete/tests.rs @@ -70,6 +70,13 @@ pub fn test_record_set_thread() { memory_stack: vec![4, 5, 6, 7].into(), store_data: vec![10, 11].into(), is_64bit: true, + layout: wasmer_wasix_types::wasix::WasiMemoryLayout { + stack_upper: 0, + stack_lower: 1024, + guard_size: 16, + stack_size: 1024, + }, + start: wasmer_wasix_types::wasix::ThreadStartType::MainThread, }); } @@ -481,7 +488,8 @@ pub fn test_record_socket_bind() { pub fn test_record_socket_connected() { run_test(JournalEntry::SocketConnectedV1 { fd: 12341, - addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 1234), + local_addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 1234), + peer_addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 1234), }); } @@ -491,6 +499,7 @@ pub fn test_record_socket_accepted() { run_test(JournalEntry::SocketAcceptedV1 { listen_fd: 21234, fd: 1, + local_addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 3452), peer_addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 3452), fd_flags: wasi::Fdflags::all(), non_blocking: true, diff --git a/lib/journal/src/concrete/unsupported.rs b/lib/journal/src/concrete/unsupported.rs index a7e52a78f21..3ac2abd0b34 100644 --- a/lib/journal/src/concrete/unsupported.rs +++ b/lib/journal/src/concrete/unsupported.rs @@ -8,7 +8,7 @@ pub static UNSUPPORTED_JOURNAL: UnsupportedJournal = UnsupportedJournal {}; pub struct UnsupportedJournal {} impl ReadableJournal for UnsupportedJournal { - fn read(&self) -> anyhow::Result>> { + fn read(&self) -> anyhow::Result>> { Ok(None) } @@ -18,7 +18,7 @@ impl ReadableJournal for UnsupportedJournal { } impl WritableJournal for UnsupportedJournal { - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result { tracing::debug!("journal event: {:?}", entry); Err(anyhow::format_err!("unsupported")) } diff --git a/lib/journal/src/entry.rs b/lib/journal/src/entry.rs index aace436d22d..29190d9d350 100644 --- a/lib/journal/src/entry.rs +++ b/lib/journal/src/entry.rs @@ -10,6 +10,7 @@ use wasmer_wasix_types::wasi::{ Filesize, Fstflags, LookupFlags, Oflags, Rights, SiFlags, Snapshot0Clockid, SockProto, Sockoption, Socktype, Timestamp, Tty, Whence, }; +use wasmer_wasix_types::wasix::{ThreadStartType, WasiMemoryLayout}; use crate::{base64, SnapshotTrigger}; @@ -85,6 +86,7 @@ pub enum JournalEntry<'a> { InitModuleV1 { wasm_hash: [u8; 8], }, + ClearEtherealV1, UpdateMemoryRegionV1 { region: Range, #[derivative(Debug = "ignore")] @@ -105,6 +107,8 @@ pub enum JournalEntry<'a> { #[derivative(Debug = "ignore")] #[serde(with = "base64")] store_data: Cow<'a, [u8]>, + start: ThreadStartType, + layout: WasiMemoryLayout, is_64bit: bool, }, CloseThreadV1 { @@ -287,11 +291,13 @@ pub enum JournalEntry<'a> { }, SocketConnectedV1 { fd: Fd, - addr: SocketAddr, + local_addr: SocketAddr, + peer_addr: SocketAddr, }, SocketAcceptedV1 { listen_fd: Fd, fd: Fd, + local_addr: SocketAddr, peer_addr: SocketAddr, fd_flags: Fdflags, non_blocking: bool, @@ -369,6 +375,7 @@ impl<'a> JournalEntry<'a> { pub fn into_owned(self) -> JournalEntry<'static> { match self { Self::InitModuleV1 { wasm_hash } => JournalEntry::InitModuleV1 { wasm_hash }, + Self::ClearEtherealV1 => JournalEntry::ClearEtherealV1, Self::UpdateMemoryRegionV1 { region, data } => JournalEntry::UpdateMemoryRegionV1 { region, data: data.into_owned().into(), @@ -380,11 +387,15 @@ impl<'a> JournalEntry<'a> { memory_stack, store_data, is_64bit, + start, + layout, } => JournalEntry::SetThreadV1 { id, call_stack: call_stack.into_owned().into(), memory_stack: memory_stack.into_owned().into(), store_data: store_data.into_owned().into(), + start, + layout, is_64bit, }, Self::CloseThreadV1 { id, exit_code } => JournalEntry::CloseThreadV1 { id, exit_code }, @@ -592,16 +603,26 @@ impl<'a> JournalEntry<'a> { Self::SocketOpenV1 { af, ty, pt, fd } => JournalEntry::SocketOpenV1 { af, ty, pt, fd }, Self::SocketListenV1 { fd, backlog } => JournalEntry::SocketListenV1 { fd, backlog }, Self::SocketBindV1 { fd, addr } => JournalEntry::SocketBindV1 { fd, addr }, - Self::SocketConnectedV1 { fd, addr } => JournalEntry::SocketConnectedV1 { fd, addr }, + Self::SocketConnectedV1 { + fd, + local_addr, + peer_addr, + } => JournalEntry::SocketConnectedV1 { + fd, + local_addr, + peer_addr, + }, Self::SocketAcceptedV1 { listen_fd, fd, + local_addr, peer_addr, fd_flags, non_blocking: nonblocking, } => JournalEntry::SocketAcceptedV1 { listen_fd, fd, + local_addr, peer_addr, fd_flags, non_blocking: nonblocking, @@ -695,6 +716,7 @@ impl<'a> JournalEntry<'a> { let base_size = std::mem::size_of_val(self); match self { JournalEntry::InitModuleV1 { .. } => base_size, + JournalEntry::ClearEtherealV1 => base_size, JournalEntry::UpdateMemoryRegionV1 { data, .. } => base_size + data.len(), JournalEntry::ProcessExitV1 { .. } => base_size, JournalEntry::SetThreadV1 { diff --git a/lib/journal/src/lib.rs b/lib/journal/src/lib.rs index d92fa4cf8cf..c7e04c06ea3 100644 --- a/lib/journal/src/lib.rs +++ b/lib/journal/src/lib.rs @@ -10,7 +10,22 @@ pub use snapshot::*; pub use util::*; use serde::{Deserialize, Serialize}; -use std::str::FromStr; +use std::{ops::Deref, str::FromStr}; + +/// The results of an operation to write a log entry to the log +#[derive(Debug)] +pub struct LogWriteResult { + // Start of the actual entry + pub record_start: u64, + // End of the actual entry + pub record_end: u64, +} + +impl LogWriteResult { + pub fn record_size(&self) -> u64 { + self.record_end - self.record_start + } +} /// The snapshot capturer will take a series of objects that represents the state of /// a WASM process at a point in time and saves it so that it can be restored. @@ -19,7 +34,32 @@ use std::str::FromStr; pub trait WritableJournal { /// Takes in a stream of snapshot log entries and saves them so that they /// may be restored at a later moment - fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result; + fn write<'a>(&'a self, entry: JournalEntry<'a>) -> anyhow::Result; +} + +/// The results of an operation to read a log entry from the log +#[derive(Debug)] +pub struct LogReadResult<'a> { + /// Offset into the journal where this entry exists + pub record_start: u64, + /// Offset of the end of the entry + pub record_end: u64, + /// Represents the journal entry + pub record: JournalEntry<'a>, +} + +impl<'a> LogReadResult<'a> { + pub fn into_inner(self) -> JournalEntry<'a> { + self.record + } +} + +impl<'a> Deref for LogReadResult<'a> { + type Target = JournalEntry<'a>; + + fn deref(&self) -> &Self::Target { + &self.record + } } /// The snapshot capturer will take a series of objects that represents the state of @@ -29,7 +69,7 @@ pub trait WritableJournal { pub trait ReadableJournal { /// Returns a stream of snapshot objects that the runtime will use /// to restore the state of a WASM process to a previous moment in time - fn read(&self) -> anyhow::Result>>; + fn read(&self) -> anyhow::Result>>; /// Resets the journal so that reads will start from the /// beginning again diff --git a/lib/journal/src/snapshot.rs b/lib/journal/src/snapshot.rs index 15afcb6ae92..ce5b7029028 100644 --- a/lib/journal/src/snapshot.rs +++ b/lib/journal/src/snapshot.rs @@ -12,6 +12,8 @@ pub enum SnapshotTrigger { FirstEnviron, /// Triggered when the process reads stdin for the first time FirstStdin, + /// Issued on the first interrupt signal (Ctrl + C) the process receives, after that normal CTRL-C will apply. + FirstSigint, /// Triggered periodically based on a interval (default 10 seconds) which can be specified using the `snapshot-interval` option PeriodicInterval, /// Issued if the user sends an interrupt signal (Ctrl + C). @@ -30,7 +32,7 @@ impl SnapshotTrigger { pub fn only_once(&self) -> bool { matches!( self, - Self::FirstListen | Self::FirstEnviron | Self::FirstStdin + Self::FirstListen | Self::FirstEnviron | Self::FirstStdin | Self::FirstSigint ) } } @@ -52,6 +54,7 @@ impl FromStr for SnapshotTrigger { "first-listen" => Self::FirstListen, "first-stdin" => Self::FirstStdin, "first-environ" => Self::FirstEnviron, + "first-intr" | "first-sigint" | "first-ctrlc" | "first-ctrl-c" => Self::FirstSigint, "periodic-interval" => Self::PeriodicInterval, "intr" | "sigint" | "ctrlc" | "ctrl-c" => Self::Sigint, "alarm" | "timer" | "sigalrm" => Self::Sigalrm, diff --git a/lib/journal/src/util.rs b/lib/journal/src/util.rs index 528c131fb2c..6f550a36622 100644 --- a/lib/journal/src/util.rs +++ b/lib/journal/src/util.rs @@ -5,7 +5,7 @@ pub fn copy_journal( to: &W, ) -> anyhow::Result<()> { while let Some(record) = from.read()? { - to.write(record)?; + to.write(record.into_inner())?; } Ok(()) } diff --git a/lib/types/Cargo.toml b/lib/types/Cargo.toml index 140c8c8b76b..2775e02e0cb 100644 --- a/lib/types/Cargo.toml +++ b/lib/types/Cargo.toml @@ -18,7 +18,7 @@ serde_bytes = { version = "0.11", optional = true } thiserror = "1.0" more-asserts = "0.2" indexmap = { version = "1.6" } -rkyv = { version = "0.7.40", features = ["indexmap", "validation", "strict"] } +rkyv = { workspace = true } enum-iterator = "0.7.0" target-lexicon = { version = "0.12.2", default-features = false } enumset.workspace = true diff --git a/lib/virtual-fs/Cargo.toml b/lib/virtual-fs/Cargo.toml index 6bad8a5c101..93536d325f4 100644 --- a/lib/virtual-fs/Cargo.toml +++ b/lib/virtual-fs/Cargo.toml @@ -40,6 +40,7 @@ getrandom = { version = "0.2", features = [ "js" ] } [dev-dependencies] pretty_assertions = "1.3.0" tempfile = "3.6.0" +tracing-test = "0.2.4" tokio = { version = "1", features = ["io-util", "rt"], default_features = false } [features] diff --git a/lib/virtual-fs/src/lib.rs b/lib/virtual-fs/src/lib.rs index 06d89156a00..406ab585959 100644 --- a/lib/virtual-fs/src/lib.rs +++ b/lib/virtual-fs/src/lib.rs @@ -359,6 +359,12 @@ pub trait VirtualFile: None } + /// Writes to this file using an mmap offset and reference + /// (this method only works for mmap optimized file systems) + fn write_from_mmap(&mut self, _offset: u64, _len: u64) -> std::io::Result<()> { + Err(std::io::ErrorKind::Unsupported.into()) + } + /// This method will copy a file from a source to this destination where /// the default is to do a straight byte copy however file system implementors /// may optimize this to do a zero copy diff --git a/lib/virtual-fs/src/mem_fs/file.rs b/lib/virtual-fs/src/mem_fs/file.rs index fe3c5779875..43f51b97c1b 100644 --- a/lib/virtual-fs/src/mem_fs/file.rs +++ b/lib/virtual-fs/src/mem_fs/file.rs @@ -6,6 +6,8 @@ use futures::future::BoxFuture; use tokio::io::AsyncRead; use tokio::io::{AsyncSeek, AsyncWrite}; +use self::offloaded_file::OffloadWrite; + use super::*; use crate::limiter::TrackedVec; use crate::{CopyOnWriteFile, FsError, Result, VirtualFile}; @@ -151,6 +153,7 @@ impl VirtualFile for FileHandle { let inode = fs.storage.get(self.inode); match inode { Some(Node::File(node)) => node.file.len().try_into().unwrap_or(0), + Some(Node::OffloadedFile(node)) => node.file.len(), Some(Node::ReadOnlyFile(node)) => node.file.len().try_into().unwrap_or(0), Some(Node::CustomFile(node)) => { let file = node.file.lock().unwrap(); @@ -182,6 +185,10 @@ impl VirtualFile for FileHandle { .resize(new_size.try_into().map_err(|_| FsError::UnknownError)?, 0)?; metadata.len = new_size; } + Some(Node::OffloadedFile(OffloadedFileNode { file, metadata, .. })) => { + file.resize(new_size, 0); + metadata.len = new_size; + } Some(Node::CustomFile(node)) => { let mut file = node.file.lock().unwrap(); file.set_len(new_size)?; @@ -337,6 +344,10 @@ impl VirtualFile for FileHandle { let remaining = node.file.buffer.len() - (self.cursor as usize); Poll::Ready(Ok(remaining)) } + Some(Node::OffloadedFile(node)) => { + let remaining = node.file.len() as usize - (self.cursor as usize); + Poll::Ready(Ok(remaining)) + } Some(Node::ReadOnlyFile(node)) => { let remaining = node.file.buffer.len() - (self.cursor as usize); Poll::Ready(Ok(remaining)) @@ -385,6 +396,7 @@ impl VirtualFile for FileHandle { let inode = fs.storage.get_mut(self.inode); match inode { Some(Node::File(_)) => Poll::Ready(Ok(8192)), + Some(Node::OffloadedFile(_)) => Poll::Ready(Ok(8192)), Some(Node::ReadOnlyFile(_)) => Poll::Ready(Ok(0)), Some(Node::CustomFile(node)) => { let mut file = node.file.lock().unwrap(); @@ -410,6 +422,39 @@ impl VirtualFile for FileHandle { ))), } } + + fn write_from_mmap(&mut self, offset: u64, size: u64) -> std::io::Result<()> { + if !self.writable { + return Err(io::Error::new( + io::ErrorKind::PermissionDenied, + format!( + "the file (inode `{}) doesn't have the `write` permission", + self.inode + ), + )); + } + + let mut cursor = self.cursor; + { + let mut fs = self.filesystem.inner.write().map_err(|_| { + io::Error::new(io::ErrorKind::Other, "failed to acquire a write lock") + })?; + + let inode = fs.storage.get_mut(self.inode); + match inode { + Some(Node::OffloadedFile(node)) => { + node.file + .write(OffloadWrite::MmapOffset { offset, size }, &mut cursor)?; + node.metadata.len = node.file.len(); + } + _ => { + return Err(io::ErrorKind::Unsupported.into()); + } + } + } + self.cursor = cursor; + Ok(()) + } } #[cfg(test)] @@ -624,6 +669,17 @@ impl AsyncRead for FileHandle { } Poll::Ready(read.map(|_| ())) } + Some(Node::OffloadedFile(node)) => { + let read = unsafe { + node.file + .read(std::mem::transmute(buf.unfilled_mut()), &mut cursor) + }; + if let Ok(read) = &read { + unsafe { buf.assume_init(*read) }; + buf.advance(*read); + } + Poll::Ready(read.map(|_| ())) + } Some(Node::ReadOnlyFile(node)) => { let read = unsafe { node.file @@ -686,6 +742,10 @@ impl AsyncSeek for FileHandle { node.file.seek(position, &mut cursor)?; Ok(()) } + Some(Node::OffloadedFile(node)) => { + node.file.seek(position, &mut cursor)?; + Ok(()) + } Some(Node::ReadOnlyFile(node)) => { node.file.seek(position, &mut cursor)?; Ok(()) @@ -749,6 +809,7 @@ impl AsyncSeek for FileHandle { let inode = fs.storage.get_mut(self.inode); match inode { Some(Node::File { .. }) => Poll::Ready(Ok(self.cursor)), + Some(Node::OffloadedFile { .. }) => Poll::Ready(Ok(self.cursor)), Some(Node::ReadOnlyFile { .. }) => Poll::Ready(Ok(self.cursor)), Some(Node::CustomFile(node)) => { let mut file = node.file.lock().unwrap(); @@ -805,6 +866,11 @@ impl AsyncWrite for FileHandle { node.metadata.len = node.file.len().try_into().unwrap(); bytes_written } + Some(Node::OffloadedFile(node)) => { + let bytes_written = node.file.write(OffloadWrite::Buffer(buf), &mut cursor)?; + node.metadata.len = node.file.len(); + bytes_written + } Some(Node::ReadOnlyFile(node)) => { let bytes_written = node.file.write(buf, &mut cursor)?; node.metadata.len = node.file.len().try_into().unwrap(); @@ -880,6 +946,15 @@ impl AsyncWrite for FileHandle { node.metadata.len = node.file.buffer.len() as u64; Poll::Ready(Ok(bytes_written)) } + Some(Node::OffloadedFile(node)) => { + let buf = bufs + .iter() + .find(|b| !b.is_empty()) + .map_or(&[][..], |b| &**b); + let bytes_written = node.file.write(OffloadWrite::Buffer(buf), &mut cursor)?; + node.metadata.len = node.file.len(); + Poll::Ready(Ok(bytes_written)) + } Some(Node::ReadOnlyFile(node)) => { let buf = bufs .iter() @@ -926,6 +1001,7 @@ impl AsyncWrite for FileHandle { let inode = fs.storage.get_mut(self.inode); match inode { Some(Node::File(node)) => Poll::Ready(node.file.flush()), + Some(Node::OffloadedFile(node)) => Poll::Ready(node.file.flush()), Some(Node::ReadOnlyFile(node)) => Poll::Ready(node.file.flush()), Some(Node::CustomFile(node)) => { let mut file = node.file.lock().unwrap(); @@ -961,6 +1037,7 @@ impl AsyncWrite for FileHandle { let inode = fs.storage.get_mut(self.inode); match inode { Some(Node::File { .. }) => Poll::Ready(Ok(())), + Some(Node::OffloadedFile { .. }) => Poll::Ready(Ok(())), Some(Node::ReadOnlyFile { .. }) => Poll::Ready(Ok(())), Some(Node::CustomFile(node)) => { let mut file = node.file.lock().unwrap(); @@ -996,6 +1073,7 @@ impl AsyncWrite for FileHandle { let inode = fs.storage.get_mut(self.inode); match inode { Some(Node::File { .. }) => false, + Some(Node::OffloadedFile { .. }) => false, Some(Node::ReadOnlyFile { .. }) => false, Some(Node::CustomFile(node)) => { let file = node.file.lock().unwrap(); @@ -1303,6 +1381,8 @@ impl fmt::Debug for FileHandle { formatter .debug_struct("FileHandle") .field("inode", &self.inode) + .field("readable", &self.readable) + .field("writable", &self.writable) .finish() } } diff --git a/lib/virtual-fs/src/mem_fs/file_opener.rs b/lib/virtual-fs/src/mem_fs/file_opener.rs index 6a070d24486..f2a7da582e7 100644 --- a/lib/virtual-fs/src/mem_fs/file_opener.rs +++ b/lib/virtual-fs/src/mem_fs/file_opener.rs @@ -402,6 +402,22 @@ impl crate::FileOpener for FileSystem { } } + Some(Node::OffloadedFile(OffloadedFileNode { metadata, file, .. })) => { + // Update the accessed time. + metadata.accessed = time(); + + // Truncate if needed. + if truncate { + file.truncate(); + metadata.len = 0; + } + + // Move the cursor to the end if needed. + if append { + cursor = file.len(); + } + } + Some(Node::ReadOnlyFile(node)) => { // Update the accessed time. node.metadata.accessed = time(); @@ -470,29 +486,42 @@ impl crate::FileOpener for FileSystem { // Write lock. let mut fs = self.inner.write().map_err(|_| FsError::Lock)?; - let file = File::new(fs.limiter.clone()); - - // Creating the file in the storage. + let metadata = { + let time = time(); + Metadata { + ft: FileType { + file: true, + ..Default::default() + }, + accessed: time, + created: time, + modified: time, + len: 0, + } + }; let inode_of_file = fs.storage.vacant_entry().key(); - let real_inode_of_file = fs.storage.insert(Node::File(FileNode { - inode: inode_of_file, - name: name_of_file, - file, - metadata: { - let time = time(); - Metadata { - ft: FileType { - file: true, - ..Default::default() - }, - accessed: time, - created: time, - modified: time, - len: 0, - } - }, - })); + // We might be in optimized mode + let file = if let Some(offload) = fs.backing_offload.clone() { + let file = OffloadedFile::new(fs.limiter.clone(), offload); + Node::OffloadedFile(OffloadedFileNode { + inode: inode_of_file, + name: name_of_file, + file, + metadata, + }) + } else { + let file = File::new(fs.limiter.clone()); + Node::File(FileNode { + inode: inode_of_file, + name: name_of_file, + file, + metadata, + }) + }; + + // Creating the file in the storage. + let real_inode_of_file = fs.storage.insert(file); assert_eq!( inode_of_file, real_inode_of_file, diff --git a/lib/virtual-fs/src/mem_fs/filesystem.rs b/lib/virtual-fs/src/mem_fs/filesystem.rs index bf81d2093bd..c1d72bc493b 100644 --- a/lib/virtual-fs/src/mem_fs/filesystem.rs +++ b/lib/virtual-fs/src/mem_fs/filesystem.rs @@ -1,5 +1,7 @@ //! This module contains the [`FileSystem`] type itself. +use self::offloaded_file::OffloadBackingStore; + use super::*; use crate::{DirEntry, FileSystem as _, FileType, FsError, Metadata, OpenOptions, ReadDir, Result}; use futures::future::BoxFuture; @@ -29,6 +31,20 @@ impl FileSystem { self } + /// Uses a mmap'ed file as a cache for file data thus removing the + /// need to copy the data into memory. + /// + /// This is especially important for journals as it means that the + /// data stored within the journals does not need to be copied + /// into memory, for very large journals this would otherwise be + /// a problem. + pub fn with_backing_offload(self, buffer: OffloadBackingStore) -> Result { + let mut lock = self.inner.write().map_err(|_| FsError::Lock)?; + lock.backing_offload.replace(buffer); + drop(lock); + Ok(self) + } + /// Canonicalize a path without validating that it actually exists. pub fn canonicalize_unchecked(&self, path: &Path) -> Result { let lock = self.inner.read().map_err(|_| FsError::Lock)?; @@ -619,6 +635,7 @@ impl fmt::Debug for FileSystem { /// indexed by their respective `Inode` in a slab. pub(super) struct FileSystemInner { pub(super) storage: Slab, + pub(super) backing_offload: Option, pub(super) limiter: Option, } @@ -754,6 +771,7 @@ impl FileSystemInner { .filter_map(|(nth, inode)| self.storage.get(*inode).map(|node| (nth, node))) .find_map(|(nth, node)| match node { Node::File(FileNode { inode, name, .. }) + | Node::OffloadedFile(OffloadedFileNode { inode, name, .. }) | Node::ReadOnlyFile(ReadOnlyFileNode { inode, name, .. }) | Node::CustomFile(CustomFileNode { inode, name, .. }) | Node::ArcFile(ArcFileNode { inode, name, .. }) @@ -793,6 +811,7 @@ impl FileSystemInner { .filter_map(|(nth, inode)| self.storage.get(*inode).map(|node| (nth, node))) .find_map(|(nth, node)| match node { Node::File(FileNode { inode, name, .. }) + | Node::OffloadedFile(OffloadedFileNode { inode, name, .. }) | Node::Directory(DirectoryNode { inode, name, .. }) | Node::ReadOnlyFile(ReadOnlyFileNode { inode, name, .. }) | Node::CustomFile(CustomFileNode { inode, name, .. }) @@ -956,6 +975,7 @@ impl fmt::Debug for FileSystemInner { inode = node.inode(), ty = match node { Node::File { .. } => "file", + Node::OffloadedFile { .. } => "offloaded-file", Node::ReadOnlyFile { .. } => "ro-file", Node::ArcFile { .. } => "arc-file", Node::CustomFile { .. } => "custom-file", @@ -1015,6 +1035,7 @@ impl Default for FileSystemInner { Self { storage: slab, + backing_offload: None, limiter: None, } } diff --git a/lib/virtual-fs/src/mem_fs/mod.rs b/lib/virtual-fs/src/mem_fs/mod.rs index 77e85d9db26..33ea3ef51a1 100644 --- a/lib/virtual-fs/src/mem_fs/mod.rs +++ b/lib/virtual-fs/src/mem_fs/mod.rs @@ -1,10 +1,12 @@ mod file; mod file_opener; mod filesystem; +mod offloaded_file; mod stdio; use file::{File, FileHandle, ReadOnlyFile}; pub use filesystem::FileSystem; +pub use offloaded_file::OffloadBackingStore; pub use stdio::{Stderr, Stdin, Stdout}; use crate::Metadata; @@ -14,6 +16,8 @@ use std::{ sync::{Arc, Mutex}, }; +use self::offloaded_file::OffloadedFile; + type Inode = usize; const ROOT_INODE: Inode = 0; @@ -33,6 +37,14 @@ struct ReadOnlyFileNode { metadata: Metadata, } +#[derive(Debug)] +struct OffloadedFileNode { + inode: Inode, + name: OsString, + file: OffloadedFile, + metadata: Metadata, +} + #[derive(Debug)] struct ArcFileNode { inode: Inode, @@ -70,6 +82,7 @@ struct ArcDirectoryNode { #[derive(Debug)] enum Node { File(FileNode), + OffloadedFile(OffloadedFileNode), ReadOnlyFile(ReadOnlyFileNode), ArcFile(ArcFileNode), CustomFile(CustomFileNode), @@ -81,6 +94,7 @@ impl Node { fn inode(&self) -> Inode { *match self { Self::File(FileNode { inode, .. }) => inode, + Self::OffloadedFile(OffloadedFileNode { inode, .. }) => inode, Self::ReadOnlyFile(ReadOnlyFileNode { inode, .. }) => inode, Self::ArcFile(ArcFileNode { inode, .. }) => inode, Self::CustomFile(CustomFileNode { inode, .. }) => inode, @@ -92,6 +106,7 @@ impl Node { fn name(&self) -> &OsStr { match self { Self::File(FileNode { name, .. }) => name.as_os_str(), + Self::OffloadedFile(OffloadedFileNode { name, .. }) => name.as_os_str(), Self::ReadOnlyFile(ReadOnlyFileNode { name, .. }) => name.as_os_str(), Self::ArcFile(ArcFileNode { name, .. }) => name.as_os_str(), Self::CustomFile(CustomFileNode { name, .. }) => name.as_os_str(), @@ -103,6 +118,7 @@ impl Node { fn metadata(&self) -> &Metadata { match self { Self::File(FileNode { metadata, .. }) => metadata, + Self::OffloadedFile(OffloadedFileNode { metadata, .. }) => metadata, Self::ReadOnlyFile(ReadOnlyFileNode { metadata, .. }) => metadata, Self::ArcFile(ArcFileNode { metadata, .. }) => metadata, Self::CustomFile(CustomFileNode { metadata, .. }) => metadata, @@ -114,6 +130,7 @@ impl Node { fn metadata_mut(&mut self) -> &mut Metadata { match self { Self::File(FileNode { metadata, .. }) => metadata, + Self::OffloadedFile(OffloadedFileNode { metadata, .. }) => metadata, Self::ReadOnlyFile(ReadOnlyFileNode { metadata, .. }) => metadata, Self::ArcFile(ArcFileNode { metadata, .. }) => metadata, Self::CustomFile(CustomFileNode { metadata, .. }) => metadata, @@ -125,6 +142,7 @@ impl Node { fn set_name(&mut self, new_name: OsString) { match self { Self::File(FileNode { name, .. }) => *name = new_name, + Self::OffloadedFile(OffloadedFileNode { name, .. }) => *name = new_name, Self::ReadOnlyFile(ReadOnlyFileNode { name, .. }) => *name = new_name, Self::ArcFile(ArcFileNode { name, .. }) => *name = new_name, Self::CustomFile(CustomFileNode { name, .. }) => *name = new_name, diff --git a/lib/virtual-fs/src/mem_fs/offloaded_file.rs b/lib/virtual-fs/src/mem_fs/offloaded_file.rs new file mode 100644 index 00000000000..a4308be32bd --- /dev/null +++ b/lib/virtual-fs/src/mem_fs/offloaded_file.rs @@ -0,0 +1,481 @@ +use bytes::Bytes; +use shared_buffer::OwnedBuffer; +use std::{ + cmp, + fs::File, + io, + ops::Range, + sync::{Arc, Mutex, MutexGuard}, +}; + +use crate::limiter::DynFsMemoryLimiter; + +#[derive(Debug)] +pub enum FileExtent { + MmapOffload { offset: u64, size: u64 }, + RepeatingBytes { value: u8, cnt: u64 }, + InMemory { data: Bytes }, +} + +impl FileExtent { + pub fn size(&self) -> u64 { + match self { + FileExtent::MmapOffload { size, .. } => *size, + FileExtent::RepeatingBytes { cnt, .. } => *cnt, + FileExtent::InMemory { data } => data.len() as u64, + } + } + + pub fn resize(&mut self, new_size: u64) { + match self { + FileExtent::MmapOffload { size, .. } => *size = new_size.min(*size), + FileExtent::RepeatingBytes { cnt, .. } => *cnt = new_size, + FileExtent::InMemory { data } => { + *data = data.slice(..(new_size as usize)); + } + } + } +} + +#[derive(Debug)] +struct OffloadBackingStoreState { + mmap_file: Option, + mmap_offload: OwnedBuffer, +} + +impl OffloadBackingStoreState { + fn get_slice(&mut self, range: Range) -> io::Result<&[u8]> { + let offset = range.start; + let size = match range.end { + u64::MAX => { + let len = self.mmap_offload.len() as u64; + if len < offset { + tracing::trace!("range out of bounds {} vs {}", len, offset); + return Err(io::ErrorKind::UnexpectedEof.into()); + } + len - offset + } + end => end - offset, + }; + + let end = offset + size; + if end > self.mmap_offload.len() as u64 { + let mmap_file = match self.mmap_file.as_ref() { + Some(a) => a, + None => { + tracing::trace!( + "mmap buffer out of bounds and no mmap file to reload {} vs {}", + end, + self.mmap_offload.len() + ); + return Err(io::ErrorKind::UnexpectedEof.into()); + } + }; + self.mmap_offload = OwnedBuffer::from_file(mmap_file) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; + if end > self.mmap_offload.len() as u64 { + tracing::trace!( + "mmap buffer out of bounds {} vs {} for {:?}", + end, + self.mmap_offload.len(), + range + ); + return Err(io::ErrorKind::UnexpectedEof.into()); + } + } + Ok(&self.mmap_offload[offset as usize..end as usize]) + } +} + +#[derive(Debug, Clone)] +pub struct OffloadBackingStore { + state: Arc>, +} + +impl OffloadBackingStore { + pub fn new(mmap_offload: OwnedBuffer, mmap_file: Option) -> Self { + Self { + state: Arc::new(Mutex::new(OffloadBackingStoreState { + mmap_file, + mmap_offload, + })), + } + } + + pub fn from_file(file: &File) -> Self { + let file = file.try_clone().unwrap(); + let buffer = OwnedBuffer::from_file(&file).unwrap(); + Self::new(buffer, Some(file)) + } + + pub fn owned_buffer(&self) -> OwnedBuffer { + let guard = self.state.lock().unwrap(); + guard.mmap_offload.clone() + } + + fn lock(&self) -> MutexGuard<'_, OffloadBackingStoreState> { + self.state.lock().unwrap() + } +} + +#[derive(Debug)] +pub struct OffloadedFile { + backing: OffloadBackingStore, + #[allow(dead_code)] + limiter: Option, + extents: Vec, + size: u64, +} + +pub enum OffloadWrite<'a> { + MmapOffset { offset: u64, size: u64 }, + Buffer(&'a [u8]), +} + +impl<'a> OffloadWrite<'a> { + fn len(&self) -> usize { + match self { + OffloadWrite::MmapOffset { size, .. } => *size as usize, + OffloadWrite::Buffer(data) => data.len(), + } + } +} + +impl OffloadedFile { + pub fn new(limiter: Option, backing: OffloadBackingStore) -> Self { + Self { + backing, + limiter, + extents: Vec::new(), + size: 0, + } + } + + pub fn seek(&self, position: io::SeekFrom, cursor: &mut u64) -> io::Result { + let to_err = |_| io::ErrorKind::InvalidInput; + + // Calculate the next cursor. + let next_cursor: i64 = match position { + io::SeekFrom::Start(offset) => offset.try_into().map_err(to_err)?, + io::SeekFrom::End(offset) => self.len() as i64 + offset, + io::SeekFrom::Current(offset) => { + TryInto::::try_into(*cursor).map_err(to_err)? + offset + } + }; + + // It's an error to seek before byte 0. + if next_cursor < 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "seeking before the byte 0", + )); + } + + // In this implementation, it's an error to seek beyond the + // end of the buffer. + let next_cursor = next_cursor.try_into().map_err(to_err)?; + *cursor = cmp::min(self.len(), next_cursor); + Ok(*cursor) + } + + pub fn read(&self, mut buf: &mut [u8], cursor: &mut u64) -> io::Result { + let cursor_start = *cursor; + + let mut extent_offset = cursor_start; + let mut extent_index = 0usize; + while !buf.is_empty() && extent_index < self.extents.len() { + let extent = &self.extents[extent_index]; + + if extent_offset >= extent.size() { + extent_offset -= extent.size(); + extent_index += 1; + continue; + } + + let read = match extent { + FileExtent::MmapOffload { + offset: mmap_offset, + size: extent_size, + } => { + let mut backing = self.backing.lock(); + let mmap_offset_plus_extent = mmap_offset + extent_offset; + let data = backing.get_slice( + mmap_offset_plus_extent + ..(mmap_offset_plus_extent + *extent_size - extent_offset), + )?; + let data_len = cmp::min(buf.len(), data.len()); + buf[..data_len].copy_from_slice(&data[..data_len]); + data_len + } + FileExtent::RepeatingBytes { value, cnt } => { + let cnt = cmp::min(buf.len() as u64, cnt - extent_offset) as usize; + buf[..cnt].iter_mut().for_each(|d| { + *d = *value; + }); + cnt + } + FileExtent::InMemory { data } => { + let data = &data.as_ref()[extent_offset as usize..]; + let data_len = cmp::min(buf.len(), data.len()); + buf[..data_len].copy_from_slice(&data[..data_len]); + data_len + } + }; + + *cursor += read as u64; + extent_offset = 0; + extent_index += 1; + buf = &mut buf[read..]; + } + Ok((*cursor - cursor_start) as usize) + } + + pub fn write(&mut self, data: OffloadWrite<'_>, cursor: &mut u64) -> io::Result { + let original_extent_offset = *cursor; + let mut extent_offset = original_extent_offset; + let mut data_len = data.len() as u64; + + // We need to split any extents that are intersecting with the + // start or end of the new block of data we are about to write + let mut split_extents = |mut split_at: u64| { + let mut index = 0usize; + while split_at > 0 && index < self.extents.len() { + let extent = &mut self.extents[index]; + if split_at >= extent.size() { + split_at -= extent.size(); + index += 1; + continue; + } else if split_at == 0 { + break; + } else { + let new_extent = match extent { + FileExtent::MmapOffload { + offset: other_offset, + size: other_size, + } => FileExtent::MmapOffload { + offset: *other_offset + split_at, + size: *other_size - split_at, + }, + FileExtent::RepeatingBytes { + value: other_value, + cnt: other_cnt, + } => FileExtent::RepeatingBytes { + value: *other_value, + cnt: *other_cnt - split_at, + }, + FileExtent::InMemory { data: other_data } => FileExtent::InMemory { + data: other_data.slice((split_at as usize)..), + }, + }; + extent.resize(split_at); + self.extents.insert(index + 1, new_extent); + break; + } + } + }; + + // If the extent is below the actual size of the file then we need to split it + let mut index = if extent_offset < self.size { + split_extents(extent_offset); + split_extents(extent_offset + data_len); + + // Now we delete all the extents that exist between the + // range that we are about to insert + let mut index = 0usize; + while index < self.extents.len() { + let extent = &self.extents[index]; + if extent_offset >= extent.size() { + extent_offset -= extent.size(); + index += 1; + continue; + } else { + break; + } + } + while index < self.extents.len() { + let extent = &self.extents[index]; + if data_len < extent.size() { + break; + } + data_len -= extent.size(); + self.extents.remove(index); + } + index + } else { + self.extents.len() + }; + + // If we have a gap that needs to be filled then do so + if extent_offset > self.size { + self.extents.insert( + index, + FileExtent::RepeatingBytes { + value: 0, + cnt: extent_offset - self.size, + }, + ); + self.size = extent_offset; + index += 1; + } + + // Insert the extent into the buffer + match data { + OffloadWrite::MmapOffset { offset, size } => { + self.extents + .insert(index, FileExtent::MmapOffload { offset, size }); + } + OffloadWrite::Buffer(data) => { + // Finally we add the new extent + let data_start = data.as_ptr() as u64; + let data_end = data_start + data.len() as u64; + let mut backing = self.backing.lock(); + let backing_data = backing.get_slice(0u64..u64::MAX)?; + + let mmap_start = backing_data.as_ptr() as u64; + let mmap_end = mmap_start + backing_data.len() as u64; + + // If the data is within the mmap buffer then we use a extent range + // to represent the data, otherwise we fall back on copying the data + let new_extent = if data_start >= mmap_start && data_end <= mmap_end { + FileExtent::MmapOffload { + offset: data_start - mmap_start, + size: data_end - data_start, + } + } else { + FileExtent::InMemory { + data: data.to_vec().into(), + } + }; + self.extents.insert(index, new_extent); + } + } + self.size = self.size.max(original_extent_offset + data.len() as u64); + + // Update the cursor + *cursor += data.len() as u64; + Ok(data.len()) + } + + pub fn flush(&mut self) -> io::Result<()> { + Ok(()) + } + + pub fn resize(&mut self, new_len: u64, value: u8) { + let mut cur_len = self.len(); + if new_len > cur_len { + self.extents.push(FileExtent::RepeatingBytes { + value, + cnt: new_len - cur_len, + }); + } + while cur_len > new_len && !self.extents.is_empty() { + let extent: &mut FileExtent = self.extents.last_mut().unwrap(); + let diff = extent.size().min(cur_len - new_len); + extent.resize(extent.size() - diff); + cur_len -= diff; + if extent.size() == 0 { + self.extents.pop(); + } + } + self.size = new_len; + } + + pub fn len(&self) -> u64 { + self.size + } + + pub fn truncate(&mut self) { + self.extents.clear(); + self.size = 0; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[tracing_test::traced_test] + pub fn test_offload_file() -> anyhow::Result<()> { + let buffer = OwnedBuffer::from_bytes(std::iter::repeat(12u8).take(100).collect::>()); + let test_data2 = buffer.clone(); + + let backing = OffloadBackingStore::new(buffer, None); + let mut file = OffloadedFile::new(None, backing); + + let mut cursor = 0u64; + let test_data = std::iter::repeat(56u8).take(100).collect::>(); + file.write(OffloadWrite::Buffer(&test_data), &mut cursor)?; + + assert_eq!(file.len(), 100); + + cursor = 0; + let mut result = std::iter::repeat(0u8).take(100).collect::>(); + file.read(&mut result, &mut cursor)?; + assert_eq!( + &result, + &std::iter::repeat(56u8).take(100).collect::>() + ); + + cursor = 50; + file.write(OffloadWrite::Buffer(&test_data2), &mut cursor)?; + + assert_eq!(file.len(), 150); + + cursor = 0; + let mut result = std::iter::repeat(0u8).take(150).collect::>(); + file.read(&mut result, &mut cursor)?; + assert_eq!( + &result, + &std::iter::repeat(56u8) + .take(50) + .chain(std::iter::repeat(12u8).take(100)) + .collect::>() + ); + + file.resize(200, 99u8); + assert_eq!(file.len(), 200); + + cursor = 0; + let mut result = std::iter::repeat(0u8).take(200).collect::>(); + file.read(&mut result, &mut cursor)?; + assert_eq!( + &result, + &std::iter::repeat(56u8) + .take(50) + .chain(std::iter::repeat(12u8).take(100)) + .chain(std::iter::repeat(99u8).take(50)) + .collect::>() + ); + + file.resize(33, 01u8); + + cursor = 0; + let mut result = std::iter::repeat(0u8).take(33).collect::>(); + file.read(&mut result, &mut cursor)?; + assert_eq!( + &result, + &std::iter::repeat(56u8).take(33).collect::>() + ); + + let mut cursor = 10u64; + let test_data = std::iter::repeat(74u8).take(10).collect::>(); + file.write(OffloadWrite::Buffer(&test_data), &mut cursor)?; + + assert_eq!(file.len(), 33); + + cursor = 0; + let mut result = std::iter::repeat(0u8).take(33).collect::>(); + file.read(&mut result, &mut cursor)?; + assert_eq!( + &result, + &std::iter::repeat(56u8) + .take(10) + .chain(std::iter::repeat(74u8).take(10)) + .chain(std::iter::repeat(56u8).take(13)) + .collect::>() + ); + + Ok(()) + } +} diff --git a/lib/virtual-io/src/arc.rs b/lib/virtual-io/src/arc.rs new file mode 100644 index 00000000000..7abc45f58c0 --- /dev/null +++ b/lib/virtual-io/src/arc.rs @@ -0,0 +1,43 @@ +use std::sync::Arc; +use std::sync::Mutex; + +use derivative::Derivative; + +use crate::{InterestHandler, InterestType}; + +#[derive(Derivative)] +#[derivative(Debug)] +struct ArcInterestHandlerState { + #[derivative(Debug = "ignore")] + handler: Box, +} + +#[derive(Debug, Clone)] +pub struct ArcInterestHandler { + state: Arc>, +} + +impl ArcInterestHandler { + pub fn new(handler: Box) -> Self { + Self { + state: Arc::new(Mutex::new(ArcInterestHandlerState { handler })), + } + } +} + +impl InterestHandler for ArcInterestHandler { + fn push_interest(&mut self, interest: InterestType) { + let mut state = self.state.lock().unwrap(); + state.handler.push_interest(interest) + } + + fn pop_interest(&mut self, interest: InterestType) -> bool { + let mut state = self.state.lock().unwrap(); + state.handler.pop_interest(interest) + } + + fn has_interest(&self, interest: InterestType) -> bool { + let state = self.state.lock().unwrap(); + state.handler.has_interest(interest) + } +} diff --git a/lib/virtual-io/src/lib.rs b/lib/virtual-io/src/lib.rs index f5fa03bbc8e..e2e52b9a6f6 100644 --- a/lib/virtual-io/src/lib.rs +++ b/lib/virtual-io/src/lib.rs @@ -1,3 +1,4 @@ +mod arc; #[cfg(feature = "sys")] mod guard; mod interest; @@ -5,6 +6,7 @@ mod interest; mod selector; pub mod waker; +pub use arc::*; #[cfg(feature = "sys")] pub use guard::*; pub use interest::*; diff --git a/lib/virtual-net/Cargo.toml b/lib/virtual-net/Cargo.toml index 5aa15a95428..6bbd2a47c14 100644 --- a/lib/virtual-net/Cargo.toml +++ b/lib/virtual-net/Cargo.toml @@ -14,7 +14,7 @@ thiserror = "1" bytes = "1.1" async-trait = { version = "^0.1" } tracing = "0.1" -tokio = { version = "1", default_features = false, optional = true } +tokio = { version = "1", default_features = false, features = ["io-util"] } libc = { version = "0.2.139", optional = true } mio = { version = "0.8", optional = true } socket2 = { version = "0.4", optional = true } @@ -31,9 +31,14 @@ tokio-util = { version = "0.7.8", features = ["codec"], optional = true } hyper-tungstenite = { version = "0.11", optional = true } hyper = { version = "0.14", optional = true } tokio-tungstenite = { version = "0.20", optional = true } -rkyv = { version = "0.7.40", features = ["indexmap", "validation", "strict"], optional = true } +rkyv = { workspace = true, optional = true } bytecheck = { version = "0.6.8", optional = true } +[dependencies.smoltcp] +version = "0.8" +default-features = false +features = ["proto-ipv4", "std", "alloc"] + [dev-dependencies] tokio = { version = "1", default_features = false, features = [ "macros", "rt-multi-thread" ] } tracing-test = { version = "0.2" } @@ -41,8 +46,8 @@ serial_test = "2.0.0" [features] default = [ "host-net", "remote", "json", "messagepack", "cbor", "hyper", "tokio-tungstenite" ] -host-net = [ "tokio", "libc", "tokio/io-util", "virtual-mio/sys", "tokio/net", "tokio/rt", "socket2", "mio" ] -remote = [ "tokio", "libc", "tokio/io-util", "tokio/sync", "tokio-serde", "tokio-util" ] +host-net = [ "libc", "tokio/io-util", "virtual-mio/sys", "tokio/net", "tokio/rt", "socket2", "mio" ] +remote = [ "libc", "tokio/io-util", "tokio/sync", "tokio-serde", "tokio-util" ] json = [ "tokio-serde/json" ] messagepack = [ "tokio-serde/messagepack" ] cbor = [ "tokio-serde/cbor" ] diff --git a/lib/virtual-net/src/composite.rs b/lib/virtual-net/src/composite.rs new file mode 100644 index 00000000000..6901bca32b7 --- /dev/null +++ b/lib/virtual-net/src/composite.rs @@ -0,0 +1,121 @@ +use std::net::SocketAddr; +use std::task::{Context, Poll}; + +use crate::{Ipv4Addr, Ipv6Addr, NetworkError, VirtualIoSource, VirtualTcpListener}; +use derivative::Derivative; +use virtual_mio::ArcInterestHandler; + +#[derive(Derivative)] +#[derivative(Debug)] +pub struct CompositeTcpListener { + #[derivative(Debug = "ignore")] + ports: Vec>, +} + +impl CompositeTcpListener { + pub fn new() -> Self { + Self { ports: Vec::new() } + } + + pub fn add_port(&mut self, port: Box) { + self.ports.push(port); + } +} + +impl Default for CompositeTcpListener { + fn default() -> Self { + Self::new() + } +} + +impl VirtualIoSource for CompositeTcpListener { + fn remove_handler(&mut self) { + for port in self.ports.iter_mut() { + port.remove_handler(); + } + } + + fn poll_read_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + for port in self.ports.iter_mut() { + if let Poll::Ready(ready) = port.poll_read_ready(cx) { + return Poll::Ready(ready); + } + } + Poll::Pending + } + + fn poll_write_ready( + &mut self, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + for port in self.ports.iter_mut() { + if let Poll::Ready(ready) = port.poll_write_ready(cx) { + return Poll::Ready(ready); + } + } + Poll::Pending + } +} + +impl VirtualTcpListener for CompositeTcpListener { + fn try_accept( + &mut self, + ) -> crate::Result<(Box, SocketAddr)> { + let mut ret = NetworkError::Unsupported; + for port in self.ports.iter_mut() { + match port.try_accept() { + Ok(ret) => return Ok(ret), + Err(err) => { + ret = err; + } + } + } + Err(ret) + } + + fn set_handler( + &mut self, + handler: Box, + ) -> crate::Result<()> { + let handler = ArcInterestHandler::new(handler); + for port in self.ports.iter_mut() { + port.set_handler(Box::new(handler.clone()))?; + } + Ok(()) + } + + fn addr_local(&self) -> crate::Result { + if self.ports.len() > 1 { + let addr = self.ports.first().unwrap().addr_local()?; + if addr.is_ipv4() { + Ok(SocketAddr::new(Ipv4Addr::UNSPECIFIED.into(), addr.port())) + } else { + Ok(SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), addr.port())) + } + } else if let Some(addr) = self.ports.first() { + addr.addr_local() + } else { + Err(NetworkError::Unsupported) + } + } + + fn set_ttl(&mut self, ttl: u8) -> crate::Result<()> { + for port in self.ports.iter_mut() { + match port.set_ttl(ttl) { + Ok(()) | Err(NetworkError::Unsupported) => {} + Err(err) => return Err(err), + } + } + Ok(()) + } + + fn ttl(&self) -> crate::Result { + let mut ret = u8::MAX; + for port in self.ports.iter() { + if let Ok(ttl) = port.ttl() { + ret = ret.min(ttl) + } + } + Ok(ret) + } +} diff --git a/lib/virtual-net/src/lib.rs b/lib/virtual-net/src/lib.rs index 307f49772cd..782d84110d4 100644 --- a/lib/virtual-net/src/lib.rs +++ b/lib/virtual-net/src/lib.rs @@ -2,19 +2,24 @@ #[cfg(feature = "remote")] pub mod client; +pub mod composite; #[cfg(feature = "host-net")] pub mod host; +pub mod loopback; pub mod meta; #[cfg(feature = "remote")] pub mod rx_tx; #[cfg(feature = "remote")] pub mod server; +pub mod tcp_pair; #[cfg(feature = "tokio")] #[cfg(test)] mod tests; #[cfg(feature = "remote")] pub use client::{RemoteNetworkingClient, RemoteNetworkingClientDriver}; +pub use composite::CompositeTcpListener; +pub use loopback::LoopbackNetworking; use pin_project_lite::pin_project; #[cfg(feature = "rkyv")] use rkyv::{Archive, CheckBytes, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize}; diff --git a/lib/virtual-net/src/loopback.rs b/lib/virtual-net/src/loopback.rs new file mode 100644 index 00000000000..93a68b10ad0 --- /dev/null +++ b/lib/virtual-net/src/loopback.rs @@ -0,0 +1,240 @@ +use std::collections::VecDeque; +use std::net::SocketAddr; +use std::sync::Mutex; +use std::task::{Context, Poll, Waker}; +use std::{collections::HashMap, sync::Arc}; + +use crate::tcp_pair::TcpSocketHalf; +use crate::{ + InterestHandler, IpAddr, IpCidr, Ipv4Addr, Ipv6Addr, NetworkError, VirtualIoSource, + VirtualNetworking, VirtualTcpListener, VirtualTcpSocket, +}; +use derivative::Derivative; +use virtual_mio::InterestType; + +const DEFAULT_MAX_BUFFER_SIZE: usize = 1_048_576; + +#[derive(Debug, Default)] +struct LoopbackNetworkingState { + tcp_listeners: HashMap, + ip_addresses: Vec, +} + +#[derive(Debug, Clone)] +pub struct LoopbackNetworking { + state: Arc>, +} + +impl LoopbackNetworking { + pub fn new() -> Self { + LoopbackNetworking { + state: Arc::new(Mutex::new(Default::default())), + } + } + + pub fn loopback_connect_to( + &self, + mut local_addr: SocketAddr, + peer_addr: SocketAddr, + ) -> Option { + let mut port = local_addr.port(); + if port == 0 { + port = peer_addr.port(); + } + + local_addr = match local_addr.ip() { + IpAddr::V4(Ipv4Addr::UNSPECIFIED) => { + SocketAddr::new(Ipv4Addr::new(127, 0, 0, 100).into(), port) + } + IpAddr::V6(Ipv6Addr::UNSPECIFIED) => { + SocketAddr::new(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 100).into(), port) + } + ip => SocketAddr::new(ip, port), + }; + + let state = self.state.lock().unwrap(); + if let Some(listener) = state.tcp_listeners.get(&peer_addr) { + Some(listener.connect_to(local_addr)) + } else { + state + .tcp_listeners + .iter() + .next() + .map(|listener| listener.1.connect_to(local_addr)) + } + } +} + +impl Default for LoopbackNetworking { + fn default() -> Self { + Self::new() + } +} + +#[allow(unused_variables)] +#[async_trait::async_trait] +impl VirtualNetworking for LoopbackNetworking { + async fn dhcp_acquire(&self) -> crate::Result> { + let mut state: std::sync::MutexGuard<'_, LoopbackNetworkingState> = + self.state.lock().unwrap(); + state.ip_addresses.clear(); + state.ip_addresses.push(IpCidr { + ip: IpAddr::V4(Ipv4Addr::LOCALHOST), + prefix: 32, + }); + state.ip_addresses.push(IpCidr { + ip: IpAddr::V6(Ipv6Addr::LOCALHOST), + prefix: 128, + }); + Ok(state.ip_addresses.iter().map(|cidr| cidr.ip).collect()) + } + + async fn ip_add(&self, ip: IpAddr, prefix: u8) -> crate::Result<()> { + let mut state = self.state.lock().unwrap(); + state.ip_addresses.push(IpCidr { ip, prefix }); + Ok(()) + } + + async fn ip_remove(&self, ip: IpAddr) -> crate::Result<()> { + let mut state: std::sync::MutexGuard<'_, LoopbackNetworkingState> = + self.state.lock().unwrap(); + state.ip_addresses.retain(|cidr| cidr.ip != ip); + Ok(()) + } + + async fn ip_clear(&self) -> crate::Result<()> { + let mut state: std::sync::MutexGuard<'_, LoopbackNetworkingState> = + self.state.lock().unwrap(); + state.ip_addresses.clear(); + Ok(()) + } + + async fn ip_list(&self) -> crate::Result> { + let state: std::sync::MutexGuard<'_, LoopbackNetworkingState> = self.state.lock().unwrap(); + Ok(state.ip_addresses.clone()) + } + + async fn listen_tcp( + &self, + mut addr: SocketAddr, + _only_v6: bool, + _reuse_port: bool, + _reuse_addr: bool, + ) -> crate::Result> { + let listener = LoopbackTcpListener::new(addr); + + if addr.ip() == IpAddr::V4(Ipv4Addr::UNSPECIFIED) { + addr = SocketAddr::new(Ipv4Addr::LOCALHOST.into(), addr.port()); + } else if addr.ip() == IpAddr::V6(Ipv6Addr::UNSPECIFIED) { + addr = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), addr.port()); + } + + let mut state = self.state.lock().unwrap(); + state.tcp_listeners.insert(addr, listener.clone()); + + Ok(Box::new(listener)) + } +} + +#[derive(Derivative)] +#[derivative(Debug)] +struct LoopbackTcpListenerState { + #[derivative(Debug = "ignore")] + handler: Option>, + addr_local: SocketAddr, + backlog: VecDeque, + wakers: Vec, +} + +#[derive(Debug, Clone)] +pub struct LoopbackTcpListener { + state: Arc>, +} + +impl LoopbackTcpListener { + pub fn new(addr_local: SocketAddr) -> Self { + Self { + state: Arc::new(Mutex::new(LoopbackTcpListenerState { + handler: None, + addr_local, + backlog: Default::default(), + wakers: Default::default(), + })), + } + } + + pub fn connect_to(&self, addr_local: SocketAddr) -> TcpSocketHalf { + let mut state = self.state.lock().unwrap(); + let (half1, half2) = + TcpSocketHalf::channel(DEFAULT_MAX_BUFFER_SIZE, state.addr_local, addr_local); + + state.backlog.push_back(half1); + if let Some(handler) = state.handler.as_mut() { + handler.push_interest(InterestType::Readable); + } + state.wakers.drain(..).for_each(|w| w.wake()); + + half2 + } +} + +impl VirtualIoSource for LoopbackTcpListener { + fn remove_handler(&mut self) { + let mut state = self.state.lock().unwrap(); + state.handler.take(); + } + + fn poll_read_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + let mut state = self.state.lock().unwrap(); + if !state.backlog.is_empty() { + return Poll::Ready(Ok(state.backlog.len())); + } + if !state.wakers.iter().any(|w| w.will_wake(cx.waker())) { + state.wakers.push(cx.waker().clone()); + } + Poll::Pending + } + + fn poll_write_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Poll::Pending + } +} + +impl VirtualTcpListener for LoopbackTcpListener { + fn try_accept( + &mut self, + ) -> crate::Result<(Box, SocketAddr)> { + let mut state = self.state.lock().unwrap(); + let next = state.backlog.pop_front(); + if let Some(next) = next { + let peer = next.addr_peer()?; + return Ok((Box::new(next), peer)); + } + Err(NetworkError::WouldBlock) + } + + fn set_handler( + &mut self, + mut handler: Box, + ) -> crate::Result<()> { + let mut state = self.state.lock().unwrap(); + if !state.backlog.is_empty() { + handler.push_interest(InterestType::Readable); + } + state.handler.replace(handler); + Ok(()) + } + + fn addr_local(&self) -> crate::Result { + let state = self.state.lock().unwrap(); + Ok(state.addr_local) + } + + fn set_ttl(&mut self, _ttl: u8) -> crate::Result<()> { + Ok(()) + } + + fn ttl(&self) -> crate::Result { + Ok(64) + } +} diff --git a/lib/virtual-net/src/tcp_pair.rs b/lib/virtual-net/src/tcp_pair.rs new file mode 100644 index 00000000000..567d4c1db03 --- /dev/null +++ b/lib/virtual-net/src/tcp_pair.rs @@ -0,0 +1,633 @@ +use crate::{ + net_error_into_io_err, InterestHandler, NetworkError, SocketStatus, VirtualConnectedSocket, + VirtualIoSource, VirtualSocket, VirtualTcpSocket, +}; +use bytes::{Buf, Bytes}; +use futures_util::Future; +use smoltcp::storage::RingBuffer; +use std::io::{self}; +use std::pin::Pin; +use std::sync::Arc; +use std::sync::Mutex; +use std::task::{Context, Waker}; +use std::time::Duration; +use std::{net::SocketAddr, task::Poll}; +use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite, BufReader}; +use virtual_mio::{ArcInterestHandler, InterestType}; + +#[derive(Debug)] +struct SocketBufferState { + buffer: RingBuffer<'static, u8>, + push_handler: Option, + pull_handler: Option, + wakers: Vec, + dead: bool, + // This flag prevents a poll write ready storm + halt_immediate_poll_write: bool, +} + +impl SocketBufferState { + fn add_waker(&mut self, waker: &Waker) { + if !self.wakers.iter().any(|w| w.will_wake(waker)) { + self.wakers.push(waker.clone()); + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct SocketBuffer { + state: Arc>, + dead_on_drop: bool, +} + +impl Drop for SocketBuffer { + fn drop(&mut self) { + if self.dead_on_drop { + self.set_dead(); + } + } +} + +impl SocketBuffer { + fn new(max_size: usize) -> Self { + Self { + state: Arc::new(Mutex::new(SocketBufferState { + buffer: RingBuffer::new(vec![0; max_size]), + push_handler: None, + pull_handler: None, + wakers: Vec::new(), + dead: false, + halt_immediate_poll_write: false, + })), + dead_on_drop: false, + } + } + + pub fn set_push_handler(&self, mut handler: ArcInterestHandler) { + let mut state = self.state.lock().unwrap(); + if state.dead { + handler.push_interest(InterestType::Closed); + } + if !state.buffer.is_empty() { + handler.push_interest(InterestType::Readable); + } + state.push_handler.replace(handler); + } + + pub fn set_pull_handler(&self, mut handler: ArcInterestHandler) { + let mut state = self.state.lock().unwrap(); + if state.dead { + handler.push_interest(InterestType::Closed); + } + if !state.buffer.is_full() && state.pull_handler.is_none() { + handler.push_interest(InterestType::Writable); + } + state.pull_handler.replace(handler); + } + + pub fn clear_push_handler(&self) { + let mut state = self.state.lock().unwrap(); + state.push_handler.take(); + } + + pub fn clear_pull_handler(&self) { + let mut state = self.state.lock().unwrap(); + state.pull_handler.take(); + } + + pub fn poll_read_ready(&self, cx: &mut Context<'_>) -> Poll> { + let mut state = self.state.lock().unwrap(); + if !state.buffer.is_empty() { + return Poll::Ready(Ok(state.buffer.len())); + } + if state.dead { + return Poll::Ready(Ok(0)); + } + if !state.wakers.iter().any(|w| w.will_wake(cx.waker())) { + state.wakers.push(cx.waker().clone()); + } + Poll::Pending + } + + pub fn poll_write_ready(&self, cx: &mut Context<'_>) -> Poll> { + let mut state = self.state.lock().unwrap(); + if state.dead { + return Poll::Ready(Ok(0)); + } + if !state.buffer.is_full() && !state.halt_immediate_poll_write { + state.halt_immediate_poll_write = true; + return Poll::Ready(Ok(state.buffer.window())); + } + if !state.wakers.iter().any(|w| w.will_wake(cx.waker())) { + state.wakers.push(cx.waker().clone()); + } + Poll::Pending + } + + pub fn set_dead(&self) { + let mut state = self.state.lock().unwrap(); + state.dead = true; + if let Some(handler) = state.pull_handler.as_mut() { + handler.push_interest(InterestType::Closed); + } + if let Some(handler) = state.push_handler.as_mut() { + handler.push_interest(InterestType::Closed); + } + state.wakers.drain(..).for_each(|w| w.wake()); + } + + pub fn is_dead(&self) -> bool { + let state = self.state.lock().unwrap(); + state.dead + } + + pub fn try_send( + &self, + data: &[u8], + all_or_nothing: bool, + waker: Option<&Waker>, + ) -> crate::Result { + let mut state = self.state.lock().unwrap(); + if state.dead { + return Err(NetworkError::ConnectionReset); + } + state.halt_immediate_poll_write = false; + let available = state.buffer.window(); + if available == 0 { + if let Some(waker) = waker { + state.add_waker(waker) + } + return Err(NetworkError::WouldBlock); + } + if data.len() > available { + if all_or_nothing { + if let Some(waker) = waker { + state.add_waker(waker) + } + return Err(NetworkError::WouldBlock); + } + let amt = state.buffer.enqueue_slice(&data[..available]); + return Ok(amt); + } + let amt = state.buffer.enqueue_slice(data); + + if let Some(handler) = state.push_handler.as_mut() { + handler.push_interest(InterestType::Readable); + } + state.wakers.drain(..).for_each(|w| w.wake()); + Ok(amt) + } + + pub async fn send(&self, data: Bytes) -> crate::Result<()> { + struct Poller<'a> { + this: &'a SocketBuffer, + data: Bytes, + } + impl<'a> Future for Poller<'a> { + type Output = crate::Result<()>; + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + loop { + if self.data.is_empty() { + return Poll::Ready(Ok(())); + } + return match self.this.try_send(&self.data, false, Some(cx.waker())) { + Ok(amt) => { + self.data.advance(amt); + continue; + } + Err(NetworkError::WouldBlock) => Poll::Pending, + Err(err) => Poll::Ready(Err(err)), + }; + } + } + } + Poller { this: self, data }.await + } + + pub fn try_read( + &self, + buf: &mut [std::mem::MaybeUninit], + waker: Option<&Waker>, + ) -> crate::Result { + let mut state = self.state.lock().unwrap(); + if state.buffer.is_empty() { + if state.dead { + return Err(NetworkError::ConnectionReset); + } + + if let Some(waker) = waker { + state.add_waker(waker) + } + return Err(NetworkError::WouldBlock); + } + + let buf: &mut [u8] = unsafe { std::mem::transmute(buf) }; + let amt = buf.len().min(state.buffer.len()); + let amt = state.buffer.dequeue_slice(&mut buf[..amt]); + + if let Some(handler) = state.pull_handler.as_mut() { + handler.push_interest(InterestType::Writable); + } + state.wakers.drain(..).for_each(|w| w.wake()); + Ok(amt) + } + + pub fn set_max_size(&self, new_size: usize) { + let mut state = self.state.lock().unwrap(); + state.halt_immediate_poll_write = false; + + let mut existing: Vec = vec![0; state.buffer.len()]; + if !state.buffer.is_empty() { + let amt = state.buffer.dequeue_slice(&mut existing[..]); + existing.resize(amt, 0); + } + + state.buffer = RingBuffer::new(vec![0; new_size]); + if !existing.is_empty() { + let _ = state.buffer.enqueue_slice(&existing[..]); + } + } + + pub fn max_size(&self) -> usize { + let state = self.state.lock().unwrap(); + state.buffer.capacity() + } +} + +impl AsyncWrite for SocketBuffer { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + match self.try_send(buf, false, Some(cx.waker())) { + Ok(amt) => Poll::Ready(Ok(amt)), + Err(NetworkError::WouldBlock) => Poll::Pending, + Err(err) => Poll::Ready(Err(net_error_into_io_err(err))), + } + } + + fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + self.set_dead(); + Poll::Ready(Ok(())) + } +} + +impl AsyncRead for SocketBuffer { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + match self.try_read(unsafe { buf.unfilled_mut() }, Some(cx.waker())) { + Ok(amt) => { + unsafe { buf.assume_init(amt) }; + buf.advance(amt); + Poll::Ready(Ok(())) + } + Err(NetworkError::WouldBlock) => Poll::Pending, + Err(err) => Poll::Ready(Err(net_error_into_io_err(err))), + } + } +} + +#[derive(Debug)] +pub struct TcpSocketHalf { + addr_local: SocketAddr, + addr_peer: SocketAddr, + tx: SocketBuffer, + rx: SocketBuffer, + ttl: u32, +} + +impl TcpSocketHalf { + pub fn channel( + max_buffer_size: usize, + addr1: SocketAddr, + addr2: SocketAddr, + ) -> (TcpSocketHalf, TcpSocketHalf) { + let mut buffer1 = SocketBuffer::new(max_buffer_size); + buffer1.dead_on_drop = true; + + let mut buffer2 = SocketBuffer::new(max_buffer_size); + buffer2.dead_on_drop = true; + + let half1 = Self { + tx: buffer1.clone(), + rx: buffer2.clone(), + addr_local: addr1, + addr_peer: addr2, + ttl: 64, + }; + let half2 = Self { + tx: buffer2, + rx: buffer1, + addr_local: addr2, + addr_peer: addr1, + ttl: 64, + }; + (half1, half2) + } + + pub fn is_active(&self) -> bool { + !self.tx.is_dead() + } + + pub fn close(&self) -> crate::Result<()> { + self.tx.set_dead(); + self.rx.set_dead(); + Ok(()) + } +} + +impl VirtualIoSource for TcpSocketHalf { + fn remove_handler(&mut self) { + self.tx.clear_pull_handler(); + self.rx.clear_push_handler(); + } + + fn poll_read_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.rx.poll_read_ready(cx) + } + + fn poll_write_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.tx.poll_write_ready(cx) + } +} + +impl VirtualSocket for TcpSocketHalf { + fn set_ttl(&mut self, ttl: u32) -> crate::Result<()> { + self.ttl = ttl; + Ok(()) + } + + fn ttl(&self) -> crate::Result { + Ok(self.ttl) + } + + fn addr_local(&self) -> crate::Result { + Ok(self.addr_local) + } + + fn status(&self) -> crate::Result { + Ok(if self.tx.is_dead() { + SocketStatus::Closed + } else { + SocketStatus::Opened + }) + } + + fn set_handler( + &mut self, + handler: Box, + ) -> crate::Result<()> { + let handler = ArcInterestHandler::new(handler); + self.tx.set_pull_handler(handler.clone()); + self.rx.set_push_handler(handler); + Ok(()) + } +} + +impl VirtualConnectedSocket for TcpSocketHalf { + fn set_linger(&mut self, _linger: Option) -> crate::Result<()> { + Ok(()) + } + + fn linger(&self) -> crate::Result> { + Ok(None) + } + + fn try_send(&mut self, data: &[u8]) -> crate::Result { + self.tx.try_send(data, false, None) + } + + fn try_flush(&mut self) -> crate::Result<()> { + Ok(()) + } + + fn close(&mut self) -> crate::Result<()> { + self.tx.set_dead(); + self.rx.set_dead(); + Ok(()) + } + + fn try_recv(&mut self, buf: &mut [std::mem::MaybeUninit]) -> crate::Result { + self.rx.try_read(buf, None) + } +} + +impl VirtualTcpSocket for TcpSocketHalf { + fn set_recv_buf_size(&mut self, size: usize) -> crate::Result<()> { + self.rx.set_max_size(size); + Ok(()) + } + + fn recv_buf_size(&self) -> crate::Result { + Ok(self.rx.max_size()) + } + + fn set_send_buf_size(&mut self, size: usize) -> crate::Result<()> { + self.tx.set_max_size(size); + Ok(()) + } + + fn send_buf_size(&self) -> crate::Result { + Ok(self.tx.max_size()) + } + + fn set_nodelay(&mut self, _reuse: bool) -> crate::Result<()> { + Ok(()) + } + + fn nodelay(&self) -> crate::Result { + Ok(true) + } + + fn set_keepalive(&mut self, _keepalive: bool) -> crate::Result<()> { + Ok(()) + } + + fn keepalive(&self) -> crate::Result { + Ok(false) + } + + fn set_dontroute(&mut self, _keepalive: bool) -> crate::Result<()> { + Ok(()) + } + + fn dontroute(&self) -> crate::Result { + Ok(false) + } + + fn addr_peer(&self) -> crate::Result { + Ok(self.addr_peer) + } + + fn shutdown(&mut self, how: std::net::Shutdown) -> crate::Result<()> { + match how { + std::net::Shutdown::Both => { + self.tx.set_dead(); + self.rx.set_dead(); + } + std::net::Shutdown::Read => { + self.rx.set_dead(); + } + std::net::Shutdown::Write => { + self.tx.set_dead(); + } + } + Ok(()) + } + + fn is_closed(&self) -> bool { + self.tx.is_dead() + } +} + +#[allow(unused)] +#[derive(Debug)] +pub struct TcpSocketHalfTx { + addr_local: SocketAddr, + addr_peer: SocketAddr, + tx: SocketBuffer, + ttl: u32, +} + +impl TcpSocketHalfTx { + pub fn poll_send(&self, cx: &mut Context<'_>, data: &[u8]) -> Poll> { + match self.tx.try_send(data, false, Some(cx.waker())) { + Ok(amt) => Poll::Ready(Ok(amt)), + Err(NetworkError::WouldBlock) => Poll::Pending, + Err(err) => Poll::Ready(Err(net_error_into_io_err(err))), + } + } + + pub fn try_send(&self, data: &[u8]) -> io::Result { + self.tx + .try_send(data, false, None) + .map_err(net_error_into_io_err) + } + + pub async fn send_ext(&self, data: Bytes, non_blocking: bool) -> io::Result<()> { + if non_blocking { + self.tx + .try_send(&data, true, None) + .map_err(net_error_into_io_err) + .map(|_| ()) + } else { + self.tx.send(data).await.map_err(net_error_into_io_err) + } + } + + pub async fn send(&self, data: Bytes) -> io::Result<()> { + self.send_ext(data, false).await + } + + pub fn close(&self) -> crate::Result<()> { + self.tx.set_dead(); + Ok(()) + } +} + +impl AsyncWrite for TcpSocketHalfTx { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + Pin::new(&mut self.tx).poll_write(cx, buf) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.tx).poll_flush(cx) + } + + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + Pin::new(&mut self.tx).poll_shutdown(cx) + } +} + +#[allow(unused)] +#[derive(Debug)] +pub struct TcpSocketHalfRx { + addr_local: SocketAddr, + addr_peer: SocketAddr, + rx: BufReader, + ttl: u32, +} + +impl TcpSocketHalfRx { + pub fn buffer(&self) -> &[u8] { + self.rx.buffer() + } + + pub fn close(&mut self) -> crate::Result<()> { + self.rx.get_mut().set_dead(); + Ok(()) + } + + #[allow(dead_code)] + pub(crate) fn inner(&mut self) -> &BufReader { + &self.rx + } + + #[allow(dead_code)] + pub(crate) fn inner_mut(&mut self) -> &mut BufReader { + &mut self.rx + } +} + +impl AsyncRead for TcpSocketHalfRx { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + Pin::new(&mut self.rx).poll_read(cx, buf) + } +} + +impl TcpSocketHalfRx { + pub fn poll_fill_buf(&mut self, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.rx).poll_fill_buf(cx) + } + + pub fn consume(&mut self, amt: usize) { + Pin::new(&mut self.rx).consume(amt) + } +} + +impl TcpSocketHalf { + pub fn split(self) -> (TcpSocketHalfTx, TcpSocketHalfRx) { + let tx = TcpSocketHalfTx { + tx: self.tx, + addr_local: self.addr_local, + addr_peer: self.addr_peer, + ttl: self.ttl, + }; + let rx = TcpSocketHalfRx { + rx: BufReader::new(self.rx), + addr_local: self.addr_local, + addr_peer: self.addr_peer, + ttl: self.ttl, + }; + (tx, rx) + } + + pub fn combine(tx: TcpSocketHalfTx, rx: TcpSocketHalfRx) -> Self { + Self { + tx: tx.tx, + rx: rx.rx.into_inner(), + addr_local: tx.addr_local, + addr_peer: tx.addr_peer, + ttl: tx.ttl, + } + } +} diff --git a/lib/wasi-types/src/lib.rs b/lib/wasi-types/src/lib.rs index 7e725ccc5d9..f1c745d1fe3 100644 --- a/lib/wasi-types/src/lib.rs +++ b/lib/wasi-types/src/lib.rs @@ -5,3 +5,4 @@ pub mod asyncify; pub mod types; pub mod wasi; +pub mod wasix; diff --git a/lib/wasi-types/src/wasix/mod.rs b/lib/wasi-types/src/wasix/mod.rs new file mode 100644 index 00000000000..8f601ed578b --- /dev/null +++ b/lib/wasi-types/src/wasix/mod.rs @@ -0,0 +1,29 @@ +#[cfg(feature = "enable-serde")] +use serde::*; + +// pub mod wasix_http_client_v1; + +#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "enable-serde", serde(rename_all = "snake_case"))] +pub enum ThreadStartType { + MainThread, + ThreadSpawn { start_ptr: u64 }, +} + +/// Represents the memory layout of the parts that the thread itself uses +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "enable-serde", serde(rename_all = "snake_case"))] +pub struct WasiMemoryLayout { + /// This is the top part of the stack (stacks go backwards) + pub stack_upper: u64, + /// This is the bottom part of the stack (anything more below this is a stack overflow) + pub stack_lower: u64, + /// Piece of memory that is marked as none readable/writable so stack overflows cause an exception + /// TODO: This field will need to be used to mark the guard memory as inaccessible + #[allow(dead_code)] + pub guard_size: u64, + /// Total size of the stack + pub stack_size: u64, +} diff --git a/lib/wasix/Cargo.toml b/lib/wasix/Cargo.toml index 289ebf53f9b..05dacf6fb0e 100644 --- a/lib/wasix/Cargo.toml +++ b/lib/wasix/Cargo.toml @@ -47,6 +47,7 @@ tokio = { version = "1", features = [ "time", "rt", ], default_features = false } +tokio-stream = { version = "0.1", features = [ "sync" ] } futures = { version = "0.3" } # used by feature='os' async-trait = { version = "^0.1" } @@ -78,9 +79,10 @@ tower-http = { version = "0.4.0", features = [ ], optional = true } tower = { version = "0.4.13", features = ["make", "util"], optional = true } url = "2.3.1" -rkyv = { version = "0.7.40", features = ["indexmap", "validation", "strict"] } +rkyv = { workspace = true } bytecheck = "0.6.8" -shared-buffer = "0.1" +blake3 = "1.0" +shared-buffer = { workspace = true } petgraph = "0.6.3" base64 = "0.21" lz4_flex = { version = "0.11" } @@ -138,9 +140,11 @@ wasmer = { path = "../api", version = "=4.2.7", default-features = false, featur default = ["sys-default"] time = ["tokio/time"] +ctrlc = ["tokio/signal"] webc_runner_rt_wcgi = ["hyper", "wcgi", "wcgi-host", "tower", "tower-http"] webc_runner_rt_dcgi = ["webc_runner_rt_wcgi", "journal"] +webc_runner_rt_dproxy = ["hyper", "tower", "tower-http", "journal"] webc_runner_rt_emscripten = ["wasmer-emscripten"] sys = ["webc/mmap", "time", "virtual-mio/sys"] @@ -154,6 +158,7 @@ sys-default = [ "host-vnet", "host-threads", "host-reqwest", + "ctrlc" ] sys-poll = [] sys-thread = ["tokio/rt", "tokio/time", "tokio/rt-multi-thread", "rusty_pool"] @@ -193,6 +198,7 @@ features = [ "wasmer/sys", "webc_runner_rt_wcgi", "webc_runner_rt_dcgi", + "webc_runner_rt_dproxy", "webc_runner_rt_emscripten", "sys-default", ] diff --git a/lib/wasix/src/bin_factory/exec.rs b/lib/wasix/src/bin_factory/exec.rs index 86c77077c9f..21829b548f8 100644 --- a/lib/wasix/src/bin_factory/exec.rs +++ b/lib/wasix/src/bin_factory/exec.rs @@ -1,7 +1,10 @@ use std::{pin::Pin, sync::Arc}; use crate::{ - os::task::{thread::WasiThreadRunGuard, TaskJoinHandle}, + os::task::{ + thread::{RewindResultType, WasiThreadRunGuard}, + TaskJoinHandle, + }, runtime::{ task_manager::{ TaskWasm, TaskWasmRecycle, TaskWasmRecycleProperties, TaskWasmRunProperties, @@ -11,7 +14,6 @@ use crate::{ syscalls::rewind_ext, RewindState, SpawnError, WasiError, WasiRuntimeError, }; -use bytes::Bytes; use futures::Future; use tracing::*; use wasmer::{Function, FunctionEnvMut, Memory32, Memory64, Module, Store}; @@ -154,6 +156,7 @@ pub fn run_exec(props: TaskWasmRunProperties) { let rewind_state = match unsafe { ctx.bootstrap(&mut store) } { Ok(r) => r, Err(err) => { + tracing::warn!("failed to bootstrap - {}", err); thread.thread.set_status_finished(Err(err)); ctx.data(&store) .blocking_on_exit(Some(Errno::Noexec.into())); @@ -184,7 +187,7 @@ fn call_module( ctx: WasiFunctionEnv, mut store: Store, handle: WasiThreadRunGuard, - rewind_state: Option<(RewindState, Option)>, + rewind_state: Option<(RewindState, RewindResultType)>, recycle: Option>, ) { let env = ctx.data(&store); @@ -255,7 +258,7 @@ fn call_module( ctx, store, handle, - Some((rewind, Some(rewind_result))), + Some((rewind, RewindResultType::RewindWithResult(rewind_result))), recycle, ); } diff --git a/lib/wasix/src/fs/mod.rs b/lib/wasix/src/fs/mod.rs index fd8db0d5978..c10420cf086 100644 --- a/lib/wasix/src/fs/mod.rs +++ b/lib/wasix/src/fs/mod.rs @@ -43,6 +43,18 @@ use crate::syscalls::map_io_err; use crate::{bin_factory::BinaryPackage, state::PreopenedDir, ALL_RIGHTS}; /// the fd value of the virtual root +/// +/// Used for interacting with the file system when it has no +/// pre-opened file descriptors at the root level. Normally +/// a WASM process will do this in the libc initialization stage +/// however that does not happen when the WASM process has never +/// been run. Further that logic could change at any time in libc +/// which would then break functionality. Instead we use this fixed +/// file descriptor +/// +/// This is especially important for fuse mounting journals which +/// use the same syscalls as a normal WASI application but do not +/// run the libc initialization logic pub const VIRTUAL_ROOT_FD: WasiFd = 3; const STDIN_DEFAULT_RIGHTS: Rights = { @@ -411,6 +423,63 @@ fn create_dir_all(fs: &dyn FileSystem, path: &Path) -> Result<(), virtual_fs::Fs Ok(()) } +/// This needs to be exposed so that the multiple use-cases are able +/// to generated unique file descriptors and update the seed during +/// journal restoration +#[derive(Debug, Clone)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct WasiFdSeed { + next_fd: Arc, +} + +impl Default for WasiFdSeed { + fn default() -> Self { + Self::new(3) + } +} + +impl WasiFdSeed { + pub fn new(initial_val: u32) -> Self { + Self { + next_fd: Arc::new(AtomicU32::new(initial_val)), + } + } + + pub fn fork(&self) -> Self { + Self { + next_fd: Arc::new(AtomicU32::new(self.next_fd.load(Ordering::SeqCst))), + } + } + + pub fn next_val(&self) -> WasiFd { + self.next_fd.fetch_add(1, Ordering::SeqCst) + } + + pub fn set_val(&self, val: WasiFd) { + self.next_fd.store(val, std::sync::atomic::Ordering::SeqCst) + } + + pub fn cur_val(&self) -> WasiFd { + self.next_fd.load(Ordering::SeqCst) + } + + pub fn clip_val(&self, fd: WasiFd) { + loop { + let existing = self.next_fd.load(Ordering::SeqCst); + if existing >= fd { + return; + } + if self + .next_fd + .compare_exchange(existing, fd, Ordering::SeqCst, Ordering::Relaxed) + .is_ok() + { + break; + } + } + } +} + /// Warning, modifying these fields directly may cause invariants to break and /// should be considered unsafe. These fields may be made private in a future release #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] @@ -418,7 +487,7 @@ pub struct WasiFs { //pub repo: Repo, pub preopen_fds: RwLock>, pub fd_map: Arc>>, - pub next_fd: AtomicU32, + pub next_fd: WasiFdSeed, pub current_dir: Mutex, #[cfg_attr(feature = "enable-serde", serde(skip, default))] pub root_fs: WasiFsRoot, @@ -454,7 +523,7 @@ impl WasiFs { Self { preopen_fds: RwLock::new(self.preopen_fds.read().unwrap().clone()), fd_map: Arc::new(RwLock::new(fd_map)), - next_fd: AtomicU32::new(self.next_fd.load(Ordering::SeqCst)), + next_fd: self.next_fd.fork(), current_dir: Mutex::new(self.current_dir.lock().unwrap().clone()), is_wasix: AtomicBool::new(self.is_wasix.load(Ordering::Acquire)), root_fs: self.root_fs.clone(), @@ -557,7 +626,7 @@ impl WasiFs { let wasi_fs = Self { preopen_fds: RwLock::new(vec![]), fd_map: Arc::new(RwLock::new(HashMap::new())), - next_fd: AtomicU32::new(3), + next_fd: WasiFdSeed::default(), current_dir: Mutex::new("/".to_string()), is_wasix: AtomicBool::new(false), root_fs: fs_backing, @@ -683,7 +752,7 @@ impl WasiFs { let kind = Kind::File { handle: Some(Arc::new(RwLock::new(file))), path: PathBuf::from(""), - fd: Some(self.next_fd.fetch_add(1, Ordering::SeqCst)), + fd: Some(self.next_fd.next_val()), }; drop(guard); @@ -1037,6 +1106,9 @@ impl WasiFs { if let Some(entry) = entries.get(component.as_ref()) { cur_inode = entry.clone(); + } else if let Some(root) = entries.get(&"/".to_string()) { + cur_inode = root.clone(); + continue 'symlink_resolution; } else { // Root is not capable of having something other then preopenned folders return Err(Errno::Notcapable); @@ -1236,15 +1308,34 @@ impl WasiFs { } pub fn get_fd(&self, fd: WasiFd) -> Result { - self.fd_map + let ret = self + .fd_map .read() .unwrap() .get(&fd) .ok_or(Errno::Badf) - .map(|a| a.clone()) + .map(|a| a.clone()); + + if ret.is_err() && fd == VIRTUAL_ROOT_FD { + Ok(Fd { + rights: ALL_RIGHTS, + rights_inheriting: ALL_RIGHTS, + flags: Fdflags::empty(), + offset: Arc::new(AtomicU64::new(0)), + open_flags: 0, + inode: self.root_inode.clone(), + is_stdio: false, + }) + } else { + ret + } } pub fn get_fd_inode(&self, fd: WasiFd) -> Result { + // see `VIRTUAL_ROOT_FD` for details as to why this exists + if fd == VIRTUAL_ROOT_FD { + return Ok(self.root_inode.clone()); + } self.fd_map .read() .unwrap() @@ -1305,10 +1396,10 @@ impl WasiFs { Kind::File { .. } => Filetype::RegularFile, Kind::Dir { .. } => Filetype::Directory, Kind::Symlink { .. } => Filetype::SymbolicLink, - Kind::Socket { socket } => match socket.inner.protected.read().unwrap().kind { + Kind::Socket { socket } => match &socket.inner.protected.read().unwrap().kind { InodeSocketKind::TcpStream { .. } => Filetype::SocketStream, InodeSocketKind::Raw { .. } => Filetype::SocketRaw, - InodeSocketKind::PreSocket { ty, .. } => match ty { + InodeSocketKind::PreSocket { props, .. } => match props.ty { Socktype::Stream => Filetype::SocketStream, Socktype::Dgram => Filetype::SocketDgram, Socktype::Raw => Filetype::SocketRaw, @@ -1466,25 +1557,13 @@ impl WasiFs { open_flags: u16, inode: InodeGuard, ) -> Result { - let idx = self.next_fd.fetch_add(1, Ordering::SeqCst); + let idx = self.next_fd.next_val(); self.create_fd_ext(rights, rights_inheriting, flags, open_flags, inode, idx)?; Ok(idx) } pub fn make_max_fd(&self, fd: u32) { - loop { - let existing = self.next_fd.load(Ordering::SeqCst); - if existing >= fd { - return; - } - if self - .next_fd - .compare_exchange(existing, fd, Ordering::SeqCst, Ordering::Relaxed) - .is_ok() - { - break; - } - } + self.next_fd.clip_val(fd); } pub fn create_fd_ext( @@ -1517,7 +1596,7 @@ impl WasiFs { pub fn clone_fd(&self, fd: WasiFd) -> Result { let fd = self.get_fd(fd)?; - let idx = self.next_fd.fetch_add(1, Ordering::SeqCst); + let idx = self.next_fd.next_val(); self.fd_map.write().unwrap().insert( idx, Fd { @@ -1920,7 +1999,7 @@ impl std::fmt::Debug for WasiFs { } else { write!(f, "current_dir=(locked) ")?; } - write!(f, "next_fd={} ", self.next_fd.load(Ordering::Relaxed))?; + write!(f, "next_fd={} ", self.next_fd.cur_val())?; write!(f, "{:?}", self.root_fs) } } diff --git a/lib/wasix/src/journal/effector/memory_and_snapshot.rs b/lib/wasix/src/journal/effector/memory_and_snapshot.rs index a53ec4465d2..145f5ce4419 100644 --- a/lib/wasix/src/journal/effector/memory_and_snapshot.rs +++ b/lib/wasix/src/journal/effector/memory_and_snapshot.rs @@ -1,9 +1,38 @@ +use std::collections::{hash_map, BTreeMap}; + +use crate::os::task::process::MemorySnapshotRegion; + use super::*; +/// This value is tweaked to minimize the amount of journal +/// entries for a nominal workload but keep the resolution +/// high enough that it reduces overhead and inefficiency. +/// +/// The test case used to tune this value was a HTTP server +/// serving a HTTP web page on hyper compiled to WASM. The +/// server was first warmed up with a bunch of requests then +/// the journal entries measured on subsequent requests, these +/// are the values +/// +/// Resolution | Journal Size | Memory Overhead +/// -----------|--------------|---------------- +/// 128 bytes | 3584 bytes | 12.5% +/// 256 bytes | 4096 bytes | 6.25% +/// 512 bytes | 7680 bytes | 3.12% +/// 1024 bytes | 12288 bytes | 1.56% +/// 2048 bytes | 22528 bytes | 0.78% +/// 4096 bytes | 32769 bytes | 0.39% +/// +/// Based on this data we have settled on 512 byte memory resolution +/// for region extents which keeps the journal size to a reasonable +/// value and the memory overhead of the hash table within an acceptable +/// limit +const MEMORY_REGION_RESOLUTION: u64 = 512; + impl JournalEffector { pub fn save_memory_and_snapshot( ctx: &mut FunctionEnvMut<'_, WasiEnv>, - process: &mut MutexGuard<'_, WasiProcessInner>, + guard: &mut MutexGuard<'_, WasiProcessInner>, trigger: SnapshotTrigger, ) -> anyhow::Result<()> { let env = ctx.data(); @@ -16,11 +45,13 @@ impl JournalEffector { // otherwise create too much inefficiency. We choose 64KB as its // aligned with the standard WASM page size. let mut cur = 0u64; - let mut regions = LinkedList::>::new(); + let mut regions = Vec::::new(); while cur < memory.data_size() { let mut again = false; - let mut end = memory.data_size().min(cur + 65536); - for (_, thread) in process.threads.iter() { + let next = ((cur + MEMORY_REGION_RESOLUTION) / MEMORY_REGION_RESOLUTION) + * MEMORY_REGION_RESOLUTION; + let mut end = memory.data_size().min(next); + for (_, thread) in guard.threads.iter() { let layout = thread.memory_layout(); if cur >= layout.stack_lower && cur < layout.stack_upper { cur = layout.stack_upper; @@ -34,28 +65,97 @@ impl JournalEffector { if again { continue; } - regions.push_back(cur..end); + + let region = cur..end; + regions.push(region.into()); cur = end; } + // Next we examine the dirty page manager and filter out any pages + // that have not been explicitly written to (according to the + // PTE) + // + // # TODO + // https://docs.kernel.org/admin-guide/mm/soft-dirty.html + // Now that we know all the regions that need to be saved we // enter a processing loop that dumps all the data to the log // file in an orderly manner. let memory = unsafe { env.memory_view(ctx) }; let journal = ctx.data().active_journal()?; + let mut regions_phase2 = BTreeMap::new(); + for region in regions.drain(..) { + // We grab this region of memory as a vector and hash + // it, which allows us to make some logging efficiency + // gains. + #[cfg(not(feature = "sys"))] + let data = memory + .copy_range_to_vec(region.into()) + .map_err(mem_error_to_wasi)?; + + // For x86 implementations running natively we have a + // performance optimization that avoids a copy of the + // memory when hashing for changed regions + #[cfg(feature = "sys")] + let data = { + let d = unsafe { memory.data_unchecked() }; + if region.end > d.len() as u64 { + return Err(anyhow::anyhow!( + "memory access out of bounds ({} vs {})", + region.end, + d.len() + )); + } + &d[region.start as usize..region.end as usize] + }; + + // Compute a checksum and skip the memory if its already + // been saved to the journal once already + let hash = { + let h: [u8; 32] = blake3::hash(data).into(); + u64::from_be_bytes([h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7]]) + }; + match guard.snapshot_memory_hash.entry(region) { + hash_map::Entry::Occupied(mut val) => { + if *val.get() == hash { + continue; + } + val.insert(hash); + } + hash_map::Entry::Vacant(vacant) => { + vacant.insert(hash); + } + } + + regions_phase2.insert(region, ()); + } + + // Combine regions together that are next to each other + regions.clear(); + let mut last_end = None; + for (region, _) in regions_phase2.iter() { + if Some(region.start) == last_end { + regions.last_mut().unwrap().end = region.end; + } else { + regions.push(*region); + } + last_end = Some(region.end); + } + + // Perform the writes for region in regions { // We grab this region of memory as a vector and hash // it, which allows us to make some logging efficiency // gains. let data = memory - .copy_range_to_vec(region.clone()) + .copy_range_to_vec(region.into()) .map_err(mem_error_to_wasi)?; // Now we write it to the snap snapshot capturer journal .write(JournalEntry::UpdateMemoryRegionV1 { - region, + region: region.into(), data: data.into(), }) .map_err(map_snapshot_err)?; @@ -78,17 +178,44 @@ impl JournalEffector { pub unsafe fn apply_memory( ctx: &mut FunctionEnvMut<'_, WasiEnv>, region: Range, - data: &[u8], + mut data: &[u8], ) -> anyhow::Result<()> { let (env, mut store) = ctx.data_and_store_mut(); let memory = unsafe { env.memory() }; memory.grow_at_least(&mut store, region.end + data.len() as u64)?; + // Write the data to the memory let memory = unsafe { env.memory_view(&store) }; memory - .write(region.start, data.as_ref()) + .write(region.start, data) .map_err(|err| WasiRuntimeError::Runtime(RuntimeError::user(err.into())))?; + + // Break the region down into chunks that align with the resolution + let mut offset = region.start; + while offset < region.end { + let next = region.end.min(offset + MEMORY_REGION_RESOLUTION); + let region = offset..next; + offset = next; + + // Compute the hash and update it + let size = region.end - region.start; + let hash = { + let h: [u8; 32] = blake3::hash(&data[..size as usize]).into(); + u64::from_be_bytes([h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7]]) + }; + env.process + .inner + .0 + .lock() + .unwrap() + .snapshot_memory_hash + .insert(region.into(), hash); + + // Shift the data pointer + data = &data[size as usize..]; + } + Ok(()) } } diff --git a/lib/wasix/src/journal/effector/mod.rs b/lib/wasix/src/journal/effector/mod.rs index bf9e464da14..632207a8adb 100644 --- a/lib/wasix/src/journal/effector/mod.rs +++ b/lib/wasix/src/journal/effector/mod.rs @@ -1,6 +1,4 @@ -pub(super) use std::{ - borrow::Cow, collections::LinkedList, ops::Range, sync::MutexGuard, time::SystemTime, -}; +pub(super) use std::{borrow::Cow, ops::Range, sync::MutexGuard, time::SystemTime}; pub(super) use anyhow::bail; pub(super) use bytes::Bytes; diff --git a/lib/wasix/src/journal/effector/syscalls/path_create_directory.rs b/lib/wasix/src/journal/effector/syscalls/path_create_directory.rs index 447c60393af..223bce3fd30 100644 --- a/lib/wasix/src/journal/effector/syscalls/path_create_directory.rs +++ b/lib/wasix/src/journal/effector/syscalls/path_create_directory.rs @@ -1,3 +1,9 @@ +use std::path::Path; + +use virtual_fs::FileSystem; + +use crate::VIRTUAL_ROOT_FD; + use super::*; impl JournalEffector { @@ -20,14 +26,19 @@ impl JournalEffector { fd: Fd, path: &str, ) -> anyhow::Result<()> { - crate::syscalls::path_create_directory_internal(ctx, fd, path).map_err(|err| { - anyhow::format_err!( - "journal restore error: failed to create directory path (fd={}, path={}) - {}", - fd, - path, - err - ) - })?; + // see `VIRTUAL_ROOT_FD` for details as to why this exists + if fd == VIRTUAL_ROOT_FD { + ctx.data().state.fs.root_fs.create_dir(Path::new(path))?; + } else { + crate::syscalls::path_create_directory_internal(ctx, fd, path).map_err(|err| { + anyhow::format_err!( + "journal restore error: failed to create directory path (fd={}, path={}) - {}", + fd, + path, + err + ) + })?; + } Ok(()) } } diff --git a/lib/wasix/src/journal/effector/syscalls/path_remove_directory.rs b/lib/wasix/src/journal/effector/syscalls/path_remove_directory.rs index b45ec07d1a4..f26fb5d99f1 100644 --- a/lib/wasix/src/journal/effector/syscalls/path_remove_directory.rs +++ b/lib/wasix/src/journal/effector/syscalls/path_remove_directory.rs @@ -1,3 +1,9 @@ +use std::path::Path; + +use virtual_fs::FileSystem; + +use crate::VIRTUAL_ROOT_FD; + use super::*; impl JournalEffector { @@ -20,7 +26,10 @@ impl JournalEffector { fd: Fd, path: &str, ) -> anyhow::Result<()> { - if let Err(err) = crate::syscalls::path_remove_directory_internal(ctx, fd, path) { + // see `VIRTUAL_ROOT_FD` for details as to why this exists + if fd == VIRTUAL_ROOT_FD { + ctx.data().state.fs.root_fs.remove_dir(Path::new(path))?; + } else if let Err(err) = crate::syscalls::path_remove_directory_internal(ctx, fd, path) { bail!( "journal restore error: failed to remove directory - {}", err diff --git a/lib/wasix/src/journal/effector/syscalls/path_rename.rs b/lib/wasix/src/journal/effector/syscalls/path_rename.rs index 41ca232f8b8..63d66dd6538 100644 --- a/lib/wasix/src/journal/effector/syscalls/path_rename.rs +++ b/lib/wasix/src/journal/effector/syscalls/path_rename.rs @@ -1,3 +1,5 @@ +use crate::{syscalls::__asyncify_light, VIRTUAL_ROOT_FD}; + use super::*; impl JournalEffector { @@ -26,16 +28,27 @@ impl JournalEffector { new_fd: Fd, new_path: &str, ) -> anyhow::Result<()> { - let ret = crate::syscalls::path_rename_internal(ctx, old_fd, old_path, new_fd, new_path)?; - if ret != Errno::Success { - bail!( - "journal restore error: failed to rename path (old_fd={}, old_path={}, new_fd={}, new_path={}) - {}", - old_fd, - old_path, - new_fd, - new_path, - ret - ); + // see `VIRTUAL_ROOT_FD` for details as to why this exists + if old_fd == VIRTUAL_ROOT_FD && new_fd == VIRTUAL_ROOT_FD { + let state = ctx.data().state.clone(); + let old_path = old_path.to_string(); + let new_path = new_path.to_string(); + __asyncify_light(ctx.data(), None, async move { + state.fs_rename(old_path, new_path).await + })??; + } else { + let ret = + crate::syscalls::path_rename_internal(ctx, old_fd, old_path, new_fd, new_path)?; + if ret != Errno::Success { + bail!( + "journal restore error: failed to rename path (old_fd={}, old_path={}, new_fd={}, new_path={}) - {}", + old_fd, + old_path, + new_fd, + new_path, + ret + ); + } } Ok(()) } diff --git a/lib/wasix/src/journal/effector/syscalls/path_set_times.rs b/lib/wasix/src/journal/effector/syscalls/path_set_times.rs index e40b3bae698..440acc2c8e8 100644 --- a/lib/wasix/src/journal/effector/syscalls/path_set_times.rs +++ b/lib/wasix/src/journal/effector/syscalls/path_set_times.rs @@ -1,3 +1,5 @@ +use crate::VIRTUAL_ROOT_FD; + use super::*; impl JournalEffector { @@ -32,19 +34,24 @@ impl JournalEffector { st_mtim: Timestamp, fst_flags: Fstflags, ) -> anyhow::Result<()> { - crate::syscalls::path_filestat_set_times_internal(ctx, fd, flags, path, st_atim, st_mtim, fst_flags) - .map_err(|err| { - anyhow::format_err!( - "journal restore error: failed to set path times (fd={}, flags={}, path={}, st_atim={}, st_mtim={}, fst_flags={:?}) - {}", - fd, - flags, - path, - st_atim, - st_mtim, - fst_flags, - err - ) - })?; + // see `VIRTUAL_ROOT_FD` for details as to why this exists + if fd == VIRTUAL_ROOT_FD { + // we ignore this record as its not implemented yet + } else { + crate::syscalls::path_filestat_set_times_internal(ctx, fd, flags, path, st_atim, st_mtim, fst_flags) + .map_err(|err| { + anyhow::format_err!( + "journal restore error: failed to set path times (fd={}, flags={}, path={}, st_atim={}, st_mtim={}, fst_flags={:?}) - {}", + fd, + flags, + path, + st_atim, + st_mtim, + fst_flags, + err + ) + })?; + } Ok(()) } } diff --git a/lib/wasix/src/journal/effector/syscalls/path_unlink.rs b/lib/wasix/src/journal/effector/syscalls/path_unlink.rs index 9141a7fe8cf..035d2e905bf 100644 --- a/lib/wasix/src/journal/effector/syscalls/path_unlink.rs +++ b/lib/wasix/src/journal/effector/syscalls/path_unlink.rs @@ -1,3 +1,9 @@ +use std::path::Path; + +use virtual_fs::FileSystem; + +use crate::VIRTUAL_ROOT_FD; + use super::*; impl JournalEffector { @@ -20,14 +26,19 @@ impl JournalEffector { fd: Fd, path: &str, ) -> anyhow::Result<()> { - let ret = crate::syscalls::path_unlink_file_internal(ctx, fd, path)?; - if ret != Errno::Success { - bail!( - "journal restore error: failed to remove file (fd={}, path={}) - {}", - fd, - path, - ret - ); + // see `VIRTUAL_ROOT_FD` for details as to why this exists + if fd == VIRTUAL_ROOT_FD { + ctx.data().state.fs.root_fs.remove_file(Path::new(path))?; + } else { + let ret = crate::syscalls::path_unlink_file_internal(ctx, fd, path)?; + if ret != Errno::Success { + bail!( + "journal restore error: failed to remove file (fd={}, path={}) - {}", + fd, + path, + ret + ); + } } Ok(()) } diff --git a/lib/wasix/src/journal/effector/syscalls/sock_accept.rs b/lib/wasix/src/journal/effector/syscalls/sock_accept.rs index 3482074dcfe..b66d4407d00 100644 --- a/lib/wasix/src/journal/effector/syscalls/sock_accept.rs +++ b/lib/wasix/src/journal/effector/syscalls/sock_accept.rs @@ -1,8 +1,10 @@ use std::net::SocketAddr; +use wasmer_wasix_types::wasi::{Addressfamily, SockProto, Socktype}; + use crate::{ fs::Kind, - net::socket::{InodeSocket, InodeSocketKind}, + net::socket::{InodeSocket, InodeSocketKind, SocketProperties}, }; use super::*; @@ -12,6 +14,7 @@ impl JournalEffector { ctx: &mut FunctionEnvMut<'_, WasiEnv>, listen_fd: Fd, fd: Fd, + addr: SocketAddr, peer_addr: SocketAddr, fd_flags: Fdflags, nonblocking: bool, @@ -21,6 +24,7 @@ impl JournalEffector { JournalEntry::SocketAcceptedV1 { listen_fd, fd, + local_addr: addr, peer_addr, fd_flags, non_blocking: nonblocking, @@ -32,12 +36,39 @@ impl JournalEffector { ctx: &mut FunctionEnvMut<'_, WasiEnv>, _listen_fd: Fd, fd: Fd, + addr: SocketAddr, peer_addr: SocketAddr, fd_flags: Fdflags, nonblocking: bool, ) -> anyhow::Result<()> { let kind = Kind::Socket { - socket: InodeSocket::new(InodeSocketKind::RemoteTcpStream { peer_addr }), + socket: InodeSocket::new(InodeSocketKind::RemoteSocket { + local_addr: addr, + peer_addr, + ttl: 0, + multicast_ttl: 0, + props: SocketProperties { + family: match peer_addr.is_ipv4() { + true => Addressfamily::Inet4, + false => Addressfamily::Inet6, + }, + ty: Socktype::Stream, + pt: SockProto::Tcp, + only_v6: false, + reuse_port: false, + reuse_addr: false, + no_delay: None, + keep_alive: None, + dont_route: None, + send_buf_size: None, + recv_buf_size: None, + write_timeout: None, + read_timeout: None, + accept_timeout: None, + connect_timeout: None, + handler: None, + }, + }), }; let env = ctx.data(); diff --git a/lib/wasix/src/journal/effector/syscalls/sock_connect.rs b/lib/wasix/src/journal/effector/syscalls/sock_connect.rs index 3c7af982efd..a72763497b6 100644 --- a/lib/wasix/src/journal/effector/syscalls/sock_connect.rs +++ b/lib/wasix/src/journal/effector/syscalls/sock_connect.rs @@ -1,8 +1,10 @@ use std::net::SocketAddr; +use wasmer_wasix_types::wasi::{Addressfamily, SockProto, Socktype}; + use crate::{ fs::Kind, - net::socket::{InodeSocket, InodeSocketKind}, + net::socket::{InodeSocket, InodeSocketKind, SocketProperties}, }; use super::*; @@ -11,18 +13,53 @@ impl JournalEffector { pub fn save_sock_connect( ctx: &mut FunctionEnvMut<'_, WasiEnv>, fd: Fd, - addr: SocketAddr, + local_addr: SocketAddr, + peer_addr: SocketAddr, ) -> anyhow::Result<()> { - Self::save_event(ctx, JournalEntry::SocketConnectedV1 { fd, addr }) + Self::save_event( + ctx, + JournalEntry::SocketConnectedV1 { + fd, + local_addr, + peer_addr, + }, + ) } pub fn apply_sock_connect( ctx: &mut FunctionEnvMut<'_, WasiEnv>, fd: Fd, - addr: SocketAddr, + local_addr: SocketAddr, + peer_addr: SocketAddr, ) -> anyhow::Result<()> { let kind = Kind::Socket { - socket: InodeSocket::new(InodeSocketKind::RemoteTcpStream { peer_addr: addr }), + socket: InodeSocket::new(InodeSocketKind::RemoteSocket { + local_addr, + peer_addr, + ttl: 0, + multicast_ttl: 0, + props: SocketProperties { + family: match peer_addr.is_ipv4() { + true => Addressfamily::Inet4, + false => Addressfamily::Inet6, + }, + ty: Socktype::Stream, + pt: SockProto::Tcp, + only_v6: false, + reuse_port: false, + reuse_addr: false, + no_delay: None, + keep_alive: None, + dont_route: None, + send_buf_size: None, + recv_buf_size: None, + write_timeout: None, + read_timeout: None, + accept_timeout: None, + connect_timeout: None, + handler: None, + }, + }), }; let env = ctx.data(); diff --git a/lib/wasix/src/journal/effector/thread_state.rs b/lib/wasix/src/journal/effector/thread_state.rs index 93b4d3cf9fe..0c824b6bb3c 100644 --- a/lib/wasix/src/journal/effector/thread_state.rs +++ b/lib/wasix/src/journal/effector/thread_state.rs @@ -1,3 +1,13 @@ +use std::sync::Arc; + +use wasmer_wasix_types::wasix::ThreadStartType; + +use crate::{ + os::task::thread::{RewindResultType, WasiMemoryLayout}, + syscalls::thread_spawn_internal_using_layout, + RewindState, +}; + use super::*; impl JournalEffector { @@ -7,6 +17,8 @@ impl JournalEffector { memory_stack: Bytes, rewind_stack: Bytes, store_data: Bytes, + start: ThreadStartType, + layout: WasiMemoryLayout, ) -> anyhow::Result<()> { Self::save_event( ctx, @@ -15,8 +27,64 @@ impl JournalEffector { call_stack: Cow::Owned(rewind_stack.into()), memory_stack: Cow::Owned(memory_stack.into()), store_data: Cow::Owned(store_data.into()), + start, + layout, is_64bit: M::is_64bit(), }, ) } + + /// This will take the supplied stacks and apply them to the memory region + /// dedicated to this thread. After that it will spawn a WASM thread and + // continue the thread where it left off, which may even mean it goes + // straight back to sleep. + pub fn apply_thread_state( + ctx: &mut FunctionEnvMut<'_, WasiEnv>, + tid: WasiThreadId, + memory_stack: Bytes, + rewind_stack: Bytes, + store_data: Bytes, + start: ThreadStartType, + layout: WasiMemoryLayout, + ) -> anyhow::Result<()> { + let start_ptr: M::Offset = match start { + ThreadStartType::MainThread => { + return Err(anyhow::format_err!( + "unable to restore a main thread via this method" + )); + } + ThreadStartType::ThreadSpawn { start_ptr } => start_ptr + .try_into() + .map_err(|_| anyhow::format_err!("overflow while processing thread restoration"))?, + }; + + // Create the thread for this ID + let thread_handle = Arc::new(ctx.data().process.new_thread_with_id( + layout.clone(), + start, + tid, + )?); + + // Now spawn the thread itself + thread_spawn_internal_using_layout::( + ctx, + thread_handle, + layout.clone(), + start_ptr, + Some(( + RewindState { + memory_stack, + rewind_stack, + store_data, + start, + layout, + is_64bit: M::is_64bit(), + }, + RewindResultType::RewindRestart, + )), + ) + .map_err(|err| anyhow::format_err!("failed to spawn thread - {}", err))?; + + Ok(()) + } } diff --git a/lib/wasix/src/lib.rs b/lib/wasix/src/lib.rs index 25a56334d0a..a73916d5477 100644 --- a/lib/wasix/src/lib.rs +++ b/lib/wasix/src/lib.rs @@ -99,11 +99,11 @@ pub use crate::{ WasiEnv, WasiEnvBuilder, WasiEnvInit, WasiFunctionEnv, WasiInstanceHandles, WasiStateCreationError, ALL_RIGHTS, }, - syscalls::{rewind, rewind_ext, types, unwind}, + syscalls::{journal::wait_for_snapshot, rewind, rewind_ext, types, unwind}, utils::is_wasix_module, utils::{ get_wasi_version, get_wasi_versions, is_wasi_module, - store::{capture_instance_snapshot, restore_instance_snapshot, InstanceSnapshot}, + store::{capture_store_snapshot, restore_store_snapshot, StoreSnapshot}, WasiVersion, }, }; diff --git a/lib/wasix/src/net/socket.rs b/lib/wasix/src/net/socket.rs index 958a01578b1..bbf6c8dd9e9 100644 --- a/lib/wasix/src/net/socket.rs +++ b/lib/wasix/src/net/socket.rs @@ -35,27 +35,32 @@ pub enum InodeHttpSocketType { #[derive(Derivative)] #[derivative(Debug)] +pub struct SocketProperties { + pub family: Addressfamily, + pub ty: Socktype, + pub pt: SockProto, + pub only_v6: bool, + pub reuse_port: bool, + pub reuse_addr: bool, + pub no_delay: Option, + pub keep_alive: Option, + pub dont_route: Option, + pub send_buf_size: Option, + pub recv_buf_size: Option, + pub write_timeout: Option, + pub read_timeout: Option, + pub accept_timeout: Option, + pub connect_timeout: Option, + #[derivative(Debug = "ignore")] + pub handler: Option>, +} + +#[derive(Debug)] //#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum InodeSocketKind { PreSocket { - family: Addressfamily, - ty: Socktype, - pt: SockProto, + props: SocketProperties, addr: Option, - only_v6: bool, - reuse_port: bool, - reuse_addr: bool, - no_delay: Option, - keep_alive: Option, - dont_route: Option, - send_buf_size: Option, - recv_buf_size: Option, - write_timeout: Option, - read_timeout: Option, - accept_timeout: Option, - connect_timeout: Option, - #[derivative(Debug = "ignore")] - handler: Option>, }, Icmp(Box), Raw(Box), @@ -72,8 +77,12 @@ pub enum InodeSocketKind { socket: Box, peer: Option, }, - RemoteTcpStream { + RemoteSocket { + props: SocketProperties, + local_addr: SocketAddr, peer_addr: SocketAddr, + ttl: u32, + multicast_ttl: u32, }, } @@ -241,15 +250,8 @@ impl InodeSocket { let socket = { let mut inner = self.inner.protected.write().unwrap(); match &mut inner.kind { - InodeSocketKind::PreSocket { - family, - ty, - addr, - reuse_port, - reuse_addr, - .. - } => { - match *family { + InodeSocketKind::PreSocket { props, addr, .. } => { + match props.family { Addressfamily::Inet4 => { if !set_addr.is_ipv4() { tracing::debug!( @@ -274,15 +276,61 @@ impl InodeSocket { addr.replace(set_addr); let addr = (*addr).unwrap(); - match *ty { + match props.ty { + Socktype::Stream => { + // we already set the socket address - next we need a listen or connect so nothing + // more to do at this time + return Ok(None); + } + Socktype::Dgram => { + let reuse_port = props.reuse_port; + let reuse_addr = props.reuse_addr; + drop(inner); + + net.bind_udp(addr, reuse_port, reuse_addr) + } + _ => return Err(Errno::Inval), + } + } + InodeSocketKind::RemoteSocket { + props, + local_addr: addr, + .. + } => { + match props.family { + Addressfamily::Inet4 => { + if !set_addr.is_ipv4() { + tracing::debug!( + "IP address is the wrong type IPv4 ({set_addr}) vs IPv6 family" + ); + return Err(Errno::Inval); + } + } + Addressfamily::Inet6 => { + if !set_addr.is_ipv6() { + tracing::debug!( + "IP address is the wrong type IPv6 ({set_addr}) vs IPv4 family" + ); + return Err(Errno::Inval); + } + } + _ => { + return Err(Errno::Notsup); + } + } + + *addr = set_addr; + let addr = *addr; + + match props.ty { Socktype::Stream => { // we already set the socket address - next we need a listen or connect so nothing // more to do at this time return Ok(None); } Socktype::Dgram => { - let reuse_port = *reuse_port; - let reuse_addr = *reuse_addr; + let reuse_port = props.reuse_port; + let reuse_addr = props.reuse_addr; drop(inner); net.bind_udp(addr, reuse_port, reuse_addr) @@ -318,34 +366,70 @@ impl InodeSocket { let socket = { let inner = self.inner.protected.read().unwrap(); match &inner.kind { - InodeSocketKind::PreSocket { - ty, - addr, - only_v6, - reuse_port, - reuse_addr, - .. - } => match *ty { + InodeSocketKind::PreSocket { props, addr, .. } => match props.ty { Socktype::Stream => { if addr.is_none() { tracing::warn!("wasi[?]::sock_listen - failed - address not set"); return Err(Errno::Inval); } let addr = *addr.as_ref().unwrap(); - let only_v6 = *only_v6; - let reuse_port = *reuse_port; - let reuse_addr = *reuse_addr; + let only_v6 = props.only_v6; + let reuse_port = props.reuse_port; + let reuse_addr = props.reuse_addr; drop(inner); net.listen_tcp(addr, only_v6, reuse_port, reuse_addr) } - _ => { - tracing::warn!("wasi[?]::sock_listen - failed - not supported(1)"); + ty => { + tracing::warn!( + "wasi[?]::sock_listen - failed - not supported(pre-socket:{:?})", + ty + ); return Err(Errno::Notsup); } }, - _ => { - tracing::warn!("wasi[?]::sock_listen - failed - not supported(2)"); + InodeSocketKind::RemoteSocket { + props, + local_addr: addr, + .. + } => match props.ty { + Socktype::Stream => { + let addr = *addr; + let only_v6 = props.only_v6; + let reuse_port = props.reuse_port; + let reuse_addr = props.reuse_addr; + drop(inner); + + net.listen_tcp(addr, only_v6, reuse_port, reuse_addr) + } + ty => { + tracing::warn!( + "wasi[?]::sock_listen - failed - not supported(remote-socket:{:?})", + ty + ); + return Err(Errno::Notsup); + } + }, + InodeSocketKind::Icmp(_) => { + tracing::warn!("wasi[?]::sock_listen - failed - not supported(icmp)"); + return Err(Errno::Notsup); + } + InodeSocketKind::Raw(_) => { + tracing::warn!("wasi[?]::sock_listen - failed - not supported(raw)"); + return Err(Errno::Notsup); + } + InodeSocketKind::TcpListener { .. } => { + tracing::warn!( + "wasi[?]::sock_listen - failed - already listening (tcp-listener)" + ); + return Err(Errno::Notsup); + } + InodeSocketKind::TcpStream { .. } => { + tracing::warn!("wasi[?]::sock_listen - failed - not supported(tcp-stream)"); + return Err(Errno::Notsup); + } + InodeSocketKind::UdpSocket { .. } => { + tracing::warn!("wasi[?]::sock_listen - failed - not supported(udp-socket)"); return Err(Errno::Notsup); } } @@ -441,7 +525,7 @@ impl InodeSocket { InodeSocketKind::UdpSocket { .. } => {} InodeSocketKind::Raw(_) => {} InodeSocketKind::PreSocket { .. } => return Err(Errno::Notconn), - InodeSocketKind::RemoteTcpStream { .. } => {} + InodeSocketKind::RemoteSocket { .. } => {} }; Ok(()) } @@ -462,25 +546,15 @@ impl InodeSocket { let connect = { let mut inner = self.inner.protected.write().unwrap(); match &mut inner.kind { - InodeSocketKind::PreSocket { - ty, - addr, - write_timeout, - read_timeout, - no_delay, - keep_alive, - dont_route, - handler: h, - .. - } => { - handler = h.take(); - new_write_timeout = *write_timeout; - new_read_timeout = *read_timeout; - match *ty { + InodeSocketKind::PreSocket { props, addr, .. } => { + handler = props.handler.take(); + new_write_timeout = props.write_timeout; + new_read_timeout = props.read_timeout; + match props.ty { Socktype::Stream => { - let no_delay = *no_delay; - let keep_alive = *keep_alive; - let dont_route = *dont_route; + let no_delay = props.no_delay; + let keep_alive = props.keep_alive; + let dont_route = props.dont_route; let addr = match addr { Some(a) => *a, None => { @@ -515,6 +589,10 @@ impl InodeSocket { target_peer.replace(peer); return Ok(None); } + InodeSocketKind::RemoteSocket { peer_addr, .. } => { + *peer_addr = peer; + return Ok(None); + } _ => return Err(Errno::Notsup), } }; @@ -546,6 +624,7 @@ impl InodeSocket { InodeSocketKind::TcpListener { .. } => WasiSocketStatus::Opened, InodeSocketKind::TcpStream { .. } => WasiSocketStatus::Opened, InodeSocketKind::UdpSocket { .. } => WasiSocketStatus::Opened, + InodeSocketKind::RemoteSocket { .. } => WasiSocketStatus::Opened, _ => WasiSocketStatus::Failed, }) } @@ -553,12 +632,12 @@ impl InodeSocket { pub fn addr_local(&self) -> Result { let inner = self.inner.protected.read().unwrap(); Ok(match &inner.kind { - InodeSocketKind::PreSocket { family, addr, .. } => { + InodeSocketKind::PreSocket { props, addr, .. } => { if let Some(addr) = addr { *addr } else { SocketAddr::new( - match *family { + match props.family { Addressfamily::Inet4 => IpAddr::V4(Ipv4Addr::UNSPECIFIED), Addressfamily::Inet6 => IpAddr::V6(Ipv6Addr::UNSPECIFIED), _ => return Err(Errno::Inval), @@ -577,6 +656,9 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => { socket.addr_local().map_err(net_error_into_wasi_err)? } + InodeSocketKind::RemoteSocket { + local_addr: addr, .. + } => *addr, _ => return Err(Errno::Notsup), }) } @@ -584,8 +666,8 @@ impl InodeSocket { pub fn addr_peer(&self) -> Result { let inner = self.inner.protected.read().unwrap(); Ok(match &inner.kind { - InodeSocketKind::PreSocket { family, .. } => SocketAddr::new( - match *family { + InodeSocketKind::PreSocket { props, .. } => SocketAddr::new( + match props.family { Addressfamily::Inet4 => IpAddr::V4(Ipv4Addr::UNSPECIFIED), Addressfamily::Inet6 => IpAddr::V6(Ipv6Addr::UNSPECIFIED), _ => return Err(Errno::Inval), @@ -613,6 +695,7 @@ impl InodeSocket { ) }) })?, + InodeSocketKind::RemoteSocket { peer_addr, .. } => *peer_addr, _ => return Err(Errno::Notsup), }) } @@ -620,22 +703,15 @@ impl InodeSocket { pub fn set_opt_flag(&mut self, option: WasiSocketOption, val: bool) -> Result<(), Errno> { let mut inner = self.inner.protected.write().unwrap(); match &mut inner.kind { - InodeSocketKind::PreSocket { - only_v6, - reuse_port, - reuse_addr, - no_delay, - keep_alive, - dont_route, - .. - } => { + InodeSocketKind::PreSocket { props, .. } + | InodeSocketKind::RemoteSocket { props, .. } => { match option { - WasiSocketOption::OnlyV6 => *only_v6 = val, - WasiSocketOption::ReusePort => *reuse_port = val, - WasiSocketOption::ReuseAddr => *reuse_addr = val, - WasiSocketOption::NoDelay => *no_delay = Some(val), - WasiSocketOption::KeepAlive => *keep_alive = Some(val), - WasiSocketOption::DontRoute => *dont_route = Some(val), + WasiSocketOption::OnlyV6 => props.only_v6 = val, + WasiSocketOption::ReusePort => props.reuse_port = val, + WasiSocketOption::ReuseAddr => props.reuse_addr = val, + WasiSocketOption::NoDelay => props.no_delay = Some(val), + WasiSocketOption::KeepAlive => props.keep_alive = Some(val), + WasiSocketOption::DontRoute => props.dont_route = Some(val), _ => return Err(Errno::Inval), }; } @@ -678,19 +754,13 @@ impl InodeSocket { pub fn get_opt_flag(&self, option: WasiSocketOption) -> Result { let mut inner = self.inner.protected.write().unwrap(); Ok(match &mut inner.kind { - InodeSocketKind::PreSocket { - only_v6, - reuse_port, - reuse_addr, - no_delay, - keep_alive, - .. - } => match option { - WasiSocketOption::OnlyV6 => *only_v6, - WasiSocketOption::ReusePort => *reuse_port, - WasiSocketOption::ReuseAddr => *reuse_addr, - WasiSocketOption::NoDelay => no_delay.unwrap_or_default(), - WasiSocketOption::KeepAlive => keep_alive.unwrap_or_default(), + InodeSocketKind::PreSocket { props, .. } + | InodeSocketKind::RemoteSocket { props, .. } => match option { + WasiSocketOption::OnlyV6 => props.only_v6, + WasiSocketOption::ReusePort => props.reuse_port, + WasiSocketOption::ReuseAddr => props.reuse_addr, + WasiSocketOption::NoDelay => props.no_delay.unwrap_or_default(), + WasiSocketOption::KeepAlive => props.keep_alive.unwrap_or_default(), _ => return Err(Errno::Inval), }, InodeSocketKind::Raw(sock) => match option { @@ -728,8 +798,9 @@ impl InodeSocket { pub fn set_send_buf_size(&mut self, size: usize) -> Result<(), Errno> { let mut inner = self.inner.protected.write().unwrap(); match &mut inner.kind { - InodeSocketKind::PreSocket { send_buf_size, .. } => { - *send_buf_size = Some(size); + InodeSocketKind::PreSocket { props, .. } + | InodeSocketKind::RemoteSocket { props, .. } => { + props.send_buf_size = Some(size); } InodeSocketKind::TcpStream { socket, .. } => { socket @@ -744,8 +815,9 @@ impl InodeSocket { pub fn send_buf_size(&self) -> Result { let inner = self.inner.protected.read().unwrap(); match &inner.kind { - InodeSocketKind::PreSocket { send_buf_size, .. } => { - Ok((*send_buf_size).unwrap_or_default()) + InodeSocketKind::PreSocket { props, .. } + | InodeSocketKind::RemoteSocket { props, .. } => { + Ok(props.send_buf_size.unwrap_or_default()) } InodeSocketKind::TcpStream { socket, .. } => { socket.send_buf_size().map_err(net_error_into_wasi_err) @@ -757,8 +829,9 @@ impl InodeSocket { pub fn set_recv_buf_size(&mut self, size: usize) -> Result<(), Errno> { let mut inner = self.inner.protected.write().unwrap(); match &mut inner.kind { - InodeSocketKind::PreSocket { recv_buf_size, .. } => { - *recv_buf_size = Some(size); + InodeSocketKind::PreSocket { props, .. } + | InodeSocketKind::RemoteSocket { props, .. } => { + props.recv_buf_size = Some(size); } InodeSocketKind::TcpStream { socket, .. } => { socket @@ -773,8 +846,9 @@ impl InodeSocket { pub fn recv_buf_size(&self) -> Result { let inner = self.inner.protected.read().unwrap(); match &inner.kind { - InodeSocketKind::PreSocket { recv_buf_size, .. } => { - Ok((*recv_buf_size).unwrap_or_default()) + InodeSocketKind::PreSocket { props, .. } + | InodeSocketKind::RemoteSocket { props, .. } => { + Ok(props.recv_buf_size.unwrap_or_default()) } InodeSocketKind::TcpStream { socket, .. } => { socket.recv_buf_size().map_err(net_error_into_wasi_err) @@ -789,6 +863,7 @@ impl InodeSocket { InodeSocketKind::TcpStream { socket, .. } => { socket.set_linger(linger).map_err(net_error_into_wasi_err) } + InodeSocketKind::RemoteSocket { .. } => Ok(()), InodeSocketKind::PreSocket { .. } => Err(Errno::Io), _ => Err(Errno::Notsup), } @@ -831,18 +906,13 @@ impl InodeSocket { } Ok(()) } - InodeSocketKind::PreSocket { - read_timeout, - write_timeout, - connect_timeout, - accept_timeout, - .. - } => { + InodeSocketKind::PreSocket { props, .. } + | InodeSocketKind::RemoteSocket { props, .. } => { match ty { - TimeType::ConnectTimeout => *connect_timeout = timeout, - TimeType::AcceptTimeout => *accept_timeout = timeout, - TimeType::ReadTimeout => *read_timeout = timeout, - TimeType::WriteTimeout => *write_timeout = timeout, + TimeType::ConnectTimeout => props.connect_timeout = timeout, + TimeType::AcceptTimeout => props.accept_timeout = timeout, + TimeType::ReadTimeout => props.read_timeout = timeout, + TimeType::WriteTimeout => props.write_timeout = timeout, _ => return Err(Errno::Io), } Ok(()) @@ -867,17 +937,12 @@ impl InodeSocket { TimeType::AcceptTimeout => *accept_timeout, _ => return Err(Errno::Inval), }), - InodeSocketKind::PreSocket { - read_timeout, - write_timeout, - connect_timeout, - accept_timeout, - .. - } => match ty { - TimeType::ConnectTimeout => Ok(*connect_timeout), - TimeType::AcceptTimeout => Ok(*accept_timeout), - TimeType::ReadTimeout => Ok(*read_timeout), - TimeType::WriteTimeout => Ok(*write_timeout), + InodeSocketKind::PreSocket { props, .. } + | InodeSocketKind::RemoteSocket { props, .. } => match ty { + TimeType::ConnectTimeout => Ok(props.connect_timeout), + TimeType::AcceptTimeout => Ok(props.accept_timeout), + TimeType::ReadTimeout => Ok(props.read_timeout), + TimeType::WriteTimeout => Ok(props.write_timeout), _ => Err(Errno::Inval), }, _ => Err(Errno::Notsup), @@ -893,6 +958,10 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => { socket.set_ttl(ttl).map_err(net_error_into_wasi_err) } + InodeSocketKind::RemoteSocket { ttl: set_ttl, .. } => { + *set_ttl = ttl; + Ok(()) + } InodeSocketKind::PreSocket { .. } => Err(Errno::Io), _ => Err(Errno::Notsup), } @@ -907,6 +976,7 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => { socket.ttl().map_err(net_error_into_wasi_err) } + InodeSocketKind::RemoteSocket { ttl, .. } => Ok(*ttl), InodeSocketKind::PreSocket { .. } => Err(Errno::Io), _ => Err(Errno::Notsup), } @@ -918,6 +988,13 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => socket .set_multicast_ttl_v4(ttl) .map_err(net_error_into_wasi_err), + InodeSocketKind::RemoteSocket { + multicast_ttl: set_ttl, + .. + } => { + *set_ttl = ttl; + Ok(()) + } InodeSocketKind::PreSocket { .. } => Err(Errno::Io), _ => Err(Errno::Notsup), } @@ -929,6 +1006,7 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => { socket.multicast_ttl_v4().map_err(net_error_into_wasi_err) } + InodeSocketKind::RemoteSocket { multicast_ttl, .. } => Ok(*multicast_ttl), InodeSocketKind::PreSocket { .. } => Err(Errno::Io), _ => Err(Errno::Notsup), } @@ -940,6 +1018,7 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => socket .join_multicast_v4(multiaddr, iface) .map_err(net_error_into_wasi_err), + InodeSocketKind::RemoteSocket { .. } => Ok(()), InodeSocketKind::PreSocket { .. } => Err(Errno::Io), _ => Err(Errno::Notsup), } @@ -951,6 +1030,7 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => socket .leave_multicast_v4(multiaddr, iface) .map_err(net_error_into_wasi_err), + InodeSocketKind::RemoteSocket { .. } => Ok(()), InodeSocketKind::PreSocket { .. } => Err(Errno::Io), _ => Err(Errno::Notsup), } @@ -962,6 +1042,7 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => socket .join_multicast_v6(multiaddr, iface) .map_err(net_error_into_wasi_err), + InodeSocketKind::RemoteSocket { .. } => Ok(()), InodeSocketKind::PreSocket { .. } => Err(Errno::Io), _ => Err(Errno::Notsup), } @@ -973,6 +1054,7 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => socket .leave_multicast_v6(multiaddr, iface) .map_err(net_error_into_wasi_err), + InodeSocketKind::RemoteSocket { .. } => Ok(()), InodeSocketKind::PreSocket { .. } => Err(Errno::Io), _ => Err(Errno::Notsup), } @@ -1020,7 +1102,7 @@ impl InodeSocket { InodeSocketKind::PreSocket { .. } => { return Poll::Ready(Err(Errno::Notconn)) } - InodeSocketKind::RemoteTcpStream { .. } => { + InodeSocketKind::RemoteSocket { .. } => { return Poll::Ready(Ok(self.data.len())) } _ => return Poll::Ready(Err(Errno::Notsup)), @@ -1100,7 +1182,7 @@ impl InodeSocket { InodeSocketKind::PreSocket { .. } => { return Poll::Ready(Err(Errno::Notconn)) } - InodeSocketKind::RemoteTcpStream { .. } => { + InodeSocketKind::RemoteSocket { .. } => { return Poll::Ready(Ok(self.data.len())) } _ => return Poll::Ready(Err(Errno::Notsup)), @@ -1188,6 +1270,9 @@ impl InodeSocket { } } } + InodeSocketKind::RemoteSocket { .. } => { + return Poll::Pending; + } InodeSocketKind::PreSocket { .. } => { return Poll::Ready(Err(Errno::Notconn)) } @@ -1264,6 +1349,9 @@ impl InodeSocket { InodeSocketKind::UdpSocket { socket, .. } => { socket.try_recv_from(self.data) } + InodeSocketKind::RemoteSocket { .. } => { + return Poll::Pending; + } InodeSocketKind::PreSocket { .. } => { return Poll::Ready(Err(Errno::Notconn)) } @@ -1310,6 +1398,7 @@ impl InodeSocket { InodeSocketKind::TcpStream { socket, .. } => { socket.shutdown(how).map_err(net_error_into_wasi_err)?; } + InodeSocketKind::RemoteSocket { .. } => return Ok(()), InodeSocketKind::PreSocket { .. } => return Err(Errno::Notconn), _ => return Err(Errno::Notsup), } @@ -1321,6 +1410,7 @@ impl InodeSocket { #[allow(clippy::match_like_matches_macro)] match &mut guard.kind { InodeSocketKind::TcpStream { .. } + | InodeSocketKind::RemoteSocket { .. } | InodeSocketKind::UdpSocket { .. } | InodeSocketKind::Raw(..) => true, _ => false, @@ -1339,10 +1429,12 @@ impl InodeSocketProtected { InodeSocketKind::UdpSocket { socket, .. } => socket.remove_handler(), InodeSocketKind::Raw(socket) => socket.remove_handler(), InodeSocketKind::Icmp(socket) => socket.remove_handler(), - InodeSocketKind::PreSocket { handler, .. } => { - handler.take(); + InodeSocketKind::PreSocket { props, .. } => { + props.handler.take(); + } + InodeSocketKind::RemoteSocket { props, .. } => { + props.handler.take(); } - InodeSocketKind::RemoteTcpStream { .. } => {} } } @@ -1354,7 +1446,7 @@ impl InodeSocketProtected { InodeSocketKind::Raw(socket) => socket.poll_read_ready(cx), InodeSocketKind::Icmp(socket) => socket.poll_read_ready(cx), InodeSocketKind::PreSocket { .. } => Poll::Pending, - InodeSocketKind::RemoteTcpStream { .. } => Poll::Pending, + InodeSocketKind::RemoteSocket { .. } => Poll::Pending, } .map_err(net_error_into_io_err) } @@ -1367,7 +1459,7 @@ impl InodeSocketProtected { InodeSocketKind::Raw(socket) => socket.poll_write_ready(cx), InodeSocketKind::Icmp(socket) => socket.poll_write_ready(cx), InodeSocketKind::PreSocket { .. } => Poll::Pending, - InodeSocketKind::RemoteTcpStream { .. } => Poll::Pending, + InodeSocketKind::RemoteSocket { .. } => Poll::Pending, } .map_err(net_error_into_io_err) } @@ -1382,11 +1474,11 @@ impl InodeSocketProtected { InodeSocketKind::UdpSocket { socket, .. } => socket.set_handler(handler), InodeSocketKind::Raw(socket) => socket.set_handler(handler), InodeSocketKind::Icmp(socket) => socket.set_handler(handler), - InodeSocketKind::PreSocket { handler: h, .. } => { - h.replace(handler); + InodeSocketKind::PreSocket { props, .. } + | InodeSocketKind::RemoteSocket { props, .. } => { + props.handler.replace(handler); Ok(()) } - InodeSocketKind::RemoteTcpStream { .. } => Ok(()), } } } diff --git a/lib/wasix/src/os/task/control_plane.rs b/lib/wasix/src/os/task/control_plane.rs index aca8a0731eb..8a27069289f 100644 --- a/lib/wasix/src/os/task/control_plane.rs +++ b/lib/wasix/src/os/task/control_plane.rs @@ -210,6 +210,8 @@ pub enum ControlPlaneError { #[cfg(test)] mod tests { + use wasmer_wasix_types::wasix::ThreadStartType; + use crate::os::task::thread::WasiMemoryLayout; use super::*; @@ -224,8 +226,12 @@ mod tests { }); let p1 = p.new_process(ModuleHash::random()).unwrap(); - let _t1 = p1.new_thread(WasiMemoryLayout::default()).unwrap(); - let _t2 = p1.new_thread(WasiMemoryLayout::default()).unwrap(); + let _t1 = p1 + .new_thread(WasiMemoryLayout::default(), ThreadStartType::MainThread) + .unwrap(); + let _t2 = p1 + .new_thread(WasiMemoryLayout::default(), ThreadStartType::MainThread) + .unwrap(); assert_eq!( p.new_process(ModuleHash::random()).unwrap_err(), @@ -245,11 +251,17 @@ mod tests { let p1 = p.new_process(ModuleHash::random()).unwrap(); for _ in 0..10 { - let _thread = p1.new_thread(WasiMemoryLayout::default()).unwrap(); + let _thread = p1 + .new_thread(WasiMemoryLayout::default(), ThreadStartType::MainThread) + .unwrap(); } - let _t1 = p1.new_thread(WasiMemoryLayout::default()).unwrap(); - let _t2 = p1.new_thread(WasiMemoryLayout::default()).unwrap(); + let _t1 = p1 + .new_thread(WasiMemoryLayout::default(), ThreadStartType::MainThread) + .unwrap(); + let _t2 = p1 + .new_thread(WasiMemoryLayout::default(), ThreadStartType::MainThread) + .unwrap(); assert_eq!( p.new_process(ModuleHash::random()).unwrap_err(), diff --git a/lib/wasix/src/os/task/mod.rs b/lib/wasix/src/os/task/mod.rs index b3cf7cc4235..755645f0e32 100644 --- a/lib/wasix/src/os/task/mod.rs +++ b/lib/wasix/src/os/task/mod.rs @@ -7,6 +7,8 @@ pub mod signal; mod task_join_handle; pub mod thread; +#[allow(unused_imports)] +pub(crate) use process::WasiProcessInner; pub use task_join_handle::{ OwnedTaskStatus, TaskJoinHandle, TaskStatus, TaskTerminatedError, VirtualTaskHandle, }; diff --git a/lib/wasix/src/os/task/process.rs b/lib/wasix/src/os/task/process.rs index 8a2aa17f7c1..3d56365748f 100644 --- a/lib/wasix/src/os/task/process.rs +++ b/lib/wasix/src/os/task/process.rs @@ -1,16 +1,20 @@ #[cfg(feature = "journal")] -use crate::{journal::JournalEffector, unwind, WasiResult}; +use crate::{journal::JournalEffector, syscalls::do_checkpoint_from_outside, unwind, WasiResult}; use crate::{ journal::SnapshotTrigger, runtime::module_cache::ModuleHash, WasiEnv, WasiRuntimeError, }; use serde::{Deserialize, Serialize}; +#[cfg(feature = "journal")] +use std::collections::HashSet; use std::{ collections::HashMap, convert::TryInto, + ops::Range, sync::{ atomic::{AtomicU32, Ordering}, Arc, Condvar, Mutex, MutexGuard, RwLock, Weak, }, + task::Waker, time::Duration, }; use tracing::trace; @@ -18,6 +22,7 @@ use wasmer::FunctionEnvMut; use wasmer_wasix_types::{ types::Signal, wasi::{Errno, ExitCode, Snapshot0Clockid}, + wasix::ThreadStartType, }; use crate::{ @@ -31,6 +36,7 @@ use super::{ signal::{SignalDeliveryError, SignalHandlerAbi}, task_join_handle::OwnedTaskStatus, thread::WasiMemoryLayout, + TaskStatus, }; /// Represents the ID of a sub-process @@ -92,7 +98,7 @@ pub struct WasiProcess { /// List of all the children spawned from this thread pub(crate) parent: Option>>, /// The inner protected region of the process with a conditional - /// variable that is used for coordination such as checksums. + /// variable that is used for coordination such as snapshots. pub(crate) inner: LockableWasiProcessInner, /// Reference back to the compute engine // TODO: remove this reference, access should happen via separate state instead @@ -122,11 +128,36 @@ pub enum WasiProcessCheckpoint { Snapshot { trigger: SnapshotTrigger }, } +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MemorySnapshotRegion { + pub start: u64, + pub end: u64, +} + +impl From> for MemorySnapshotRegion { + fn from(value: Range) -> Self { + Self { + start: value.start, + end: value.end, + } + } +} + +#[allow(clippy::from_over_into)] +impl Into> for MemorySnapshotRegion { + fn into(self) -> Range { + self.start..self.end + } +} + // TODO: fields should be private and only accessed via methods. #[derive(Debug)] pub struct WasiProcessInner { /// Unique ID of this process pub pid: WasiProcessId, + /// Number of threads waiting for children to exit + pub(crate) waiting: Arc, /// The threads that make up this process pub threads: HashMap, /// Number of threads running for this process @@ -138,6 +169,15 @@ pub struct WasiProcessInner { /// Represents a checkpoint which blocks all the threads /// and then executes some maintenance action pub checkpoint: WasiProcessCheckpoint, + /// List of situations that the process will checkpoint on + #[cfg(feature = "journal")] + pub snapshot_on: HashSet, + /// Any wakers waiting on this process (for example for a checkpoint) + pub wakers: Vec, + /// The snapshot memory significantly reduce the amount of + /// duplicate entries in the journal for memory that has not changed + #[cfg(feature = "journal")] + pub snapshot_memory_hash: HashMap, /// Represents all the backoff properties for this process /// which will be used to determine if the CPU should be /// throttled or not @@ -160,8 +200,12 @@ impl WasiProcessInner { ) -> WasiResult> { // Set the checkpoint flag and then enter the normal processing loop { - let mut inner = inner.0.lock().unwrap(); - inner.checkpoint = for_what; + let mut guard = inner.0.lock().unwrap(); + guard.checkpoint = for_what; + for waker in guard.wakers.drain(..) { + waker.wake(); + } + inner.1.notify_all(); } Self::maybe_checkpoint::(inner, ctx) @@ -180,7 +224,7 @@ impl WasiProcessInner { use wasmer::AsStoreMut; use wasmer_types::OnCalledAction; - use crate::{rewind_ext, WasiError}; + use crate::{os::task::thread::RewindResultType, rewind_ext, WasiError}; let guard = inner.0.lock().unwrap(); if guard.checkpoint == WasiProcessCheckpoint::Execute { // No checkpoint so just carry on @@ -190,12 +234,12 @@ impl WasiProcessInner { drop(guard); // Perform the unwind action + let thread_layout = ctx.data().thread.memory_layout().clone(); unwind::(ctx, move |mut ctx, memory_stack, rewind_stack| { // Grab all the globals and serialize them - let store_data = - crate::utils::store::capture_instance_snapshot(&mut ctx.as_store_mut()) - .serialize() - .unwrap(); + let store_data = crate::utils::store::capture_store_snapshot(&mut ctx.as_store_mut()) + .serialize() + .unwrap(); let memory_stack = memory_stack.freeze(); let rewind_stack = rewind_stack.freeze(); let store_data = Bytes::from(store_data); @@ -208,6 +252,7 @@ impl WasiProcessInner { ); // Write our thread state to the snapshot + let thread_start = ctx.data().thread.thread_start_type(); let tid = ctx.data().thread.tid(); if let Err(err) = JournalEffector::save_thread_state::( &mut ctx, @@ -215,6 +260,8 @@ impl WasiProcessInner { memory_stack.clone(), rewind_stack.clone(), store_data.clone(), + thread_start, + thread_layout, ) { return wasmer_types::OnCalledAction::Trap(err.into()); } @@ -225,10 +272,13 @@ impl WasiProcessInner { // to freeze then we have to execute the checksum operation) loop { if let WasiProcessCheckpoint::Snapshot { trigger } = guard.checkpoint { - ctx.data().thread.set_check_pointing(true); + ctx.data().thread.set_checkpointing(true); // Now if we are the last thread we also write the memory - let is_last_thread = guard.threads.values().all(WasiThread::is_check_pointing); + let is_last_thread = guard + .threads + .values() + .all(|t| t.is_check_pointing() || t.is_deep_sleeping()); if is_last_thread { if let Err(err) = JournalEffector::save_memory_and_snapshot(&mut ctx, &mut guard, trigger) @@ -238,9 +288,12 @@ impl WasiProcessInner { } // Clear the checkpointing flag and notify everyone to wake up - ctx.data().thread.set_check_pointing(false); - guard.checkpoint = WasiProcessCheckpoint::Execute; + ctx.data().thread.set_checkpointing(false); trace!("checkpoint complete"); + guard.checkpoint = WasiProcessCheckpoint::Execute; + for waker in guard.wakers.drain(..) { + waker.wake(); + } inner.1.notify_all(); } else { guard = inner.1.wait(guard).unwrap(); @@ -248,7 +301,7 @@ impl WasiProcessInner { continue; } - ctx.data().thread.set_check_pointing(false); + ctx.data().thread.set_checkpointing(false); trace!("checkpoint finished"); // Rewind the stack and carry on @@ -257,7 +310,7 @@ impl WasiProcessInner { Some(memory_stack), rewind_stack, store_data, - None, + RewindResultType::RewindWithoutResult, ) { Errno::Success => OnCalledAction::InvokeAgain, err => { @@ -273,6 +326,53 @@ impl WasiProcessInner { Ok(Ok(MaybeCheckpointResult::Unwinding)) } + + // Execute any checkpoints that can be executed while outside of the WASM process + #[cfg(not(feature = "journal"))] + pub fn do_checkpoints_from_outside(_ctx: &mut FunctionEnvMut<'_, WasiEnv>) {} + + // Execute any checkpoints that can be executed while outside of the WASM process + #[cfg(feature = "journal")] + pub fn do_checkpoints_from_outside(ctx: &mut FunctionEnvMut<'_, WasiEnv>) { + let inner = ctx.data().process.inner.clone(); + let mut guard = inner.0.lock().unwrap(); + + // Wait for the checkpoint to finish (or if we are the last thread + // to freeze then we have to execute the checksum operation) + while let WasiProcessCheckpoint::Snapshot { trigger } = guard.checkpoint { + ctx.data().thread.set_checkpointing(true); + + // Now if we are the last thread we also write the memory + let is_last_thread = guard + .threads + .values() + .all(|t| t.is_check_pointing() || t.is_deep_sleeping()); + if is_last_thread { + if let Err(err) = + JournalEffector::save_memory_and_snapshot(ctx, &mut guard, trigger) + { + inner.1.notify_all(); + tracing::error!("failed to snapshot memory and threads - {}", err); + return; + } + + // Clear the checkpointing flag and notify everyone to wake up + ctx.data().thread.set_checkpointing(false); + trace!("checkpoint complete"); + guard.checkpoint = WasiProcessCheckpoint::Execute; + for waker in guard.wakers.drain(..) { + waker.wake(); + } + inner.1.notify_all(); + } else { + guard = inner.1.wait(guard).unwrap(); + } + continue; + } + + ctx.data().thread.set_checkpointing(false); + trace!("checkpoint finished"); + } } // TODO: why do we need this, how is it used? @@ -302,28 +402,51 @@ impl WasiProcess { .and_then(|p| p.config().enable_exponential_cpu_backoff) .unwrap_or(Duration::from_secs(30)); let max_cpu_cool_off_time = Duration::from_millis(500); + + let waiting = Arc::new(AtomicU32::new(0)); + let inner = Arc::new(( + Mutex::new(WasiProcessInner { + pid, + threads: Default::default(), + thread_count: Default::default(), + signal_intervals: Default::default(), + children: Default::default(), + checkpoint: WasiProcessCheckpoint::Execute, + wakers: Default::default(), + waiting: waiting.clone(), + #[cfg(feature = "journal")] + snapshot_on: Default::default(), + #[cfg(feature = "journal")] + snapshot_memory_hash: Default::default(), + backoff: WasiProcessCpuBackoff::new(max_cpu_backoff_time, max_cpu_cool_off_time), + }), + Condvar::new(), + )); + + #[derive(Debug)] + struct SignalHandler(LockableWasiProcessInner); + impl SignalHandlerAbi for SignalHandler { + fn signal(&self, signal: u8) -> Result<(), SignalDeliveryError> { + if let Ok(signal) = signal.try_into() { + signal_process_internal(&self.0, signal); + Ok(()) + } else { + Err(SignalDeliveryError) + } + } + } + WasiProcess { pid, module_hash, parent: None, compute: plane, - inner: Arc::new(( - Mutex::new(WasiProcessInner { - pid, - threads: Default::default(), - thread_count: Default::default(), - signal_intervals: Default::default(), - children: Default::default(), - checkpoint: WasiProcessCheckpoint::Execute, - backoff: WasiProcessCpuBackoff::new( - max_cpu_backoff_time, - max_cpu_cool_off_time, - ), - }), - Condvar::new(), - )), - finished: Arc::new(OwnedTaskStatus::default()), - waiting: Arc::new(AtomicU32::new(0)), + inner: inner.clone(), + finished: Arc::new( + OwnedTaskStatus::new(TaskStatus::Pending) + .with_signal_handler(Arc::new(SignalHandler(inner))), + ), + waiting, cpu_run_tokens: Arc::new(AtomicU32::new(0)), } } @@ -357,15 +480,12 @@ impl WasiProcess { pub fn new_thread( &self, layout: WasiMemoryLayout, + start: ThreadStartType, ) -> Result { let control_plane = self.compute.must_upgrade(); - let task_count_guard = control_plane.register_task()?; // Determine if its the main thread or not - let is_main = { - let inner = self.inner.0.lock().unwrap(); - inner.thread_count == 0 - }; + let is_main = matches!(start, ThreadStartType::MainThread); // Generate a new process ID (this is because the process ID and thread ID // address space must not overlap in libc). For the main proecess the TID=PID @@ -376,6 +496,21 @@ impl WasiProcess { tid.into() }; + self.new_thread_with_id(layout, start, tid) + } + + /// Creates a a thread and returns it + pub fn new_thread_with_id( + &self, + layout: WasiMemoryLayout, + start: ThreadStartType, + tid: WasiThreadId, + ) -> Result { + let control_plane = self.compute.must_upgrade(); + let task_count_guard = control_plane.register_task()?; + + let is_main = matches!(start, ThreadStartType::MainThread); + // The wait finished should be the process version if its the main thread let mut inner = self.inner.0.lock().unwrap(); let finished = if is_main { @@ -385,7 +520,15 @@ impl WasiProcess { }; // Insert the thread into the pool - let ctrl = WasiThread::new(self.pid(), tid, is_main, finished, task_count_guard, layout); + let ctrl = WasiThread::new( + self.pid(), + tid, + is_main, + finished, + task_count_guard, + layout, + start, + ); inner.threads.insert(tid, ctrl.clone()); inner.thread_count += 1; @@ -425,26 +568,7 @@ impl WasiProcess { /// Signals all the threads in this process pub fn signal_process(&self, signal: Signal) { - let pid = self.pid(); - tracing::trace!(%pid, "signal-process({:?})", signal); - - { - let inner = self.inner.0.lock().unwrap(); - if self.waiting.load(Ordering::Acquire) > 0 { - let mut triggered = false; - for child in inner.children.iter() { - child.signal_process(signal); - triggered = true; - } - if triggered { - return; - } - } - } - let inner = self.inner.0.lock().unwrap(); - for thread in inner.threads.values() { - thread.signal(signal); - } + signal_process_internal(&self.inner, signal); } /// Signals one of the threads every interval @@ -560,6 +684,54 @@ impl WasiProcess { } } +/// Signals all the threads in this process +fn signal_process_internal(process: &LockableWasiProcessInner, signal: Signal) { + #[allow(unused_mut)] + let mut guard = process.0.lock().unwrap(); + let pid = guard.pid; + tracing::trace!(%pid, "signal-process({:?})", signal); + + // If the snapshot on ctrl-c is currently registered then we need + // to take a snapshot and exit + #[cfg(feature = "journal")] + { + if signal == Signal::Sigint + && (guard.snapshot_on.contains(&SnapshotTrigger::Sigint) + || guard.snapshot_on.remove(&SnapshotTrigger::FirstSigint)) + { + drop(guard); + + tracing::debug!(%pid, "snapshot-on-interrupt-signal"); + + do_checkpoint_from_outside( + process, + WasiProcessCheckpoint::Snapshot { + trigger: SnapshotTrigger::Sigint, + }, + ); + return; + }; + } + + // Check if there are subprocesses that will receive this signal + // instead of this process + if guard.waiting.load(Ordering::Acquire) > 0 { + let mut triggered = false; + for child in guard.children.iter() { + child.signal_process(signal); + triggered = true; + } + if triggered { + return; + } + } + + // Otherwise just send the signal to all the threads + for thread in guard.threads.values() { + thread.signal(signal); + } +} + impl SignalHandlerAbi for WasiProcess { fn signal(&self, sig: u8) -> Result<(), SignalDeliveryError> { if let Ok(sig) = sig.try_into() { diff --git a/lib/wasix/src/os/task/signal.rs b/lib/wasix/src/os/task/signal.rs index 28462829bab..d6b729b888f 100644 --- a/lib/wasix/src/os/task/signal.rs +++ b/lib/wasix/src/os/task/signal.rs @@ -1,4 +1,4 @@ -use std::time::Duration; +use std::{sync::Arc, time::Duration}; use wasmer_wasix_types::types::Signal; @@ -15,6 +15,8 @@ where fn signal(&self, signal: u8) -> Result<(), SignalDeliveryError>; } +pub type DynSignalHandlerAbi = dyn SignalHandlerAbi + Send + Sync + 'static; + #[derive(Debug)] pub struct WasiSignalInterval { /// Signal that will be raised @@ -26,3 +28,30 @@ pub struct WasiSignalInterval { /// Last time that a signal was triggered pub last_signal: u128, } + +pub fn default_signal_handler() -> Arc { + #[derive(Debug)] + struct DefaultHandler {} + impl SignalHandlerAbi for DefaultHandler { + fn signal(&self, signal: u8) -> Result<(), SignalDeliveryError> { + if let Ok(signal) = TryInto::::try_into(signal) { + match signal { + Signal::Sigkill + | Signal::Sigterm + | Signal::Sigabrt + | Signal::Sigquit + | Signal::Sigint + | Signal::Sigstop => { + tracing::debug!("handling terminate signal"); + std::process::exit(1); + } + signal => tracing::info!("unhandled signal - {:?}", signal), + } + } else { + tracing::info!("unknown signal - {}", signal) + } + Ok(()) + } + } + Arc::new(DefaultHandler {}) +} diff --git a/lib/wasix/src/os/task/task_join_handle.rs b/lib/wasix/src/os/task/task_join_handle.rs index 18d7271bac6..5c2329e7f42 100644 --- a/lib/wasix/src/os/task/task_join_handle.rs +++ b/lib/wasix/src/os/task/task_join_handle.rs @@ -8,6 +8,8 @@ use wasmer_wasix_types::wasi::{Errno, ExitCode}; use crate::WasiRuntimeError; +use super::signal::{default_signal_handler, DynSignalHandlerAbi}; + #[derive(Clone, Debug)] pub enum TaskStatus { Pending, @@ -70,6 +72,9 @@ pub trait VirtualTaskHandle: std::fmt::Debug + Send + Sync + 'static { /// A handle that allows awaiting the termination of a task, and retrieving its exit code. #[derive(Debug)] pub struct OwnedTaskStatus { + // The signal handler that can be invoked for this owned task + signal_handler: Arc, + watch_tx: tokio::sync::watch::Sender, // Even through unused, without this receive there is a race condition // where the previously sent values are lost. @@ -81,11 +86,23 @@ impl OwnedTaskStatus { pub fn new(status: TaskStatus) -> Self { let (tx, rx) = tokio::sync::watch::channel(status); Self { + signal_handler: default_signal_handler(), watch_tx: tx, watch_rx: rx, } } + /// Sets the signal handler used for this owned task + pub fn set_signal_handler(&mut self, handler: Arc) { + self.signal_handler = handler; + } + + /// Attaches a signal handler + pub fn with_signal_handler(mut self, handler: Arc) -> Self { + self.set_signal_handler(handler); + self + } + pub fn new_finished_with_code(code: ExitCode) -> Self { Self::new(TaskStatus::Finished(Ok(code))) } @@ -144,6 +161,7 @@ impl OwnedTaskStatus { pub fn handle(&self) -> TaskJoinHandle { TaskJoinHandle { + signal_handler: self.signal_handler.clone(), watch: self.watch_tx.subscribe(), } } @@ -158,6 +176,8 @@ impl Default for OwnedTaskStatus { /// A handle that allows awaiting the termination of a task, and retrieving its exit code. #[derive(Clone, Debug)] pub struct TaskJoinHandle { + #[allow(unused)] + signal_handler: Arc, watch: tokio::sync::watch::Receiver, } @@ -167,6 +187,24 @@ impl TaskJoinHandle { self.watch.borrow().clone() } + #[cfg(feature = "ctrlc")] + pub fn install_ctrlc_handler(&self) { + use wasmer::FromToNativeWasmType; + use wasmer_wasix_types::wasi::Signal; + + let signal_handler = self.signal_handler.clone(); + + tokio::spawn(async move { + // Loop sending ctrl-c presses as signals to the signal handler + while tokio::signal::ctrl_c().await.is_ok() { + if let Err(err) = signal_handler.signal(Signal::Sigint.to_native() as u8) { + tracing::error!("failed to process signal - {}", err); + std::process::exit(1); + } + } + }); + } + /// Wait until the task finishes. pub async fn wait_finished(&mut self) -> Result> { loop { diff --git a/lib/wasix/src/os/task/thread.rs b/lib/wasix/src/os/task/thread.rs index 91e628a08b6..69837851f39 100644 --- a/lib/wasix/src/os/task/thread.rs +++ b/lib/wasix/src/os/task/thread.rs @@ -1,5 +1,4 @@ use serde::{Deserialize, Serialize}; -#[cfg(feature = "journal")] use std::sync::atomic::{AtomicBool, Ordering}; use std::{ collections::HashMap, @@ -13,6 +12,7 @@ use wasmer::{ExportError, InstantiationError, MemoryError}; use wasmer_wasix_types::{ types::Signal, wasi::{Errno, ExitCode}, + wasix::ThreadStartType, }; use crate::{ @@ -100,6 +100,7 @@ pub struct ThreadStack { pub struct WasiThread { state: Arc, layout: WasiMemoryLayout, + start: ThreadStartType, // This is used for stack rewinds rewind: Option, @@ -116,23 +117,50 @@ impl WasiThread { self.rewind.take() } - pub(crate) fn has_rewind_of_type(&self, _type: HandleRewindType) -> bool { - match _type { + /// Gets the thread start type for this thread + pub fn thread_start_type(&self) -> ThreadStartType { + self.start + } + + /// Returns true if a rewind of a particular type has been queued + /// for processed by a rewind operation + pub(crate) fn has_rewind_of_type(&self, type_: HandleRewindType) -> bool { + match type_ { HandleRewindType::ResultDriven => match &self.rewind { - Some(rewind) => rewind.rewind_result.is_some(), + Some(rewind) => match rewind.rewind_result { + RewindResultType::RewindRestart => true, + RewindResultType::RewindWithoutResult => false, + RewindResultType::RewindWithResult(_) => true, + }, None => false, }, - HandleRewindType::Resultless => match &self.rewind { - Some(rewind) => rewind.rewind_result.is_none(), + HandleRewindType::ResultLess => match &self.rewind { + Some(rewind) => match rewind.rewind_result { + RewindResultType::RewindRestart => true, + RewindResultType::RewindWithoutResult => true, + RewindResultType::RewindWithResult(_) => false, + }, None => false, }, } } + /// Sets a flag that tells others if this thread is currently + /// deep sleeping + pub(crate) fn set_deep_sleeping(&self, val: bool) { + self.state.deep_sleeping.store(val, Ordering::SeqCst); + } + + /// Reads a flag that determines if this thread is currently + /// deep sleeping + pub(crate) fn is_deep_sleeping(&self) -> bool { + self.state.deep_sleeping.load(Ordering::SeqCst) + } + /// Sets a flag that tells others that this thread is currently /// check pointing itself #[cfg(feature = "journal")] - pub(crate) fn set_check_pointing(&self, val: bool) { + pub(crate) fn set_checkpointing(&self, val: bool) { self.state.check_pointing.store(val, Ordering::SeqCst); } @@ -181,18 +209,16 @@ impl Drop for WasiThreadRunGuard { } /// Represents the memory layout of the parts that the thread itself uses -#[derive(Debug, Default, Clone)] -pub struct WasiMemoryLayout { - /// This is the top part of the stack (stacks go backwards) - pub stack_upper: u64, - /// This is the bottom part of the stack (anything more below this is a stack overflow) - pub stack_lower: u64, - /// Piece of memory that is marked as none readable/writable so stack overflows cause an exception - /// TODO: This field will need to be used to mark the guard memory as inaccessible - #[allow(dead_code)] - pub guard_size: u64, - /// Total size of the stack - pub stack_size: u64, +pub use wasmer_wasix_types::wasix::WasiMemoryLayout; + +#[derive(Clone, Debug)] +pub enum RewindResultType { + // The rewind must restart the operation it had already started + RewindRestart, + // The rewind has been triggered and should be handled but has not result + RewindWithoutResult, + // The rewind has been triggered and should be handled with the supplied result + RewindWithResult(Bytes), } // Contains the result of a rewind operation @@ -202,7 +228,7 @@ pub(crate) struct RewindResult { pub memory_stack: Option, /// Generic serialized object passed back to the rewind resumption code /// (uses the bincode serializer) - pub rewind_result: Option, + pub rewind_result: RewindResultType, } #[derive(Debug)] @@ -215,6 +241,7 @@ struct WasiThreadState { status: Arc, #[cfg(feature = "journal")] check_pointing: AtomicBool, + deep_sleeping: AtomicBool, // Registers the task termination with the ControlPlane on drop. // Never accessed, since it's a drop guard. @@ -231,6 +258,7 @@ impl WasiThread { status: Arc, guard: TaskCountGuard, layout: WasiMemoryLayout, + start: ThreadStartType, ) -> Self { Self { state: Arc::new(WasiThreadState { @@ -242,9 +270,11 @@ impl WasiThread { stack: Mutex::new(ThreadStack::default()), #[cfg(feature = "journal")] check_pointing: AtomicBool::new(false), + deep_sleeping: AtomicBool::new(false), _task_count_guard: guard, }), layout, + start, rewind: None, } } @@ -329,6 +359,27 @@ impl WasiThread { false } + /// Waits for a signal to arrive + pub async fn wait_for_signal(&self) { + // This poller will process any signals when the main working function is idle + struct SignalPoller<'a> { + thread: &'a WasiThread, + } + impl<'a> std::future::Future for SignalPoller<'a> { + type Output = (); + fn poll( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll { + if self.thread.has_signals_or_subscribe(cx.waker()) { + return std::task::Poll::Ready(()); + } + std::task::Poll::Pending + } + } + SignalPoller { thread: self }.await + } + /// Returns all the signals that are waiting to be processed pub fn pop_signals_or_subscribe(&self, waker: &Waker) -> Option> { let mut guard = self.state.signals.lock().unwrap(); diff --git a/lib/wasix/src/rewind.rs b/lib/wasix/src/rewind.rs index 92f72a6edc3..fb3810b517a 100644 --- a/lib/wasix/src/rewind.rs +++ b/lib/wasix/src/rewind.rs @@ -2,7 +2,12 @@ use std::pin::Pin; use bytes::Bytes; use futures::Future; -use wasmer_wasix_types::wasi::Errno; +use wasmer_wasix_types::{ + wasi::Errno, + wasix::{ThreadStartType, WasiMemoryLayout}, +}; + +use crate::os::task::thread::RewindResultType; /// Future that will be polled by asyncify methods #[doc(hidden)] @@ -24,11 +29,15 @@ pub struct RewindState { pub rewind_stack: Bytes, /// All the global data stored in the store pub store_data: Bytes, + /// Describes the type of thread start + pub start: ThreadStartType, + /// Layout of the memory, + pub layout: WasiMemoryLayout, /// Flag that indicates if this rewind is 64-bit or 32-bit memory based pub is_64bit: bool, } -pub type RewindStateOption = Option<(RewindState, Option)>; +pub type RewindStateOption = Option<(RewindState, RewindResultType)>; /// Represents the work that will be done when a thread goes to deep sleep and /// includes the things needed to restore it again diff --git a/lib/wasix/src/runners/dproxy/factory.rs b/lib/wasix/src/runners/dproxy/factory.rs new file mode 100644 index 00000000000..f5f492fbe9c --- /dev/null +++ b/lib/wasix/src/runners/dproxy/factory.rs @@ -0,0 +1,127 @@ +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, + task::Context, + time::Instant, +}; + +use derivative::Derivative; +use wasmer_journal::{DynJournal, RecombinedJournal}; + +use crate::{ + runners::Runner, + runtime::{DynRuntime, OverriddenRuntime}, +}; + +use super::{ + handler::Handler, hyper_proxy::HyperProxyConnectorBuilder, instance::DProxyInstance, + networking::LocalWithLoopbackNetworking, shard::Shard, socket_manager::SocketManager, +}; + +#[derive(Debug, Default)] +struct State { + instance: HashMap, +} + +/// This factory will store and reuse instances between invocations thus +/// allowing for the instances to be stateful. +#[derive(Derivative, Clone, Default)] +#[derivative(Debug)] +pub struct DProxyInstanceFactory { + state: Arc>, +} + +impl DProxyInstanceFactory { + pub fn new() -> Self { + Default::default() + } + + pub async fn acquire(&self, handler: &Handler, shard: Shard) -> anyhow::Result { + loop { + { + let state = self.state.lock().unwrap(); + if let Some(instance) = state.instance.get(&shard).cloned() { + return Ok(instance); + } + } + + let instance = self.spin_up(handler, shard.clone()).await?; + + let mut state = self.state.lock().unwrap(); + state.instance.insert(shard.clone(), instance); + } + } + + pub async fn spin_up(&self, handler: &Handler, shard: Shard) -> anyhow::Result { + // Get the runtime with its already wired local networking + let runtime = handler.runtime.clone(); + + // DProxy is able to resume execution of the stateful workload using memory + // snapshots hence the journals it stores are complete journals + let journals = runtime + .journals() + .clone() + .into_iter() + .map(|journal| { + let tx = Box::new(journal.clone()); + let rx = journal.as_restarted()?; + anyhow::Result::Ok(Arc::new(RecombinedJournal::new(tx, rx)) as Arc) + }) + .collect::>>()?; + let mut runtime = OverriddenRuntime::new(runtime).with_journals(journals); + + // We attach a composite networking to the runtime which includes a loopback + // networking implementation connected to a socket manager + let composite_networking = LocalWithLoopbackNetworking::new(); + let poll_listening = { + let networking = composite_networking.clone(); + Arc::new(move |cx: &mut Context<'_>| networking.poll_listening(cx)) + }; + let socket_manager = Arc::new(SocketManager::new( + poll_listening, + composite_networking.loopback_networking(), + handler.config.proxy_connect_init_timeout, + handler.config.proxy_connect_nominal_timeout, + )); + runtime = runtime.with_networking(Arc::new(composite_networking)); + + // The connector uses the socket manager to open sockets to the instance + let connector = HyperProxyConnectorBuilder::new(socket_manager.clone()) + .build() + .await; + + // Now we run the actual instance under a WasiRunner + #[cfg(feature = "sys")] + let handle = tokio::runtime::Handle::current(); + let this = self.clone(); + let pkg = handler.config.pkg.clone(); + let command_name = handler.command_name.clone(); + let connector_inner = connector.clone(); + let runtime = Arc::new(runtime) as Arc; + let mut runner = handler.config.inner.clone(); + runtime + .task_manager() + .clone() + .task_dedicated(Box::new(move || { + #[cfg(feature = "sys")] + let _guard = handle.enter(); + if let Err(err) = runner.run_command(&command_name, &pkg, runtime) { + tracing::error!("Instance Exited: {}", err); + } else { + tracing::info!("Instance Exited: Nominal"); + } + { + let mut state = this.state.lock().unwrap(); + state.instance.remove(&shard); + } + connector_inner.shutdown(); + }))?; + + // Return an instance + Ok(DProxyInstance { + last_used: Arc::new(Mutex::new(Instant::now())), + socket_manager, + client: hyper::Client::builder().build(connector), + }) + } +} diff --git a/lib/wasix/src/runners/dproxy/handler.rs b/lib/wasix/src/runners/dproxy/handler.rs new file mode 100644 index 00000000000..ea0cf3d6fbb --- /dev/null +++ b/lib/wasix/src/runners/dproxy/handler.rs @@ -0,0 +1,121 @@ +use std::pin::Pin; +use std::sync::Arc; +use std::task::Poll; + +use futures::{Future, FutureExt}; +use http::{Request, Response, Uri}; +use hyper::Body; +use tower::Service; + +use crate::runners::dproxy::shard::Shard; +use crate::Runtime; + +use super::factory::DProxyInstanceFactory; +use super::Config; + +#[derive(derivative::Derivative)] +#[derivative(Debug)] +pub struct SharedState { + pub(crate) config: Config, + pub(crate) command_name: String, + #[derivative(Debug = "ignore")] + pub(crate) runtime: Arc, + pub(crate) factory: DProxyInstanceFactory, +} + +/// Handler which will process DProxy requests +#[derive(Clone, Debug)] +pub struct Handler(Arc); + +impl Handler { + pub(crate) fn new( + config: Config, + command_name: String, + factory: DProxyInstanceFactory, + runtime: Arc, + ) -> Self { + Handler(Arc::new(SharedState { + config, + command_name, + runtime, + factory, + })) + } + + #[tracing::instrument(level = "debug", skip_all, err)] + pub(crate) async fn handle( + &self, + mut req: Request, + _token: T, + ) -> anyhow::Result> + where + T: Send + 'static, + { + tracing::debug!(headers=?req.headers()); + + // Determine the shard we are using + let shard = req + .headers() + .get("X-Shard") + .map(|v| String::from_utf8_lossy(v.as_bytes())) + .map(|s| match s.parse::() { + Ok(id) => Ok(Shard::ById(id)), + Err(err) => Err(err), + }) + .unwrap_or(Ok(Shard::Singleton))?; + + // Modify the request URI so that it will work with the hyper proxy + let mut new_uri = Uri::builder() + .scheme("http") + .authority( + req.uri() + .authority() + .cloned() + .unwrap_or_else(|| "localhost".parse().unwrap()), + ) + .path_and_query( + req.uri() + .path_and_query() + .cloned() + .unwrap_or_else(|| "/".parse().unwrap()), + ) + .build() + .unwrap(); + std::mem::swap(req.uri_mut(), &mut new_uri); + + // Acquire a DProxy instance + tracing::debug!("Acquiring DProxy instance instance"); + let instance = self.factory.acquire(self, shard).await?; + + tracing::debug!("Calling into the DProxy instance"); + let client = instance.client.clone(); + + // Perform the request + Ok(client.request(req).await?) + } +} + +impl std::ops::Deref for Handler { + type Target = Arc; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Service> for Handler { + type Response = Response; + type Error = anyhow::Error; + type Future = Pin>> + Send>>; + + fn poll_ready(&mut self, _cx: &mut std::task::Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, request: Request) -> Self::Future { + // Note: all fields are reference-counted so cloning is pretty cheap + let handler = self.clone(); + let fut = async move { handler.handle(request, ()).await }; + fut.boxed() + } +} diff --git a/lib/wasix/src/runners/dproxy/hyper_proxy/builder.rs b/lib/wasix/src/runners/dproxy/hyper_proxy/builder.rs new file mode 100644 index 00000000000..00265d228bd --- /dev/null +++ b/lib/wasix/src/runners/dproxy/hyper_proxy/builder.rs @@ -0,0 +1,22 @@ +use std::sync::Arc; + +use crate::runners::dproxy::socket_manager::SocketManager; + +use super::*; + +#[derive(Debug)] +pub struct HyperProxyConnectorBuilder { + socket_manager: Arc, +} + +impl HyperProxyConnectorBuilder { + pub fn new(socket_manager: Arc) -> Self { + Self { socket_manager } + } + + pub async fn build(self) -> HyperProxyConnector { + HyperProxyConnector { + socket_manager: self.socket_manager, + } + } +} diff --git a/lib/wasix/src/runners/dproxy/hyper_proxy/connector.rs b/lib/wasix/src/runners/dproxy/hyper_proxy/connector.rs new file mode 100644 index 00000000000..a309c8c18c8 --- /dev/null +++ b/lib/wasix/src/runners/dproxy/hyper_proxy/connector.rs @@ -0,0 +1,46 @@ +use std::sync::Arc; + +use tokio_stream::wrappers::BroadcastStream; + +use super::socket_manager::SocketManager; + +use super::*; + +/// A Connector for the WASM processes behind a socket. +#[derive(Debug, Clone)] +pub struct HyperProxyConnector { + pub(super) socket_manager: Arc, +} + +impl HyperProxyConnector { + pub fn shutdown(&self) { + self.socket_manager.shutdown(); + } +} + +impl Service for HyperProxyConnector { + type Response = HyperProxyStream; + type Error = BoxError; + + #[allow(clippy::type_complexity)] + type Future = Pin> + Send>>; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, _dst: Uri) -> Self::Future { + let this = self.clone(); + Box::pin(async move { + let terminate_rx = this.socket_manager.terminate_rx(); + let socket = this.socket_manager.acquire_http_socket().await?; + let (tx, rx) = socket.split(); + Ok(HyperProxyStream { + tx, + rx, + terminate: BroadcastStream::new(terminate_rx), + terminated: false, + }) + }) + } +} diff --git a/lib/wasix/src/runners/dproxy/hyper_proxy/mod.rs b/lib/wasix/src/runners/dproxy/hyper_proxy/mod.rs new file mode 100644 index 00000000000..5dcc7c785b4 --- /dev/null +++ b/lib/wasix/src/runners/dproxy/hyper_proxy/mod.rs @@ -0,0 +1,18 @@ +mod builder; +mod connector; +mod stream; + +pub use builder::*; +pub use connector::*; +pub use stream::*; + +use super::*; + +pub(super) use hyper::{service::Service, Uri}; +pub(super) use std::pin::Pin; +pub(super) type BoxError = Box; +pub(super) use std::{ + future::Future, + task::{Context, Poll}, +}; +pub(super) use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; diff --git a/lib/wasix/src/runners/dproxy/hyper_proxy/stream.rs b/lib/wasix/src/runners/dproxy/hyper_proxy/stream.rs new file mode 100644 index 00000000000..fd5d0d17493 --- /dev/null +++ b/lib/wasix/src/runners/dproxy/hyper_proxy/stream.rs @@ -0,0 +1,93 @@ +use std::io; + +use futures::Stream; +use hyper::client::connect::Connected; +use tokio_stream::wrappers::BroadcastStream; +use virtual_net::tcp_pair::{TcpSocketHalfRx, TcpSocketHalfTx}; + +use super::*; + +#[derive(Debug)] +pub struct HyperProxyStream { + pub(super) tx: TcpSocketHalfTx, + pub(super) rx: TcpSocketHalfRx, + pub(super) terminate: BroadcastStream<()>, + pub(super) terminated: bool, +} + +impl AsyncRead for HyperProxyStream { + #[inline] + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + if let Poll::Ready(ret) = Pin::new(&mut self.rx).poll_read(cx, buf) { + return Poll::Ready(ret); + } + if self.terminated { + return Poll::Ready(Err(io::ErrorKind::ConnectionReset.into())); + } + if let Poll::Ready(Some(_)) = Pin::new(&mut self.terminate).poll_next(cx) { + return Poll::Ready(Err(io::ErrorKind::ConnectionReset.into())); + } + Poll::Pending + } +} + +impl AsyncWrite for HyperProxyStream { + #[inline] + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + if let Poll::Ready(ret) = Pin::new(&mut self.tx).poll_write(cx, buf) { + return Poll::Ready(ret); + } + if self.terminated { + return Poll::Ready(Err(io::ErrorKind::ConnectionReset.into())); + } + if let Poll::Ready(Some(_)) = Pin::new(&mut self.terminate).poll_next(cx) { + return Poll::Ready(Err(io::ErrorKind::ConnectionReset.into())); + } + Poll::Pending + } + + #[inline] + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + if let Poll::Ready(ret) = Pin::new(&mut self.tx).poll_flush(cx) { + return Poll::Ready(ret); + } + if self.terminated { + return Poll::Ready(Err(io::ErrorKind::ConnectionReset.into())); + } + if let Poll::Ready(Some(_)) = Pin::new(&mut self.terminate).poll_next(cx) { + return Poll::Ready(Err(io::ErrorKind::ConnectionReset.into())); + } + Poll::Pending + } + + #[inline] + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + if let Poll::Ready(ret) = Pin::new(&mut self.tx).poll_shutdown(cx) { + return Poll::Ready(ret); + } + if self.terminated { + return Poll::Ready(Err(io::ErrorKind::ConnectionReset.into())); + } + if let Poll::Ready(Some(_)) = Pin::new(&mut self.terminate).poll_next(cx) { + return Poll::Ready(Err(io::ErrorKind::ConnectionReset.into())); + } + Poll::Pending + } +} + +impl hyper::client::connect::Connection for HyperProxyStream { + fn connected(&self) -> Connected { + Connected::new().proxy(true) + } +} diff --git a/lib/wasix/src/runners/dproxy/instance.rs b/lib/wasix/src/runners/dproxy/instance.rs new file mode 100644 index 00000000000..edbb8e656fe --- /dev/null +++ b/lib/wasix/src/runners/dproxy/instance.rs @@ -0,0 +1,15 @@ +use std::{ + sync::{Arc, Mutex}, + time::Instant, +}; + +use super::{hyper_proxy::HyperProxyConnector, socket_manager::SocketManager}; + +#[derive(Debug, Clone)] +pub struct DProxyInstance { + #[allow(unused)] + pub(super) last_used: Arc>, + #[allow(unused)] + pub(super) socket_manager: Arc, + pub(super) client: hyper::Client, +} diff --git a/lib/wasix/src/runners/dproxy/mod.rs b/lib/wasix/src/runners/dproxy/mod.rs new file mode 100644 index 00000000000..56376089c8c --- /dev/null +++ b/lib/wasix/src/runners/dproxy/mod.rs @@ -0,0 +1,12 @@ +mod factory; +pub(super) mod handler; +mod hyper_proxy; +mod instance; +mod networking; +mod runner; +mod shard; +mod socket_manager; + +pub use self::factory::DProxyInstanceFactory; +pub use self::instance::DProxyInstance; +pub use self::runner::{Config, DProxyRunner}; diff --git a/lib/wasix/src/runners/dproxy/networking.rs b/lib/wasix/src/runners/dproxy/networking.rs new file mode 100644 index 00000000000..8e46f1ce526 --- /dev/null +++ b/lib/wasix/src/runners/dproxy/networking.rs @@ -0,0 +1,223 @@ +use std::{ + net::{IpAddr, SocketAddr}, + sync::{Arc, Mutex}, + task::{Context, Poll, Waker}, + time::Duration, +}; + +use virtual_net::{ + host::LocalNetworking, loopback::LoopbackNetworking, IpCidr, IpRoute, NetworkError, + StreamSecurity, VirtualIcmpSocket, VirtualNetworking, VirtualRawSocket, VirtualTcpListener, + VirtualTcpSocket, VirtualUdpSocket, +}; + +#[derive(Debug, Default)] +struct LocalWithLoopbackNetworkingListening { + addresses: Vec, + wakers: Vec, +} + +#[derive(Debug, Clone)] +pub struct LocalWithLoopbackNetworking { + inner_networking: Arc, + local_listening: Arc>, + loopback_networking: LoopbackNetworking, +} + +impl LocalWithLoopbackNetworking { + pub fn new() -> Self { + lazy_static::lazy_static! { + static ref LOCAL_NETWORKING: Arc = Arc::new(LocalNetworking::default()); + } + Self { + local_listening: Default::default(), + inner_networking: LOCAL_NETWORKING.clone(), + loopback_networking: LoopbackNetworking::new(), + } + } + + pub fn poll_listening(&self, cx: &mut Context<'_>) -> Poll { + let mut listening = self.local_listening.lock().unwrap(); + + if let Some(addr) = listening.addresses.first() { + return Poll::Ready(*addr); + } + + if !listening.wakers.iter().any(|w| w.will_wake(cx.waker())) { + listening.wakers.push(cx.waker().clone()); + } + + Poll::Pending + } + + pub fn register_listener(&self, addr: SocketAddr) { + let mut listening = self.local_listening.lock().unwrap(); + listening.addresses.push(addr); + listening.addresses.sort_by_key(|a| a.port()); + listening.wakers.drain(..).for_each(|w| w.wake()); + } + + pub fn loopback_networking(&self) -> LoopbackNetworking { + self.loopback_networking.clone() + } +} + +#[allow(unused_variables)] +#[async_trait::async_trait] +impl VirtualNetworking for LocalWithLoopbackNetworking { + /// Bridges this local network with a remote network, which is required in + /// order to make lower level networking calls (such as UDP/TCP) + async fn bridge( + &self, + network: &str, + access_token: &str, + security: StreamSecurity, + ) -> Result<(), NetworkError> { + self.inner_networking + .bridge(network, access_token, security) + .await + } + + /// Disconnects from the remote network essentially unbridging it + async fn unbridge(&self) -> Result<(), NetworkError> { + self.inner_networking.unbridge().await + } + + /// Acquires an IP address on the network and configures the routing tables + async fn dhcp_acquire(&self) -> Result, NetworkError> { + self.inner_networking.dhcp_acquire().await + } + + /// Adds a static IP address to the interface with a netmask prefix + async fn ip_add(&self, ip: IpAddr, prefix: u8) -> Result<(), NetworkError> { + self.inner_networking.ip_add(ip, prefix).await + } + + /// Removes a static (or dynamic) IP address from the interface + async fn ip_remove(&self, ip: IpAddr) -> Result<(), NetworkError> { + self.inner_networking.ip_remove(ip).await + } + + /// Clears all the assigned IP addresses for this interface + async fn ip_clear(&self) -> Result<(), NetworkError> { + self.inner_networking.ip_clear().await + } + + /// Lists all the IP addresses currently assigned to this interface + async fn ip_list(&self) -> Result, NetworkError> { + self.inner_networking.ip_list().await + } + + /// Returns the hardware MAC address for this interface + async fn mac(&self) -> Result<[u8; 6], NetworkError> { + self.inner_networking.mac().await + } + + /// Adds a default gateway to the routing table + async fn gateway_set(&self, ip: IpAddr) -> Result<(), NetworkError> { + self.inner_networking.gateway_set(ip).await + } + + /// Adds a specific route to the routing table + async fn route_add( + &self, + cidr: IpCidr, + via_router: IpAddr, + preferred_until: Option, + expires_at: Option, + ) -> Result<(), NetworkError> { + self.inner_networking + .route_add(cidr, via_router, preferred_until, expires_at) + .await + } + + /// Removes a routing rule from the routing table + async fn route_remove(&self, cidr: IpAddr) -> Result<(), NetworkError> { + self.inner_networking.route_remove(cidr).await + } + + /// Clears the routing table for this interface + async fn route_clear(&self) -> Result<(), NetworkError> { + self.inner_networking.route_clear().await + } + + /// Lists all the routes defined in the routing table for this interface + async fn route_list(&self) -> Result, NetworkError> { + self.inner_networking.route_list().await + } + + /// Creates a low level socket that can read and write Ethernet packets + /// directly to the interface + async fn bind_raw(&self) -> Result, NetworkError> { + self.inner_networking.bind_raw().await + } + + /// Listens for TCP connections on a specific IP and Port combination + /// Multiple servers (processes or threads) can bind to the same port if they each set + /// the reuse-port and-or reuse-addr flags + async fn listen_tcp( + &self, + addr: SocketAddr, + only_v6: bool, + reuse_port: bool, + reuse_addr: bool, + ) -> Result, NetworkError> { + let backlog = 1024; + + tracing::debug!("registering listener on loopback networking"); + + let ret: Result, NetworkError> = self + .loopback_networking + .listen_tcp(addr, only_v6, reuse_port, reuse_addr) + .await; + + if ret.is_ok() { + tracing::debug!("registering listener on loopback networking"); + self.register_listener(addr); + } + + ret + } + + /// Opens a UDP socket that listens on a specific IP and Port combination + /// Multiple servers (processes or threads) can bind to the same port if they each set + /// the reuse-port and-or reuse-addr flags + async fn bind_udp( + &self, + addr: SocketAddr, + reuse_port: bool, + reuse_addr: bool, + ) -> Result, NetworkError> { + self.inner_networking + .bind_udp(addr, reuse_port, reuse_addr) + .await + } + + /// Creates a socket that can be used to send and receive ICMP packets + /// from a paritcular IP address + async fn bind_icmp( + &self, + addr: IpAddr, + ) -> Result, NetworkError> { + self.inner_networking.bind_icmp(addr).await + } + + /// Opens a TCP connection to a particular destination IP address and port + async fn connect_tcp( + &self, + addr: SocketAddr, + peer: SocketAddr, + ) -> Result, NetworkError> { + self.inner_networking.connect_tcp(addr, peer).await + } + + /// Performs DNS resolution for a specific hostname + async fn resolve( + &self, + host: &str, + port: Option, + dns_server: Option, + ) -> Result, NetworkError> { + self.inner_networking.resolve(host, port, dns_server).await + } +} diff --git a/lib/wasix/src/runners/dproxy/runner.rs b/lib/wasix/src/runners/dproxy/runner.rs new file mode 100644 index 00000000000..5fced6201f1 --- /dev/null +++ b/lib/wasix/src/runners/dproxy/runner.rs @@ -0,0 +1,143 @@ +use std::{net::SocketAddr, sync::Arc, time::Duration}; + +use anyhow::{Context, Error}; +use http::{Request, Response}; +use hyper::Body; +use tower::{make::Shared, ServiceBuilder}; +use tower_http::{catch_panic::CatchPanicLayer, cors::CorsLayer, trace::TraceLayer}; +use tracing::Span; +use webc::metadata::Command; + +use crate::{ + bin_factory::BinaryPackage, + runners::wasi::WasiRunner, + runtime::{task_manager::VirtualTaskManagerExt, DynRuntime}, +}; + +use super::factory::DProxyInstanceFactory; + +#[derive(Debug)] +pub struct DProxyRunner { + config: Config, + factory: DProxyInstanceFactory, +} + +impl DProxyRunner { + pub fn new(inner: WasiRunner, pkg: &BinaryPackage) -> Self { + Self { + config: Config::new(inner, pkg), + factory: DProxyInstanceFactory::new(), + } + } + + pub fn config(&mut self) -> &mut Config { + &mut self.config + } +} + +/// The base URI used by a [`DProxy`] runner. +pub const DPROXY_RUNNER_URI: &str = "https://webc.org/runner/dproxy"; + +impl crate::runners::Runner for DProxyRunner { + fn can_run_command(command: &Command) -> Result { + Ok(command.runner.starts_with(DPROXY_RUNNER_URI)) + } + + fn run_command( + &mut self, + command_name: &str, + _pkg: &BinaryPackage, + runtime: Arc, + ) -> Result<(), Error> { + // Create the handler that will process the HTTP requests + let handler = super::handler::Handler::new( + self.config.clone(), + command_name.to_string(), + self.factory.clone(), + runtime.clone(), + ); + + // We create a HTTP server which will reverse proxy all the requests + // to the proxy workload + let service = ServiceBuilder::new() + .layer( + TraceLayer::new_for_http() + .make_span_with(|request: &Request| { + tracing::info_span!( + "request", + method = %request.method(), + uri = %request.uri(), + status_code = tracing::field::Empty, + ) + }) + .on_response(|response: &Response<_>, _latency: Duration, span: &Span| { + span.record("status_code", &tracing::field::display(response.status())); + tracing::info!("response generated") + }), + ) + .layer(CatchPanicLayer::new()) + .layer(CorsLayer::permissive()) + .service(handler); + + let address = self.config.addr; + tracing::info!(%address, "Starting the DProxy server"); + + runtime + .task_manager() + .spawn_and_block_on(async move { + let (shutdown, _abort_handle) = + futures::future::abortable(futures::future::pending::<()>()); + + hyper::Server::bind(&address) + .serve(Shared::new(service)) + .with_graceful_shutdown(async { + let _ = shutdown.await; + tracing::info!("Shutting down gracefully"); + }) + .await + }) + .context("Unable to start the server")??; + + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct Config { + pub(crate) inner: WasiRunner, + pub(crate) addr: SocketAddr, + pub(crate) pkg: BinaryPackage, + pub(crate) proxy_connect_init_timeout: Duration, + pub(crate) proxy_connect_nominal_timeout: Duration, +} + +impl Config { + pub fn new(inner: WasiRunner, pkg: &BinaryPackage) -> Self { + Self { + inner, + pkg: pkg.clone(), + addr: ([127, 0, 0, 1], 8000).into(), + proxy_connect_init_timeout: Duration::from_secs(30), + proxy_connect_nominal_timeout: Duration::from_secs(30), + } + } + + pub fn addr(&mut self, addr: SocketAddr) -> &mut Self { + self.addr = addr; + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn send_and_sync() { + fn assert_send() {} + fn assert_sync() {} + + assert_send::(); + assert_sync::(); + } +} diff --git a/lib/wasix/src/runners/dproxy/shard.rs b/lib/wasix/src/runners/dproxy/shard.rs new file mode 100644 index 00000000000..bcea1333ae3 --- /dev/null +++ b/lib/wasix/src/runners/dproxy/shard.rs @@ -0,0 +1,6 @@ +#[derive(Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Shard { + #[default] + Singleton, + ById(u64), +} diff --git a/lib/wasix/src/runners/dproxy/socket_manager.rs b/lib/wasix/src/runners/dproxy/socket_manager.rs new file mode 100644 index 00000000000..658832fde1a --- /dev/null +++ b/lib/wasix/src/runners/dproxy/socket_manager.rs @@ -0,0 +1,102 @@ +use std::{ + future::poll_fn, + net::{IpAddr, Ipv4Addr, SocketAddr}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + task::{Context, Poll}, + time::Duration, +}; + +use derivative::Derivative; +use tokio::sync::broadcast; +use virtual_net::{tcp_pair::TcpSocketHalf, LoopbackNetworking}; + +pub type PollListeningFn = + Arc) -> Poll + Send + Sync + 'static>; + +#[derive(Derivative)] +#[derivative(Debug)] +pub struct SocketManager { + #[derivative(Debug = "ignore")] + poll_listening: PollListeningFn, + loopback_networking: LoopbackNetworking, + proxy_connect_init_timeout: Duration, + proxy_connect_nominal_timeout: Duration, + is_running: AtomicBool, + is_terminated: AtomicBool, + terminate_all: broadcast::Sender<()>, +} + +impl SocketManager { + pub fn new( + poll_listening: PollListeningFn, + loopback_networking: LoopbackNetworking, + proxy_connect_init_timeout: Duration, + proxy_connect_nominal_timeout: Duration, + ) -> Self { + Self { + poll_listening, + loopback_networking, + proxy_connect_init_timeout, + proxy_connect_nominal_timeout, + is_running: AtomicBool::new(false), + is_terminated: AtomicBool::new(false), + terminate_all: broadcast::channel(1).0, + } + } + + pub fn shutdown(&self) { + self.is_terminated.store(true, Ordering::SeqCst); + self.terminate_all.send(()).ok(); + } + + pub fn terminate_rx(&self) -> broadcast::Receiver<()> { + self.terminate_all.subscribe() + } + + pub async fn acquire_http_socket(&self) -> anyhow::Result { + let mut rx_terminate = self.terminate_all.subscribe(); + + if self.is_terminated.load(Ordering::SeqCst) { + return Err(anyhow::anyhow!( + "failed to open HTTP socket as the instance has terminated" + )); + } + let connect_timeout = if self.is_running.load(Ordering::SeqCst) { + self.proxy_connect_nominal_timeout + } else { + self.proxy_connect_init_timeout + }; + + let ret = tokio::select! { + socket = tokio::time::timeout(connect_timeout, self.open_proxy_http_socket()) => socket??, + _ = rx_terminate.recv() => { + return Err(anyhow::anyhow!( + "failed to open HTTP socket as the instance has terminated" + )); + } + }; + self.is_running.store(true, Ordering::Relaxed); + Ok(ret) + } + + pub async fn open_proxy_http_socket(&self) -> anyhow::Result { + // We need to find the destination address + let poll_listening = self.poll_listening.clone(); + let port = poll_fn(|cx| poll_listening(cx)).await.port(); + let dst = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), port); + + // Open a connection directly to the loopback port + // (or at least try to) + self.loopback_networking + .loopback_connect_to(SocketAddr::new(Ipv4Addr::UNSPECIFIED.into(), 0), dst) + .ok_or_else(|| { + tracing::debug!( + "proxy connection attempt failed - could not connect to http server socket as the loopback socket is not open", + ); + anyhow::anyhow!("failed to open HTTP socket as the loopback socket is not open") + }) + } +} diff --git a/lib/wasix/src/runners/mod.rs b/lib/wasix/src/runners/mod.rs index fa64d05a8cd..a2bf9eb4128 100644 --- a/lib/wasix/src/runners/mod.rs +++ b/lib/wasix/src/runners/mod.rs @@ -2,6 +2,8 @@ mod runner; #[cfg(feature = "webc_runner_rt_dcgi")] pub mod dcgi; +#[cfg(feature = "webc_runner_rt_dproxy")] +pub mod dproxy; #[cfg(feature = "webc_runner_rt_emscripten")] pub mod emscripten; pub mod wasi; diff --git a/lib/wasix/src/runners/wasi.rs b/lib/wasix/src/runners/wasi.rs index fb7fcb5d2e6..c36b95cbb65 100644 --- a/lib/wasix/src/runners/wasi.rs +++ b/lib/wasix/src/runners/wasi.rs @@ -326,6 +326,9 @@ impl crate::runners::Runner for WasiRunner { .await .context("Spawn failed")?; + #[cfg(feature = "ctrlc")] + task_handle.install_ctrlc_handler(); + task_handle .wait_finished() .await diff --git a/lib/wasix/src/runtime/task_manager/mod.rs b/lib/wasix/src/runtime/task_manager/mod.rs index b55e0701c4a..49699c42f85 100644 --- a/lib/wasix/src/runtime/task_manager/mod.rs +++ b/lib/wasix/src/runtime/task_manager/mod.rs @@ -15,7 +15,7 @@ use wasmer_wasix_types::wasi::{Errno, ExitCode}; use crate::os::task::thread::WasiThreadError; use crate::syscalls::AsyncifyFuture; -use crate::{capture_instance_snapshot, InstanceSnapshot, WasiEnv, WasiFunctionEnv, WasiThread}; +use crate::{capture_store_snapshot, StoreSnapshot, WasiEnv, WasiFunctionEnv, WasiThread}; pub use virtual_mio::waker::*; @@ -76,7 +76,7 @@ pub struct TaskWasm<'a, 'b> { pub recycle: Option>, pub env: WasiEnv, pub module: Module, - pub snapshot: Option<&'b InstanceSnapshot>, + pub globals: Option<&'b StoreSnapshot>, pub spawn_type: SpawnMemoryType<'a>, pub trigger: Option>, pub update_layout: bool, @@ -89,7 +89,7 @@ impl<'a, 'b> TaskWasm<'a, 'b> { run, env, module, - snapshot: None, + globals: None, spawn_type: match shared_memory { Some(ty) => SpawnMemoryType::CreateMemoryOfType(ty), None => SpawnMemoryType::CreateMemory, @@ -112,8 +112,8 @@ impl<'a, 'b> TaskWasm<'a, 'b> { self } - pub fn with_snapshot(mut self, snapshot: &'b InstanceSnapshot) -> Self { - self.snapshot.replace(snapshot); + pub fn with_globals(mut self, snapshot: &'b StoreSnapshot) -> Self { + self.globals.replace(snapshot); self } @@ -346,7 +346,7 @@ impl dyn VirtualTaskManager { } } - let snapshot = capture_instance_snapshot(&mut store.as_store_mut()); + let snapshot = capture_store_snapshot(&mut store.as_store_mut()); let env = ctx.data(&store); let module = env.inner().module_clone(); let memory = env.inner().memory_clone(); @@ -374,7 +374,7 @@ impl dyn VirtualTaskManager { false, ) .with_memory(SpawnMemoryType::ShareMemory(memory, store.as_store_ref())) - .with_snapshot(&snapshot) + .with_globals(&snapshot) .with_trigger(Box::new(move || { Box::pin(async move { let mut poller = AsyncifyPollerOwned { @@ -390,7 +390,7 @@ impl dyn VirtualTaskManager { } }; - tracing::trace!("deep sleep woken - {:?}", res); + tracing::trace!("deep sleep woken - res.len={}", res.len()); Ok(res) }) })), diff --git a/lib/wasix/src/runtime/task_manager/tokio.rs b/lib/wasix/src/runtime/task_manager/tokio.rs index 9f6b6752d64..39408896890 100644 --- a/lib/wasix/src/runtime/task_manager/tokio.rs +++ b/lib/wasix/src/runtime/task_manager/tokio.rs @@ -148,10 +148,10 @@ impl VirtualTaskManager for TokioTaskManager { // Create the context on a new store let run = task.run; let recycle = task.recycle; - let (ctx, store) = WasiFunctionEnv::new_with_store( + let (ctx, mut store) = WasiFunctionEnv::new_with_store( task.module, task.env, - task.snapshot, + task.globals, task.spawn_type, task.update_layout, )?; @@ -161,10 +161,38 @@ impl VirtualTaskManager for TokioTaskManager { if let Some(trigger) = task.trigger { tracing::trace!("spawning task_wasm trigger in async pool"); - let trigger = trigger(); + let mut trigger = trigger(); let pool = self.pool.clone(); self.rt.handle().spawn(async move { - let result = trigger.await; + // We wait for either the trigger or for a snapshot to take place + let result = loop { + let env = ctx.data(&store); + break tokio::select! { + r = &mut trigger => r, + _ = env.thread.wait_for_signal() => { + tracing::debug!("wait-for-signal(triggered)"); + let mut ctx = ctx.env.clone().into_mut(&mut store); + if let Err(err) = crate::WasiEnv::process_signals_and_exit(&mut ctx) { + match err { + crate::WasiError::Exit(code) => Err(code), + err => { + tracing::error!("failed to process signals - {}", err); + continue; + } + } + } else { + continue; + } + } + _ = crate::wait_for_snapshot(env) => { + tracing::debug!("wait-for-snapshot(triggered)"); + let mut ctx = ctx.env.clone().into_mut(&mut store); + crate::os::task::WasiProcessInner::do_checkpoints_from_outside(&mut ctx); + continue; + } + }; + }; + // Build the task that will go on the callback pool.execute(move || { // Invoke the callback diff --git a/lib/wasix/src/state/env.rs b/lib/wasix/src/state/env.rs index df95f341449..ce69a63d974 100644 --- a/lib/wasix/src/state/env.rs +++ b/lib/wasix/src/state/env.rs @@ -1,5 +1,3 @@ -#[cfg(feature = "journal")] -use std::collections::HashSet; use std::{ collections::HashMap, ops::Deref, @@ -20,6 +18,7 @@ use wasmer::{ use wasmer_wasix_types::{ types::Signal, wasi::{Errno, ExitCode, Snapshot0Clockid}, + wasix::ThreadStartType, }; #[cfg(feature = "journal")] @@ -332,10 +331,6 @@ pub struct WasiEnv { /// (this is normally used so that the instance can be reused later on) pub(crate) disable_fs_cleanup: bool, - /// List of situations that the process will checkpoint on - #[cfg(feature = "journal")] - snapshot_on: HashSet, - /// Inner functions and references that are loaded before the environment starts /// (inner is not safe to send between threads and so it is private and will /// not be cloned when `WasiEnv` is cloned) @@ -368,8 +363,6 @@ impl Clone for WasiEnv { enable_journal: self.enable_journal, enable_exponential_cpu_backoff: self.enable_exponential_cpu_backoff, replaying_journal: self.replaying_journal, - #[cfg(feature = "journal")] - snapshot_on: self.snapshot_on.clone(), disable_fs_cleanup: self.disable_fs_cleanup, } } @@ -384,7 +377,7 @@ impl WasiEnv { /// Forking the WasiState is used when either fork or vfork is called pub fn fork(&self) -> Result<(Self, WasiThreadHandle), ControlPlaneError> { let process = self.control_plane.new_process(self.process.module_hash)?; - let handle = process.new_thread(self.layout.clone())?; + let handle = process.new_thread(self.layout.clone(), ThreadStartType::MainThread)?; let thread = handle.as_thread(); thread.copy_stack_from(&self.thread); @@ -410,8 +403,6 @@ impl WasiEnv { enable_journal: self.enable_journal, enable_exponential_cpu_backoff: self.enable_exponential_cpu_backoff, replaying_journal: false, - #[cfg(feature = "journal")] - snapshot_on: self.snapshot_on.clone(), disable_fs_cleanup: self.disable_fs_cleanup, }; Ok((new_env, handle)) @@ -437,10 +428,7 @@ impl WasiEnv { map.clear(); } self.state.fs.preopen_fds.write().unwrap().clear(); - self.state - .fs - .next_fd - .store(3, std::sync::atomic::Ordering::SeqCst); + self.state.fs.next_fd.set_val(3); *self.state.fs.current_dir.lock().unwrap() = "/".to_string(); // We need to rebuild the basic file descriptors @@ -470,6 +458,7 @@ impl WasiEnv { self.process.finished.clone(), self.process.compute.must_upgrade().register_task()?, self.thread.memory_layout().clone(), + self.thread.thread_start_type(), ); Ok(()) @@ -508,11 +497,16 @@ impl WasiEnv { init.control_plane.new_process(module_hash)? }; + #[cfg(feature = "journal")] + { + process.inner.0.lock().unwrap().snapshot_on = init.snapshot_on.into_iter().collect(); + } + let layout = WasiMemoryLayout::default(); let thread = if let Some(t) = init.thread { t } else { - process.new_thread(layout.clone())? + process.new_thread(layout.clone(), ThreadStartType::MainThread)? }; let mut env = Self { @@ -538,8 +532,6 @@ impl WasiEnv { runtime: init.runtime, bin_factory: init.bin_factory, capabilities: init.capabilities, - #[cfg(feature = "journal")] - snapshot_on: init.snapshot_on.into_iter().collect(), disable_fs_cleanup: false, }; env.owned_handles.push(thread); @@ -692,7 +684,7 @@ impl WasiEnv { } /// Porcesses any signals that are batched up or any forced exit codes - pub(crate) fn process_signals_and_exit(ctx: &mut FunctionEnvMut<'_, Self>) -> WasiResult { + pub fn process_signals_and_exit(ctx: &mut FunctionEnvMut<'_, Self>) -> WasiResult { // If a signal handler has never been set then we need to handle signals // differently let env = ctx.data(); @@ -820,6 +812,7 @@ impl WasiEnv { } Ok(true) } else { + tracing::trace!("no signal handler"); Ok(false) } } @@ -977,16 +970,18 @@ impl WasiEnv { /// Returns true if a particular snapshot trigger is enabled #[cfg(feature = "journal")] pub fn has_snapshot_trigger(&self, trigger: SnapshotTrigger) -> bool { - self.snapshot_on.contains(&trigger) + let guard = self.process.inner.0.lock().unwrap(); + guard.snapshot_on.contains(&trigger) } /// Returns true if a particular snapshot trigger is enabled #[cfg(feature = "journal")] pub fn pop_snapshot_trigger(&mut self, trigger: SnapshotTrigger) -> bool { + let mut guard = self.process.inner.0.lock().unwrap(); if trigger.only_once() { - self.snapshot_on.remove(&trigger) + guard.snapshot_on.remove(&trigger) } else { - self.snapshot_on.contains(&trigger) + guard.snapshot_on.contains(&trigger) } } diff --git a/lib/wasix/src/state/func_env.rs b/lib/wasix/src/state/func_env.rs index cff6ed1918a..001f3290f94 100644 --- a/lib/wasix/src/state/func_env.rs +++ b/lib/wasix/src/state/func_env.rs @@ -6,14 +6,16 @@ use wasmer::{ }; use wasmer_wasix_types::wasi::ExitCode; +#[allow(unused_imports)] +use crate::os::task::thread::RewindResultType; #[cfg(feature = "journal")] use crate::syscalls::restore_snapshot; use crate::{ import_object_for_all_wasi_versions, runtime::SpawnMemoryType, state::WasiInstanceHandles, - utils::{get_wasi_version, get_wasi_versions, store::restore_instance_snapshot}, - InstanceSnapshot, RewindStateOption, WasiEnv, WasiError, WasiRuntimeError, WasiThreadError, + utils::{get_wasi_version, get_wasi_versions, store::restore_store_snapshot}, + RewindStateOption, StoreSnapshot, WasiEnv, WasiError, WasiRuntimeError, WasiThreadError, }; /// The default stack size for WASIX - the number itself is the default that compilers @@ -39,7 +41,7 @@ impl WasiFunctionEnv { pub fn new_with_store( module: Module, env: WasiEnv, - snapshot: Option<&InstanceSnapshot>, + store_snapshot: Option<&StoreSnapshot>, spawn_type: SpawnMemoryType, update_layout: bool, ) -> Result<(Self, Store), WasiThreadError> { @@ -76,9 +78,8 @@ impl WasiFunctionEnv { })?; // Set all the globals - if let Some(snapshot) = snapshot { - tracing::trace!("restoring snapshot for new thread"); - restore_instance_snapshot(&mut store, snapshot); + if let Some(snapshot) = store_snapshot { + restore_store_snapshot(&mut store, snapshot); } Ok((ctx, store)) @@ -269,27 +270,47 @@ impl WasiFunctionEnv { return Err(err); } }; - rewind_state = rewind.map(|rewind| (rewind, None)); + rewind_state = rewind.map(|rewind| (rewind, RewindResultType::RewindRestart)); } self.data_mut(&mut store).replaying_journal = false; } - // The first event we save is an event that records the module hash. - // Note: This is used to detect if an incorrect journal is used on the wrong - // process or if a process has been recompiled - let wasm_hash = self.data(&store).process.module_hash.as_bytes(); - let mut ctx = self.env.clone().into_mut(&mut store); - crate::journal::JournalEffector::save_event( - &mut ctx, - crate::journal::JournalEntry::InitModuleV1 { wasm_hash }, - ) - .map_err(|err| { - WasiRuntimeError::Runtime(wasmer::RuntimeError::new(format!( - "journal failied to save the module initialization event - {}", - err - ))) - })?; + // If there is no rewind state then the journal is being replayed + // and hence we do not need to write an init module event + // + // But otherwise we need to notify the journal of the module hash + // so that recompiled modules will restart + if rewind_state.is_none() { + // The first event we save is an event that records the module hash. + // Note: This is used to detect if an incorrect journal is used on the wrong + // process or if a process has been recompiled + let wasm_hash = self.data(&store).process.module_hash.as_bytes(); + let mut ctx = self.env.clone().into_mut(&mut store); + crate::journal::JournalEffector::save_event( + &mut ctx, + crate::journal::JournalEntry::InitModuleV1 { wasm_hash }, + ) + .map_err(|err| { + WasiRuntimeError::Runtime(wasmer::RuntimeError::new(format!( + "journal failed to save the module initialization event - {}", + err + ))) + })?; + } else { + // Otherwise we should emit a clear ethereal event + let mut ctx = self.env.clone().into_mut(&mut store); + crate::journal::JournalEffector::save_event( + &mut ctx, + crate::journal::JournalEntry::ClearEtherealV1, + ) + .map_err(|err| { + WasiRuntimeError::Runtime(wasmer::RuntimeError::new(format!( + "journal failed to save clear ethereal event - {}", + err + ))) + })?; + } } Ok(rewind_state) diff --git a/lib/wasix/src/state/run.rs b/lib/wasix/src/state/run.rs index d2ec22dc35b..f878f4422d3 100644 --- a/lib/wasix/src/state/run.rs +++ b/lib/wasix/src/state/run.rs @@ -2,7 +2,7 @@ use virtual_mio::InlineWaker; use wasmer::{RuntimeError, Store}; use wasmer_wasix_types::wasi::ExitCode; -use crate::{RewindStateOption, WasiError, WasiRuntimeError}; +use crate::{os::task::thread::RewindResultType, RewindStateOption, WasiError, WasiRuntimeError}; use super::*; @@ -42,6 +42,7 @@ impl WasiFunctionEnv { match this.bootstrap(&mut store) { Ok(a) => a, Err(err) => { + tracing::warn!("failed to bootstrap - {}", err); this.on_exit(&mut store, None); tx.send(Err(err)).ok(); return; @@ -157,7 +158,12 @@ fn handle_result( let tasks = env.data(&store).tasks().clone(); let rewind = work.rewind; let respawn = move |ctx, store, res| { - run_with_deep_sleep(store, Some((rewind, Some(res))), ctx, sender) + run_with_deep_sleep( + store, + Some((rewind, RewindResultType::RewindWithResult(res))), + ctx, + sender, + ) }; // Spawns the WASM process after a trigger diff --git a/lib/wasix/src/syscalls/journal.rs b/lib/wasix/src/syscalls/journal.rs deleted file mode 100644 index 8d9cbe01314..00000000000 --- a/lib/wasix/src/syscalls/journal.rs +++ /dev/null @@ -1,665 +0,0 @@ -use super::*; - -#[allow(clippy::extra_unused_type_parameters)] -#[cfg(not(feature = "journal"))] -pub fn maybe_snapshot_once( - ctx: FunctionEnvMut<'_, WasiEnv>, - _trigger: crate::journal::SnapshotTrigger, -) -> WasiResult> { - Ok(Ok(ctx)) -} - -#[cfg(feature = "journal")] -pub fn maybe_snapshot_once( - mut ctx: FunctionEnvMut<'_, WasiEnv>, - trigger: crate::journal::SnapshotTrigger, -) -> WasiResult> { - use crate::os::task::process::{WasiProcessCheckpoint, WasiProcessInner}; - - unsafe { handle_rewind_ext::(&mut ctx, HandleRewindType::Resultless) }; - - if !ctx.data().enable_journal { - return Ok(Ok(ctx)); - } - - if ctx.data_mut().pop_snapshot_trigger(trigger) { - let inner = ctx.data().process.inner.clone(); - let res = wasi_try_ok_ok!(WasiProcessInner::checkpoint::( - inner, - ctx, - WasiProcessCheckpoint::Snapshot { trigger }, - )?); - match res { - MaybeCheckpointResult::Unwinding => return Ok(Err(Errno::Success)), - MaybeCheckpointResult::NotThisTime(c) => { - ctx = c; - } - } - } - Ok(Ok(ctx)) -} - -#[allow(clippy::extra_unused_type_parameters)] -#[cfg(not(feature = "journal"))] -pub fn maybe_snapshot( - ctx: FunctionEnvMut<'_, WasiEnv>, -) -> WasiResult> { - Ok(Ok(ctx)) -} - -#[cfg(feature = "journal")] -pub fn maybe_snapshot( - mut ctx: FunctionEnvMut<'_, WasiEnv>, -) -> WasiResult> { - use crate::os::task::process::{WasiProcessCheckpoint, WasiProcessInner}; - - if !ctx.data().enable_journal { - return Ok(Ok(ctx)); - } - - let inner = ctx.data().process.inner.clone(); - let res = wasi_try_ok_ok!(WasiProcessInner::maybe_checkpoint::(inner, ctx)?); - match res { - MaybeCheckpointResult::Unwinding => return Ok(Err(Errno::Success)), - MaybeCheckpointResult::NotThisTime(c) => { - ctx = c; - } - } - Ok(Ok(ctx)) -} - -/// Safety: This function manipulates the memory of the process and thus must -/// be executed by the WASM process thread itself. -/// -#[allow(clippy::result_large_err)] -#[cfg(feature = "journal")] -pub unsafe fn restore_snapshot( - mut ctx: FunctionEnvMut<'_, WasiEnv>, - journal: Arc, - bootstrapping: bool, -) -> Result, WasiRuntimeError> { - use std::ops::Range; - - use crate::journal::Journal; - - // We delay the spawning of threads until the end as its - // possible that the threads will be cancelled before all the - // events finished the streaming process - let mut spawn_threads: HashMap = Default::default(); - - // We delay the memory updates until the end as its possible the - // memory will be cleared before all the events finished the - // streaming process - let mut update_memory: HashMap, Cow<'_, [u8]>> = Default::default(); - let mut update_tty = None; - - // We capture the stdout and stderr while we replay - let mut stdout = Vec::new(); - let mut stderr = Vec::new(); - let mut stdout_fds = HashSet::new(); - let mut stderr_fds = HashSet::new(); - stdout_fds.insert(1 as WasiFd); - stderr_fds.insert(2 as WasiFd); - - // Loop through all the events and process them - let cur_module_hash = ctx.data().process.module_hash.as_bytes(); - let mut journal_module_hash = None; - let mut rewind = None; - while let Some(next) = journal.read().map_err(anyhow_err_to_runtime_err)? { - tracing::trace!("Restoring snapshot event - {next:?}"); - match next { - crate::journal::JournalEntry::InitModuleV1 { wasm_hash } => { - journal_module_hash.replace(wasm_hash); - } - crate::journal::JournalEntry::ProcessExitV1 { exit_code } => { - if bootstrapping { - rewind = None; - spawn_threads.clear(); - update_memory.clear(); - update_tty.take(); - stdout.clear(); - stderr.clear(); - stdout_fds.clear(); - stderr_fds.clear(); - stdout_fds.insert(1 as WasiFd); - stderr_fds.insert(2 as WasiFd); - } else { - JournalEffector::apply_process_exit(&mut ctx, exit_code) - .map_err(anyhow_err_to_runtime_err)?; - } - } - crate::journal::JournalEntry::FileDescriptorWriteV1 { - fd, - offset, - data, - is_64bit, - } => { - if stdout_fds.contains(&fd) { - stdout.push((offset, data, is_64bit)); - continue; - } - if stderr_fds.contains(&fd) { - stderr.push((offset, data, is_64bit)); - continue; - } - - if is_64bit { - JournalEffector::apply_fd_write::(&ctx, fd, offset, data) - } else { - JournalEffector::apply_fd_write::(&ctx, fd, offset, data) - } - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::FileDescriptorSeekV1 { fd, offset, whence } => { - JournalEffector::apply_fd_seek(&mut ctx, fd, offset, whence) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::UpdateMemoryRegionV1 { region, data } => { - if Some(cur_module_hash) != journal_module_hash { - continue; - } - - if bootstrapping { - update_memory.insert(region, data.clone()); - } else { - JournalEffector::apply_memory(&mut ctx, region, &data) - .map_err(anyhow_err_to_runtime_err)?; - } - } - crate::journal::JournalEntry::CloseThreadV1 { id, exit_code } => { - if id == ctx.data().tid().raw() { - if bootstrapping { - rewind = None; - spawn_threads.clear(); - update_memory.clear(); - update_tty.take(); - stdout.clear(); - stderr.clear(); - stdout_fds.clear(); - stderr_fds.clear(); - stdout_fds.insert(1 as WasiFd); - stderr_fds.insert(2 as WasiFd); - } else { - JournalEffector::apply_process_exit(&mut ctx, exit_code) - .map_err(anyhow_err_to_runtime_err)?; - } - } else if bootstrapping { - spawn_threads.remove(&Into::::into(id)); - } else { - JournalEffector::apply_thread_exit( - &mut ctx, - Into::::into(id), - exit_code, - ) - .map_err(anyhow_err_to_runtime_err)?; - } - } - crate::journal::JournalEntry::SetThreadV1 { - id, - call_stack, - memory_stack, - store_data, - is_64bit, - } => { - if Some(cur_module_hash) != journal_module_hash { - continue; - } - - let state = RewindState { - memory_stack: memory_stack.to_vec().into(), - rewind_stack: call_stack.to_vec().into(), - store_data: store_data.to_vec().into(), - is_64bit, - }; - - let id = Into::::into(id); - if id == ctx.data().tid() { - rewind.replace(state); - } else if bootstrapping { - spawn_threads.insert(id, state); - } else { - return Err(WasiRuntimeError::Runtime(RuntimeError::user( - anyhow::format_err!( - "Snapshot restoration does not currently support live updates of running threads." - ) - .into(), - ))); - } - } - crate::journal::JournalEntry::CloseFileDescriptorV1 { fd } => { - stdout_fds.remove(&fd); - stderr_fds.remove(&fd); - JournalEffector::apply_fd_close(&mut ctx, fd).map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::OpenFileDescriptorV1 { - fd, - dirfd, - dirflags, - path, - o_flags, - fs_rights_base, - fs_rights_inheriting, - fs_flags, - } => { - JournalEffector::apply_path_open( - &mut ctx, - fd, - dirfd, - dirflags, - &path, - o_flags, - fs_rights_base, - fs_rights_inheriting, - fs_flags, - ) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::RemoveDirectoryV1 { fd, path } => { - JournalEffector::apply_path_remove_directory(&mut ctx, fd, &path) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::UnlinkFileV1 { fd, path } => { - JournalEffector::apply_path_unlink(&mut ctx, fd, &path) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::PathRenameV1 { - old_fd, - old_path, - new_fd, - new_path, - } => { - JournalEffector::apply_path_rename(&mut ctx, old_fd, &old_path, new_fd, &new_path) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::SnapshotV1 { when: _, trigger } => { - if Some(cur_module_hash) != journal_module_hash { - continue; - } - ctx.data_mut().pop_snapshot_trigger(trigger); - } - crate::journal::JournalEntry::SetClockTimeV1 { clock_id, time } => { - JournalEffector::apply_clock_time_set(&mut ctx, clock_id, time) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::RenumberFileDescriptorV1 { old_fd, new_fd } => { - if old_fd != new_fd { - stdout_fds.remove(&new_fd); - stderr_fds.remove(&new_fd); - } - if stdout_fds.remove(&old_fd) { - stdout_fds.insert(new_fd); - } - if stderr_fds.remove(&old_fd) { - stderr_fds.insert(new_fd); - } - JournalEffector::apply_fd_renumber(&mut ctx, old_fd, new_fd) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::DuplicateFileDescriptorV1 { - original_fd, - copied_fd, - } => { - if original_fd != copied_fd { - stdout_fds.remove(&copied_fd); - stderr_fds.remove(&copied_fd); - } - if stdout_fds.contains(&original_fd) { - stdout_fds.insert(copied_fd); - } - if stderr_fds.contains(&original_fd) { - stderr_fds.insert(copied_fd); - } - JournalEffector::apply_fd_duplicate(&mut ctx, original_fd, copied_fd) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::CreateDirectoryV1 { fd, path } => { - JournalEffector::apply_path_create_directory(&mut ctx, fd, &path) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::PathSetTimesV1 { - fd, - flags, - path, - st_atim, - st_mtim, - fst_flags, - } => { - JournalEffector::apply_path_set_times( - &mut ctx, fd, flags, &path, st_atim, st_mtim, fst_flags, - ) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::FileDescriptorSetTimesV1 { - fd, - st_atim, - st_mtim, - fst_flags, - } => { - JournalEffector::apply_fd_set_times(&mut ctx, fd, st_atim, st_mtim, fst_flags) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::FileDescriptorSetSizeV1 { fd, st_size } => { - JournalEffector::apply_fd_set_size(&mut ctx, fd, st_size) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::FileDescriptorSetFlagsV1 { fd, flags } => { - JournalEffector::apply_fd_set_flags(&mut ctx, fd, flags) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::FileDescriptorSetRightsV1 { - fd, - fs_rights_base, - fs_rights_inheriting, - } => { - JournalEffector::apply_fd_set_rights( - &mut ctx, - fd, - fs_rights_base, - fs_rights_inheriting, - ) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::FileDescriptorAdviseV1 { - fd, - offset, - len, - advice, - } => { - JournalEffector::apply_fd_advise(&mut ctx, fd, offset, len, advice) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::FileDescriptorAllocateV1 { fd, offset, len } => { - JournalEffector::apply_fd_allocate(&mut ctx, fd, offset, len) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::CreateHardLinkV1 { - old_fd, - old_path, - old_flags, - new_fd, - new_path, - } => { - JournalEffector::apply_path_link( - &mut ctx, old_fd, old_flags, &old_path, new_fd, &new_path, - ) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::CreateSymbolicLinkV1 { - old_path, - fd, - new_path, - } => { - JournalEffector::apply_path_symlink(&mut ctx, &old_path, fd, &new_path) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::ChangeDirectoryV1 { path } => { - JournalEffector::apply_chdir(&mut ctx, &path).map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::CreatePipeV1 { fd1, fd2 } => { - JournalEffector::apply_fd_pipe(&mut ctx, fd1, fd2) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::EpollCreateV1 { fd } => { - JournalEffector::apply_epoll_create(&mut ctx, fd) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::EpollCtlV1 { - epfd, - op, - fd, - event, - } => { - JournalEffector::apply_epoll_ctl(&mut ctx, epfd, op, fd, event) - .map_err(anyhow_err_to_runtime_err)?; - } - crate::journal::JournalEntry::TtySetV1 { tty, line_feeds } => { - let state = crate::WasiTtyState { - cols: tty.cols, - rows: tty.rows, - width: tty.width, - height: tty.height, - stdin_tty: tty.stdin_tty, - stdout_tty: tty.stdout_tty, - stderr_tty: tty.stderr_tty, - echo: tty.echo, - line_buffered: tty.line_buffered, - line_feeds, - }; - - if bootstrapping { - update_tty.replace(state); - } else { - JournalEffector::apply_tty_set(&mut ctx, state) - .map_err(anyhow_err_to_runtime_err)?; - } - } - crate::journal::JournalEntry::PortAddAddrV1 { cidr } => { - JournalEffector::apply_port_addr_add(&mut ctx, cidr) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::PortDelAddrV1 { addr } => { - JournalEffector::apply_port_addr_remove(&mut ctx, addr) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::PortAddrClearV1 => { - JournalEffector::apply_port_addr_clear(&mut ctx) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::PortBridgeV1 { - network, - token, - security, - } => JournalEffector::apply_port_bridge(&mut ctx, &network, &token, security) - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::PortUnbridgeV1 => { - JournalEffector::apply_port_unbridge(&mut ctx).map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::PortDhcpAcquireV1 => { - JournalEffector::apply_port_dhcp_acquire(&mut ctx) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::PortGatewaySetV1 { ip } => { - JournalEffector::apply_port_gateway_set(&mut ctx, ip) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::PortRouteAddV1 { - cidr, - via_router, - preferred_until, - expires_at, - } => JournalEffector::apply_port_route_add( - &mut ctx, - cidr, - via_router, - preferred_until, - expires_at, - ) - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::PortRouteClearV1 => { - JournalEffector::apply_port_route_clear(&mut ctx) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::PortRouteDelV1 { ip } => { - JournalEffector::apply_port_route_remove(&mut ctx, ip) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::SocketOpenV1 { af, ty, pt, fd } => { - JournalEffector::apply_sock_open(&mut ctx, af, ty, pt, fd) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::SocketListenV1 { fd, backlog } => { - JournalEffector::apply_sock_listen(&mut ctx, fd, backlog as usize) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::SocketBindV1 { fd, addr } => { - JournalEffector::apply_sock_bind(&mut ctx, fd, addr) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::SocketConnectedV1 { fd, addr } => { - JournalEffector::apply_sock_connect(&mut ctx, fd, addr) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::SocketAcceptedV1 { - listen_fd, - fd, - peer_addr, - fd_flags, - non_blocking: nonblocking, - } => JournalEffector::apply_sock_accepted( - &mut ctx, - listen_fd, - fd, - peer_addr, - fd_flags, - nonblocking, - ) - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::SocketJoinIpv4MulticastV1 { - fd, - multiaddr, - iface, - } => JournalEffector::apply_sock_join_ipv4_multicast(&mut ctx, fd, multiaddr, iface) - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::SocketJoinIpv6MulticastV1 { - fd, - multi_addr: multiaddr, - iface, - } => JournalEffector::apply_sock_join_ipv6_multicast(&mut ctx, fd, multiaddr, iface) - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::SocketLeaveIpv4MulticastV1 { - fd, - multi_addr: multiaddr, - iface, - } => JournalEffector::apply_sock_leave_ipv4_multicast(&mut ctx, fd, multiaddr, iface) - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::SocketLeaveIpv6MulticastV1 { - fd, - multi_addr: multiaddr, - iface, - } => JournalEffector::apply_sock_leave_ipv6_multicast(&mut ctx, fd, multiaddr, iface) - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::SocketSendFileV1 { - socket_fd, - file_fd, - offset, - count, - } => JournalEffector::apply_sock_send_file(&mut ctx, socket_fd, file_fd, offset, count) - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::SocketSendToV1 { - fd, - data, - flags, - addr, - is_64bit, - } => if is_64bit { - JournalEffector::apply_sock_send_to::(&ctx, fd, data, flags, addr) - } else { - JournalEffector::apply_sock_send_to::(&ctx, fd, data, flags, addr) - } - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::SocketSendV1 { - fd, - data, - flags, - is_64bit, - } => if is_64bit { - JournalEffector::apply_sock_send::(&ctx, fd, data, flags) - } else { - JournalEffector::apply_sock_send::(&ctx, fd, data, flags) - } - .map_err(anyhow_err_to_runtime_err)?, - crate::journal::JournalEntry::SocketSetOptFlagV1 { fd, opt, flag } => { - JournalEffector::apply_sock_set_opt_flag(&mut ctx, fd, opt, flag) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::SocketSetOptSizeV1 { fd, opt, size } => { - JournalEffector::apply_sock_set_opt_size(&mut ctx, fd, opt, size) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::SocketSetOptTimeV1 { fd, ty, time } => { - JournalEffector::apply_sock_set_opt_time(&mut ctx, fd, ty.into(), time) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::SocketShutdownV1 { fd, how } => { - JournalEffector::apply_sock_shutdown(&mut ctx, fd, how.into()) - .map_err(anyhow_err_to_runtime_err)? - } - crate::journal::JournalEntry::CreateEventV1 { - initial_val, - flags, - fd, - } => JournalEffector::apply_fd_event(&mut ctx, initial_val, flags, fd) - .map_err(anyhow_err_to_runtime_err)?, - } - } - - // If we are not in the same module then we fire off an exit - // that simulates closing the process (hence keeps everything - // in a clean state) - if journal_module_hash.is_some() && Some(cur_module_hash) != journal_module_hash { - tracing::error!( - "The WASM module hash does not match the journal module hash (journal_hash={:x?} vs module_hash{:x?}) - forcing a restart", - journal_module_hash.unwrap(), - cur_module_hash - ); - if bootstrapping { - rewind = None; - spawn_threads.clear(); - update_memory.clear(); - update_tty.take(); - stdout.clear(); - stderr.clear(); - stdout_fds.clear(); - stderr_fds.clear(); - stdout_fds.insert(1 as WasiFd); - stderr_fds.insert(2 as WasiFd); - } else { - JournalEffector::apply_process_exit(&mut ctx, None) - .map_err(anyhow_err_to_runtime_err)?; - } - } else { - tracing::debug!( - "journal used on a different module - the process will simulate a restart." - ); - } - - // We do not yet support multi threading - if !spawn_threads.is_empty() { - return Err(WasiRuntimeError::Runtime(RuntimeError::user( - anyhow::format_err!( - "Snapshot restoration does not currently support multiple threads." - ) - .into(), - ))); - } - - // Now output the stdout and stderr - for (offset, data, is_64bit) in stdout { - if is_64bit { - JournalEffector::apply_fd_write::(&ctx, 1, offset, data) - } else { - JournalEffector::apply_fd_write::(&ctx, 1, offset, data) - } - .map_err(anyhow_err_to_runtime_err)?; - } - - for (offset, data, is_64bit) in stderr { - if is_64bit { - JournalEffector::apply_fd_write::(&ctx, 2, offset, data) - } else { - JournalEffector::apply_fd_write::(&ctx, 2, offset, data) - } - .map_err(anyhow_err_to_runtime_err)?; - } - // Next we apply all the memory updates that were delayed while the logs - // were processed to completion. - for (region, data) in update_memory { - JournalEffector::apply_memory(&mut ctx, region, &data) - .map_err(anyhow_err_to_runtime_err)?; - } - if let Some(state) = update_tty { - JournalEffector::apply_tty_set(&mut ctx, state).map_err(anyhow_err_to_runtime_err)?; - } - - Ok(rewind) -} diff --git a/lib/wasix/src/syscalls/journal/actions/close_thread.rs b/lib/wasix/src/syscalls/journal/actions/close_thread.rs new file mode 100644 index 00000000000..159cafb02b3 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/close_thread.rs @@ -0,0 +1,36 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_close_thread( + &mut self, + id: u32, + exit_code: Option, + differ_ethereal: Option<&mut Vec>>, + ) -> Result<(), WasiRuntimeError> { + if id == self.ctx.data().tid().raw() { + tracing::trace!(%id, ?exit_code, "Replay journal - CloseThread(main)"); + if self.bootstrapping { + self.clear_ethereal(differ_ethereal); + self.staged_differ_memory.clear(); + self.differ_memory.clear(); + self.rewind = None; + } else { + JournalEffector::apply_process_exit(&mut self.ctx, exit_code) + .map_err(anyhow_err_to_runtime_err)?; + } + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%id, ?exit_code, "Differ(end) journal - CloseThread"); + differ_ethereal.push(JournalEntry::CloseThreadV1 { id, exit_code }); + } else { + tracing::trace!(%id, ?exit_code, "Replay journal - CloseThread"); + JournalEffector::apply_thread_exit( + &mut self.ctx, + Into::::into(id), + exit_code, + ) + .map_err(anyhow_err_to_runtime_err)?; + } + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_advise.rs b/lib/wasix/src/syscalls/journal/actions/fd_advise.rs new file mode 100644 index 00000000000..2e094bc9e7f --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_advise.rs @@ -0,0 +1,17 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_advise( + &mut self, + fd: Fd, + offset: Filesize, + len: Filesize, + advice: Advice, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, %offset, %len, ?advice, "Replay journal - FdAdvise"); + JournalEffector::apply_fd_advise(&mut self.ctx, fd, offset, len, advice) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_allocate.rs b/lib/wasix/src/syscalls/journal/actions/fd_allocate.rs new file mode 100644 index 00000000000..dbf1896003d --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_allocate.rs @@ -0,0 +1,16 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_allocate( + &mut self, + fd: Fd, + offset: Filesize, + len: Filesize, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, %offset, %len, "Replay journal - FdAllocate"); + JournalEffector::apply_fd_allocate(&mut self.ctx, fd, offset, len) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_close.rs b/lib/wasix/src/syscalls/journal/actions/fd_close.rs new file mode 100644 index 00000000000..26b308c5aad --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_close.rs @@ -0,0 +1,12 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_close(&mut self, fd: u32) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, "Replay journal - FdClose"); + self.stdout_fds.remove(&fd); + self.stderr_fds.remove(&fd); + JournalEffector::apply_fd_close(&mut self.ctx, fd).map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_dup.rs b/lib/wasix/src/syscalls/journal/actions/fd_dup.rs new file mode 100644 index 00000000000..dd4bd8d3baf --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_dup.rs @@ -0,0 +1,26 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_dup( + &mut self, + original_fd: u32, + copied_fd: u32, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%original_fd, %copied_fd, "Replay journal - FdDuplicate"); + self.real_fd.insert(copied_fd); + if original_fd != copied_fd { + self.stdout_fds.remove(&copied_fd); + self.stderr_fds.remove(&copied_fd); + } + if self.stdout_fds.contains(&original_fd) { + self.stdout_fds.insert(copied_fd); + } + if self.stderr_fds.contains(&original_fd) { + self.stderr_fds.insert(copied_fd); + } + JournalEffector::apply_fd_duplicate(&mut self.ctx, original_fd, copied_fd) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_open.rs b/lib/wasix/src/syscalls/journal/actions/fd_open.rs new file mode 100644 index 00000000000..2393bac14a0 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_open.rs @@ -0,0 +1,31 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_open( + &mut self, + fd: u32, + dirfd: u32, + dirflags: u32, + path: Cow<'a, str>, + o_flags: Oflags, + fs_rights_base: Rights, + fs_rights_inheriting: Rights, + fs_flags: Fdflags, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, %dirfd, %dirflags, "Replay journal - FdOpen {}", path); + JournalEffector::apply_path_open( + &mut self.ctx, + fd, + dirfd, + dirflags, + &path, + o_flags, + fs_rights_base, + fs_rights_inheriting, + fs_flags, + ) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_renumber.rs b/lib/wasix/src/syscalls/journal/actions/fd_renumber.rs new file mode 100644 index 00000000000..b2b485f075c --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_renumber.rs @@ -0,0 +1,26 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_renumber( + &mut self, + old_fd: u32, + new_fd: u32, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%old_fd, %new_fd, "Replay journal - FdRenumber"); + self.real_fd.insert(new_fd); + if old_fd != new_fd { + self.stdout_fds.remove(&new_fd); + self.stderr_fds.remove(&new_fd); + } + if self.stdout_fds.remove(&old_fd) { + self.stdout_fds.insert(new_fd); + } + if self.stderr_fds.remove(&old_fd) { + self.stderr_fds.insert(new_fd); + } + JournalEffector::apply_fd_renumber(&mut self.ctx, old_fd, new_fd) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_seek.rs b/lib/wasix/src/syscalls/journal/actions/fd_seek.rs new file mode 100644 index 00000000000..c57b10dd3ec --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_seek.rs @@ -0,0 +1,16 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_seek( + &mut self, + fd: u32, + offset: i64, + whence: Whence, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, %offset, ?whence, "Replay journal - FdSeek"); + JournalEffector::apply_fd_seek(&mut self.ctx, fd, offset, whence) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_set_flags.rs b/lib/wasix/src/syscalls/journal/actions/fd_set_flags.rs new file mode 100644 index 00000000000..db2511cf9b6 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_set_flags.rs @@ -0,0 +1,15 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_set_flags( + &mut self, + fd: Fd, + flags: Fdflags, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, ?flags, "Replay journal - FdSetFlags"); + JournalEffector::apply_fd_set_flags(&mut self.ctx, fd, flags) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_set_rights.rs b/lib/wasix/src/syscalls/journal/actions/fd_set_rights.rs new file mode 100644 index 00000000000..479fe235241 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_set_rights.rs @@ -0,0 +1,21 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_set_rights( + &mut self, + fd: Fd, + fs_rights_base: Rights, + fs_rights_inheriting: Rights, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, "Replay journal - FdSetRights"); + JournalEffector::apply_fd_set_rights( + &mut self.ctx, + fd, + fs_rights_base, + fs_rights_inheriting, + ) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_set_size.rs b/lib/wasix/src/syscalls/journal/actions/fd_set_size.rs new file mode 100644 index 00000000000..12084fc969e --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_set_size.rs @@ -0,0 +1,15 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_set_size( + &mut self, + fd: Fd, + st_size: Filesize, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, %st_size, "Replay journal - FdSetSize"); + JournalEffector::apply_fd_set_size(&mut self.ctx, fd, st_size) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_set_times.rs b/lib/wasix/src/syscalls/journal/actions/fd_set_times.rs new file mode 100644 index 00000000000..7c615e9e23b --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_set_times.rs @@ -0,0 +1,17 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_set_times( + &mut self, + fd: Fd, + st_atim: Timestamp, + st_mtim: Timestamp, + fst_flags: Fstflags, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, %st_atim, %st_mtim, ?fst_flags, "Replay journal - FdSetTimes"); + JournalEffector::apply_fd_set_times(&mut self.ctx, fd, st_atim, st_mtim, fst_flags) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/fd_write.rs b/lib/wasix/src/syscalls/journal/actions/fd_write.rs new file mode 100644 index 00000000000..06e7249d85e --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/fd_write.rs @@ -0,0 +1,30 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_fd_write( + &mut self, + fd: u32, + offset: u64, + data: Cow<'a, [u8]>, + is_64bit: bool, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, %offset, "Replay journal - FdWrite"); + if self.stdout_fds.contains(&fd) { + self.stdout.push((offset, data, is_64bit)); + return Ok(()); + } + if self.stderr_fds.contains(&fd) { + self.stderr.push((offset, data, is_64bit)); + return Ok(()); + } + + if is_64bit { + JournalEffector::apply_fd_write::(&self.ctx, fd, offset, data) + } else { + JournalEffector::apply_fd_write::(&self.ctx, fd, offset, data) + } + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/init_module.rs b/lib/wasix/src/syscalls/journal/actions/init_module.rs new file mode 100644 index 00000000000..2e0dfe59a15 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/init_module.rs @@ -0,0 +1,16 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_init_module( + &mut self, + wasm_hash: [u8; 8], + differ_ethereal: Option<&mut Vec>>, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!("Replay journal - InitModule {:?}", wasm_hash); + self.clear_ethereal(differ_ethereal); + self.differ_memory.clear(); + self.journal_module_hash.replace(wasm_hash); + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/mod.rs b/lib/wasix/src/syscalls/journal/actions/mod.rs new file mode 100644 index 00000000000..49a5d1d6a42 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/mod.rs @@ -0,0 +1,46 @@ +mod close_thread; +mod fd_advise; +mod fd_allocate; +mod fd_close; +mod fd_dup; +mod fd_open; +mod fd_renumber; +mod fd_seek; +mod fd_set_flags; +mod fd_set_rights; +mod fd_set_size; +mod fd_set_times; +mod fd_write; +mod init_module; +mod path_set_times; +mod process_exit; +mod set_thread; +mod snapshot; +mod tty_set; +mod update_memory; + +use crate::journal::JournalEffector; +use crate::syscalls::anyhow_err_to_runtime_err; +use crate::syscalls::JournalSyscallPlayer; +use crate::RewindState; +use crate::WasiRuntimeError; +use crate::WasiThreadId; +use std::borrow::Cow; +use std::ops::Range; +use std::time::SystemTime; +use wasmer::RuntimeError; +use wasmer_journal::JournalEntry; +use wasmer_journal::SnapshotTrigger; +use wasmer_types::Memory32; +use wasmer_types::Memory64; +use wasmer_wasix_types::wasi::Advice; +use wasmer_wasix_types::wasi::ExitCode; +use wasmer_wasix_types::wasi::Fd; +use wasmer_wasix_types::wasi::Fdflags; +use wasmer_wasix_types::wasi::Filesize; +use wasmer_wasix_types::wasi::Tty; +use wasmer_wasix_types::wasi::Whence; +use wasmer_wasix_types::wasi::{Fstflags, LookupFlags, Timestamp}; +use wasmer_wasix_types::wasi::{Oflags, Rights}; +use wasmer_wasix_types::wasix::ThreadStartType; +use wasmer_wasix_types::wasix::WasiMemoryLayout; diff --git a/lib/wasix/src/syscalls/journal/actions/path_set_times.rs b/lib/wasix/src/syscalls/journal/actions/path_set_times.rs new file mode 100644 index 00000000000..c91fe3c21ee --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/path_set_times.rs @@ -0,0 +1,27 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_path_set_times( + &mut self, + fd: Fd, + flags: LookupFlags, + path: Cow<'_, str>, + st_atim: Timestamp, + st_mtim: Timestamp, + fst_flags: Fstflags, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(%fd, "Replay journal - PathSetTimes"); + JournalEffector::apply_path_set_times( + &mut self.ctx, + fd, + flags, + &path, + st_atim, + st_mtim, + fst_flags, + ) + .map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/process_exit.rs b/lib/wasix/src/syscalls/journal/actions/process_exit.rs new file mode 100644 index 00000000000..6228c88658e --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/process_exit.rs @@ -0,0 +1,21 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_process_exit( + &mut self, + exit_code: Option, + differ_ethereal: Option<&mut Vec>>, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!(?exit_code, "Replay journal - ProcessExit"); + if self.bootstrapping { + self.clear_ethereal(differ_ethereal); + self.differ_memory.clear(); + self.rewind = None; + } else { + JournalEffector::apply_process_exit(&mut self.ctx, exit_code) + .map_err(anyhow_err_to_runtime_err)?; + } + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/set_thread.rs b/lib/wasix/src/syscalls/journal/actions/set_thread.rs new file mode 100644 index 00000000000..2d21e1de6bf --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/set_thread.rs @@ -0,0 +1,57 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_set_thread( + &mut self, + id: u32, + call_stack: Cow<'a, [u8]>, + memory_stack: Cow<'a, [u8]>, + store_data: Cow<'a, [u8]>, + is_64bit: bool, + start: ThreadStartType, + layout: WasiMemoryLayout, + differ_ethereal: Option<&mut Vec>>, + ) -> Result<(), WasiRuntimeError> { + if Some(self.cur_module_hash) != self.journal_module_hash { + tracing::trace!(%id, "Skipping journal entry - SetThread call_stack={} bytes memory_stack={} bytes store_data={} bytes", call_stack.len(), memory_stack.len(), store_data.len()); + return Ok(()); + } + + let state = RewindState { + memory_stack: memory_stack.to_vec().into(), + rewind_stack: call_stack.to_vec().into(), + store_data: store_data.to_vec().into(), + start, + layout: layout.clone(), + is_64bit, + }; + + if Into::::into(id) == self.ctx.data().tid() { + tracing::trace!(%id, "Differ(end) journal - SetThread(main) call_stack={} bytes memory_stack={} bytes store_data={} bytes", call_stack.len(), memory_stack.len(), store_data.len()); + self.rewind.replace(state); + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%id, "Differ(ether) journal - SetThread call_stack={} bytes memory_stack={} bytes store_data={} bytes", call_stack.len(), memory_stack.len(), store_data.len()); + differ_ethereal.push(JournalEntry::SetThreadV1 { + id, + call_stack, + memory_stack, + store_data, + start, + layout, + is_64bit, + }); + } else if self.bootstrapping { + tracing::trace!(%id, "Differ(end) journal - SetThread({id}) call_stack={} bytes memory_stack={} bytes store_data={} bytes", call_stack.len(), memory_stack.len(), store_data.len()); + self.spawn_threads.insert(id.into(), state); + } else { + return Err(WasiRuntimeError::Runtime(RuntimeError::user( + anyhow::format_err!( + "Snapshot restoration does not currently support live updates of running threads." + ) + .into(), + ))); + } + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/snapshot.rs b/lib/wasix/src/syscalls/journal/actions/snapshot.rs new file mode 100644 index 00000000000..3b6e5439afc --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/snapshot.rs @@ -0,0 +1,50 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_snapshot( + &mut self, + when: SystemTime, + trigger: SnapshotTrigger, + differ_ethereal: Option<&mut Vec>>, + ) -> Result<(), WasiRuntimeError> { + // If we are not in the same module then we fire off an exit + // that simulates closing the process (hence keeps everything + // in a clean state) + let mut clear_ethereal = false; + if self.journal_module_hash.is_some() + && Some(self.cur_module_hash) != self.journal_module_hash + { + tracing::error!( + "The WASM module hash does not match the journal module hash (journal_hash={:x?} vs module_hash{:x?}) - forcing a restart", + self.journal_module_hash.unwrap(), + self.cur_module_hash + ); + self.clear_ethereal(differ_ethereal); + return Ok(()); + } + + tracing::trace!("Replay journal - Snapshot (trigger={:?})", trigger); + + // Execute all the ethereal events + if let Some(ethereal_events) = differ_ethereal { + for next in ethereal_events.drain(..) { + tracing::trace!("Replay(ether) snapshot event - {next:?}"); + if let Err(err) = self.play_event(next, None) { + tracing::warn!("failed to replay event - {}", err); + return Err(err); + } + } + for (region, data) in self.staged_differ_memory.drain(..) { + tracing::trace!( + "Differ(end) memory event - {region:?} data.len={}", + data.len() + ); + self.differ_memory.push((region, data)); + } + } + + self.ctx.data_mut().pop_snapshot_trigger(trigger); + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/tty_set.rs b/lib/wasix/src/syscalls/journal/actions/tty_set.rs new file mode 100644 index 00000000000..2d0206e3942 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/tty_set.rs @@ -0,0 +1,27 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_tty_set( + &mut self, + tty: Tty, + line_feeds: bool, + ) -> Result<(), WasiRuntimeError> { + tracing::trace!("Replay journal - TtySet"); + let state = crate::WasiTtyState { + cols: tty.cols, + rows: tty.rows, + width: tty.width, + height: tty.height, + stdin_tty: tty.stdin_tty, + stdout_tty: tty.stdout_tty, + stderr_tty: tty.stderr_tty, + echo: tty.echo, + line_buffered: tty.line_buffered, + line_feeds, + }; + + JournalEffector::apply_tty_set(&mut self.ctx, state).map_err(anyhow_err_to_runtime_err)?; + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/actions/update_memory.rs b/lib/wasix/src/syscalls/journal/actions/update_memory.rs new file mode 100644 index 00000000000..746e5355e7a --- /dev/null +++ b/lib/wasix/src/syscalls/journal/actions/update_memory.rs @@ -0,0 +1,29 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(crate) unsafe fn action_update_memory( + &mut self, + region: Range, + data: Cow<'a, [u8]>, + differ_ethereal: Option<&mut Vec>>, + ) -> Result<(), WasiRuntimeError> { + if Some(self.cur_module_hash) != self.journal_module_hash { + tracing::trace!("Ignored journal - UpdateMemory"); + return Ok(()); + } + + if self.bootstrapping { + tracing::trace!("Differ(stage) journal - UpdateMemory"); + self.staged_differ_memory.push((region, data)); + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!("Differ(ether) journal - UpdateMemory"); + differ_ethereal.push(JournalEntry::UpdateMemoryRegionV1 { region, data }); + } else { + tracing::trace!("Replay journal - UpdateMemory"); + JournalEffector::apply_memory(&mut self.ctx, region, &data) + .map_err(anyhow_err_to_runtime_err)?; + } + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/clear_ethereal.rs b/lib/wasix/src/syscalls/journal/clear_ethereal.rs new file mode 100644 index 00000000000..2ce4fc9b769 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/clear_ethereal.rs @@ -0,0 +1,19 @@ +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + pub(super) fn clear_ethereal( + &mut self, + mut differ_ethereal: Option<&mut Vec>>, + ) { + tracing::trace!("Replay journal - ClearEthereal"); + self.spawn_threads.clear(); + self.stdout.clear(); + self.stderr.clear(); + self.stdout_fds.clear(); + self.stderr_fds.clear(); + self.stdout_fds.insert(1 as WasiFd); + self.stderr_fds.insert(2 as WasiFd); + differ_ethereal.iter_mut().for_each(|e| e.clear()); + self.staged_differ_memory.clear(); + } +} diff --git a/lib/wasix/src/syscalls/journal/do_checkpoint_from_outside.rs b/lib/wasix/src/syscalls/journal/do_checkpoint_from_outside.rs new file mode 100644 index 00000000000..36603d3904d --- /dev/null +++ b/lib/wasix/src/syscalls/journal/do_checkpoint_from_outside.rs @@ -0,0 +1,24 @@ +use crate::os::task::process::LockableWasiProcessInner; + +use super::WasiProcessCheckpoint; + +pub(crate) fn do_checkpoint_from_outside( + process: &LockableWasiProcessInner, + checkpoint: WasiProcessCheckpoint, +) { + let mut guard = process.0.lock().unwrap(); + + // Initiate the checksum (if one already exists we must wait for it to end + // before we start the next checksum) + + // TODO: Disabled as this blocks the async runtime + //while !matches!(guard.checkpoint, WasiProcessCheckpoint::Execute) { + // guard = process.1.wait(guard).unwrap(); + //} + + guard.checkpoint = checkpoint; + for waker in guard.wakers.drain(..) { + waker.wake(); + } + process.1.notify_all(); +} diff --git a/lib/wasix/src/syscalls/journal/maybe_snapshot.rs b/lib/wasix/src/syscalls/journal/maybe_snapshot.rs new file mode 100644 index 00000000000..83fd7faf34a --- /dev/null +++ b/lib/wasix/src/syscalls/journal/maybe_snapshot.rs @@ -0,0 +1,30 @@ +use super::*; + +#[allow(clippy::extra_unused_type_parameters)] +#[cfg(not(feature = "journal"))] +pub fn maybe_snapshot( + ctx: FunctionEnvMut<'_, WasiEnv>, +) -> WasiResult> { + Ok(Ok(ctx)) +} + +#[cfg(feature = "journal")] +pub fn maybe_snapshot( + mut ctx: FunctionEnvMut<'_, WasiEnv>, +) -> WasiResult> { + use crate::os::task::process::{WasiProcessCheckpoint, WasiProcessInner}; + + if !ctx.data().enable_journal { + return Ok(Ok(ctx)); + } + + let inner = ctx.data().process.inner.clone(); + let res = wasi_try_ok_ok!(WasiProcessInner::maybe_checkpoint::(inner, ctx)?); + match res { + MaybeCheckpointResult::Unwinding => return Ok(Err(Errno::Success)), + MaybeCheckpointResult::NotThisTime(c) => { + ctx = c; + } + } + Ok(Ok(ctx)) +} diff --git a/lib/wasix/src/syscalls/journal/maybe_snapshot_many.rs b/lib/wasix/src/syscalls/journal/maybe_snapshot_many.rs new file mode 100644 index 00000000000..866d8999d91 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/maybe_snapshot_many.rs @@ -0,0 +1,44 @@ +use super::*; + +#[allow(clippy::extra_unused_type_parameters)] +#[cfg(not(feature = "journal"))] +pub fn maybe_snapshot_many( + ctx: FunctionEnvMut<'_, WasiEnv>, + _trigger: crate::journal::SnapshotTrigger, +) -> WasiResult> { + Ok(Ok(ctx)) +} + +#[cfg(feature = "journal")] +pub fn maybe_snapshot_many( + mut ctx: FunctionEnvMut<'_, WasiEnv>, + trigger: crate::journal::SnapshotTrigger, +) -> WasiResult> { + use crate::os::task::process::{WasiProcessCheckpoint, WasiProcessInner}; + + if unsafe { handle_rewind_ext_with_default::(&mut ctx, HandleRewindType::ResultLess) } + .is_some() + { + return Ok(Ok(ctx)); + } + + if !ctx.data().enable_journal { + return Ok(Ok(ctx)); + } + + if ctx.data_mut().has_snapshot_trigger(trigger) { + let inner = ctx.data().process.inner.clone(); + let res = wasi_try_ok_ok!(WasiProcessInner::checkpoint::( + inner, + ctx, + WasiProcessCheckpoint::Snapshot { trigger }, + )?); + match res { + MaybeCheckpointResult::Unwinding => return Ok(Err(Errno::Success)), + MaybeCheckpointResult::NotThisTime(c) => { + ctx = c; + } + } + } + Ok(Ok(ctx)) +} diff --git a/lib/wasix/src/syscalls/journal/maybe_snapshot_once.rs b/lib/wasix/src/syscalls/journal/maybe_snapshot_once.rs new file mode 100644 index 00000000000..8938f63de3e --- /dev/null +++ b/lib/wasix/src/syscalls/journal/maybe_snapshot_once.rs @@ -0,0 +1,44 @@ +use super::*; + +#[allow(clippy::extra_unused_type_parameters)] +#[cfg(not(feature = "journal"))] +pub fn maybe_snapshot_once( + ctx: FunctionEnvMut<'_, WasiEnv>, + _trigger: crate::journal::SnapshotTrigger, +) -> WasiResult> { + Ok(Ok(ctx)) +} + +#[cfg(feature = "journal")] +pub fn maybe_snapshot_once( + mut ctx: FunctionEnvMut<'_, WasiEnv>, + trigger: crate::journal::SnapshotTrigger, +) -> WasiResult> { + use crate::os::task::process::{WasiProcessCheckpoint, WasiProcessInner}; + + if unsafe { handle_rewind_ext_with_default::(&mut ctx, HandleRewindType::ResultLess) } + .is_some() + { + return Ok(Ok(ctx)); + } + + if !ctx.data().enable_journal { + return Ok(Ok(ctx)); + } + + if ctx.data_mut().pop_snapshot_trigger(trigger) { + let inner = ctx.data().process.inner.clone(); + let res = wasi_try_ok_ok!(WasiProcessInner::checkpoint::( + inner, + ctx, + WasiProcessCheckpoint::Snapshot { trigger }, + )?); + match res { + MaybeCheckpointResult::Unwinding => return Ok(Err(Errno::Success)), + MaybeCheckpointResult::NotThisTime(c) => { + ctx = c; + } + } + } + Ok(Ok(ctx)) +} diff --git a/lib/wasix/src/syscalls/journal/mod.rs b/lib/wasix/src/syscalls/journal/mod.rs new file mode 100644 index 00000000000..984c9b5b686 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/mod.rs @@ -0,0 +1,78 @@ +#[cfg(feature = "journal")] +mod actions; +mod clear_ethereal; +mod do_checkpoint_from_outside; +mod maybe_snapshot; +mod maybe_snapshot_many; +mod maybe_snapshot_once; +#[cfg(feature = "journal")] +mod play_event; +mod restore_snapshot; +mod wait_for_snapshot; + +#[cfg(feature = "journal")] +use actions::*; +use clear_ethereal::*; +use wasmer_journal::JournalEntry; + +pub use do_checkpoint_from_outside::*; +pub use maybe_snapshot::*; +pub use maybe_snapshot_many::*; +pub use maybe_snapshot_once::*; +pub use restore_snapshot::*; +pub use wait_for_snapshot::*; + +use crate::os::task::process::MemorySnapshotRegion; +use std::{collections::BTreeMap, ops::Range}; + +use super::*; + +pub struct JournalSyscallPlayer<'a, 'c> { + pub ctx: FunctionEnvMut<'c, WasiEnv>, + pub bootstrapping: bool, + + pub journal_module_hash: Option<[u8; 8]>, + pub rewind: Option, + pub cur_module_hash: [u8; 8], + pub real_fd: HashSet, + + // We delay the spawning of threads until the end as its + // possible that the threads will be cancelled before all the + // events finished the streaming process + pub spawn_threads: BTreeMap, + pub staged_differ_memory: Vec<(Range, Cow<'a, [u8]>)>, + pub differ_memory: Vec<(Range, Cow<'a, [u8]>)>, + + // We capture the stdout and stderr while we replay + pub stdout: Vec<(u64, Cow<'a, [u8]>, bool)>, + pub stderr: Vec<(u64, Cow<'a, [u8]>, bool)>, + pub stdout_fds: HashSet, + pub stderr_fds: HashSet, +} + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + pub fn new(mut ctx: FunctionEnvMut<'c, WasiEnv>, bootstrapping: bool) -> Self { + let cur_module_hash: [u8; 8] = ctx.data().process.module_hash.as_bytes(); + let mut ret = JournalSyscallPlayer { + ctx, + bootstrapping, + cur_module_hash, + journal_module_hash: None, + rewind: None, + spawn_threads: Default::default(), + staged_differ_memory: Default::default(), + differ_memory: Default::default(), + stdout: Default::default(), + stderr: Default::default(), + stdout_fds: Default::default(), + stderr_fds: Default::default(), + real_fd: Default::default(), + }; + + // We capture the stdout and stderr while we replay + ret.stdout_fds.insert(1 as WasiFd); + ret.stderr_fds.insert(2 as WasiFd); + + ret + } +} diff --git a/lib/wasix/src/syscalls/journal/play_event.rs b/lib/wasix/src/syscalls/journal/play_event.rs new file mode 100644 index 00000000000..be62674723f --- /dev/null +++ b/lib/wasix/src/syscalls/journal/play_event.rs @@ -0,0 +1,749 @@ +use std::ops::Range; + +use super::*; + +impl<'a, 'c> JournalSyscallPlayer<'a, 'c> { + #[allow(clippy::result_large_err)] + pub(super) unsafe fn play_event( + &mut self, + next: JournalEntry<'a>, + differ_ethereal: Option<&mut Vec>>, + ) -> Result<(), WasiRuntimeError> { + match next { + JournalEntry::InitModuleV1 { wasm_hash } => { + self.action_init_module(wasm_hash, differ_ethereal)?; + } + JournalEntry::ClearEtherealV1 => { + self.clear_ethereal(differ_ethereal); + } + JournalEntry::ProcessExitV1 { exit_code } => { + self.action_process_exit(exit_code, differ_ethereal)?; + } + JournalEntry::FileDescriptorWriteV1 { + fd, + offset, + data, + is_64bit, + } => { + if self.real_fd.contains(&fd) { + self.action_fd_write(fd, offset, data, is_64bit)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, %offset, "Differ(ether) journal - FdWrite"); + differ_ethereal.push(JournalEntry::FileDescriptorWriteV1 { + fd, + offset, + data, + is_64bit, + }); + } else { + self.action_fd_write(fd, offset, data, is_64bit)?; + } + } + JournalEntry::FileDescriptorSeekV1 { fd, offset, whence } => { + if self.real_fd.contains(&fd) { + self.action_fd_seek(fd, offset, whence)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, %offset, ?whence, "Differ(ether) journal - FdSeek"); + differ_ethereal.push(JournalEntry::FileDescriptorSeekV1 { fd, offset, whence }); + } else { + self.action_fd_seek(fd, offset, whence)?; + } + } + JournalEntry::UpdateMemoryRegionV1 { region, data } => { + self.action_update_memory(region, data, differ_ethereal)?; + } + JournalEntry::CloseThreadV1 { id, exit_code } => { + self.action_close_thread(id, exit_code, differ_ethereal)?; + } + JournalEntry::SetThreadV1 { + id, + call_stack, + memory_stack, + store_data, + is_64bit, + start, + layout, + } => { + self.action_set_thread( + id, + call_stack, + memory_stack, + store_data, + is_64bit, + start, + layout, + differ_ethereal, + )?; + } + JournalEntry::CloseFileDescriptorV1 { fd } => { + if self.real_fd.contains(&fd) { + self.action_fd_close(fd)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, "Differ(ether) journal - FdClose"); + differ_ethereal.push(JournalEntry::CloseFileDescriptorV1 { fd }); + } else { + self.action_fd_close(fd)?; + } + } + JournalEntry::OpenFileDescriptorV1 { + fd, + dirfd, + dirflags, + path, + o_flags, + fs_rights_base, + fs_rights_inheriting, + fs_flags, + } => { + self.real_fd.insert(fd); + self.action_fd_open( + fd, + dirfd, + dirflags, + path, + o_flags, + fs_rights_base, + fs_rights_inheriting, + fs_flags, + )?; + } + JournalEntry::RemoveDirectoryV1 { fd, path } => { + tracing::trace!("Replay journal - RemoveDirectory {}", path); + JournalEffector::apply_path_remove_directory(&mut self.ctx, fd, &path) + .map_err(anyhow_err_to_runtime_err)?; + } + JournalEntry::UnlinkFileV1 { fd, path } => { + tracing::trace!("Replay journal - UnlinkFile {}", path); + JournalEffector::apply_path_unlink(&mut self.ctx, fd, &path) + .map_err(anyhow_err_to_runtime_err)?; + } + JournalEntry::PathRenameV1 { + old_fd, + old_path, + new_fd, + new_path, + } => { + tracing::trace!("Replay journal - PathRename {}->{}", old_path, new_path); + JournalEffector::apply_path_rename( + &mut self.ctx, + old_fd, + &old_path, + new_fd, + &new_path, + ) + .map_err(anyhow_err_to_runtime_err)?; + } + JournalEntry::SnapshotV1 { when, trigger } => { + self.action_snapshot(when, trigger, differ_ethereal)?; + } + JournalEntry::SetClockTimeV1 { clock_id, time } => { + tracing::trace!(?clock_id, %time, "Replay journal - ClockTimeSet"); + JournalEffector::apply_clock_time_set(&mut self.ctx, clock_id, time) + .map_err(anyhow_err_to_runtime_err)?; + } + JournalEntry::RenumberFileDescriptorV1 { old_fd, new_fd } => { + if self.real_fd.remove(&old_fd) { + self.action_fd_renumber(old_fd, new_fd)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%old_fd, %new_fd, "Differ(ether) journal - FdRenumber"); + differ_ethereal.push(JournalEntry::RenumberFileDescriptorV1 { old_fd, new_fd }); + } else { + self.action_fd_renumber(old_fd, new_fd)?; + } + } + JournalEntry::DuplicateFileDescriptorV1 { + original_fd, + copied_fd, + } => { + if self.real_fd.contains(&original_fd) { + self.action_fd_dup(original_fd, copied_fd)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%original_fd, %copied_fd, "Differ(ether) journal - FdDuplicate"); + differ_ethereal.push(JournalEntry::DuplicateFileDescriptorV1 { + original_fd, + copied_fd, + }); + } else { + self.action_fd_dup(original_fd, copied_fd)?; + } + } + JournalEntry::CreateDirectoryV1 { fd, path } => { + tracing::trace!(%fd, %path, "Replay journal - CreateDirectory"); + JournalEffector::apply_path_create_directory(&mut self.ctx, fd, &path) + .map_err(anyhow_err_to_runtime_err)?; + } + JournalEntry::PathSetTimesV1 { + fd, + flags, + path, + st_atim, + st_mtim, + fst_flags, + } => { + if self.real_fd.contains(&fd) { + self.action_path_set_times(fd, flags, path, st_atim, st_mtim, fst_flags)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, "Differ(ether) journal - PathSetTimes"); + differ_ethereal.push(JournalEntry::PathSetTimesV1 { + fd, + flags, + path, + st_atim, + st_mtim, + fst_flags, + }); + } else { + self.action_path_set_times(fd, flags, path, st_atim, st_mtim, fst_flags)?; + } + } + JournalEntry::FileDescriptorSetTimesV1 { + fd, + st_atim, + st_mtim, + fst_flags, + } => { + if self.real_fd.contains(&fd) { + self.action_fd_set_times(fd, st_atim, st_mtim, fst_flags)? + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, %st_atim, %st_mtim, ?fst_flags, "Differ(ether) journal - FdSetTimes"); + differ_ethereal.push(JournalEntry::FileDescriptorSetTimesV1 { + fd, + st_atim, + st_mtim, + fst_flags, + }); + } else { + self.action_fd_set_times(fd, st_atim, st_mtim, fst_flags)? + } + } + JournalEntry::FileDescriptorSetSizeV1 { fd, st_size } => { + if self.real_fd.contains(&fd) { + self.action_fd_set_size(fd, st_size)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, %st_size, "Differ(ether) journal - FdSetSize"); + differ_ethereal.push(JournalEntry::FileDescriptorSetSizeV1 { fd, st_size }); + } else { + self.action_fd_set_size(fd, st_size)?; + } + } + JournalEntry::FileDescriptorSetFlagsV1 { fd, flags } => { + if self.real_fd.contains(&fd) { + self.action_fd_set_flags(fd, flags)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?flags, "Differ(ether) journal - FdSetFlags"); + differ_ethereal.push(JournalEntry::FileDescriptorSetFlagsV1 { fd, flags }); + } else { + self.action_fd_set_flags(fd, flags)?; + } + } + JournalEntry::FileDescriptorSetRightsV1 { + fd, + fs_rights_base, + fs_rights_inheriting, + } => { + if self.real_fd.contains(&fd) { + self.action_fd_set_rights(fd, fs_rights_base, fs_rights_inheriting)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, "Differ(ether) journal - FdSetRights"); + differ_ethereal.push(JournalEntry::FileDescriptorSetRightsV1 { + fd, + fs_rights_base, + fs_rights_inheriting, + }); + } else { + self.action_fd_set_rights(fd, fs_rights_base, fs_rights_inheriting)?; + } + } + JournalEntry::FileDescriptorAdviseV1 { + fd, + offset, + len, + advice, + } => { + if self.real_fd.contains(&fd) { + self.action_fd_advise(fd, offset, len, advice)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, %offset, %len, ?advice, "Differ(ether) journal - FdAdvise"); + differ_ethereal.push(JournalEntry::FileDescriptorAdviseV1 { + fd, + offset, + len, + advice, + }); + } else { + self.action_fd_advise(fd, offset, len, advice)?; + } + } + JournalEntry::FileDescriptorAllocateV1 { fd, offset, len } => { + if self.real_fd.contains(&fd) { + self.action_fd_allocate(fd, offset, len)?; + } else if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, %offset, %len, "Differ(ether) journal - FdAllocate"); + differ_ethereal.push(JournalEntry::FileDescriptorAllocateV1 { + fd, + offset, + len, + }); + } else { + self.action_fd_allocate(fd, offset, len)?; + } + } + JournalEntry::CreateHardLinkV1 { + old_fd, + old_path, + old_flags, + new_fd, + new_path, + } => { + tracing::trace!("Replay journal - PathLink {}->{}", old_path, new_path); + JournalEffector::apply_path_link( + &mut self.ctx, + old_fd, + old_flags, + &old_path, + new_fd, + &new_path, + ) + .map_err(anyhow_err_to_runtime_err)?; + } + JournalEntry::CreateSymbolicLinkV1 { + old_path, + fd, + new_path, + } => { + tracing::trace!("Replay journal - PathSymlink {}->{}", old_path, new_path); + JournalEffector::apply_path_symlink(&mut self.ctx, &old_path, fd, &new_path) + .map_err(anyhow_err_to_runtime_err)?; + } + JournalEntry::ChangeDirectoryV1 { path } => { + tracing::trace!("Replay journal - ChangeDirection {}", path); + JournalEffector::apply_chdir(&mut self.ctx, &path) + .map_err(anyhow_err_to_runtime_err)?; + } + JournalEntry::CreatePipeV1 { fd1, fd2 } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd1, %fd2, "Differ(ether) journal - CreatePipe"); + differ_ethereal.push(JournalEntry::CreatePipeV1 { fd1, fd2 }); + } else { + tracing::trace!(%fd1, %fd2, "Replay journal - CreatePipe"); + JournalEffector::apply_fd_pipe(&mut self.ctx, fd1, fd2) + .map_err(anyhow_err_to_runtime_err)?; + } + } + JournalEntry::EpollCreateV1 { fd } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, "Differ(ether) journal - EpollCreate"); + differ_ethereal.push(JournalEntry::EpollCreateV1 { fd }); + } else { + tracing::trace!(%fd, "Replay journal - EpollCreate"); + JournalEffector::apply_epoll_create(&mut self.ctx, fd) + .map_err(anyhow_err_to_runtime_err)?; + } + } + JournalEntry::EpollCtlV1 { + epfd, + op, + fd, + event, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%epfd, %fd, ?op, "Differ(ether) journal - EpollCtl"); + differ_ethereal.push(JournalEntry::EpollCtlV1 { + epfd, + op, + fd, + event, + }); + } else { + tracing::trace!(%epfd, %fd, ?op, "Replay journal - EpollCtl"); + JournalEffector::apply_epoll_ctl(&mut self.ctx, epfd, op, fd, event) + .map_err(anyhow_err_to_runtime_err)?; + } + } + JournalEntry::TtySetV1 { tty, line_feeds } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!("Differ(ether) journal - TtySet"); + differ_ethereal.push(JournalEntry::TtySetV1 { tty, line_feeds }); + } else { + self.action_tty_set(tty, line_feeds)?; + } + } + JournalEntry::PortAddAddrV1 { cidr } => { + tracing::trace!(?cidr, "Replay journal - PortAddAddr"); + JournalEffector::apply_port_addr_add(&mut self.ctx, cidr) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::PortDelAddrV1 { addr } => { + tracing::trace!(?addr, "Replay journal - PortDelAddr"); + JournalEffector::apply_port_addr_remove(&mut self.ctx, addr) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::PortAddrClearV1 => { + tracing::trace!("Replay journal - PortAddrClear"); + JournalEffector::apply_port_addr_clear(&mut self.ctx) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::PortBridgeV1 { + network, + token, + security, + } => { + tracing::trace!("Replay journal - PortBridge"); + JournalEffector::apply_port_bridge(&mut self.ctx, &network, &token, security) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::PortUnbridgeV1 => { + tracing::trace!("Replay journal - PortUnBridge"); + JournalEffector::apply_port_unbridge(&mut self.ctx) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::PortDhcpAcquireV1 => { + tracing::trace!("Replay journal - PortDhcpAcquire"); + JournalEffector::apply_port_dhcp_acquire(&mut self.ctx) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::PortGatewaySetV1 { ip } => { + tracing::trace!(?ip, "Replay journal - PortGatewaySet"); + JournalEffector::apply_port_gateway_set(&mut self.ctx, ip) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::PortRouteAddV1 { + cidr, + via_router, + preferred_until, + expires_at, + } => { + tracing::trace!(?cidr, "Replay journal - PortRouteAdd"); + JournalEffector::apply_port_route_add( + &mut self.ctx, + cidr, + via_router, + preferred_until, + expires_at, + ) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::PortRouteClearV1 => { + tracing::trace!("Replay journal - PortRouteClear"); + JournalEffector::apply_port_route_clear(&mut self.ctx) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::PortRouteDelV1 { ip } => { + tracing::trace!(?ip, "Replay journal - PortRouteDel"); + JournalEffector::apply_port_route_remove(&mut self.ctx, ip) + .map_err(anyhow_err_to_runtime_err)? + } + JournalEntry::SocketOpenV1 { af, ty, pt, fd } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(?af, ?ty, ?pt, %fd, "Differ(ether) journal - SocketOpen"); + differ_ethereal.push(JournalEntry::SocketOpenV1 { af, ty, pt, fd }); + } else { + tracing::trace!(?af, ?ty, ?pt, %fd, "Replay journal - SocketOpen"); + JournalEffector::apply_sock_open(&mut self.ctx, af, ty, pt, fd) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketListenV1 { fd, backlog } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, "Differ(ether) journal - SocketListen"); + differ_ethereal.push(JournalEntry::SocketListenV1 { fd, backlog }); + } else { + tracing::trace!(%fd, "Replay journal - SocketListen"); + JournalEffector::apply_sock_listen(&mut self.ctx, fd, backlog as usize) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketBindV1 { fd, addr } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?addr, "Differ(ether) journal - SocketBind"); + differ_ethereal.push(JournalEntry::SocketBindV1 { fd, addr }); + } else { + tracing::trace!(%fd, ?addr, "Replay journal - SocketBind"); + JournalEffector::apply_sock_bind(&mut self.ctx, fd, addr) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketConnectedV1 { + fd, + local_addr, + peer_addr, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?peer_addr, "Differ(ether) journal - SockConnect"); + differ_ethereal.push(JournalEntry::SocketConnectedV1 { + fd, + local_addr, + peer_addr, + }); + } else { + tracing::trace!(%fd, ?peer_addr, "Replay journal - SockConnect"); + JournalEffector::apply_sock_connect(&mut self.ctx, fd, local_addr, peer_addr) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketAcceptedV1 { + listen_fd, + fd, + local_addr: addr, + peer_addr, + fd_flags, + non_blocking: nonblocking, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%listen_fd, %fd, ?peer_addr, "Differ(ether) journal - SocketAccept"); + differ_ethereal.push(JournalEntry::SocketAcceptedV1 { + listen_fd, + fd, + local_addr: addr, + peer_addr, + fd_flags, + non_blocking: nonblocking, + }); + } else { + tracing::trace!(%listen_fd, %fd, ?peer_addr, "Replay journal - SocketAccept"); + JournalEffector::apply_sock_accepted( + &mut self.ctx, + listen_fd, + fd, + addr, + peer_addr, + fd_flags, + nonblocking, + ) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketJoinIpv4MulticastV1 { + fd, + multiaddr, + iface, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?multiaddr, "Differ(ether) journal - JoinIpv4Multicast"); + differ_ethereal.push(JournalEntry::SocketJoinIpv4MulticastV1 { + fd, + multiaddr, + iface, + }); + } else { + tracing::trace!(%fd, ?multiaddr, "Replay journal - JoinIpv4Multicast"); + JournalEffector::apply_sock_join_ipv4_multicast( + &mut self.ctx, + fd, + multiaddr, + iface, + ) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketJoinIpv6MulticastV1 { + fd, + multi_addr: multiaddr, + iface, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?multiaddr, "Differ(ether) journal - JoinIpv6Multicast"); + differ_ethereal.push(JournalEntry::SocketJoinIpv6MulticastV1 { + fd, + multi_addr: multiaddr, + iface, + }); + } else { + tracing::trace!(%fd, ?multiaddr, "Replay journal - JoinIpv6Multicast"); + JournalEffector::apply_sock_join_ipv6_multicast( + &mut self.ctx, + fd, + multiaddr, + iface, + ) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketLeaveIpv4MulticastV1 { + fd, + multi_addr: multiaddr, + iface, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?multiaddr, "Differ(ether) journal - LeaveIpv4Multicast"); + differ_ethereal.push(JournalEntry::SocketLeaveIpv4MulticastV1 { + fd, + multi_addr: multiaddr, + iface, + }); + } else { + tracing::trace!(%fd, ?multiaddr, "Replay journal - LeaveIpv4Multicast"); + JournalEffector::apply_sock_leave_ipv4_multicast( + &mut self.ctx, + fd, + multiaddr, + iface, + ) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketLeaveIpv6MulticastV1 { + fd, + multi_addr: multiaddr, + iface, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?multiaddr, "Differ(ether) journal - LeaveIpv6Multicast"); + differ_ethereal.push(JournalEntry::SocketLeaveIpv6MulticastV1 { + fd, + multi_addr: multiaddr, + iface, + }); + } else { + tracing::trace!(%fd, ?multiaddr, "Replay journal - LeaveIpv6Multicast"); + JournalEffector::apply_sock_leave_ipv6_multicast( + &mut self.ctx, + fd, + multiaddr, + iface, + ) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketSendFileV1 { + socket_fd, + file_fd, + offset, + count, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%socket_fd, %file_fd, %offset, %count, "Differ(ether) journal - SockSendFile"); + differ_ethereal.push(JournalEntry::SocketSendFileV1 { + socket_fd, + file_fd, + offset, + count, + }); + } else { + tracing::trace!(%socket_fd, %file_fd, %offset, %count, "Replay journal - SockSendFile"); + JournalEffector::apply_sock_send_file( + &mut self.ctx, + socket_fd, + file_fd, + offset, + count, + ) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketSendToV1 { + fd, + data, + flags, + addr, + is_64bit, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, "Differ(ether) journal - SocketSendTo data={} bytes", data.len()); + differ_ethereal.push(JournalEntry::SocketSendToV1 { + fd, + data, + flags, + addr, + is_64bit, + }); + } else { + tracing::trace!(%fd, "Replay journal - SocketSendTo data={} bytes", data.len()); + if is_64bit { + JournalEffector::apply_sock_send_to::( + &self.ctx, fd, data, flags, addr, + ) + } else { + JournalEffector::apply_sock_send_to::( + &self.ctx, fd, data, flags, addr, + ) + } + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketSendV1 { + fd, + data, + flags, + is_64bit, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, "Differ(ether) journal - SocketSend data={} bytes", data.len()); + differ_ethereal.push(JournalEntry::SocketSendV1 { + fd, + data, + flags, + is_64bit, + }); + } else { + tracing::trace!(%fd, "Replay journal - SocketSend data={} bytes", data.len()); + if is_64bit { + JournalEffector::apply_sock_send::(&self.ctx, fd, data, flags) + } else { + JournalEffector::apply_sock_send::(&self.ctx, fd, data, flags) + } + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketSetOptFlagV1 { fd, opt, flag } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?opt, %flag, "Differ(ether) journal - SocketSetOptFlag"); + differ_ethereal.push(JournalEntry::SocketSetOptFlagV1 { fd, opt, flag }); + } else { + tracing::trace!(%fd, ?opt, %flag, "Replay journal - SocketSetOptFlag"); + JournalEffector::apply_sock_set_opt_flag(&mut self.ctx, fd, opt, flag) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketSetOptSizeV1 { fd, opt, size } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?opt, %size, "Differ(ether) journal - SocketSetOptSize"); + differ_ethereal.push(JournalEntry::SocketSetOptSizeV1 { fd, opt, size }); + } else { + tracing::trace!(%fd, ?opt, %size, "Replay journal - SocketSetOptSize"); + JournalEffector::apply_sock_set_opt_size(&mut self.ctx, fd, opt, size) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketSetOptTimeV1 { fd, ty, time } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?ty, ?time, "Differ(ether) journal - SocketSetOptTime"); + differ_ethereal.push(JournalEntry::SocketSetOptTimeV1 { fd, ty, time }); + } else { + tracing::trace!(%fd, ?ty, ?time, "Replay journal - SocketSetOptTime"); + JournalEffector::apply_sock_set_opt_time(&mut self.ctx, fd, ty.into(), time) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::SocketShutdownV1 { fd, how } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, ?how, "Differ(ether) journal - SocketShutdown"); + differ_ethereal.push(JournalEntry::SocketShutdownV1 { fd, how }); + } else { + tracing::trace!(%fd, ?how, "Replay journal - SocketShutdown"); + JournalEffector::apply_sock_shutdown(&mut self.ctx, fd, how.into()) + .map_err(anyhow_err_to_runtime_err)? + } + } + JournalEntry::CreateEventV1 { + initial_val, + flags, + fd, + } => { + if let Some(differ_ethereal) = differ_ethereal { + tracing::trace!(%fd, %flags, "Differ(ether) journal - CreateEvent"); + differ_ethereal.push(JournalEntry::CreateEventV1 { + initial_val, + flags, + fd, + }); + } else { + tracing::trace!(%fd, %flags, "Replay journal - CreateEvent"); + JournalEffector::apply_fd_event(&mut self.ctx, initial_val, flags, fd) + .map_err(anyhow_err_to_runtime_err)? + } + } + } + Ok(()) + } +} diff --git a/lib/wasix/src/syscalls/journal/restore_snapshot.rs b/lib/wasix/src/syscalls/journal/restore_snapshot.rs new file mode 100644 index 00000000000..6535ebf4353 --- /dev/null +++ b/lib/wasix/src/syscalls/journal/restore_snapshot.rs @@ -0,0 +1,89 @@ +use super::*; + +/// Safety: This function manipulates the memory of the process and thus must +/// be executed by the WASM process thread itself. +/// +#[allow(clippy::result_large_err)] +#[cfg(feature = "journal")] +pub unsafe fn restore_snapshot( + mut ctx: FunctionEnvMut<'_, WasiEnv>, + journal: Arc, + bootstrapping: bool, +) -> Result, WasiRuntimeError> { + use std::{collections::BTreeMap, ops::Range}; + + use crate::{journal::Journal, os::task::process::MemorySnapshotRegion}; + + // Create the journal replay runner + let mut runner = JournalSyscallPlayer::new(ctx, bootstrapping); + + // We read all the logs from the journal into the state machine + let mut ethereal_events = Vec::new(); + while let Some(next) = journal.read().map_err(anyhow_err_to_runtime_err)? { + runner.play_event(next.into_inner(), Some(&mut ethereal_events)); + } + + // Check for events that are orphaned + for evt in ethereal_events { + tracing::debug!("Orphaned ethereal events - {:?}", evt); + } + + // Now output the stdout and stderr + for (offset, data, is_64bit) in runner.stdout { + if is_64bit { + JournalEffector::apply_fd_write::(&runner.ctx, 1, offset, data) + } else { + JournalEffector::apply_fd_write::(&runner.ctx, 1, offset, data) + } + .map_err(anyhow_err_to_runtime_err)?; + } + + for (offset, data, is_64bit) in runner.stderr { + if is_64bit { + JournalEffector::apply_fd_write::(&runner.ctx, 2, offset, data) + } else { + JournalEffector::apply_fd_write::(&runner.ctx, 2, offset, data) + } + .map_err(anyhow_err_to_runtime_err)?; + } + + // Apply the memory changes (if this is in bootstrapping mode we differed them) + for (region, data) in runner.differ_memory { + tracing::trace!( + "Replay journal - UpdateMemory - region:{:?}, data.len={}", + region, + data.len() + ); + JournalEffector::apply_memory(&mut runner.ctx, region, &data) + .map_err(anyhow_err_to_runtime_err)?; + } + + // Spawn all the threads + for (thread_id, thread_state) in runner.spawn_threads { + if thread_state.is_64bit { + JournalEffector::apply_thread_state::( + &mut runner.ctx, + thread_id, + thread_state.memory_stack, + thread_state.rewind_stack, + thread_state.store_data, + thread_state.start, + thread_state.layout, + ) + .map_err(anyhow_err_to_runtime_err)?; + } else { + JournalEffector::apply_thread_state::( + &mut runner.ctx, + thread_id, + thread_state.memory_stack, + thread_state.rewind_stack, + thread_state.store_data, + thread_state.start, + thread_state.layout, + ) + .map_err(anyhow_err_to_runtime_err)?; + } + } + + Ok(runner.rewind) +} diff --git a/lib/wasix/src/syscalls/journal/wait_for_snapshot.rs b/lib/wasix/src/syscalls/journal/wait_for_snapshot.rs new file mode 100644 index 00000000000..bead2155f7f --- /dev/null +++ b/lib/wasix/src/syscalls/journal/wait_for_snapshot.rs @@ -0,0 +1,31 @@ +use super::*; + +#[cfg(not(feature = "journal"))] +pub fn wait_for_snapshot(_env: &WasiEnv) -> Pin + Send + Sync>> { + Box::pin(std::future::pending()) +} + +#[cfg(feature = "journal")] +pub fn wait_for_snapshot(env: &WasiEnv) -> Pin + Send + Sync>> { + use crate::os::task::process::{LockableWasiProcessInner, WasiProcessCheckpoint}; + + struct Poller { + inner: LockableWasiProcessInner, + } + impl Future for Poller { + type Output = (); + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let mut guard = self.inner.0.lock().unwrap(); + if !matches!(guard.checkpoint, WasiProcessCheckpoint::Execute) { + return Poll::Ready(()); + } + if !guard.wakers.iter().any(|w| w.will_wake(cx.waker())) { + guard.wakers.push(cx.waker().clone()); + } + Poll::Pending + } + } + Box::pin(Poller { + inner: env.process.inner.clone(), + }) +} diff --git a/lib/wasix/src/syscalls/mod.rs b/lib/wasix/src/syscalls/mod.rs index 9a2e2e066ec..0374b9d8b8a 100644 --- a/lib/wasix/src/syscalls/mod.rs +++ b/lib/wasix/src/syscalls/mod.rs @@ -28,6 +28,8 @@ use futures::{ use tracing::instrument; pub use wasi::*; pub use wasix::*; +use wasmer_journal::SnapshotTrigger; +use wasmer_wasix_types::wasix::ThreadStartType; pub mod legacy; @@ -120,9 +122,12 @@ use crate::{ MAX_SYMLINKS, }, journal::{DynJournal, JournalEffector}, - os::task::{process::MaybeCheckpointResult, thread::RewindResult}, + os::task::{ + process::{MaybeCheckpointResult, WasiProcessCheckpoint}, + thread::{RewindResult, RewindResultType}, + }, runtime::task_manager::InlineWaker, - utils::store::InstanceSnapshot, + utils::store::StoreSnapshot, DeepSleepWork, RewindPostProcess, RewindState, RewindStateOption, SpawnError, WasiInodes, WasiResult, WasiRuntimeError, }; @@ -310,14 +315,14 @@ where } // This poller will process any signals when the main working function is idle - struct Poller<'a, 'b, Fut, T> + struct SignalPoller<'a, 'b, Fut, T> where Fut: Future>, { ctx: &'a mut FunctionEnvMut<'b, WasiEnv>, pinned_work: Pin>, } - impl<'a, 'b, Fut, T> Future for Poller<'a, 'b, Fut, T> + impl<'a, 'b, Fut, T> Future for SignalPoller<'a, 'b, Fut, T> where Fut: Future>, { @@ -339,7 +344,7 @@ where // Block on the work let mut pinned_work = Box::pin(work); let tasks = env.tasks().clone(); - let poller = Poller { ctx, pinned_work }; + let poller = SignalPoller { ctx, pinned_work }; block_on_with_timeout(&tasks, timeout, poller) } @@ -426,7 +431,7 @@ pub(crate) fn maybe_backoff( if let Some(backoff) = env.process.acquire_cpu_backoff_token(env.tasks()) { tracing::trace!("exponential CPU backoff {:?}", backoff.backoff_time()); if let AsyncifyAction::Finish(mut ctx, _) = - __asyncify_with_deep_sleep::(ctx, Duration::from_millis(50), backoff)? + __asyncify_with_deep_sleep::(ctx, backoff)? { Ok(Ok(ctx)) } else { @@ -448,13 +453,18 @@ pub(crate) fn maybe_backoff( /// pub(crate) fn __asyncify_with_deep_sleep( mut ctx: FunctionEnvMut<'_, WasiEnv>, - deep_sleep_time: Duration, work: Fut, ) -> Result, WasiError> where T: serde::Serialize + serde::de::DeserializeOwned, Fut: Future + Send + Sync + 'static, { + // Determine the deep sleep time + let deep_sleep_time = match ctx.data().enable_journal { + true => Duration::from_micros(100), + false => Duration::from_millis(50), + }; + // Determine if we should process signals or now let process_signals = ctx .data() @@ -476,6 +486,7 @@ where } else { None }; + let deep_sleep_wait = async { if let Some(tasks) = tasks_for_deep_sleep { tasks.sleep_now(deep_sleep_time).await @@ -498,9 +509,20 @@ where _ = deep_sleep_wait => { let pid = ctx.data().pid(); let tid = ctx.data().tid(); + + // We put thread into a deep sleeping state and + // notify anyone who is waiting for that + let thread = ctx.data().thread.clone(); + thread.set_deep_sleeping(true); + ctx.data().process.inner.1.notify_one(); + tracing::trace!(%pid, %tid, "thread entering deep sleep"); deep_sleep::(ctx, Box::pin(async move { + // After this wakes the background work or waking + // event has triggered and its time to result let result = trigger.await; + tracing::trace!(%pid, %tid, "thread leaving deep sleep"); + thread.set_deep_sleeping(false); bincode::serialize(&result).unwrap().into() }))?; AsyncifyAction::Unwind @@ -526,6 +548,8 @@ where T: 'static, Fut: Future>, { + let snapshot_wait = wait_for_snapshot(env); + // This poller will process any signals when the main working function is idle struct Poller<'a, Fut, T> where @@ -533,6 +557,7 @@ where { env: &'a WasiEnv, pinned_work: Pin>, + pinned_snapshot: Pin>>, } impl<'a, Fut, T> Future for Poller<'a, Fut, T> where @@ -543,6 +568,9 @@ where if let Poll::Ready(res) = Pin::new(&mut self.pinned_work).poll(cx) { return Poll::Ready(Ok(res)); } + if let Poll::Ready(()) = Pin::new(&mut self.pinned_snapshot).poll(cx) { + return Poll::Ready(Ok(Err(Errno::Intr))); + } if let Some(exit_code) = self.env.should_exit() { return Poll::Ready(Err(WasiError::Exit(exit_code))); } @@ -989,21 +1017,87 @@ pub(crate) fn deep_sleep( trigger: Pin>, ) -> Result<(), WasiError> { // Grab all the globals and serialize them - let store_data = crate::utils::store::capture_instance_snapshot(&mut ctx.as_store_mut()) + let store_data = crate::utils::store::capture_store_snapshot(&mut ctx.as_store_mut()) .serialize() .unwrap(); let store_data = Bytes::from(store_data); + let thread_start = ctx.data().thread.thread_start_type(); // Perform the unwind action let tasks = ctx.data().tasks().clone(); - let res = unwind::(ctx, move |_ctx, memory_stack, rewind_stack| { + let res = unwind::(ctx, move |mut ctx, memory_stack, rewind_stack| { + let memory_stack = memory_stack.freeze(); + let rewind_stack = rewind_stack.freeze(); + let thread_layout = ctx.data().thread.memory_layout().clone(); + + // If journal'ing is enabled then we dump the stack into the journal + if ctx.data().enable_journal { + // Grab all the globals and serialize them + let store_data = crate::utils::store::capture_store_snapshot(&mut ctx.as_store_mut()) + .serialize() + .unwrap(); + let store_data = Bytes::from(store_data); + + tracing::debug!( + "stack snapshot unwind (memory_stack={}, rewind_stack={}, store_data={})", + memory_stack.len(), + rewind_stack.len(), + store_data.len(), + ); + + #[cfg(feature = "journal")] + { + // Write our thread state to the snapshot + let tid = ctx.data().thread.tid(); + let thread_start = ctx.data().thread.thread_start_type(); + if let Err(err) = JournalEffector::save_thread_state::( + &mut ctx, + tid, + memory_stack.clone(), + rewind_stack.clone(), + store_data.clone(), + thread_start, + thread_layout.clone(), + ) { + return wasmer_types::OnCalledAction::Trap(err.into()); + } + } + + // If all the threads are now in a deep sleep state + // then we can trigger the idle snapshot event + let inner = ctx.data().process.inner.clone(); + let is_idle = { + let mut guard = inner.0.lock().unwrap(); + guard.threads.values().all(WasiThread::is_deep_sleeping) + }; + + // When we idle the journal functionality may be set + // will take a snapshot of the memory and threads so + // that it can resumed. + #[cfg(feature = "journal")] + { + if is_idle && ctx.data_mut().has_snapshot_trigger(SnapshotTrigger::Idle) { + let mut guard = inner.0.lock().unwrap(); + if let Err(err) = JournalEffector::save_memory_and_snapshot( + &mut ctx, + &mut guard, + SnapshotTrigger::Idle, + ) { + return wasmer_types::OnCalledAction::Trap(err.into()); + } + } + } + } + // Schedule the process on the stack so that it can be resumed OnCalledAction::Trap(Box::new(WasiError::DeepSleep(DeepSleepWork { trigger, rewind: RewindState { - memory_stack: memory_stack.freeze(), - rewind_stack: rewind_stack.freeze(), + memory_stack, + rewind_stack, store_data, + start: thread_start, + layout: thread_layout, is_64bit: M::is_64bit(), }, }))) @@ -1165,7 +1259,7 @@ where Some(memory_stack), rewind_stack, store_data, - Some(rewind_result), + RewindResultType::RewindWithResult(rewind_result), ) } @@ -1176,7 +1270,7 @@ pub fn rewind_ext( memory_stack: Option, rewind_stack: Bytes, store_data: Bytes, - rewind_result: Option, + rewind_result: RewindResultType, ) -> Errno { // Store the memory stack so that it can be restored later ctx.data_mut().thread.set_rewind(RewindResult { @@ -1185,14 +1279,14 @@ pub fn rewind_ext( }); // Deserialize the store data back into a snapshot - let store_snapshot = match InstanceSnapshot::deserialize(&store_data[..]) { + let store_snapshot = match StoreSnapshot::deserialize(&store_data[..]) { Ok(a) => a, Err(err) => { warn!("snapshot restore failed - the store snapshot could not be deserialized"); return Errno::Unknown; } }; - crate::utils::store::restore_instance_snapshot(ctx, &store_snapshot); + crate::utils::store::restore_store_snapshot(ctx, &store_snapshot); let env = ctx.data(); let memory = match env.try_memory_view(&ctx) { Some(v) => v, @@ -1300,29 +1394,43 @@ pub(crate) unsafe fn handle_rewind( where T: serde::de::DeserializeOwned, { - handle_rewind_ext::(ctx, HandleRewindType::ResultDriven) + handle_rewind_ext::(ctx, HandleRewindType::ResultDriven).flatten() } pub(crate) enum HandleRewindType { /// Handle rewind types that have a result to be processed ResultDriven, - /// Handle rewind types that are resultless (generally these + /// Handle rewind types that are result-less (generally these /// are caused by snapshot events) - Resultless, + ResultLess, } -pub(crate) unsafe fn handle_rewind_ext( +pub(crate) unsafe fn handle_rewind_ext_with_default( ctx: &mut FunctionEnvMut<'_, WasiEnv>, - _type: HandleRewindType, + type_: HandleRewindType, ) -> Option +where + T: serde::de::DeserializeOwned + Default, +{ + let ret = handle_rewind_ext::(ctx, type_); + ret.unwrap_or_default() +} + +pub(crate) unsafe fn handle_rewind_ext( + ctx: &mut FunctionEnvMut<'_, WasiEnv>, + type_: HandleRewindType, +) -> Option> where T: serde::de::DeserializeOwned, { - if !ctx.data().thread.has_rewind_of_type(_type) { + let env = ctx.data(); + if !env.thread.has_rewind_of_type(type_) { return None; }; // If the stack has been restored + let tid = env.tid(); + let pid = env.pid(); if let Some(result) = ctx.data_mut().thread.take_rewind() { // Deserialize the result let memory_stack = result.memory_stack; @@ -1333,7 +1441,7 @@ where asyncify_stop_rewind.call(ctx); } else { warn!("failed to handle rewind because the asyncify_start_rewind export is missing or inaccessible"); - return None; + return Some(None); } // Restore the memory stack @@ -1342,15 +1450,25 @@ where set_memory_stack::(env, &mut store, memory_stack); } - if let Some(rewind_result) = result.rewind_result { - let ret = bincode::deserialize(&rewind_result) - .expect("failed to deserialize the rewind result"); - Some(ret) - } else { - None + match result.rewind_result { + RewindResultType::RewindRestart => { + debug!(%pid, %tid, "rewind for syscall restart"); + None + } + RewindResultType::RewindWithoutResult => { + debug!(%pid, %tid, "rewind with no result"); + Some(None) + } + RewindResultType::RewindWithResult(rewind_result) => { + debug!(%pid, %tid, "rewind with result (data={})", rewind_result.len()); + let ret = bincode::deserialize(&rewind_result) + .expect("failed to deserialize the rewind result"); + Some(Some(ret)) + } } } else { - None + debug!(%pid, %tid, "rewind miss"); + Some(None) } } diff --git a/lib/wasix/src/syscalls/wasi/path_create_directory.rs b/lib/wasix/src/syscalls/wasi/path_create_directory.rs index 8e514f43697..b7263ff21c5 100644 --- a/lib/wasix/src/syscalls/wasi/path_create_directory.rs +++ b/lib/wasix/src/syscalls/wasi/path_create_directory.rs @@ -60,10 +60,12 @@ pub(crate) fn path_create_directory_internal( { let guard = working_dir.inode.read(); if let Kind::Root { .. } = guard.deref() { + trace!("root has no rights to create a directories"); return Err(Errno::Access); } } if !working_dir.rights.contains(Rights::PATH_CREATE_DIRECTORY) { + trace!("working directory (fd={fd}) has no rights to create a directory"); return Err(Errno::Access); } @@ -78,6 +80,7 @@ pub(crate) fn path_create_directory_internal( }) .collect::, Errno>>()?; if path_vec.is_empty() { + trace!("path vector is inva;id (its empty)"); return Err(Errno::Inval); } @@ -118,6 +121,7 @@ pub(crate) fn path_create_directory_internal( &adjusted_path.to_string_lossy(), ) { if adjusted_path_stat.st_filetype != Filetype::Directory { + trace!("path is not a directory"); return Err(Errno::Notdir); } } else { @@ -145,8 +149,14 @@ pub(crate) fn path_create_directory_internal( cur_dir_inode = new_inode; } } - Kind::Root { .. } => return Err(Errno::Access), - _ => return Err(Errno::Notdir), + Kind::Root { .. } => { + trace!("the root node can no create a directory"); + return Err(Errno::Access); + } + _ => { + trace!("path is not a directory"); + return Err(Errno::Notdir); + } } } diff --git a/lib/wasix/src/syscalls/wasi/poll_oneoff.rs b/lib/wasix/src/syscalls/wasi/poll_oneoff.rs index 0fb60b088d0..d9d6e0f942f 100644 --- a/lib/wasix/src/syscalls/wasi/poll_oneoff.rs +++ b/lib/wasix/src/syscalls/wasi/poll_oneoff.rs @@ -442,7 +442,6 @@ where // We use asyncify with a deep sleep to wait on new IO events let res = __asyncify_with_deep_sleep::, Errno>, _>( ctx, - Duration::from_millis(50), Box::pin(trigger), )?; if let AsyncifyAction::Finish(mut ctx, events) = res { diff --git a/lib/wasix/src/syscalls/wasi/thread_spawn.rs b/lib/wasix/src/syscalls/wasi/thread_spawn.rs index 9009aea6010..be8e9de02a3 100644 --- a/lib/wasix/src/syscalls/wasi/thread_spawn.rs +++ b/lib/wasix/src/syscalls/wasi/thread_spawn.rs @@ -21,7 +21,7 @@ pub fn thread_spawn( mut ctx: FunctionEnvMut<'_, WasiEnv>, start_ptr: WasmPtr, M>, ) -> i32 { - thread_spawn_internal(&mut ctx, start_ptr) + thread_spawn_internal_from_wasi(&mut ctx, start_ptr) .map(|tid| tid as i32) .map_err(|errno| errno as i32) .unwrap_or_else(|err| -err) diff --git a/lib/wasix/src/syscalls/wasix/epoll_wait.rs b/lib/wasix/src/syscalls/wasix/epoll_wait.rs index 5642bbb0302..e67675f0be1 100644 --- a/lib/wasix/src/syscalls/wasix/epoll_wait.rs +++ b/lib/wasix/src/syscalls/wasix/epoll_wait.rs @@ -204,7 +204,6 @@ pub fn epoll_wait<'a, M: MemorySize + 'static>( // We use asyncify with a deep sleep to wait on new IO events let res = __asyncify_with_deep_sleep::, Errno>, _>( ctx, - Duration::from_millis(50), Box::pin(trigger), )?; if let AsyncifyAction::Finish(mut ctx, events) = res { diff --git a/lib/wasix/src/syscalls/wasix/futex_wait.rs b/lib/wasix/src/syscalls/wasix/futex_wait.rs index 81711dd3479..69b46f43b27 100644 --- a/lib/wasix/src/syscalls/wasix/futex_wait.rs +++ b/lib/wasix/src/syscalls/wasix/futex_wait.rs @@ -162,8 +162,7 @@ pub(super) fn futex_wait_internal( // We use asyncify on the poller and potentially go into deep sleep tracing::trace!("wait on {futex_idx}"); - let res = - __asyncify_with_deep_sleep::(ctx, Duration::from_millis(50), Box::pin(poller))?; + let res = __asyncify_with_deep_sleep::(ctx, Box::pin(poller))?; if let AsyncifyAction::Finish(ctx, res) = res { let mut env = ctx.data(); let memory = unsafe { env.memory_view(&ctx) }; diff --git a/lib/wasix/src/syscalls/wasix/proc_exec.rs b/lib/wasix/src/syscalls/wasix/proc_exec.rs index 5e3b1d93215..8e2e71f0c51 100644 --- a/lib/wasix/src/syscalls/wasix/proc_exec.rs +++ b/lib/wasix/src/syscalls/wasix/proc_exec.rs @@ -224,17 +224,13 @@ pub fn proc_exec( let thread = env.thread.clone(); // The poller will wait for the process to actually finish - let res = __asyncify_with_deep_sleep::( - ctx, - Duration::from_millis(50), - async move { - process - .wait_finished() - .await - .unwrap_or_else(|_| Errno::Child.into()) - .to_native() - }, - )?; + let res = __asyncify_with_deep_sleep::(ctx, async move { + process + .wait_finished() + .await + .unwrap_or_else(|_| Errno::Child.into()) + .to_native() + })?; match res { AsyncifyAction::Finish(mut ctx, result) => { // When we arrive here the process should already be terminated diff --git a/lib/wasix/src/syscalls/wasix/proc_fork.rs b/lib/wasix/src/syscalls/wasix/proc_fork.rs index 45955b37cff..31092c86e54 100644 --- a/lib/wasix/src/syscalls/wasix/proc_fork.rs +++ b/lib/wasix/src/syscalls/wasix/proc_fork.rs @@ -1,6 +1,6 @@ use super::*; use crate::{ - capture_instance_snapshot, + capture_store_snapshot, os::task::OwnedTaskStatus, runtime::task_manager::{TaskWasm, TaskWasmRunProperties}, syscalls::*, @@ -84,10 +84,9 @@ pub fn proc_fork( // Perform the unwind action return unwind::(ctx, move |mut ctx, mut memory_stack, rewind_stack| { // Grab all the globals and serialize them - let store_data = - crate::utils::store::capture_instance_snapshot(&mut ctx.as_store_mut()) - .serialize() - .unwrap(); + let store_data = crate::utils::store::capture_store_snapshot(&mut ctx.as_store_mut()) + .serialize() + .unwrap(); let store_data = Bytes::from(store_data); // We first fork the environment and replace the current environment @@ -130,7 +129,7 @@ pub fn proc_fork( let bin_factory = env.bin_factory.clone(); // Perform the unwind action - let snapshot = capture_instance_snapshot(&mut ctx.as_store_mut()); + let snapshot = capture_store_snapshot(&mut ctx.as_store_mut()); unwind::(ctx, move |mut ctx, mut memory_stack, rewind_stack| { let tasks = ctx.data().tasks().clone(); let span = debug_span!( @@ -201,7 +200,7 @@ pub fn proc_fork( tasks_outer .task_wasm( TaskWasm::new(Box::new(run), child_env, module, false) - .with_snapshot(&snapshot) + .with_globals(&snapshot) .with_memory(spawn_type), ) .map_err(|err| { @@ -238,7 +237,7 @@ fn run( ctx: WasiFunctionEnv, mut store: Store, child_handle: WasiThreadHandle, - rewind_state: Option<(RewindState, Bytes)>, + rewind_state: Option<(RewindState, RewindResultType)>, ) -> ExitCode { let env = ctx.data(&store); let tasks = env.tasks().clone(); @@ -253,7 +252,7 @@ fn run( Some(rewind_state.memory_stack), rewind_state.rewind_stack, rewind_state.store_data, - Some(rewind_result), + rewind_result, ); if res != Errno::Success { return res.into(); @@ -290,7 +289,10 @@ fn run( ctx, store, child_handle, - Some((rewind_state, rewind_result)), + Some(( + rewind_state, + RewindResultType::RewindWithResult(rewind_result), + )), ); } }; diff --git a/lib/wasix/src/syscalls/wasix/proc_join.rs b/lib/wasix/src/syscalls/wasix/proc_join.rs index e7e34662521..3bc85e2cce2 100644 --- a/lib/wasix/src/syscalls/wasix/proc_join.rs +++ b/lib/wasix/src/syscalls/wasix/proc_join.rs @@ -119,28 +119,24 @@ pub(super) fn proc_join_internal( // We wait for any process to exit (if it takes too long // then we go into a deep sleep) - let res = __asyncify_with_deep_sleep::( - ctx, - Duration::from_millis(50), - async move { - let child_exit = process.join_any_child().await; - match child_exit { - Ok(Some((pid, exit_code))) => { - tracing::trace!(%pid, %exit_code, "triggered child join"); - trace!(ret_id = pid.raw(), exit_code = exit_code.raw()); - JoinStatusResult::ExitNormal(pid, exit_code) - } - Ok(None) => { - tracing::trace!("triggered child join (no child)"); - JoinStatusResult::Err(Errno::Child) - } - Err(err) => { - tracing::trace!(%err, "error triggered on child join"); - JoinStatusResult::Err(err) - } + let res = __asyncify_with_deep_sleep::(ctx, async move { + let child_exit = process.join_any_child().await; + match child_exit { + Ok(Some((pid, exit_code))) => { + tracing::trace!(%pid, %exit_code, "triggered child join"); + trace!(ret_id = pid.raw(), exit_code = exit_code.raw()); + JoinStatusResult::ExitNormal(pid, exit_code) } - }, - )?; + Ok(None) => { + tracing::trace!("triggered child join (no child)"); + JoinStatusResult::Err(Errno::Child) + } + Err(err) => { + tracing::trace!(%err, "error triggered on child join"); + JoinStatusResult::Err(err) + } + } + })?; return match res { AsyncifyAction::Finish(ctx, result) => ret_result(ctx, result), AsyncifyAction::Unwind => Ok(Errno::Success), @@ -192,15 +188,11 @@ pub(super) fn proc_join_internal( } else { // Wait for the process to finish let process2 = process.clone(); - let res = __asyncify_with_deep_sleep::( - ctx, - Duration::from_millis(50), - async move { - let exit_code = process.join().await.unwrap_or_else(|_| Errno::Child.into()); - tracing::trace!(%exit_code, "triggered child join"); - JoinStatusResult::ExitNormal(pid, exit_code) - }, - )?; + let res = __asyncify_with_deep_sleep::(ctx, async move { + let exit_code = process.join().await.unwrap_or_else(|_| Errno::Child.into()); + tracing::trace!(%exit_code, "triggered child join"); + JoinStatusResult::ExitNormal(pid, exit_code) + })?; match res { AsyncifyAction::Finish(ctx, result) => ret_result(ctx, result), AsyncifyAction::Unwind => Ok(Errno::Success), diff --git a/lib/wasix/src/syscalls/wasix/sock_accept.rs b/lib/wasix/src/syscalls/wasix/sock_accept.rs index 9ce6b75a63c..727bf85bde3 100644 --- a/lib/wasix/src/syscalls/wasix/sock_accept.rs +++ b/lib/wasix/src/syscalls/wasix/sock_accept.rs @@ -31,7 +31,7 @@ pub fn sock_accept( let nonblocking = fd_flags.contains(Fdflags::NONBLOCK); - let (fd, addr) = wasi_try_ok!(sock_accept_internal(env, sock, fd_flags, nonblocking)?); + let (fd, _, _) = wasi_try_ok!(sock_accept_internal(env, sock, fd_flags, nonblocking)?); wasi_try_mem_ok!(ro_fd.write(&memory, fd)); @@ -66,15 +66,24 @@ pub fn sock_accept_v2( let nonblocking = fd_flags.contains(Fdflags::NONBLOCK); - let (fd, addr) = wasi_try_ok!(sock_accept_internal(env, sock, fd_flags, nonblocking)?); + let (fd, local_addr, peer_addr) = + wasi_try_ok!(sock_accept_internal(env, sock, fd_flags, nonblocking)?); #[cfg(feature = "journal")] if ctx.data().enable_journal { - JournalEffector::save_sock_accepted(&mut ctx, sock, fd, addr, fd_flags, nonblocking) - .map_err(|err| { - tracing::error!("failed to save sock_accepted event - {}", err); - WasiError::Exit(ExitCode::Errno(Errno::Fault)) - })?; + JournalEffector::save_sock_accepted( + &mut ctx, + sock, + fd, + local_addr, + peer_addr, + fd_flags, + nonblocking, + ) + .map_err(|err| { + tracing::error!("failed to save sock_accepted event - {}", err); + WasiError::Exit(ExitCode::Errno(Errno::Fault)) + })?; } let env = ctx.data(); @@ -83,8 +92,8 @@ pub fn sock_accept_v2( wasi_try_ok!(crate::net::write_ip_port( &memory, ro_addr, - addr.ip(), - addr.port() + peer_addr.ip(), + peer_addr.port() )); Ok(Errno::Success) @@ -95,12 +104,12 @@ pub(crate) fn sock_accept_internal( sock: WasiFd, mut fd_flags: Fdflags, mut nonblocking: bool, -) -> Result, WasiError> { +) -> Result, WasiError> { let state = env.state(); let inodes = &state.inodes; let tasks = env.tasks().clone(); - let (child, addr, fd_flags) = wasi_try_ok_ok!(__sock_asyncify( + let (child, local_addr, peer_addr, fd_flags) = wasi_try_ok_ok!(__sock_asyncify( env, sock, Rights::SOCK_ACCEPT, @@ -114,10 +123,11 @@ pub(crate) fn sock_accept_internal( .ok() .flatten() .unwrap_or(Duration::from_secs(30)); + let local_addr = socket.addr_local()?; socket .accept(tasks.deref(), nonblocking, Some(timeout)) .await - .map(|a| (a.0, a.1, fd_flags)) + .map(|a| (a.0, local_addr, a.1, fd_flags)) }, )); @@ -146,5 +156,5 @@ pub(crate) fn sock_accept_internal( let fd = wasi_try_ok_ok!(state.fs.create_fd(rights, rights, new_flags, 0, inode)); Span::current().record("fd", fd); - Ok(Ok((fd, addr))) + Ok(Ok((fd, local_addr, peer_addr))) } diff --git a/lib/wasix/src/syscalls/wasix/sock_connect.rs b/lib/wasix/src/syscalls/wasix/sock_connect.rs index 03e51ec28b4..e935bec8627 100644 --- a/lib/wasix/src/syscalls/wasix/sock_connect.rs +++ b/lib/wasix/src/syscalls/wasix/sock_connect.rs @@ -22,17 +22,25 @@ pub fn sock_connect( let env = ctx.data(); let memory = unsafe { env.memory_view(&ctx) }; let addr = wasi_try_ok!(crate::net::read_ip_port(&memory, addr)); - let addr = SocketAddr::new(addr.0, addr.1); - Span::current().record("addr", &format!("{:?}", addr)); + let peer_addr = SocketAddr::new(addr.0, addr.1); + Span::current().record("addr", &format!("{:?}", peer_addr)); - wasi_try_ok!(sock_connect_internal(&mut ctx, sock, addr)?); + wasi_try_ok!(sock_connect_internal(&mut ctx, sock, peer_addr)?); #[cfg(feature = "journal")] if ctx.data().enable_journal { - JournalEffector::save_sock_connect(&mut ctx, sock, addr).map_err(|err| { - tracing::error!("failed to save sock_connected event - {}", err); - WasiError::Exit(ExitCode::Errno(Errno::Fault)) - })?; + let local_addr = wasi_try_ok!(__sock_actor( + &mut ctx, + sock, + Rights::empty(), + |socket, _| socket.addr_local() + )); + JournalEffector::save_sock_connect(&mut ctx, sock, local_addr, peer_addr).map_err( + |err| { + tracing::error!("failed to save sock_connected event - {}", err); + WasiError::Exit(ExitCode::Errno(Errno::Fault)) + }, + )?; } Ok(Errno::Success) diff --git a/lib/wasix/src/syscalls/wasix/sock_open.rs b/lib/wasix/src/syscalls/wasix/sock_open.rs index dfd0678d8fd..02eb651c8de 100644 --- a/lib/wasix/src/syscalls/wasix/sock_open.rs +++ b/lib/wasix/src/syscalls/wasix/sock_open.rs @@ -1,5 +1,5 @@ use super::*; -use crate::syscalls::*; +use crate::{net::socket::SocketProperties, syscalls::*}; /// ### `sock_open()` /// Create an endpoint for communication. @@ -72,23 +72,25 @@ pub(crate) fn sock_open_internal( let kind = match ty { Socktype::Stream | Socktype::Dgram => Kind::Socket { socket: InodeSocket::new(InodeSocketKind::PreSocket { - family: af, - ty, - pt, + props: SocketProperties { + family: af, + ty, + pt, + only_v6: false, + reuse_port: false, + reuse_addr: false, + no_delay: None, + keep_alive: None, + dont_route: None, + send_buf_size: None, + recv_buf_size: None, + write_timeout: None, + read_timeout: None, + accept_timeout: None, + connect_timeout: None, + handler: None, + }, addr: None, - only_v6: false, - reuse_port: false, - reuse_addr: false, - no_delay: None, - keep_alive: None, - dont_route: None, - send_buf_size: None, - recv_buf_size: None, - write_timeout: None, - read_timeout: None, - accept_timeout: None, - connect_timeout: None, - handler: None, }), }, _ => return Ok(Err(Errno::Notsup)), diff --git a/lib/wasix/src/syscalls/wasix/stack_checkpoint.rs b/lib/wasix/src/syscalls/wasix/stack_checkpoint.rs index 84fefad10ba..7100ce79c13 100644 --- a/lib/wasix/src/syscalls/wasix/stack_checkpoint.rs +++ b/lib/wasix/src/syscalls/wasix/stack_checkpoint.rs @@ -45,7 +45,7 @@ pub fn stack_checkpoint( // Perform the unwind action unwind::(ctx, move |mut ctx, mut memory_stack, rewind_stack| { // Grab all the globals and serialize them - let store_data = crate::utils::store::capture_instance_snapshot(&mut ctx.as_store_mut()) + let store_data = crate::utils::store::capture_store_snapshot(&mut ctx.as_store_mut()) .serialize() .unwrap(); let env = ctx.data(); diff --git a/lib/wasix/src/syscalls/wasix/stack_restore.rs b/lib/wasix/src/syscalls/wasix/stack_restore.rs index 66bcce7c144..b601d58f229 100644 --- a/lib/wasix/src/syscalls/wasix/stack_restore.rs +++ b/lib/wasix/src/syscalls/wasix/stack_restore.rs @@ -49,7 +49,7 @@ pub fn stack_restore( None, // we do not restore the thread memory as `longjmp`` is not meant to do this rewind_stack, store_data, - Some(rewind_result), + RewindResultType::RewindWithResult(rewind_result), ); match ret { Errno::Success => OnCalledAction::InvokeAgain, diff --git a/lib/wasix/src/syscalls/wasix/thread_join.rs b/lib/wasix/src/syscalls/wasix/thread_join.rs index 2be67a229a3..9b9c87816b1 100644 --- a/lib/wasix/src/syscalls/wasix/thread_join.rs +++ b/lib/wasix/src/syscalls/wasix/thread_join.rs @@ -33,18 +33,17 @@ pub(super) fn thread_join_internal( let tid: WasiThreadId = join_tid.into(); let other_thread = env.process.get_thread(&tid); if let Some(other_thread) = other_thread { - let res = - __asyncify_with_deep_sleep::(ctx, Duration::from_millis(50), async move { - other_thread - .join() - .await - .map_err(|err| { - err.as_exit_code() - .unwrap_or(ExitCode::Errno(Errno::Unknown)) - }) - .unwrap_or_else(|a| a) - .raw() - })?; + let res = __asyncify_with_deep_sleep::(ctx, async move { + other_thread + .join() + .await + .map_err(|err| { + err.as_exit_code() + .unwrap_or(ExitCode::Errno(Errno::Unknown)) + }) + .unwrap_or_else(|a| a) + .raw() + })?; Ok(Errno::Success) } else { Ok(Errno::Success) diff --git a/lib/wasix/src/syscalls/wasix/thread_sleep.rs b/lib/wasix/src/syscalls/wasix/thread_sleep.rs index c061bbaa669..4925d4d0c79 100644 --- a/lib/wasix/src/syscalls/wasix/thread_sleep.rs +++ b/lib/wasix/src/syscalls/wasix/thread_sleep.rs @@ -40,10 +40,9 @@ pub(crate) fn thread_sleep_internal( if duration > 0 { let duration = Duration::from_nanos(duration); let tasks = env.tasks().clone(); - let res = - __asyncify_with_deep_sleep::(ctx, Duration::from_millis(50), async move { - tasks.sleep_now(duration).await; - })?; + let res = __asyncify_with_deep_sleep::(ctx, async move { + tasks.sleep_now(duration).await; + })?; } Ok(Errno::Success) } diff --git a/lib/wasix/src/syscalls/wasix/thread_spawn.rs b/lib/wasix/src/syscalls/wasix/thread_spawn.rs index d2249089e33..f30d2d772fa 100644 --- a/lib/wasix/src/syscalls/wasix/thread_spawn.rs +++ b/lib/wasix/src/syscalls/wasix/thread_spawn.rs @@ -4,7 +4,7 @@ use super::*; #[cfg(feature = "journal")] use crate::journal::JournalEffector; use crate::{ - capture_instance_snapshot, + capture_store_snapshot, os::task::thread::WasiMemoryLayout, runtime::{ task_manager::{TaskWasm, TaskWasmRunProperties}, @@ -37,7 +37,7 @@ pub fn thread_spawn_v2( ret_tid: WasmPtr, ) -> Errno { // Create the thread - let tid = wasi_try!(thread_spawn_internal(&mut ctx, start_ptr)); + let tid = wasi_try!(thread_spawn_internal_from_wasi(&mut ctx, start_ptr)); // Success let memory = unsafe { ctx.data().memory_view(&ctx) }; @@ -45,7 +45,7 @@ pub fn thread_spawn_v2( Errno::Success } -pub(crate) fn thread_spawn_internal( +pub fn thread_spawn_internal_from_wasi( ctx: &mut FunctionEnvMut<'_, WasiEnv>, start_ptr: WasmPtr, M>, ) -> Result { @@ -56,9 +56,6 @@ pub(crate) fn thread_spawn_internal( let tasks = env.tasks().clone(); let start_ptr_offset = start_ptr.offset(); - // We extract the memory which will be passed to the thread - let thread_memory = unsafe { env.inner() }.memory_clone(); - // Read the properties about the stack which we will use for asyncify let layout = { let start: ThreadStart = start_ptr.read(&memory).map_err(mem_error_to_wasi)?; @@ -78,7 +75,10 @@ pub(crate) fn thread_spawn_internal( tracing::trace!("spawn with layout {:?}", layout); // Create the handle that represents this thread - let mut thread_handle = match env.process.new_thread(layout.clone()) { + let thread_start = ThreadStartType::ThreadSpawn { + start_ptr: start_ptr_offset.into(), + }; + let mut thread_handle = match env.process.new_thread(layout.clone(), thread_start) { Ok(h) => Arc::new(h), Err(err) => { error!( @@ -92,6 +92,25 @@ pub(crate) fn thread_spawn_internal( let thread_id: Tid = thread_handle.id().into(); Span::current().record("tid", thread_id); + // Spawn the thread + thread_spawn_internal_using_layout::(ctx, thread_handle, layout, start_ptr_offset, None)?; + + // Success + Ok(thread_id) +} + +pub fn thread_spawn_internal_using_layout( + ctx: &mut FunctionEnvMut<'_, WasiEnv>, + thread_handle: Arc, + layout: WasiMemoryLayout, + start_ptr_offset: M::Offset, + rewind_state: Option<(RewindState, RewindResultType)>, +) -> Result<(), Errno> { + // We extract the memory which will be passed to the thread + let env = ctx.data(); + let tasks = env.tasks().clone(); + let thread_memory = unsafe { env.inner() }.memory_clone(); + // We capture some local variables let state = env.state.clone(); let mut thread_env = env.clone(); @@ -99,7 +118,7 @@ pub(crate) fn thread_spawn_internal( thread_env.layout = layout; // TODO: Currently asynchronous threading does not work with multi - // threading but it does work for the main thread. This will + // threading in JS but it does work for the main thread. This will // require more work to find out why. thread_env.enable_deep_sleep = if cfg!(feature = "js") { false @@ -113,7 +132,7 @@ pub(crate) fn thread_spawn_internal( let thread_handle = thread_handle; move |ctx: WasiFunctionEnv, mut store: Store| { // Call the thread - call_module::(ctx, store, start_ptr_offset, thread_handle, None) + call_module::(ctx, store, start_ptr_offset, thread_handle, rewind_state) } }; @@ -124,7 +143,7 @@ pub(crate) fn thread_spawn_internal( return Err(Errno::Notcapable); } let thread_module = unsafe { env.inner() }.module_clone(); - let snapshot = capture_instance_snapshot(&mut ctx.as_store_mut()); + let globals = capture_store_snapshot(&mut ctx.as_store_mut()); let spawn_type = crate::runtime::SpawnMemoryType::ShareMemory(thread_memory, ctx.as_store_ref()); @@ -136,13 +155,13 @@ pub(crate) fn thread_spawn_internal( tasks .task_wasm( TaskWasm::new(Box::new(run), thread_env, thread_module, false) - .with_snapshot(&snapshot) + .with_globals(&globals) .with_memory(spawn_type), ) .map_err(Into::::into)?; // Success - Ok(thread_id) + Ok(()) } /// Calls the module @@ -151,7 +170,7 @@ fn call_module( mut store: Store, start_ptr_offset: M::Offset, thread_handle: Arc, - rewind_state: Option<(RewindState, Bytes)>, + rewind_state: Option<(RewindState, RewindResultType)>, ) -> Result { let env = ctx.data(&store); let tasks = env.tasks().clone(); @@ -223,7 +242,7 @@ fn call_module( Some(rewind_state.memory_stack), rewind_state.rewind_stack, rewind_state.store_data, - Some(rewind_result), + rewind_result, ); if res != Errno::Success { return Err(res); @@ -252,7 +271,7 @@ fn call_module( store, start_ptr_offset, thread_handle, - Some((rewind, trigger_res)), + Some((rewind, RewindResultType::RewindWithResult(trigger_res))), ); } }; diff --git a/lib/wasix/src/utils/store.rs b/lib/wasix/src/utils/store.rs index 1fac581ba90..fdfb6ae2fb1 100644 --- a/lib/wasix/src/utils/store.rs +++ b/lib/wasix/src/utils/store.rs @@ -1,11 +1,11 @@ /// A snapshot that captures the runtime state of an instance. #[derive(Default, serde::Serialize, serde::Deserialize, Clone, Debug)] -pub struct InstanceSnapshot { +pub struct StoreSnapshot { /// Values of all globals, indexed by the same index used in Webassembly. pub globals: Vec, } -impl InstanceSnapshot { +impl StoreSnapshot { pub fn serialize(&self) -> Result, bincode::Error> { bincode::serialize(self) } @@ -15,13 +15,13 @@ impl InstanceSnapshot { } } -pub fn capture_instance_snapshot(store: &mut impl wasmer::AsStoreMut) -> InstanceSnapshot { +pub fn capture_store_snapshot(store: &mut impl wasmer::AsStoreMut) -> StoreSnapshot { let objs = store.objects_mut(); let globals = objs.as_u128_globals(); - InstanceSnapshot { globals } + StoreSnapshot { globals } } -pub fn restore_instance_snapshot(store: &mut impl wasmer::AsStoreMut, snapshot: &InstanceSnapshot) { +pub fn restore_store_snapshot(store: &mut impl wasmer::AsStoreMut, snapshot: &StoreSnapshot) { let objs = store.objects_mut(); for (index, value) in snapshot.globals.iter().enumerate() {