From 1ee2af00982f2d7d3dc815434e1641446a9599c6 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 27 Sep 2021 12:27:19 -0500 Subject: [PATCH] Remove the lightbeam backend (#3390) This commit removes the Lightbeam backend from Wasmtime as per [RFC 14]. This backend hasn't received maintenance in quite some time, and as [RFC 14] indicates this doesn't meet the threshold for keeping the code in-tree, so this commit removes it. A fast "baseline" compiler may still be added in the future. The addition of such a backend should be in line with [RFC 14], though, with the principles we now have for stable releases of Wasmtime. I'll close out Lightbeam-related issues once this is merged. [RFC 14]: https://github.com/bytecodealliance/rfcs/pull/14 --- .github/workflows/main.yml | 13 +- Cargo.lock | 145 - Cargo.toml | 4 +- README.md | 4 +- build.rs | 14 +- ci/run-tests.sh | 1 - .../codegen/src/isa/x64/inst/emit_tests.rs | 4 +- crates/c-api/Cargo.toml | 1 - crates/c-api/include/wasmtime/config.h | 12 +- crates/c-api/src/config.rs | 2 - crates/fuzzing/src/generators.rs | 12 +- crates/fuzzing/src/oracles.rs | 2 +- crates/lightbeam/Cargo.toml | 36 - crates/lightbeam/LICENSE | 220 - crates/lightbeam/README.md | 168 - crates/lightbeam/examples/test.rs | 16 - crates/lightbeam/src/backend.rs | 6134 ----------------- crates/lightbeam/src/benches.rs | 112 - crates/lightbeam/src/disassemble.rs | 57 - crates/lightbeam/src/error.rs | 29 - crates/lightbeam/src/function_body.rs | 902 --- crates/lightbeam/src/lib.rs | 24 - crates/lightbeam/src/microwasm.rs | 2386 ------- crates/lightbeam/src/module.rs | 570 -- crates/lightbeam/src/translate_sections.rs | 124 - crates/lightbeam/tests/quickchecks.rs | 654 -- crates/lightbeam/tests/wrongs.rs | 45 - crates/lightbeam/wasmtime/Cargo.toml | 22 - crates/lightbeam/wasmtime/LICENSE | 220 - crates/lightbeam/wasmtime/README.md | 4 - crates/lightbeam/wasmtime/src/lib.rs | 370 - crates/wasmtime/Cargo.toml | 3 - crates/wasmtime/src/config.rs | 15 +- crates/wasmtime/src/module/serialization.rs | 20 - crates/wast/src/wast.rs | 4 - docs/contributing-architecture.md | 8 +- fuzz/fuzz_targets/compile.rs | 5 - scripts/publish.rs | 5 +- src/lib.rs | 25 +- 39 files changed, 20 insertions(+), 12372 deletions(-) delete mode 100644 crates/lightbeam/Cargo.toml delete mode 100644 crates/lightbeam/LICENSE delete mode 100644 crates/lightbeam/README.md delete mode 100644 crates/lightbeam/examples/test.rs delete mode 100644 crates/lightbeam/src/backend.rs delete mode 100644 crates/lightbeam/src/benches.rs delete mode 100644 crates/lightbeam/src/disassemble.rs delete mode 100644 crates/lightbeam/src/error.rs delete mode 100644 crates/lightbeam/src/function_body.rs delete mode 100644 crates/lightbeam/src/lib.rs delete mode 100644 crates/lightbeam/src/microwasm.rs delete mode 100644 crates/lightbeam/src/module.rs delete mode 100644 crates/lightbeam/src/translate_sections.rs delete mode 100644 crates/lightbeam/tests/quickchecks.rs delete mode 100644 crates/lightbeam/tests/wrongs.rs delete mode 100644 crates/lightbeam/wasmtime/Cargo.toml delete mode 100644 crates/lightbeam/wasmtime/LICENSE delete mode 100644 crates/lightbeam/wasmtime/README.md delete mode 100644 crates/lightbeam/wasmtime/src/lib.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e5a0ea5ca372..500c743f978f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -131,7 +131,6 @@ jobs: # Check some feature combinations of the `wasmtime` crate - run: cargo check -p wasmtime --no-default-features - run: cargo check -p wasmtime --no-default-features --features wat - - run: cargo check -p wasmtime --no-default-features --features lightbeam - run: cargo check -p wasmtime --no-default-features --features jitdump - run: cargo check -p wasmtime --no-default-features --features vtune - run: cargo check -p wasmtime --no-default-features --features cache @@ -307,7 +306,7 @@ jobs: RUST_BACKTRACE: 1 if: matrix.target == '' - # Build and test all features except for lightbeam + # Build and test all features - run: ./ci/run-tests.sh --locked env: RUST_BACKTRACE: 1 @@ -329,16 +328,6 @@ jobs: env: RUST_BACKTRACE: 1 - # Build and test lightbeam. Note that - # Lightbeam tests fail right now, but we don't want to block on that. - - run: cargo build --package lightbeam - if: matrix.target != 'aarch64-unknown-linux-gnu' && matrix.target != 's390x-unknown-linux-gnu' - - run: cargo test --package lightbeam - if: matrix.target != 'aarch64-unknown-linux-gnu' && matrix.target != 's390x-unknown-linux-gnu' - continue-on-error: true - env: - RUST_BACKTRACE: 1 - # Perform all tests (debug mode) for `wasmtime` with the old x86 backend. test_x86: name: Test old x86 backend diff --git a/Cargo.lock b/Cargo.lock index 6d7b99926c89..a2e80da3ea63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,12 +125,6 @@ dependencies = [ "derive_arbitrary", ] -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - [[package]] name = "async-trait" version = "0.1.50" @@ -526,12 +520,6 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f6b64db6932c7e49332728e3a6bd82c6b7e16016607d20923b537c3bc4c0d5f" -[[package]] -name = "convert_case" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" - [[package]] name = "cpp_demangle" version = "0.3.2" @@ -990,29 +978,6 @@ dependencies = [ "syn", ] -[[package]] -name = "derive_more" -version = "0.99.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc7b9cef1e351660e5443924e4f43ab25fbbed3e9a5f052df3677deb4d6b320" -dependencies = [ - "convert_case", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "derive_utils" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "532b4c15dccee12c7044f1fcad956e98410860b22231e44a3b827464797ca7bf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "digest" version = "0.9.0" @@ -1059,32 +1024,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" -[[package]] -name = "dynasm" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdc2d9a5e44da60059bd38db2d05cbb478619541b8c79890547861ec1e3194f0" -dependencies = [ - "bitflags", - "byteorder", - "lazy_static", - "proc-macro-error", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "dynasmrt" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42276e3f205fe63887cca255aa9a65a63fb72764c30b9a6252a7c7e46994f689" -dependencies = [ - "byteorder", - "dynasm", - "memmap2", -] - [[package]] name = "ecdsa" version = "0.10.2" @@ -1494,17 +1433,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47be2f14c678be2fdcab04ab1171db51b2762ce6f0a8ee87c8dd4a04ed216135" -[[package]] -name = "iter-enum" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f947f0d9df7e69c4df60a950c0a83741455bb9ebd8fd9b5a87994dda4dbb005" -dependencies = [ - "derive_utils", - "quote", - "syn", -] - [[package]] name = "itertools" version = "0.9.0" @@ -1622,30 +1550,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7d73b3f436185384286bd8098d17ec07c9a7d2388a6599f824d8502b529702a" -[[package]] -name = "lightbeam" -version = "0.30.0" -dependencies = [ - "anyhow", - "arrayvec", - "capstone", - "cranelift-codegen", - "derive_more", - "dynasm", - "dynasmrt", - "iter-enum", - "itertools 0.10.0", - "lazy_static", - "memoffset", - "more-asserts", - "quickcheck", - "smallvec", - "thiserror", - "typemap", - "wasmparser", - "wat", -] - [[package]] name = "linux-raw-sys" version = "0.0.24" @@ -2342,17 +2246,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" -[[package]] -name = "quickcheck" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "env_logger 0.8.3", - "log", - "rand 0.8.3", -] - [[package]] name = "quote" version = "1.0.9" @@ -3178,21 +3071,6 @@ dependencies = [ "tracing-serde", ] -[[package]] -name = "traitobject" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd1f82c56340fdf16f2a953d7bda4f8fdffba13d93b00844c25572110b26079" - -[[package]] -name = "typemap" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "653be63c80a3296da5551e1bfd2cca35227e13cdd08c6668903ae2f4f77aa1f6" -dependencies = [ - "unsafe-any", -] - [[package]] name = "typenum" version = "1.13.0" @@ -3227,15 +3105,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "unsafe-any" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30360d7979f5e9c6e6cea48af192ea8fab4afb3cf72597154b8f08935bc9c7f" -dependencies = [ - "traitobject", -] - [[package]] name = "unsafe-io" version = "0.9.1" @@ -3797,20 +3666,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "wasmtime-lightbeam" -version = "0.30.0" -dependencies = [ - "anyhow", - "cranelift-codegen", - "gimli", - "lightbeam", - "object", - "target-lexicon", - "wasmparser", - "wasmtime-environ", -] - [[package]] name = "wasmtime-runtime" version = "0.30.0" diff --git a/Cargo.toml b/Cargo.toml index bb7806a973c7..08e0c0eb7201 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,7 +75,6 @@ members = [ "cranelift", "crates/bench-api", "crates/c-api", - "crates/lightbeam/wasmtime", "crates/misc/run-examples", "examples/fib-debug/wasm", "examples/wasi/wasm", @@ -83,13 +82,12 @@ members = [ "fuzz", ] exclude = [ - 'crates/wasi-common/WASI/tools/witx-cli', + 'crates/wasi-common/WASI/tools/witx-cli', 'docs/rust_wasi_markdown_parser' ] [features] default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation", "wasi-nn"] -lightbeam = ["wasmtime/lightbeam"] jitdump = ["wasmtime/jitdump"] vtune = ["wasmtime/vtune"] wasi-crypto = ["wasmtime-wasi-crypto"] diff --git a/README.md b/README.md index 302883cd263b..f5151c2acd12 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,8 @@ Hello, world! quickly generate high-quality machine code at runtime. * **Configurable**. Whether you need to precompile your wasm ahead of time, - generate code blazingly fast with Lightbeam, or interpret it at runtime, - Wasmtime has you covered for all your wasm-executing needs. + or interpret it at runtime, Wasmtime has you covered for all your + wasm-executing needs. * **WASI**. Wasmtime supports a rich set of APIs for interacting with the host environment through the [WASI standard](https://wasi.dev). diff --git a/build.rs b/build.rs index b3dc1fd8ea86..06f0669cdf0a 100644 --- a/build.rs +++ b/build.rs @@ -17,11 +17,7 @@ fn main() -> anyhow::Result<()> { ); let mut out = String::new(); - for strategy in &[ - "Cranelift", - #[cfg(feature = "lightbeam")] - "Lightbeam", - ] { + for strategy in &["Cranelift"] { writeln!(out, "#[cfg(test)]")?; writeln!(out, "#[allow(non_snake_case)]")?; writeln!(out, "mod {} {{", strategy)?; @@ -185,14 +181,6 @@ fn write_testsuite_tests( /// Ignore tests that aren't supported yet. fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { match strategy { - #[cfg(feature = "lightbeam")] - "Lightbeam" => match (testsuite, testname) { - ("simd", _) => return true, - ("multi_value", _) => return true, - ("reference_types", _) => return true, - ("bulk_memory_operations", _) => return true, - _ => (), - }, "Cranelift" => match (testsuite, testname) { // Skip all reference types tests on the old backend. The modern // implementation of reference types uses atomic instructions diff --git a/ci/run-tests.sh b/ci/run-tests.sh index 15b2cabc0379..7bd661ffe564 100755 --- a/ci/run-tests.sh +++ b/ci/run-tests.sh @@ -3,7 +3,6 @@ cargo test \ --features "test-programs/test_programs" \ --workspace \ - --exclude '*lightbeam*' \ --exclude 'wasmtime-wasi-*' \ --exclude 'peepmatic*' \ --exclude wasi-crypto \ diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index c02593eab25c..1a811911414f 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -8,8 +8,8 @@ //! //! cd to the top of your wasmtime tree, then: //! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \ -//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \ -//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \ +//! --features experimental_x64 --all --exclude peepmatic \ +//! --exclude peepmatic-automata --exclude peepmatic-fuzzing \ //! --exclude peepmatic-macro --exclude wasmtime-wasi-nn -- isa::x64::inst::emit_tests::test_x64_emit use super::*; diff --git a/crates/c-api/Cargo.toml b/crates/c-api/Cargo.toml index 39e99eb1837e..97a0d759ae13 100644 --- a/crates/c-api/Cargo.toml +++ b/crates/c-api/Cargo.toml @@ -34,7 +34,6 @@ cap-std = { version = "0.19.1", optional = true } [features] default = ['jitdump', 'wat', 'wasi', 'cache'] -lightbeam = ["wasmtime/lightbeam"] jitdump = ["wasmtime/jitdump"] cache = ["wasmtime/cache"] wasi = ['wasi-common', 'wasi-cap-std-sync', 'wasmtime-wasi', 'cap-std'] diff --git a/crates/c-api/include/wasmtime/config.h b/crates/c-api/include/wasmtime/config.h index 0150ee01a59a..9f90059b0959 100644 --- a/crates/c-api/include/wasmtime/config.h +++ b/crates/c-api/include/wasmtime/config.h @@ -26,19 +26,13 @@ typedef uint8_t wasmtime_strategy_t; * The default value is #WASMTIME_STRATEGY_AUTO. */ enum wasmtime_strategy_enum { // Strategy - /// Wasmtime will automatically determine whether to use Cranelift or - /// Lightbeam, and currently it will always pick Cranelift. This default may - /// change over time though. + /// Automatically picks the compilation backend, currently always defaulting + /// to Cranelift. WASMTIME_STRATEGY_AUTO, - /// Indicates that Cranelift will unconditionally use Cranelift to compile + /// Indicates that Wasmtime will unconditionally use Cranelift to compile /// WebAssembly code. WASMTIME_STRATEGY_CRANELIFT, - - /// Indicates that Cranelift will unconditionally use Lightbeam to compile - /// WebAssembly code. Note that Lightbeam isn't always enabled at compile - /// time, and if that's the case an error will be returned. - WASMTIME_STRATEGY_LIGHTBEAM, }; /** diff --git a/crates/c-api/src/config.rs b/crates/c-api/src/config.rs index 6386ca6c9116..64c3a881317b 100644 --- a/crates/c-api/src/config.rs +++ b/crates/c-api/src/config.rs @@ -20,7 +20,6 @@ wasmtime_c_api_macros::declare_own!(wasm_config_t); pub enum wasmtime_strategy_t { WASMTIME_STRATEGY_AUTO, WASMTIME_STRATEGY_CRANELIFT, - WASMTIME_STRATEGY_LIGHTBEAM, } #[repr(u8)] @@ -114,7 +113,6 @@ pub extern "C" fn wasmtime_config_strategy_set( let result = c.config.strategy(match strategy { WASMTIME_STRATEGY_AUTO => Strategy::Auto, WASMTIME_STRATEGY_CRANELIFT => Strategy::Cranelift, - WASMTIME_STRATEGY_LIGHTBEAM => Strategy::Lightbeam, }); handle_result(result, |_cfg| {}) } diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index 242a5bc3bb34..10eaa86844ae 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -18,7 +18,6 @@ use arbitrary::{Arbitrary, Unstructured}; /// testing between. #[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)] pub struct DifferentialConfig { - strategy: DifferentialStrategy, opt_level: OptLevel, force_jump_veneers: bool, } @@ -26,10 +25,7 @@ pub struct DifferentialConfig { impl DifferentialConfig { /// Convert this differential fuzzing config into a `wasmtime::Config`. pub fn to_wasmtime_config(&self) -> anyhow::Result { - let mut config = crate::fuzz_default_config(match self.strategy { - DifferentialStrategy::Cranelift => wasmtime::Strategy::Cranelift, - DifferentialStrategy::Lightbeam => wasmtime::Strategy::Lightbeam, - })?; + let mut config = crate::fuzz_default_config(wasmtime::Strategy::Cranelift)?; config.cranelift_opt_level(self.opt_level.to_wasmtime()); if self.force_jump_veneers { unsafe { @@ -40,12 +36,6 @@ impl DifferentialConfig { } } -#[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)] -enum DifferentialStrategy { - Cranelift, - Lightbeam, -} - #[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)] enum OptLevel { None, diff --git a/crates/fuzzing/src/oracles.rs b/crates/fuzzing/src/oracles.rs index 467aa6955ee7..ce41fd141084 100644 --- a/crates/fuzzing/src/oracles.rs +++ b/crates/fuzzing/src/oracles.rs @@ -253,7 +253,7 @@ pub fn differential_execution( let configs: Vec<_> = match configs.iter().map(|c| c.to_wasmtime_config()).collect() { Ok(cs) => cs, // If the config is trying to use something that was turned off at - // compile time, eg lightbeam, just continue to the next fuzz input. + // compile time just continue to the next fuzz input. Err(_) => return, }; diff --git a/crates/lightbeam/Cargo.toml b/crates/lightbeam/Cargo.toml deleted file mode 100644 index a974df55651a..000000000000 --- a/crates/lightbeam/Cargo.toml +++ /dev/null @@ -1,36 +0,0 @@ -[package] -name = "lightbeam" -version = "0.30.0" -authors = ["The Lightbeam Project Developers"] -description = "An optimising one-pass streaming compiler for WebAssembly" -license = "Apache-2.0 WITH LLVM-exception" -repository = "https://github.com/bytecodealliance/wasmtime" -readme = "README.md" -categories = ["wasm"] -keywords = ["webassembly", "wasm", "compile", "compiler", "jit"] -edition = "2018" - -[dependencies] -arrayvec = "0.5" -capstone = "0.9.0" -cranelift-codegen = { path = "../../cranelift/codegen", version = "0.77.0" } -derive_more = "0.99" -dynasm = "1.0.0" -dynasmrt = "1.0.0" -iter-enum = "1" -itertools = "0.10.0" -memoffset = "0.6.0" -more-asserts = "0.2.1" -smallvec = "1.6.1" -thiserror = "1.0.9" -typemap = "0.3" -wasmparser = "0.80" - -[dev-dependencies] -lazy_static = "1.2" -wat = "1.0.37" -quickcheck = "1.0.0" -anyhow = "1.0" - -[badges] -maintenance = { status = "experimental" } diff --git a/crates/lightbeam/LICENSE b/crates/lightbeam/LICENSE deleted file mode 100644 index f9d81955f4bc..000000000000 --- a/crates/lightbeam/LICENSE +++ /dev/null @@ -1,220 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ---- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - diff --git a/crates/lightbeam/README.md b/crates/lightbeam/README.md deleted file mode 100644 index 85d787481a24..000000000000 --- a/crates/lightbeam/README.md +++ /dev/null @@ -1,168 +0,0 @@ -# Lightbeam - -Lightbeam is an optimising one-pass streaming compiler for WebAssembly, intended for use in [Wasmtime][wasmtime]. - -[wasmtime]: https://github.com/bytecodealliance/wasmtime - -## Quality of output - -Already - with a very small number of relatively simple optimisation rules - Lightbeam produces surprisingly high-quality output considering how restricted it is. It even produces better code than Cranelift, Firefox or both for some workloads. Here's a very simple example, this recursive fibonacci function in Rust: - -```rust -fn fib(n: i32) -> i32 { - if n == 0 || n == 1 { - 1 - } else { - fib(n - 1) + fib(n - 2) - } -} -``` - -When compiled with optimisations enabled, rustc will produce the following WebAssembly: - -```rust -(module - (func $fib (param $p0 i32) (result i32) - (local $l1 i32) - (set_local $l1 - (i32.const 1)) - (block $B0 - (br_if $B0 - (i32.lt_u - (get_local $p0) - (i32.const 2))) - (set_local $l1 - (i32.const 1)) - (loop $L1 - (set_local $l1 - (i32.add - (call $fib - (i32.add - (get_local $p0) - (i32.const -1))) - (get_local $l1))) - (br_if $L1 - (i32.gt_u - (tee_local $p0 - (i32.add - (get_local $p0) - (i32.const -2))) - (i32.const 1))))) - (get_local $l1))) -``` - -Firefox's optimising compiler produces the following assembly (labels cleaned up somewhat): - -```asm -fib: - sub rsp, 0x18 - cmp qword ptr [r14 + 0x28], rsp - jae stack_overflow - mov dword ptr [rsp + 0xc], edi - cmp edi, 2 - jae .Lelse - mov eax, 1 - mov dword ptr [rsp + 8], eax - jmp .Lreturn -.Lelse: - mov dword ptr [rsp + 0xc], edi - mov eax, 1 - mov dword ptr [rsp + 8], eax -.Lloop: - mov edi, dword ptr [rsp + 0xc] - add edi, -1 - call 0 - mov ecx, dword ptr [rsp + 8] - add ecx, eax - mov dword ptr [rsp + 8], ecx - mov ecx, dword ptr [rsp + 0xc] - add ecx, -2 - mov dword ptr [rsp + 0xc], ecx - cmp ecx, 1 - ja .Lloop -.Lreturn: - mov eax, dword ptr [rsp + 8] - nop - add rsp, 0x18 - ret -``` - -Cranelift with optimisations enabled produces similar: - -```asm -fib: - push rbp - mov rbp, rsp - sub rsp, 0x20 - mov qword ptr [rsp + 0x10], rdi - mov dword ptr [rsp + 0x1c], esi - mov eax, 1 - mov dword ptr [rsp + 0x18], eax - mov eax, dword ptr [rsp + 0x1c] - cmp eax, 2 - jb .Lreturn - movabs rax, 0 - mov qword ptr [rsp + 8], rax -.Lloop: - mov eax, dword ptr [rsp + 0x1c] - add eax, -1 - mov rcx, qword ptr [rsp + 8] - mov rdx, qword ptr [rsp + 0x10] - mov rdi, rdx - mov esi, eax - call rcx - mov ecx, dword ptr [rsp + 0x18] - add eax, ecx - mov dword ptr [rsp + 0x18], eax - mov eax, dword ptr [rsp + 0x1c] - add eax, -2 - mov dword ptr [rsp + 0x1c], eax - mov eax, dword ptr [rsp + 0x1c] - cmp eax, 1 - ja .Lloop -.Lreturn: - mov eax, dword ptr [rsp + 0x18] - add rsp, 0x20 - pop rbp - ret -``` - -Whereas Lightbeam produces smaller code with far fewer memory accesses than both (and fewer blocks than Firefox's output): - -```asm -fib: - cmp esi, 2 - mov eax, 1 - jb .Lreturn - mov eax, 1 -.Lloop: - mov rcx, rsi - add ecx, 0xffffffff - push rsi - push rax - push rax - mov rsi, rcx - call fib - add eax, [rsp + 8] - mov rcx, [rsp + 0x10] - add ecx, 0xfffffffe - cmp ecx, 1 - mov rsi, rcx - lea rsp, [rsp + 0x18] - ja .Lloop -.Lreturn: - ret -``` - -Now obviously I'm not advocating for replacing Firefox's optimising compiler with Lightbeam since the latter can only really produce better code when receiving optimised WebAssembly (and so debug-mode or hand-written WebAssembly may produce much worse output). However, this shows that even with the restrictions of a streaming compiler it's absolutely possible to produce high-quality assembly output. For the assembly above, the Lightbeam output runs within 15% of native speed. This is paramount for one of Lightbeam's intended usecases for real-time systems that want good runtime performance but cannot tolerate compiler bombs. - -## Specification compliance - -Lightbeam passes 100% of the specification test suite, but that doesn't necessarily mean that it's 100% specification-compliant. Hopefully as we run a fuzzer against it we can find any issues and get Lightbeam to a state where it can be used in production. - -## Getting involved - -You can file issues in the [Wasmtime issue tracker][Wasmtime issue tracker]. If you want to get involved jump into the [Bytecode Alliance Zulip][bytecodealliance-zulip] and someone can direct you to the right place. I wish I could say "the most useful thing you can do is play with it and open issues where you find problems" but until it passes the spec suite that won't be very helpful. - -[bytecodealliance-zulip]: https://bytecodealliance.zulipchat.com/ -[Wasmtime issue tracker]: https://github.com/bytecodealliance/wasmtime/issues diff --git a/crates/lightbeam/examples/test.rs b/crates/lightbeam/examples/test.rs deleted file mode 100644 index 12d54193ae6c..000000000000 --- a/crates/lightbeam/examples/test.rs +++ /dev/null @@ -1,16 +0,0 @@ -use lightbeam::translate; - -const WAT: &str = r#" -(module - (func (param i32) (param i32) (result i32) (i32.add (get_local 0) (get_local 1))) -) -"#; - -fn main() -> anyhow::Result<()> { - let data = wat::parse_str(WAT)?; - let translated = translate(&data)?; - let result: u32 = translated.execute_func(0, (5u32, 3u32))?; - println!("f(5, 3) = {}", result); - - Ok(()) -} diff --git a/crates/lightbeam/src/backend.rs b/crates/lightbeam/src/backend.rs deleted file mode 100644 index 240364320b9d..000000000000 --- a/crates/lightbeam/src/backend.rs +++ /dev/null @@ -1,6134 +0,0 @@ -#![allow(clippy::float_cmp)] - -use self::registers::*; -use crate::error::Error; -use crate::microwasm::{BrTarget, Ieee32, Ieee64, SignlessType, Type, Value, F32, F64, I32, I64}; -use crate::module::ModuleContext; -use cranelift_codegen::{ - binemit, - ir::{self, SourceLoc, TrapCode}, -}; -use dynasm::dynasm; -use dynasmrt::x64::Assembler; -use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer}; -use std::{ - cmp::Ordering, - convert::{TryFrom, TryInto}, - fmt::Display, - hash::Hash, - iter, mem, - ops::{Deref, RangeInclusive}, -}; -// use wasmtime_environ::BuiltinFunctionIndex; - -mod magic { - /// An index type for builtin functions. - pub struct BuiltinFunctionIndex(u32); - - impl BuiltinFunctionIndex { - /// Returns an index for wasm's `memory.grow` builtin function. - pub const fn get_memory32_grow_index() -> Self { - Self(0) - } - /// Returns an index for wasm's imported `memory.grow` builtin function. - pub const fn get_imported_memory32_grow_index() -> Self { - Self(1) - } - /// Returns an index for wasm's `memory.size` builtin function. - pub const fn get_memory32_size_index() -> Self { - Self(2) - } - /// Returns an index for wasm's imported `memory.size` builtin function. - pub const fn get_imported_memory32_size_index() -> Self { - Self(3) - } - - /// Return the index as an u32 number. - pub const fn index(&self) -> u32 { - self.0 - } - } -} - -use magic::BuiltinFunctionIndex; - -/// Size of a pointer on the target in bytes. -const WORD_SIZE: u32 = 8; - -type RegId = u8; - -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] -pub enum GPR { - Rq(RegId), - Rx(RegId), -} - -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] -pub enum GPRType { - Rq, - Rx, -} - -impl From for GPRType { - fn from(other: SignlessType) -> GPRType { - match other { - I32 | I64 => GPRType::Rq, - F32 | F64 => GPRType::Rx, - } - } -} - -impl From for Option { - fn from(other: SignlessType) -> Self { - Some(other.into()) - } -} - -impl GPR { - fn type_(self) -> GPRType { - match self { - GPR::Rq(_) => GPRType::Rq, - GPR::Rx(_) => GPRType::Rx, - } - } - - fn rq(self) -> Option { - match self { - GPR::Rq(r) => Some(r), - GPR::Rx(_) => None, - } - } - - fn rx(self) -> Option { - match self { - GPR::Rx(r) => Some(r), - GPR::Rq(_) => None, - } - } -} - -fn arg_locs>( - types: I, -) -> impl ExactSizeIterator + DoubleEndedIterator + Clone -where - I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, -{ - // TODO: VmCtx is in the first register - let mut int_gpr_iter = INTEGER_ARGS_IN_GPRS.iter(); - let mut float_gpr_iter = FLOAT_ARGS_IN_GPRS.iter(); - let mut stack_idx = 0; - - types - .into_iter() - .map(move |ty| { - match ty { - I32 | I64 => int_gpr_iter.next(), - F32 | F64 => float_gpr_iter.next(), - } - .map(|&r| CCLoc::Reg(r)) - .unwrap_or_else(|| { - let out = CCLoc::Stack(stack_idx); - stack_idx += 1; - out - }) - }) - // Since we only advance the iterators based on the values in `types`, - // we can't do this lazily. - .collect::>() - .into_iter() -} - -fn arg_locs_skip_caller_vmctx>( - types: I, -) -> impl ExactSizeIterator + DoubleEndedIterator + Clone -where - I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, -{ - #[derive(Debug, Clone)] - struct WithInt { - caller_vmctx_ty: Option, - iter: I, - } - - impl Iterator for WithInt - where - I: Iterator, - { - type Item = SignlessType; - - fn next(&mut self) -> Option { - self.caller_vmctx_ty.take().or_else(|| self.iter.next()) - } - - fn size_hint(&self) -> (usize, Option) { - let ty_len = if self.caller_vmctx_ty.is_some() { 1 } else { 0 }; - let (lower, upper) = self.iter.size_hint(); - - (lower + ty_len, upper.map(|u| u + ty_len)) - } - } - - impl DoubleEndedIterator for WithInt - where - I: DoubleEndedIterator, - { - fn next_back(&mut self) -> Option { - self.iter - .next_back() - .or_else(|| self.caller_vmctx_ty.take()) - } - } - - impl ExactSizeIterator for WithInt where I: ExactSizeIterator {} - - arg_locs(WithInt { - caller_vmctx_ty: Some(I32), - iter: types.into_iter(), - }) - .skip(1) -} - -pub fn ret_locs(types: impl IntoIterator) -> Result, Error> { - let types = types.into_iter(); - let mut out = Vec::with_capacity(types.size_hint().0); - // TODO: VmCtx is in the first register - let mut int_gpr_iter = INTEGER_RETURN_GPRS.iter(); - let mut float_gpr_iter = FLOAT_RETURN_GPRS.iter(); - - for ty in types { - match ty { - I32 | I64 => match int_gpr_iter.next() { - None => { - return Err(Error::Microwasm( - "We don't support stack returns yet".to_string(), - )) - } - Some(val) => out.push(CCLoc::Reg(*val)), - }, - F32 | F64 => match float_gpr_iter.next() { - None => { - return Err(Error::Microwasm( - "We don't support stack returns yet".to_string(), - )) - } - Some(val) => out.push(CCLoc::Reg(*val)), - }, - } - } - - Ok(out) -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -struct GPRs { - bits: u16, -} - -impl GPRs { - fn new() -> Self { - Self { bits: 0 } - } -} - -#[allow(dead_code)] -pub mod registers { - use super::{RegId, GPR}; - - pub mod rq { - use super::RegId; - - pub const RAX: RegId = 0; - pub const RCX: RegId = 1; - pub const RDX: RegId = 2; - pub const RBX: RegId = 3; - pub const RSP: RegId = 4; - pub const RBP: RegId = 5; - pub const RSI: RegId = 6; - pub const RDI: RegId = 7; - pub const R8: RegId = 8; - pub const R9: RegId = 9; - pub const R10: RegId = 10; - pub const R11: RegId = 11; - pub const R12: RegId = 12; - pub const R13: RegId = 13; - pub const R14: RegId = 14; - pub const R15: RegId = 15; - } - - pub const RAX: GPR = GPR::Rq(self::rq::RAX); - pub const RCX: GPR = GPR::Rq(self::rq::RCX); - pub const RDX: GPR = GPR::Rq(self::rq::RDX); - pub const RBX: GPR = GPR::Rq(self::rq::RBX); - pub const RSP: GPR = GPR::Rq(self::rq::RSP); - pub const RBP: GPR = GPR::Rq(self::rq::RBP); - pub const RSI: GPR = GPR::Rq(self::rq::RSI); - pub const RDI: GPR = GPR::Rq(self::rq::RDI); - pub const R8: GPR = GPR::Rq(self::rq::R8); - pub const R9: GPR = GPR::Rq(self::rq::R9); - pub const R10: GPR = GPR::Rq(self::rq::R10); - pub const R11: GPR = GPR::Rq(self::rq::R11); - pub const R12: GPR = GPR::Rq(self::rq::R12); - pub const R13: GPR = GPR::Rq(self::rq::R13); - pub const R14: GPR = GPR::Rq(self::rq::R14); - pub const R15: GPR = GPR::Rq(self::rq::R15); - - pub const XMM0: GPR = GPR::Rx(0); - pub const XMM1: GPR = GPR::Rx(1); - pub const XMM2: GPR = GPR::Rx(2); - pub const XMM3: GPR = GPR::Rx(3); - pub const XMM4: GPR = GPR::Rx(4); - pub const XMM5: GPR = GPR::Rx(5); - pub const XMM6: GPR = GPR::Rx(6); - pub const XMM7: GPR = GPR::Rx(7); - pub const XMM8: GPR = GPR::Rx(8); - pub const XMM9: GPR = GPR::Rx(9); - pub const XMM10: GPR = GPR::Rx(10); - pub const XMM11: GPR = GPR::Rx(11); - pub const XMM12: GPR = GPR::Rx(12); - pub const XMM13: GPR = GPR::Rx(13); - pub const XMM14: GPR = GPR::Rx(14); - pub const XMM15: GPR = GPR::Rx(15); - - pub const NUM_GPRS: u8 = 16; -} - -const SIGN_MASK_F64: u64 = 0x8000_0000_0000_0000; -const REST_MASK_F64: u64 = !SIGN_MASK_F64; -const SIGN_MASK_F32: u32 = 0x8000_0000; -const REST_MASK_F32: u32 = !SIGN_MASK_F32; - -impl GPRs { - fn take(&mut self) -> Option { - let lz = self.bits.trailing_zeros(); - if lz < 16 { - let gpr = lz as RegId; - self.mark_used(gpr); - Some(gpr) - } else { - None - } - } - - fn mark_used(&mut self, gpr: RegId) { - self.bits &= !(1 << gpr as u16); - } - - fn release(&mut self, gpr: RegId) { - debug_assert!( - !self.is_free(gpr), - "released register {} was already free", - gpr - ); - self.bits |= 1 << gpr; - } - - fn is_free(self, gpr: RegId) -> bool { - (self.bits & (1 << gpr)) != 0 - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct Registers { - /// Registers at 64 bits and below (al/ah/ax/eax/rax, for example) - scratch_64: (GPRs, [u8; NUM_GPRS as usize]), - /// Registers at 128 bits (xmm0, for example) - scratch_128: (GPRs, [u8; NUM_GPRS as usize]), -} - -impl Default for Registers { - fn default() -> Self { - Self::new() - } -} - -impl Registers { - pub fn new() -> Self { - Self { - scratch_64: (GPRs::new(), [1; NUM_GPRS as _]), - scratch_128: (GPRs::new(), [1; NUM_GPRS as _]), - } - } - - pub fn release_scratch_register(&mut self) -> Result<(), Error> { - // Give ourselves a few scratch registers to work with, for now. - for &scratch in SCRATCH_REGS { - self.release(scratch)?; - } - Ok(()) - } - - fn scratch_counts_mut(&mut self, gpr: GPR) -> (u8, &mut (GPRs, [u8; NUM_GPRS as usize])) { - match gpr { - GPR::Rq(r) => (r, &mut self.scratch_64), - GPR::Rx(r) => (r, &mut self.scratch_128), - } - } - - fn scratch_counts(&self, gpr: GPR) -> (u8, &(GPRs, [u8; NUM_GPRS as usize])) { - match gpr { - GPR::Rq(r) => (r, &self.scratch_64), - GPR::Rx(r) => (r, &self.scratch_128), - } - } - - pub fn mark_used(&mut self, gpr: GPR) { - let (gpr, scratch_counts) = self.scratch_counts_mut(gpr); - scratch_counts.0.mark_used(gpr); - scratch_counts.1[gpr as usize] += 1; - } - - pub fn num_usages(&self, gpr: GPR) -> u8 { - let (gpr, scratch_counts) = self.scratch_counts(gpr); - scratch_counts.1[gpr as usize] - } - - pub fn take(&mut self, ty: impl Into) -> Option { - let (mk_gpr, scratch_counts) = match ty.into() { - GPRType::Rq => (GPR::Rq as fn(_) -> _, &mut self.scratch_64), - GPRType::Rx => (GPR::Rx as fn(_) -> _, &mut self.scratch_128), - }; - - let out = scratch_counts.0.take()?; - scratch_counts.1[out as usize] += 1; - Some(mk_gpr(out)) - } - - pub fn release(&mut self, gpr: GPR) -> Result<(), Error> { - let (gpr, scratch_counts) = self.scratch_counts_mut(gpr); - let c = &mut scratch_counts.1[gpr as usize]; - *c = match c.checked_sub(1) { - Some(e) => e, - None => return Err(Error::Microwasm(format!("Double-freed register: {}", gpr))), - }; - if *c == 0 { - scratch_counts.0.release(gpr); - } - Ok(()) - } - - pub fn is_free(&self, gpr: GPR) -> bool { - let (gpr, scratch_counts) = self.scratch_counts(gpr); - scratch_counts.0.is_free(gpr) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct BlockCallingConvention> { - pub stack_depth: StackDepth, - pub arguments: I, -} - -impl BlockCallingConvention { - pub fn function_start(arguments: I) -> Self { - BlockCallingConvention { - // We start and return the function with stack depth 1 since we must - // allow space for the saved return address. - stack_depth: StackDepth(1), - arguments, - } - } -} - -impl BlockCallingConvention -where - for<'a> &'a I::Target: IntoIterator, -{ - pub fn as_ref(&self) -> BlockCallingConvention + '_> { - BlockCallingConvention { - // We start and return the function with stack depth 1 since we must - // allow space for the saved return address. - stack_depth: self.stack_depth.clone(), - arguments: self.arguments.into_iter().copied(), - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum FunctionDefLocation { - SameModule, - PossiblyExternal, -} - -// TODO: Combine this with `ValueLocation`? -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum CCLoc { - /// Value exists in a register. - Reg(GPR), - /// Value exists on the stack. - Stack(i32), -} - -impl CCLoc { - fn try_from(other: ValueLocation) -> Option { - match other { - ValueLocation::Reg(reg) => Some(CCLoc::Reg(reg)), - ValueLocation::Stack(offset) => Some(CCLoc::Stack(offset)), - ValueLocation::Cond(_) | ValueLocation::Immediate(_) => None, - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum CondCode { - CF0, - CF1, - ZF0, - ZF1, - CF0AndZF0, - CF1OrZF1, - ZF0AndSFEqOF, - ZF1OrSFNeOF, - SFEqOF, - SFNeOF, -} - -mod cc { - use super::CondCode; - - pub const EQUAL: CondCode = CondCode::ZF0; - pub const NOT_EQUAL: CondCode = CondCode::ZF1; - pub const GE_U: CondCode = CondCode::CF0; - pub const LT_U: CondCode = CondCode::CF1; - pub const GT_U: CondCode = CondCode::CF0AndZF0; - pub const LE_U: CondCode = CondCode::CF1OrZF1; - pub const GE_S: CondCode = CondCode::SFEqOF; - pub const LT_S: CondCode = CondCode::SFNeOF; - pub const GT_S: CondCode = CondCode::ZF0AndSFEqOF; - pub const LE_S: CondCode = CondCode::ZF1OrSFNeOF; -} - -impl std::ops::Not for CondCode { - type Output = Self; - - fn not(self) -> Self { - use CondCode::*; - - match self { - CF0 => CF1, - CF1 => CF0, - ZF0 => ZF1, - ZF1 => ZF0, - CF0AndZF0 => CF1OrZF1, - CF1OrZF1 => CF0AndZF0, - ZF0AndSFEqOF => ZF1OrSFNeOF, - ZF1OrSFNeOF => ZF0AndSFEqOF, - SFEqOF => SFNeOF, - SFNeOF => SFEqOF, - } - } -} - -/// Describes location of a value. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum ValueLocation { - /// Value exists in a register. - Reg(GPR), - /// Value exists on the stack. Note that this offset is from the rsp as it - /// was when we entered the function. - Stack(i32), - /// Value is a literal - Immediate(Value), - /// Value is a set condition code - Cond(CondCode), -} - -impl From for ValueLocation { - fn from(other: CCLoc) -> Self { - match other { - CCLoc::Reg(r) => ValueLocation::Reg(r), - CCLoc::Stack(o) => ValueLocation::Stack(o), - } - } -} - -impl ValueLocation { - fn stack(self) -> Option { - match self { - ValueLocation::Stack(o) => Some(o), - _ => None, - } - } - - fn reg(self) -> Option { - match self { - ValueLocation::Reg(r) => Some(r), - _ => None, - } - } - - fn immediate(self) -> Option { - match self { - ValueLocation::Immediate(i) => Some(i), - _ => None, - } - } - - fn imm_i32(self) -> Option { - self.immediate().and_then(Value::as_i32) - } - - fn imm_i64(self) -> Option { - self.immediate().and_then(Value::as_i64) - } - - fn imm_f32(self) -> Option { - self.immediate().and_then(Value::as_f32) - } - - fn imm_f64(self) -> Option { - self.immediate().and_then(Value::as_f64) - } -} - -// TODO: This assumes only system-v calling convention. -// In system-v calling convention the first 6 arguments are passed via registers. -// All rest arguments are passed on the stack. -// Usually system-v uses rdi and rsi, but rdi is used for the vmctx and rsi is used for the _caller_ vmctx -const INTEGER_ARGS_IN_GPRS: &[GPR] = &[GPR::Rq(CALLER_VMCTX), RDX, RCX, R8, R9]; -const INTEGER_RETURN_GPRS: &[GPR] = &[RAX, RDX]; -const FLOAT_ARGS_IN_GPRS: &[GPR] = &[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]; -const FLOAT_RETURN_GPRS: &[GPR] = &[XMM0, XMM1]; -// List of scratch registers taken from https://wiki.osdev.org/System_V_ABI -const SCRATCH_REGS: &[GPR] = &[ - RDX, RCX, R8, R9, RAX, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, - XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, -]; -const VMCTX: RegId = rq::RDI; -const CALLER_VMCTX: RegId = rq::RSI; - -pub struct CodeGenSession<'module, M> { - assembler: Assembler, - pub module_context: &'module M, - pub op_offset_map: Vec<(AssemblyOffset, Box)>, - func_starts: Vec<(Option, DynamicLabel)>, - pointer_type: SignlessType, -} - -impl<'module, M> CodeGenSession<'module, M> { - pub fn new(func_count: u32, module_context: &'module M, pointer_type: SignlessType) -> Self { - let mut assembler = Assembler::new().unwrap(); - let func_starts = iter::repeat_with(|| (None, assembler.new_dynamic_label())) - .take(func_count as usize) - .collect::>(); - - CodeGenSession { - assembler, - op_offset_map: Default::default(), - func_starts, - module_context, - pointer_type, - } - } - - pub fn offset(&self) -> usize { - self.assembler.offset().0 - } - - pub fn pointer_type(&self) -> SignlessType { - self.pointer_type - } - - pub fn new_context<'this>( - &'this mut self, - func_idx: u32, - reloc_sink: &'this mut dyn binemit::RelocSink, - ) -> Context<'this, M> { - { - let func_start = &mut self.func_starts[func_idx as usize]; - - // At this point we know the exact start address of this function. Save it - // and define dynamic label at this location. - func_start.0 = Some(self.assembler.offset()); - self.assembler.dynamic_label(func_start.1); - } - - Context { - asm: &mut self.assembler, - current_function: func_idx, - reloc_sink, - pointer_type: self.pointer_type, - source_loc: Default::default(), - func_starts: &self.func_starts, - block_state: Default::default(), - module_context: self.module_context, - labels: Default::default(), - } - } - - pub fn into_translated_code_section(self) -> Result { - let exec_buf = self - .assembler - .finalize() - .map_err(|_asm| Error::Assembler("assembler error".to_owned()))?; - let func_starts = self - .func_starts - .iter() - .map(|(offset, _)| offset.unwrap()) - .collect::>(); - Ok(TranslatedCodeSection { - exec_buf, - func_starts, - op_offset_map: self.op_offset_map, - // TODO - relocatable_accesses: vec![], - }) - } -} - -#[derive(Debug)] -struct RelocateAddress { - reg: Option, - imm: usize, -} - -#[derive(Debug)] -struct RelocateAccess { - position: AssemblyOffset, - dst_reg: GPR, - address: RelocateAddress, -} - -pub struct TranslatedCodeSection { - exec_buf: ExecutableBuffer, - func_starts: Vec, - #[allow(dead_code)] - relocatable_accesses: Vec, - op_offset_map: Vec<(AssemblyOffset, Box)>, -} - -impl TranslatedCodeSection { - pub fn func_start(&self, idx: usize) -> *const u8 { - let offset = self.func_starts[idx]; - self.exec_buf.ptr(offset) - } - - pub fn func_range(&self, idx: usize) -> std::ops::Range { - let end = self - .func_starts - .get(idx + 1) - .map(|i| i.0) - .unwrap_or_else(|| self.exec_buf.len()); - - self.func_starts[idx].0..end - } - - pub fn funcs<'a>(&'a self) -> impl Iterator> + 'a { - (0..self.func_starts.len()).map(move |i| self.func_range(i)) - } - - pub fn buffer(&self) -> &[u8] { - &*self.exec_buf - } - - pub fn disassemble(&self) { - crate::disassemble::disassemble(&*self.exec_buf, &self.op_offset_map).unwrap(); - } -} - -#[derive(Debug, Default, Clone)] -pub struct BlockState { - pub stack: Stack, - pub depth: StackDepth, - pub regs: Registers, -} - -type Stack = Vec; - -mod labels { - use super::Label; - use std::collections::HashMap; - - pub struct LabelInfo { - pub label: Label, - pub align: u32, - pub inner: LabelValue, - } - - #[derive(Copy, Clone, PartialEq, Eq, Hash)] - pub enum LabelValue { - Ret, - I32(i32), - I64(i64), - } - - #[derive(Default)] - pub struct Labels { - map: HashMap, - } - - impl Labels { - pub fn drain(&mut self) -> impl Iterator + '_ { - self.map.drain().map(|(_, info)| info) - } - - pub fn insert( - &mut self, - l: impl FnOnce() -> Label, - align: u32, - label: LabelValue, - ) -> Label { - let val = self.map.entry(label).or_insert_with(move || LabelInfo { - label: l(), - align, - inner: label, - }); - - val.align = val.align.max(align); - - val.label - } - } -} - -use labels::{LabelInfo, LabelValue, Labels}; - -pub struct Context<'this, M> { - pub asm: &'this mut Assembler, - pointer_type: SignlessType, - #[allow(dead_code)] - source_loc: SourceLoc, - reloc_sink: &'this mut dyn binemit::RelocSink, - module_context: &'this M, - current_function: u32, - func_starts: &'this Vec<(Option, DynamicLabel)>, - /// Each push and pop on the value stack increments or decrements this value by 1 respectively. - pub block_state: BlockState, - labels: Labels, -} - -/// Label in code. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct Label(DynamicLabel); - -/// Offset from starting value of SP counted in words. -#[derive(Default, Debug, Clone, PartialEq, Eq)] -pub struct StackDepth(u32); - -impl StackDepth { - pub fn reserve(&mut self, slots: u32) { - self.0 = self.0.checked_add(slots).unwrap(); - } - - pub fn free(&mut self, slots: u32) { - self.0 = self.0.checked_sub(slots).unwrap(); - } -} - -macro_rules! int_div { - ($full_div_s:ident, $full_div_u:ident, $div_u:ident, $div_s:ident, $rem_u:ident, $rem_s:ident, $imm_fn:ident, $signed_ty:ty, $unsigned_ty:ty, $reg_ty:tt, $pointer_ty:tt) => { - // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when - // emitting Wasm. - pub fn $div_u(&mut self) -> Result<(), Error>{ - let divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?; - } else { - self.push(ValueLocation::Immediate( - <$unsigned_ty>::wrapping_div(dividend as _, divisor as _).into(), - ))?; - } - - return Ok(()) - } - - let (div, rem, saved) = self.$full_div_u(divisor, dividend)?; - - self.free_value(rem)?; - - let div = match div { - ValueLocation::Reg(div) => { - if saved.clone().any(|dst| dst == div) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap()) - ); - self.block_state.regs.release(div)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(div) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => div, - }; - - self.cleanup_gprs(saved); - - self.push(div)?; - Ok(()) - } - - // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when - // emitting Wasm. - pub fn $div_s(&mut self) -> Result<(), Error>{ - let divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?; - } else { - self.push(ValueLocation::Immediate( - <$signed_ty>::wrapping_div(dividend, divisor).into(), - ))?; - } - - return Ok(()) - } - - let (div, rem, saved) = self.$full_div_s(divisor, dividend)?; - - self.free_value(rem)?; - - let div = match div { - ValueLocation::Reg(div) => { - if saved.clone().any(|dst| dst == div) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap()) - ); - self.block_state.regs.release(div)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(div) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => div, - }; - - self.cleanup_gprs(saved); - - self.push(div)?; - Ok(()) - } - - pub fn $rem_u(&mut self) -> Result<(), Error>{ - let divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?; - } else { - self.push(ValueLocation::Immediate( - (dividend as $unsigned_ty % divisor as $unsigned_ty).into(), - ))?; - } - return Ok(()); - } - - let (div, rem, saved) = self.$full_div_u(divisor, dividend)?; - - self.free_value(div)?; - - let rem = match rem { - ValueLocation::Reg(rem) => { - if saved.clone().any(|dst| dst == rem) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap()) - ); - self.block_state.regs.release(rem)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(rem) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => rem, - }; - - self.cleanup_gprs(saved); - - self.push(rem)?; - Ok(()) - } - - pub fn $rem_s(&mut self) -> Result<(), Error>{ - let mut divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?; - } else { - self.push(ValueLocation::Immediate((dividend % divisor).into()))?; - } - return Ok(()); - } - - let is_neg1 = self.create_label(); - - let current_depth = self.block_state.depth.clone(); - - // TODO: This could cause segfaults because of implicit push/pop - let gen_neg1_case = match divisor { - ValueLocation::Immediate(_) => { - if divisor.$imm_fn().unwrap() == -1 { - self.push(ValueLocation::Immediate((-1 as $signed_ty).into()))?; - self.free_value(dividend)?; - return Ok(()); - } - - false - } - ValueLocation::Reg(_) => { - let reg = self.put_into_register(GPRType::Rq, &mut divisor)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp $reg_ty(reg.rq().unwrap()), -1 - ); - // TODO: We could choose `current_depth` as the depth here instead but we currently - // don't for simplicity - self.set_stack_depth(current_depth.clone())?; - dynasm!(self.asm - ; je =>is_neg1.0 - ); - - true - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; cmp $pointer_ty [rsp + offset], -1 - ); - self.set_stack_depth(current_depth.clone())?; - dynasm!(self.asm - ; je =>is_neg1.0 - ); - - true - } - ValueLocation::Cond(_) => { - // `cc` can never be `-1`, only `0` and `1` - false - } - }; - - let (div, rem, saved) = self.$full_div_s(divisor, dividend)?; - - self.free_value(div)?; - - let rem = match rem { - ValueLocation::Reg(rem) => { - if saved.clone().any(|dst| dst == rem) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap()) - ); - self.block_state.regs.release(rem)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(rem) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => rem, - }; - - self.cleanup_gprs(saved); - - if gen_neg1_case { - let ret = self.create_label(); - self.set_stack_depth(current_depth.clone())?; - dynasm!(self.asm - ; jmp =>ret.0 - ); - self.define_label(is_neg1); - - let dst_ccloc = match CCLoc::try_from(rem) { - None => { - return Err(Error::Microwasm( - "$rem_s Programmer error".to_string(), - )) - } - Some(o) => o, - }; - - self.copy_value( - ValueLocation::Immediate((0 as $signed_ty).into()), - dst_ccloc - )?; - - self.set_stack_depth(current_depth.clone())?; - self.define_label(ret); - } - - self.push(rem)?; - Ok(()) - } - } -} - -macro_rules! unop { - ($name:ident, $instr:ident, $reg_ty:tt, $typ:ty, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => - ValueLocation::Immediate( - ($const_fallback(imm.as_int().unwrap() as $typ) as $typ).into() - ), - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(Type::for_::<$typ>()).unwrap(); - dynasm!(self.asm - ; $instr $reg_ty(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = self.put_into_register(GPRType::Rq, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let temp = self.take_reg(Type::for_::<$typ>()).unwrap(); - dynasm!(self.asm - ; $instr $reg_ty(temp.rq().unwrap()), $reg_ty(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - } -} - -macro_rules! conversion { - ( - $name:ident, - $instr:ident, - $in_reg_ty:tt, - $in_reg_fn:ident, - $out_reg_ty:tt, - $out_reg_fn:ident, - $in_typ:ty, - $out_typ:ty, - $const_ty_fn:ident, - $const_fallback:expr - ) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => - ValueLocation::Immediate( - $const_fallback(imm.$const_ty_fn().unwrap()).into() - ), - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap(); - dynasm!(self.asm - ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), [rsp + offset] - ); - - ValueLocation::Reg(temp) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = self.put_into_register(Type::for_::<$in_typ>(), &mut val)? .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap(); - - dynasm!(self.asm - ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), $in_reg_ty(reg.$in_reg_fn().unwrap()) - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - } -} - -// TODO: Support immediate `count` parameters -macro_rules! shift { - ($name:ident, $reg_ty:tt, $instr:ident, $const_fallback:expr, $ty:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut count = self.pop()?; - let mut val = self.pop()?; - - if let Some(imm) = count.immediate() { - if let Some(imm) = imm.as_int() { - if let Ok(imm) = i8::try_from(imm) { - let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $instr $reg_ty(reg.rq().unwrap()), imm - ); - self.push(ValueLocation::Reg(reg))?; - return Ok(()); - } - } - } - - if val == ValueLocation::Reg(RCX) { - let new = self.take_reg($ty).unwrap(); - self.copy_value(val, CCLoc::Reg(new))?; - self.free_value(val)?; - val = ValueLocation::Reg(new); - } - - // TODO: Maybe allocate `RCX`, write `count` to it and then free `count`. - // Once we've implemented refcounting this will do the right thing - // for free. - let temp_rcx = match count { - ValueLocation::Reg(RCX) => {None} - other => { - let out = if self.block_state.regs.is_free(RCX) { - None - } else { - let new_reg = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new_reg.rq().unwrap()), rcx - ); - Some(new_reg) - }; - - match other { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let gpr = self.put_into_register(I32, &mut count)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - dynasm!(self.asm - ; mov cl, Rb(gpr.rq().unwrap()) - ); - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; mov cl, [rsp + offset] - ); - } - ValueLocation::Immediate(imm) => { - dynasm!(self.asm - ; mov cl, imm.as_int().unwrap() as i8 - ); - } - } - - out - } - }; - - self.free_value(count)?; - self.block_state.regs.mark_used(RCX); - count = ValueLocation::Reg(RCX); - - let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $instr $reg_ty(reg.rq().unwrap()), cl - ); - - self.free_value(count)?; - - if let Some(gpr) = temp_rcx { - dynasm!(self.asm - ; mov rcx, Rq(gpr.rq().unwrap()) - ); - self.block_state.regs.release(gpr)?; - } - - self.push(val)?; - Ok(()) - } - } -} - -macro_rules! cmp_i32 { - ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let Some(i) = left.imm_i32() { - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - dynasm!(self.asm - ; cmp DWORD [rsp + offset], i - ); - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - dynasm!(self.asm - ; cmp Rd(rreg.rq().unwrap()), i - ); - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Immediate(right) => { - ValueLocation::Immediate( - (if $const_fallback(i, right.as_i32().unwrap()) { - 1i32 - } else { - 0i32 - }).into() - ) - } - } - } else { - let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; cmp Rd(lreg.rq().unwrap()), [rsp + offset] - ); - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap()) - ); - } - ValueLocation::Immediate(i) => { - dynasm!(self.asm - ; cmp Rd(lreg.rq().unwrap()), i.as_i32().unwrap() - ); - } - } - - ValueLocation::Cond($flags) - }; - - self.free_value(left)?; - self.free_value(right)?; - - self.push(out)?; - Ok(()) - } - } -} - -macro_rules! cmp_i64 { - ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let Some(i) = left.imm_i64() { - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - if let Some(i) = i.try_into().ok() { - dynasm!(self.asm - ; cmp QWORD [rsp + offset], i - ); - } else { - let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp QWORD [rsp + offset], Rq(lreg.rq().unwrap()) - ); - } - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - if let Some(i) = i.try_into().ok() { - dynasm!(self.asm - ; cmp Rq(rreg.rq().unwrap()), i - ); - } else { - let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rq(rreg.rq().unwrap()), Rq(lreg.rq().unwrap()) - ); - } - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Immediate(right) => { - ValueLocation::Immediate( - (if $const_fallback(i, right.as_i64().unwrap()) { - 1i32 - } else { - 0i32 - }).into() - ) - } - } - } else { - let lreg = self.put_into_register(I64, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), [rsp + offset] - ); - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - } - ValueLocation::Immediate(i) => { - let i = i.as_i64().unwrap(); - if let Some(i) = i.try_into().ok() { - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), i - ); - } else { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - } - } - } - - ValueLocation::Cond($flags) - }; - - self.free_value(left)?; - self.free_value(right)?; - self.push(out)?; - Ok(()) - } - } -} - -macro_rules! cmp_f32 { - ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => { - cmp_float!( - comiss, - f32, - imm_f32, - $name, - $reverse_name, - $instr, - $const_fallback - ); - }; -} - -macro_rules! eq_float { - ($name:ident, $instr:ident, $imm_fn:ident, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - if $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()) { - 1u32 - } else { - 0 - }.into() - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right), - _ => (right, left) - }; - - let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let out = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; movd Rd(out.rq().unwrap()), Rx(lreg.rx().unwrap()) - ; and Rd(out.rq().unwrap()), 1 - ); - - self.push(ValueLocation::Reg(out))?; - self.free_value(left)?; - self.free_value(right)?; - Ok(()) - } - - } -} - -macro_rules! minmax_float { - ( - $name:ident, - $instr:ident, - $cmpinstr:ident, - $addinstr:ident, - $combineinstr:ident, - $imm_fn:ident, - $const_fallback:expr - ) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()).into() - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right), - _ => (right, left) - }; - - let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $cmpinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; je >equal - ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; jmp >ret - ; equal: - ; jnp >equal_but_not_parity - ; $addinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; jmp >ret - ; equal_but_not_parity: - ; $combineinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; ret: - ); - - self.push(left)?; - self.free_value(right)?; - Ok(()) - } - - } -} - -macro_rules! cmp_f64 { - ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => { - cmp_float!( - comisd, - f64, - imm_f64, - $name, - $reverse_name, - $instr, - $const_fallback - ); - }; -} - -macro_rules! cmp_float { - (@helper $cmp_instr:ident, $ty:ty, $imm_fn:ident, $self:expr, $left:expr, $right:expr, $instr:ident, $const_fallback:expr) => {{ - let (left, right, this) = ($left, $right, $self); - if let (Some(left), Some(right)) = (left.$imm_fn(), right.$imm_fn()) { - if $const_fallback(<$ty>::from_bits(left.to_bits()), <$ty>::from_bits(right.to_bits())) { - ValueLocation::Immediate(1i32.into()) - } else { - ValueLocation::Immediate(0i32.into()) - } - } else { - let lreg = this.put_into_register(GPRType::Rx, left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let result = this.take_reg(I32).unwrap(); - - match right { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - - dynasm!(this.asm - ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap()) - ; $cmp_instr Rx(lreg.rx().unwrap()), [rsp + offset] - ; $instr Rb(result.rq().unwrap()) - ); - } - right => { - let rreg = this.put_into_register(GPRType::Rx, right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(this.asm - ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap()) - ; $cmp_instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; $instr Rb(result.rq().unwrap()) - ); - } - } - - ValueLocation::Reg(result) - } - }}; - ($cmp_instr:ident, $ty:ty, $imm_fn:ident, $name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = cmp_float!(@helper - $cmp_instr, - $ty, - $imm_fn, - &mut *self, - &mut left, - &mut right, - $instr, - $const_fallback - ); - - self.free_value(left)?; - self.free_value(right)?; - - self.push(out)?; - Ok(()) - } - - pub fn $reverse_name(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = cmp_float!(@helper - $cmp_instr, - $ty, - $imm_fn, - &mut *self, - &mut right, - &mut left, - $instr, - $const_fallback - ); - - self.free_value(left)?; - self.free_value(right)?; - - self.push(out)?; - Ok(()) - } - }; -} - -macro_rules! binop_i32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - $const_fallback, - Rd, - rq, - I32, - imm_i32, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rd(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! commutative_binop_i32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - $const_fallback, - Rd, - rq, - I32, - imm_i32, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rd(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! binop_i64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - $const_fallback, - Rq, - rq, - I64, - imm_i64, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rq(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! commutative_binop_i64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - $const_fallback, - Rq, - rq, - I64, - imm_i64, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rq(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! binop_f32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F32, - imm_f32, - |_, _, _: i32| unreachable!() - ); - }; -} - -macro_rules! commutative_binop_f32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F32, - imm_f32, - |_, _, _: i32| unreachable!() - ); - }; -} - -macro_rules! binop_f64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F64, - imm_f64, - |_, _, _: i32| unreachable!() - ); - }; -} - -macro_rules! commutative_binop_f64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F64, - imm_f64, - |_, _, _: i32| unreachable!() - ); - }; -} -macro_rules! commutative_binop { - ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => { - binop!( - $name, - $instr, - $const_fallback, - $reg_ty, - $reg_fn, - $ty, - $imm_fn, - $direct_imm, - |op1: ValueLocation, op0: ValueLocation| match op1 { - ValueLocation::Reg(_) => (op1, op0), - _ => { - if op0.immediate().is_some() { - (op1, op0) - } else { - (op0, op1) - } - } - } - ); - }; -} - -macro_rules! binop { - ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => { - binop!($name, $instr, $const_fallback, $reg_ty, $reg_fn, $ty, $imm_fn, $direct_imm, |a, b| (a, b)); - }; - ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr, $map_op:expr) => { - pub fn $name(&mut self) -> Result<(), Error> { - let right = self.pop()?; - let left = self.pop()?; - - if let Some(i1) = left.$imm_fn() { - if let Some(i0) = right.$imm_fn() { - self.block_state.stack.push(ValueLocation::Immediate($const_fallback(i1, i0).into())); - return Ok(()); - } - } - - let (mut left, mut right) = $map_op(left, right); - let lreg = self.put_into_temp_register($ty, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match right { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - // This handles the case where we (for example) have a float in an `Rq` reg - let right_reg = self.put_into_register($ty, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(right_reg.$reg_fn().unwrap()) - ); - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; $instr $reg_ty(lreg.$reg_fn().unwrap()), [rsp + offset] - ); - } - ValueLocation::Immediate(i) => { - if let Some(i) = i.as_int().and_then(|i| i.try_into().ok()) { - $direct_imm(&mut *self, lreg, i); - } else { - let scratch = self.take_reg($ty).unwrap(); - self.immediate_to_reg(scratch, i)?; - - dynasm!(self.asm - ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(scratch.$reg_fn().unwrap()) - ); - - self.block_state.regs.release(scratch)?; - } - } - } - - self.free_value(right)?; - self.push(left)?; - Ok(()) - } - } -} - -macro_rules! load { - (@inner $name:ident, $rtype:expr, $reg_ty:tt, $emit_fn:expr) => { - pub fn $name(&mut self, offset: u32) -> Result<(), Error> { - fn load_to_reg<_M: ModuleContext>( - ctx: &mut Context<_M>, - dst: GPR, - (offset, runtime_offset): (i32, Result) - ) -> Result<(), Error> { - let mem_index = 0; - let reg_offset = ctx.module_context - .defined_memory_index(mem_index) - .map(|index| ( - None, - ctx.module_context.vmctx_vmmemory_definition(index) as i32 - )); - let (reg, mem_offset) = reg_offset.unwrap_or_else(|| { - let reg = ctx.take_reg(I64).unwrap(); - - dynasm!(ctx.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32 - ] - ); - - (Some(reg), 0) - }); - - let vmctx = GPR::Rq(VMCTX); - - if ctx.module_context.emit_memory_bounds_check() { - let addr_reg = match runtime_offset { - Ok(imm) => { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64 - ); - addr_reg - } - Err(gpr) => { - if offset == 0 { - ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } else if offset > 0 { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset] - ); - addr_reg - } else { - let addr_reg = ctx.take_reg(I64).unwrap(); - let offset_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rd(offset_reg.rq().unwrap()), offset - ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap()) - ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap()) - ); - ctx.block_state.regs.release(offset_reg)?; - addr_reg - } - } - }; - dynasm!(ctx.asm - ; cmp Rq(addr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_current_length() as i32 - ] - ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds) - ); - ctx.block_state.regs.release(addr_reg)?; - } - - let mem_ptr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(mem_ptr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_base() as i32 - ] - ); - if let Some(reg) = reg { - ctx.block_state.regs.release(reg)?; - } - $emit_fn(ctx, dst, mem_ptr_reg, runtime_offset, offset)?; - ctx.block_state.regs.release(mem_ptr_reg)?; - Ok(()) - } - - let base = self.pop()?; - - let temp = self.take_reg($rtype).unwrap(); - - match base { - ValueLocation::Immediate(i) => { - load_to_reg(self, temp, (offset as _, Ok(i.as_i32().unwrap())))?; - } - mut base => { - let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - load_to_reg(self, temp, (offset as _, Err(gpr)))?; - self.free_value(base)?; - } - } - - self.push(ValueLocation::Reg(temp))?; - Ok(()) - } - }; - ($name:ident, $rtype:expr, $reg_ty:tt, NONE, $rq_instr:ident, $ty:ident) => { - load!(@inner - $name, - $rtype, - $reg_ty, - |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32| -> Result<(), Error> { - match runtime_offset { - Ok(imm) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] - ); - Ok(()) - } - Err(offset_reg) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] - ); - Ok(()) - } - } - } - ); - }; - ($name:ident, $rtype:expr, $reg_ty:tt, $xmm_instr:ident, $rq_instr:ident, $ty:ident) => { - load!(@inner - $name, - $rtype, - $reg_ty, - |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32| -> Result<(), Error> { - match (dst, runtime_offset) { - (GPR::Rq(r), Ok(imm)) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] - ); - Ok(()) - } - (GPR::Rx(r), Ok(imm)) => { - if let Some(combined) = offset.checked_add(imm) { - dynasm!(ctx.asm - ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + combined] - ); - Ok(()) - } else { - let offset_reg = ctx.take_reg(GPRType::Rq).unwrap(); - dynasm!(ctx.asm - ; mov Rq(offset_reg.rq().unwrap()), offset - ; $xmm_instr Rx(r), $ty [ - Rq(mem_ptr_reg.rq().unwrap()) + - Rq(offset_reg.rq().unwrap()) + - imm - ] - ); - ctx.block_state.regs.release(offset_reg)?; - Ok(()) - } - } - (GPR::Rq(r), Err(offset_reg)) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] - ); - Ok(()) - } - (GPR::Rx(r), Err(offset_reg)) => { - dynasm!(ctx.asm - ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] - ); - Ok(()) - } - } - } - ); - }; -} - -macro_rules! store { - (@inner $name:ident, $int_reg_ty:tt, $match_offset:expr, $size:ident) => { - pub fn $name(&mut self, offset: u32) -> Result<(), Error>{ - fn store_from_reg<_M: ModuleContext>( - ctx: &mut Context<_M>, - src: GPR, - (offset, runtime_offset): (i32, Result) - ) -> Result<(), Error> { - let mem_index = 0; - let reg_offset = ctx.module_context - .defined_memory_index(mem_index) - .map(|index| ( - None, - ctx.module_context.vmctx_vmmemory_definition(index) as i32 - )); - let (reg, mem_offset) = reg_offset.unwrap_or_else(|| { - let reg = ctx.take_reg(I64).unwrap(); - - dynasm!(ctx.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32 - ] - ); - - (Some(reg), 0) - }); - - let vmctx = GPR::Rq(VMCTX); - - if ctx.module_context.emit_memory_bounds_check() { - let addr_reg = match runtime_offset { - Ok(imm) => { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64 - ); - addr_reg - } - Err(gpr) => { - if offset == 0 { - ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - - } else if offset > 0 { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset] - ); - addr_reg - } else { - let addr_reg = ctx.take_reg(I64).unwrap(); - let offset_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rd(offset_reg.rq().unwrap()), offset - ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap()) - ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap()) - ); - ctx.block_state.regs.release(offset_reg)?; - addr_reg - } - } - }; - dynasm!(ctx.asm - ; cmp Rq(addr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_current_length() as i32 - ] - ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds) - ); - ctx.block_state.regs.release(addr_reg)?; - } - - let mem_ptr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(mem_ptr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_base() as i32 - ] - ); - if let Some(reg) = reg { - ctx.block_state.regs.release(reg)?; - } - let src = $match_offset(ctx, mem_ptr_reg, runtime_offset, offset, src)?; - ctx.block_state.regs.release(mem_ptr_reg)?; - ctx.block_state.regs.release(src)?; - Ok(()) - } - - if !(offset <= i32::max_value() as u32) { - return Err(Error::Microwasm(format!("store: offset value too big {}", offset))) - } - - let mut src = self.pop()?; - let base = self.pop()?; - - // `store_from_reg` frees `src` - // TODO: Would it be better to free it outside `store_from_reg`? - let src_reg = self.put_into_register(None, &mut src)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - - match base { - ValueLocation::Immediate(i) => { - store_from_reg(self, src_reg, (offset as i32, Ok(i.as_i32().unwrap())))? - } - mut base => { - let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - store_from_reg(self, src_reg, (offset as i32, Err(gpr)))?; - self.free_value(base)?; - } - } - Ok(()) - } - }; - ($name:ident, $int_reg_ty:tt, NONE, $size:ident) => { - store!(@inner - $name, - $int_reg_ty, - |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32, src| -> Result { - let src_reg = ctx.put_into_temp_register(GPRType::Rq, &mut ValueLocation::Reg(src))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match runtime_offset { - Ok(imm) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(src_reg.rq().unwrap()) - ); - } - Err(offset_reg) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(src_reg.rq().unwrap()) - ); - } - } - - Ok(src_reg) - }, - $size - ); - }; - ($name:ident, $int_reg_ty:tt, $xmm_instr:ident, $size:ident) => { - store!(@inner - $name, - $int_reg_ty, - |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32, src| -> Result { - match (runtime_offset, src) { - (Ok(imm), GPR::Rq(r)) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(r) - ); - } - (Ok(imm), GPR::Rx(r)) => { - dynasm!(ctx.asm - ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], Rx(r) - ); - } - (Err(offset_reg), GPR::Rq(r)) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(r) - ); - } - (Err(offset_reg), GPR::Rx(r)) => { - dynasm!(ctx.asm - ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], Rx(r) - ); - } - } - - Ok(src) - }, - $size - ); - }; -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct VirtualCallingConvention { - pub stack: Stack, - pub depth: StackDepth, -} - -impl<'this, M: ModuleContext> Context<'this, M> { - fn free_reg(&mut self, type_: GPRType) -> Result { - let pos = if let Some(pos) = self - .block_state - .stack - .iter() - .position(|r| r.reg().map(|reg| reg.type_() == type_).unwrap_or(false)) - { - pos - } else { - return Ok(false); - }; - - let old_loc = self.block_state.stack[pos]; - let new_loc = self.push_physical(old_loc)?; - self.block_state.stack[pos] = new_loc; - - let reg = old_loc.reg().unwrap(); - - for elem in &mut self.block_state.stack[pos + 1..] { - if *elem == old_loc { - *elem = new_loc; - self.block_state.regs.release(reg)?; - } - } - - Ok(true) - } - - fn take_reg(&mut self, r: impl Into) -> Option { - let r = r.into(); - loop { - if let Some(gpr) = self.block_state.regs.take(r) { - break Some(gpr); - } - - if self.free_reg(r) == Ok(false) { - break None; - } - } - } - - pub fn set_source_loc(&mut self, loc: SourceLoc) { - self.source_loc = loc; - } - - pub fn virtual_calling_convention(&self) -> VirtualCallingConvention { - VirtualCallingConvention { - stack: self.block_state.stack.clone(), - depth: self.block_state.depth.clone(), - } - } - - /// Create a new undefined label. - pub fn create_label(&mut self) -> Label { - Label(self.asm.new_dynamic_label()) - } - - fn adjusted_offset(&self, offset: i32) -> i32 { - (self.block_state.depth.0 as i32 + offset) * WORD_SIZE as i32 - } - - cmp_i32!(i32_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b); - cmp_i32!(i32_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b); - // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous - cmp_i32!(i32_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u32) < (b as u32)); - cmp_i32!(i32_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u32) - <= (b as u32)); - cmp_i32!(i32_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u32) > (b as u32)); - cmp_i32!(i32_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u32) - >= (b as u32)); - cmp_i32!(i32_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b); - cmp_i32!(i32_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b); - cmp_i32!(i32_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b); - cmp_i32!(i32_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b); - - cmp_i64!(i64_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b); - cmp_i64!(i64_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b); - // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous - cmp_i64!(i64_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u64) < (b as u64)); - cmp_i64!(i64_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u64) - <= (b as u64)); - cmp_i64!(i64_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u64) > (b as u64)); - cmp_i64!(i64_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u64) - >= (b as u64)); - cmp_i64!(i64_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b); - cmp_i64!(i64_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b); - cmp_i64!(i64_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b); - cmp_i64!(i64_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b); - - cmp_f32!(f32_gt, f32_lt, seta, |a, b| a > b); - cmp_f32!(f32_ge, f32_le, setnc, |a, b| a >= b); - eq_float!( - f32_eq, - cmpeqss, - as_f32, - |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) == f32::from_bits(b.to_bits()) - ); - eq_float!( - f32_ne, - cmpneqss, - as_f32, - |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) != f32::from_bits(b.to_bits()) - ); - - cmp_f64!(f64_gt, f64_lt, seta, |a, b| a > b); - cmp_f64!(f64_ge, f64_le, setnc, |a, b| a >= b); - eq_float!( - f64_eq, - cmpeqsd, - as_f64, - |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) == f64::from_bits(b.to_bits()) - ); - eq_float!( - f64_ne, - cmpneqsd, - as_f64, - |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) != f64::from_bits(b.to_bits()) - ); - - // TODO: Should we do this logic in `eq` and just have this delegate to `eq`? - // That would mean that `eqz` and `eq` with a const 0 argument don't - // result in different code. It would also allow us to generate better - // code for `neq` and `gt_u` with const 0 operand - pub fn i32_eqz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - if let ValueLocation::Immediate(Value::I32(i)) = val { - self.push(ValueLocation::Immediate( - (if i == 0 { 1i32 } else { 0 }).into(), - ))?; - return Ok(()); - } - - if let ValueLocation::Cond(loc) = val { - self.push(ValueLocation::Cond(!loc))?; - return Ok(()); - } - - let reg = self - .put_into_register(I32, &mut val)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let out = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap()) - ; test Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap()) - ; setz Rb(out.rq().unwrap()) - ); - - self.free_value(val)?; - - self.push(ValueLocation::Reg(out))?; - Ok(()) - } - - pub fn i64_eqz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - if let ValueLocation::Immediate(Value::I64(i)) = val { - self.push(ValueLocation::Immediate( - (if i == 0 { 1i32 } else { 0 }).into(), - ))?; - return Ok(()); - } - - if let ValueLocation::Cond(loc) = val { - self.push(ValueLocation::Cond(!loc))?; - return Ok(()); - } - - let reg = self - .put_into_register(I64, &mut val)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let out = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap()) - ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap()) - ; setz Rb(out.rq().unwrap()) - ); - - self.free_value(val)?; - - self.push(ValueLocation::Reg(out))?; - Ok(()) - } - - fn br_on_cond_code(&mut self, label: Label, cond: CondCode) { - match cond { - cc::EQUAL => dynasm!(self.asm - ; je =>label.0 - ), - cc::NOT_EQUAL => dynasm!(self.asm - ; jne =>label.0 - ), - cc::GT_U => dynasm!(self.asm - ; ja =>label.0 - ), - cc::GE_U => dynasm!(self.asm - ; jae =>label.0 - ), - cc::LT_U => dynasm!(self.asm - ; jb =>label.0 - ), - cc::LE_U => dynasm!(self.asm - ; jbe =>label.0 - ), - cc::GT_S => dynasm!(self.asm - ; jg =>label.0 - ), - cc::GE_S => dynasm!(self.asm - ; jge =>label.0 - ), - cc::LT_S => dynasm!(self.asm - ; jl =>label.0 - ), - cc::LE_S => dynasm!(self.asm - ; jle =>label.0 - ), - } - } - - /// Pops i32 predicate and branches to the specified label - /// if the predicate is equal to zero. - pub fn br_if_false( - &mut self, - target: impl Into>, - pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, - ) -> Result<(), Error> { - let mut val = self.pop()?; - let label = self.target_to_label(target.into()); - - let cond = match val { - ValueLocation::Cond(cc) => !cc, - _ => { - let predicate = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap()) - ); - - CondCode::ZF0 - } - }; - - self.free_value(val)?; - - pass_args(self)?; - - self.br_on_cond_code(label, cond); - - Ok(()) - } - - /// Pops i32 predicate and branches to the specified label - /// if the predicate is not equal to zero. - pub fn br_if_true( - &mut self, - target: impl Into>, - pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, - ) -> Result<(), Error> { - let mut val = self.pop()?; - let label = self.target_to_label(target.into()); - - let cond = match val { - ValueLocation::Cond(cc) => cc, - _ => { - let predicate = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap()) - ); - - CondCode::ZF1 - } - }; - - self.free_value(val)?; - - pass_args(self)?; - - self.br_on_cond_code(label, cond); - - Ok(()) - } - - /// Branch unconditionally to the specified label. - pub fn br(&mut self, label: impl Into>) { - match label.into() { - BrTarget::Return => self.ret(), - BrTarget::Label(label) => dynasm!(self.asm - ; jmp =>label.0 - ), - } - } - - /// If `default` is `None` then the default is just continuing execution - pub fn br_table( - &mut self, - targets: I, - default: Option>, - pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, - ) -> Result<(), Error> - where - I: IntoIterator>>, - I::IntoIter: ExactSizeIterator + DoubleEndedIterator, - { - let mut targets = targets.into_iter(); - let count = targets.len(); - - let mut selector = self.pop()?; - - pass_args(self)?; - - if let Some(imm) = selector.imm_i32() { - if let Some(target) = targets.nth(imm as _).or(Some(default)).and_then(|a| a) { - match target { - BrTarget::Label(label) => self.br(label), - BrTarget::Return => { - dynasm!(self.asm - ; ret - ); - } - } - } - } else { - let end_label = self.create_label(); - - if count > 0 { - let temp = match self.put_into_temp_register(GPRType::Rq, &mut selector) { - Err(e) => return Err(e), - Ok(o) => match o { - Some(r) => Ok((r, false)), - None => { - self.push_physical(ValueLocation::Reg(RAX))?; - self.block_state.regs.mark_used(RAX); - Ok((RAX, true)) - } - }, - }; - - let (selector_reg, pop_selector) = match temp { - Err(e) => return Err(e), - Ok(a) => a, - }; - - let (tmp, pop_tmp) = if let Some(reg) = self.take_reg(I64) { - (reg, false) - } else { - let out_reg = if selector_reg == RAX { RCX } else { RAX }; - - self.push_physical(ValueLocation::Reg(out_reg))?; - self.block_state.regs.mark_used(out_reg); - - (out_reg, true) - }; - - self.immediate_to_reg(tmp, (count as u32).into())?; - dynasm!(self.asm - ; cmp Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap()) - ; cmova Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap()) - ; lea Rq(tmp.rq().unwrap()), [>start_label] - ; lea Rq(selector_reg.rq().unwrap()), [ - Rq(selector_reg.rq().unwrap()) * 5 - ] - ; add Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap()) - ); - - if pop_tmp { - dynasm!(self.asm - ; pop Rq(tmp.rq().unwrap()) - ); - } else { - self.block_state.regs.release(tmp)?; - } - - if pop_selector { - dynasm!(self.asm - ; pop Rq(selector_reg.rq().unwrap()) - ); - } - - dynasm!(self.asm - ; jmp Rq(selector_reg.rq().unwrap()) - ; start_label: - ); - - for target in targets { - let label = target - .map(|target| self.target_to_label(target)) - .unwrap_or(end_label); - dynasm!(self.asm - ; jmp =>label.0 - ); - } - } - - if let Some(def) = default { - self.br(def); - } - - self.define_label(end_label); - } - - self.free_value(selector)?; - Ok(()) - } - - fn set_stack_depth(&mut self, depth: StackDepth) -> Result<(), Error> { - if self.block_state.depth.0 != depth.0 { - let diff = depth.0 as i32 - self.block_state.depth.0 as i32; - let emit_lea = if diff.abs() != 1 { - true - } else { - match self.block_state.depth.0.cmp(&depth.0) { - Ordering::Less => { - for _ in 0..diff { - dynasm!(self.asm - ; push rax - ); - } - false - } - Ordering::Greater => { - if let Some(trash) = self.take_reg(I64) { - for _ in 0..self.block_state.depth.0 - depth.0 { - dynasm!(self.asm - ; pop Rq(trash.rq().unwrap()) - ); - } - self.block_state.regs.release(trash)?; - false - } else { - true - } - } - Ordering::Equal => false, - } - }; - if emit_lea { - dynasm!(self.asm - ; lea rsp, [rsp + (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32] - ); - } - self.block_state.depth = depth; - } - Ok(()) - } - - fn do_pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> { - let args = &cc.arguments; - for &dst in args.iter().rev().take(self.block_state.stack.len()) { - if let CCLoc::Reg(r) = dst { - if !self.block_state.regs.is_free(r) - && *self.block_state.stack.last().unwrap() != ValueLocation::Reg(r) - { - // TODO: This would be made simpler and more efficient with a proper SSE - // representation. - self.save_regs(std::iter::once(r))?; - } - - self.block_state.regs.mark_used(r); - } - self.pop_into(dst)?; - } - Ok(()) - } - - pub fn pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> { - self.do_pass_block_args(cc)?; - self.set_stack_depth(cc.stack_depth.clone())?; - Ok(()) - } - - pub fn serialize_block_args( - &mut self, - cc: &BlockCallingConvention, - params: u32, - ) -> Result { - self.do_pass_block_args(cc)?; - - let mut out_args = cc.arguments.clone(); - - out_args.reverse(); - - while out_args.len() < params as usize { - let mut val = self.pop()?; - - // TODO: We can use stack slots for values already on the stack but we - // don't refcount stack slots right now - let ccloc = self.put_into_temp_location(None, &mut val)?; - out_args.push(ccloc); - } - - out_args.reverse(); - - self.set_stack_depth(cc.stack_depth.clone())?; - - Ok(BlockCallingConvention { - stack_depth: cc.stack_depth.clone(), - arguments: out_args, - }) - } - - /// Puts all stack values into "real" locations so that they can i.e. be set to different - /// values on different iterations of a loop - pub fn serialize_args(&mut self, count: u32) -> Result { - let mut out = Vec::with_capacity(count as _); - - // TODO: We can make this more efficient now that `pop` isn't so complicated - for _ in 0..count { - let mut val = self.pop()?; - // TODO: We can use stack slots for values already on the stack but we - // don't refcount stack slots right now - let loc = self.put_into_temp_location(None, &mut val)?; - - out.push(loc); - } - - out.reverse(); - - Ok(BlockCallingConvention { - stack_depth: self.block_state.depth.clone(), - arguments: out, - }) - } - - pub fn get_global(&mut self, global_idx: u32) -> Result<(), Error> { - let (reg, offset) = self - .module_context - .defined_global_index(global_idx) - .map(|defined_global_index| { - ( - None, - self.module_context - .vmctx_vmglobal_definition(defined_global_index), - ) - }) - .unwrap_or_else(|| { - let reg = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + - self.module_context.vmctx_vmglobal_import_from(global_idx) as i32 - ] - ); - - (Some(reg), 0) - }); - - let out = self.take_reg(GPRType::Rq).unwrap(); - let vmctx = GPR::Rq(VMCTX); - - // TODO: Are globals necessarily aligned to 128 bits? We can load directly to an XMM reg if so - dynasm!(self.asm - ; mov Rq(out.rq().unwrap()), [Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32] - ); - - if let Some(reg) = reg { - self.block_state.regs.release(reg)?; - } - - self.push(ValueLocation::Reg(out))?; - Ok(()) - } - - pub fn set_global(&mut self, global_idx: u32) -> Result<(), Error> { - let mut val = self.pop()?; - let (reg, offset) = self - .module_context - .defined_global_index(global_idx) - .map(|defined_global_index| { - ( - None, - self.module_context - .vmctx_vmglobal_definition(defined_global_index), - ) - }) - .unwrap_or_else(|| { - let reg = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + - self.module_context.vmctx_vmglobal_import_from(global_idx) as i32 - ] - ); - - (Some(reg), 0) - }); - - let val_reg = self - .put_into_register(GPRType::Rq, &mut val)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let vmctx = GPR::Rq(VMCTX); - - // We always use `Rq` (even for floats) since the globals are not necessarily aligned to 128 bits - dynasm!(self.asm - ; mov [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32 - ], Rq(val_reg.rq().unwrap()) - ); - - if let Some(reg) = reg { - self.block_state.regs.release(reg)?; - } - - self.free_value(val)?; - Ok(()) - } - - fn immediate_to_reg(&mut self, reg: GPR, val: Value) -> Result<(), Error> { - match reg { - GPR::Rq(r) => { - let val = val.as_bytes(); - if (val as u64) <= u32::max_value() as u64 { - dynasm!(self.asm - ; mov Rd(r), val as i32 - ); - } else { - dynasm!(self.asm - ; mov Rq(r), QWORD val - ); - } - } - reg @ GPR::Rx(_) => { - let tmp = self - .take_reg(GPRType::Rq) - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - self.immediate_to_reg(tmp, val)?; - let tmp = ValueLocation::Reg(tmp); - self.copy_value(tmp, CCLoc::Reg(reg))?; - self.free_value(tmp)?; - } - } - - Ok(()) - } - - // The `&` and `&mut` aren't necessary (`ValueLocation` is copy) but it ensures that we don't get - // the arguments the wrong way around. In the future we want to have a `ReadLocation` and `WriteLocation` - // so we statically can't write to a literal so this will become a non-issue. - fn copy_value(&mut self, src: ValueLocation, dst: CCLoc) -> Result<(), Error> { - match (src, dst) { - (ValueLocation::Cond(cond), CCLoc::Stack(o)) => { - let offset = self.adjusted_offset(o); - - self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?; - - match cond { - cc::EQUAL => dynasm!(self.asm - ; sete [rsp + offset] - ), - cc::NOT_EQUAL => dynasm!(self.asm - ; setne [rsp + offset] - ), - cc::GT_U => dynasm!(self.asm - ; seta [rsp + offset] - ), - cc::GE_U => dynasm!(self.asm - ; setae [rsp + offset] - ), - cc::LT_U => dynasm!(self.asm - ; setb [rsp + offset] - ), - cc::LE_U => dynasm!(self.asm - ; setbe [rsp + offset] - ), - cc::GT_S => dynasm!(self.asm - ; setg [rsp + offset] - ), - cc::GE_S => dynasm!(self.asm - ; setge [rsp + offset] - ), - cc::LT_S => dynasm!(self.asm - ; setl [rsp + offset] - ), - cc::LE_S => dynasm!(self.asm - ; setle [rsp + offset] - ), - } - } - (ValueLocation::Cond(cond), CCLoc::Reg(reg)) => match reg { - GPR::Rq(r) => { - self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?; - - match cond { - cc::EQUAL => dynasm!(self.asm - ; sete Rb(r) - ), - cc::NOT_EQUAL => dynasm!(self.asm - ; setne Rb(r) - ), - cc::GT_U => dynasm!(self.asm - ; seta Rb(r) - ), - cc::GE_U => dynasm!(self.asm - ; setae Rb(r) - ), - cc::LT_U => dynasm!(self.asm - ; setb Rb(r) - ), - cc::LE_U => dynasm!(self.asm - ; setbe Rb(r) - ), - cc::GT_S => dynasm!(self.asm - ; setg Rb(r) - ), - cc::GE_S => dynasm!(self.asm - ; setge Rb(r) - ), - cc::LT_S => dynasm!(self.asm - ; setl Rb(r) - ), - cc::LE_S => dynasm!(self.asm - ; setle Rb(r) - ), - } - } - GPR::Rx(_) => { - let temp = CCLoc::Reg(self.take_reg(I32).unwrap()); - self.copy_value(src, temp)?; - let temp = temp.into(); - self.copy_value(temp, dst)?; - self.free_value(temp)?; - } - }, - (ValueLocation::Stack(in_offset), CCLoc::Stack(out_offset)) => { - let in_offset = self.adjusted_offset(in_offset); - let out_offset = self.adjusted_offset(out_offset); - if in_offset != out_offset { - if let Some(gpr) = self.take_reg(I64) { - dynasm!(self.asm - ; mov Rq(gpr.rq().unwrap()), [rsp + in_offset] - ; mov [rsp + out_offset], Rq(gpr.rq().unwrap()) - ); - self.block_state.regs.release(gpr)?; - } else { - dynasm!(self.asm - ; push rax - ; mov rax, [rsp + in_offset + WORD_SIZE as i32] - ; mov [rsp + out_offset + WORD_SIZE as i32], rax - ; pop rax - ); - } - } - } - // TODO: XMM registers - (ValueLocation::Reg(in_reg), CCLoc::Stack(out_offset)) => { - let out_offset = self.adjusted_offset(out_offset); - match in_reg { - GPR::Rq(in_reg) => { - // We can always use `Rq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; mov [rsp + out_offset], Rq(in_reg) - ); - } - GPR::Rx(in_reg) => { - // We can always use `movq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; movq [rsp + out_offset], Rx(in_reg) - ); - } - } - } - (ValueLocation::Immediate(i), CCLoc::Stack(out_offset)) => { - // TODO: Floats - let i = i.as_bytes(); - let out_offset = self.adjusted_offset(out_offset); - if let Some(scratch) = self.take_reg(I64) { - dynasm!(self.asm - ; mov Rq(scratch.rq().unwrap()), QWORD i - ; mov [rsp + out_offset], Rq(scratch.rq().unwrap()) - ); - - self.block_state.regs.release(scratch)?; - } else { - dynasm!(self.asm - ; push rax - ; mov rax, QWORD i - ; mov [rsp + out_offset + WORD_SIZE as i32], rax - ; pop rax - ); - } - } - (ValueLocation::Stack(in_offset), CCLoc::Reg(out_reg)) => { - let in_offset = self.adjusted_offset(in_offset); - match out_reg { - GPR::Rq(out_reg) => { - // We can always use `Rq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; mov Rq(out_reg), [rsp + in_offset] - ); - } - GPR::Rx(out_reg) => { - // We can always use `movq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; movq Rx(out_reg), [rsp + in_offset] - ); - } - } - } - (ValueLocation::Reg(in_reg), CCLoc::Reg(out_reg)) => { - if in_reg != out_reg { - match (in_reg, out_reg) { - (GPR::Rq(in_reg), GPR::Rq(out_reg)) => { - dynasm!(self.asm - ; mov Rq(out_reg), Rq(in_reg) - ); - } - (GPR::Rx(in_reg), GPR::Rq(out_reg)) => { - dynasm!(self.asm - ; movq Rq(out_reg), Rx(in_reg) - ); - } - (GPR::Rq(in_reg), GPR::Rx(out_reg)) => { - dynasm!(self.asm - ; movq Rx(out_reg), Rq(in_reg) - ); - } - (GPR::Rx(in_reg), GPR::Rx(out_reg)) => { - dynasm!(self.asm - ; movapd Rx(out_reg), Rx(in_reg) - ); - } - } - } - } - (ValueLocation::Immediate(i), CCLoc::Reg(out_reg)) => { - // TODO: Floats - self.immediate_to_reg(out_reg, i)?; - } - } - Ok(()) - } - - /// Define the given label at the current position. - /// - /// Multiple labels can be defined at the same position. However, a label - /// can be defined only once. - pub fn define_label(&mut self, label: Label) { - self.asm.dynamic_label(label.0); - } - - pub fn set_state(&mut self, state: VirtualCallingConvention) -> Result<(), Error> { - self.block_state.regs = Registers::new(); - self.block_state.regs.release_scratch_register()?; - for elem in &state.stack { - if let ValueLocation::Reg(r) = elem { - self.block_state.regs.mark_used(*r); - } - } - self.block_state.stack = state.stack; - self.block_state.depth = state.depth; - Ok(()) - } - - pub fn apply_cc( - &mut self, - cc: BlockCallingConvention>, - ) -> Result<(), Error> { - let stack = cc.arguments.into_iter(); - - self.block_state.stack = Vec::with_capacity(stack.size_hint().0); - self.block_state.regs = Registers::new(); - self.block_state.regs.release_scratch_register()?; - - for elem in stack { - if let CCLoc::Reg(r) = elem { - self.block_state.regs.mark_used(r); - } - - self.block_state.stack.push(elem.into()); - } - - self.block_state.depth = cc.stack_depth; - Ok(()) - } - - load!(i32_load, GPRType::Rq, Rd, movd, mov, DWORD); - load!(i64_load, GPRType::Rq, Rq, movq, mov, QWORD); - load!(f32_load, GPRType::Rx, Rd, movd, mov, DWORD); - load!(f64_load, GPRType::Rx, Rq, movq, mov, QWORD); - - load!(i32_load8_u, GPRType::Rq, Rd, NONE, movzx, BYTE); - load!(i32_load8_s, GPRType::Rq, Rd, NONE, movsx, BYTE); - load!(i32_load16_u, GPRType::Rq, Rd, NONE, movzx, WORD); - load!(i32_load16_s, GPRType::Rq, Rd, NONE, movsx, WORD); - - load!(i64_load8_u, GPRType::Rq, Rq, NONE, movzx, BYTE); - load!(i64_load8_s, GPRType::Rq, Rq, NONE, movsx, BYTE); - load!(i64_load16_u, GPRType::Rq, Rq, NONE, movzx, WORD); - load!(i64_load16_s, GPRType::Rq, Rq, NONE, movsx, WORD); - load!(i64_load32_u, GPRType::Rq, Rd, movd, mov, DWORD); - load!(i64_load32_s, GPRType::Rq, Rq, NONE, movsxd, DWORD); - - store!(store8, Rb, NONE, DWORD); - store!(store16, Rw, NONE, QWORD); - store!(store32, Rd, movd, DWORD); - store!(store64, Rq, movq, QWORD); - - fn push_physical(&mut self, mut value: ValueLocation) -> Result { - let out_offset = -(self.block_state.depth.0 as i32 + 1); - match value { - ValueLocation::Reg(_) | ValueLocation::Immediate(_) | ValueLocation::Cond(_) => { - if let Some(gpr) = self.put_into_register(GPRType::Rq, &mut value)? { - dynasm!(self.asm - ; push Rq(gpr.rq().unwrap()) - ); - } else { - dynasm!(self.asm - ; push rax - ); - - self.copy_value(value, CCLoc::Stack(out_offset))?; - } - - self.free_value(value)?; - } - ValueLocation::Stack(o) => { - let offset = self.adjusted_offset(o); - dynasm!(self.asm - ; push QWORD [rsp + offset] - ); - } - } - - self.block_state.depth.reserve(1); - - Ok(ValueLocation::Stack(out_offset)) - } - - fn push(&mut self, value: ValueLocation) -> Result<(), Error> { - if let Some(mut top) = self.block_state.stack.pop() { - if let ValueLocation::Cond(_) = top { - match self.put_into_register(I32, &mut top) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - } - - self.block_state.stack.push(top); - } - - self.block_state.stack.push(value); - Ok(()) - } - - fn pop(&mut self) -> Result { - match self.block_state.stack.pop() { - Some(v) => Ok(v), - None => Err(Error::Microwasm( - "Stack is empty - pop impossible".to_string(), - )), - } - } - - pub fn drop(&mut self, range: RangeInclusive) -> Result<(), Error> { - let mut repush = Vec::with_capacity(*range.start() as _); - - for _ in 0..*range.start() { - let v = self.pop()?; - repush.push(v); - } - - for _ in range { - let val = self.pop()?; - self.free_value(val)?; - } - - for v in repush.into_iter().rev() { - self.push(v)?; - } - Ok(()) - } - - fn pop_into(&mut self, dst: CCLoc) -> Result<(), Error> { - let val = self.pop()?; - self.copy_value(val, dst)?; - self.free_value(val)?; - Ok(()) - } - - fn free_value(&mut self, val: ValueLocation) -> Result<(), Error> { - if let ValueLocation::Reg(r) = val { - self.block_state.regs.release(r)?; - } - Ok(()) - } - - /// Puts this value into a register so that it can be efficiently read - fn put_into_register( - &mut self, - ty: impl Into>, - val: &mut ValueLocation, - ) -> Result, Error> { - if let Some(out) = self.clone_to_register(ty, *val)? { - self.free_value(*val)?; - *val = ValueLocation::Reg(out); - Ok(Some(out)) - } else { - Ok(None) - } - } - - /// Clones this value into a register so that it can be efficiently read - fn clone_to_register( - &mut self, - ty: impl Into>, - val: ValueLocation, - ) -> Result, Error> { - let ty = ty.into(); - match val { - ValueLocation::Reg(r) if ty.map(|t| t == r.type_()).unwrap_or(true) => { - self.block_state.regs.mark_used(r); - Ok(Some(r)) - } - val => match self.take_reg(ty.unwrap_or(GPRType::Rq)) { - Some(scratch) => { - self.copy_value(val, CCLoc::Reg(scratch))?; - Ok(Some(scratch)) - } - None => Ok(None), - }, - } - } - - /// Puts this value into a temporary register so that operations - /// on that register don't write to a local. - fn put_into_temp_register( - &mut self, - ty: impl Into>, - val: &mut ValueLocation, - ) -> Result, Error> { - let out = self.clone_to_temp_register(ty, *val)?; - if let Some(o) = out { - self.free_value(*val)?; - *val = ValueLocation::Reg(o); - Ok(Some(o)) - } else { - Ok(None) - } - } - - fn put_into_temp_location( - &mut self, - ty: impl Into>, - val: &mut ValueLocation, - ) -> Result { - if let Some(gpr) = self.put_into_temp_register(ty, val)? { - Ok(CCLoc::Reg(gpr)) - } else { - let out = CCLoc::Stack(self.push_physical(*val)?.stack().unwrap()); - *val = out.into(); - Ok(out) - } - } - - /// Clones this value into a temporary register so that operations - /// on that register don't write to a local. - - fn clone_to_temp_register( - &mut self, - ty: impl Into>, - val: ValueLocation, - ) -> Result, Error> { - // If we have `None` as the type then it always matches (`.unwrap_or(true)`) - match val { - ValueLocation::Reg(r) => { - let ty = ty.into(); - let type_matches = ty.map(|t| t == r.type_()).unwrap_or(true); - - if self.block_state.regs.num_usages(r) <= 1 && type_matches { - self.block_state.regs.mark_used(r); - Ok(Some(r)) - } else if let Some(scratch) = self.take_reg(ty.unwrap_or(GPRType::Rq)) { - self.copy_value(val, CCLoc::Reg(scratch))?; - Ok(Some(scratch)) - } else { - Ok(None) - } - } - val => self.clone_to_register(ty, val), - } - } - - pub fn f32_neg(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f32() { - ValueLocation::Immediate( - Ieee32::from_bits((-f32::from_bits(i.to_bits())).to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let const_label = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32)); - - dynasm!(self.asm - ; xorps Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_neg(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f64() { - ValueLocation::Immediate( - Ieee64::from_bits((-f64::from_bits(i.to_bits())).to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let const_label = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - - dynasm!(self.asm - ; xorpd Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_abs(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f32() { - ValueLocation::Immediate( - Ieee32::from_bits(f32::from_bits(i.to_bits()).abs().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let const_label = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32)); - - dynasm!(self.asm - ; andps Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_abs(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f64() { - ValueLocation::Immediate( - Ieee64::from_bits(f64::from_bits(i.to_bits()).abs().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let const_label = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64)); - - dynasm!(self.asm - ; andps Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_sqrt(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f32() { - ValueLocation::Immediate( - Ieee32::from_bits(f32::from_bits(i.to_bits()).sqrt().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; sqrtss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_sqrt(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f64() { - ValueLocation::Immediate( - Ieee64::from_bits(f64::from_bits(i.to_bits()).sqrt().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; sqrtsd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ); - - ValueLocation::Reg(reg) - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_copysign(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let (Some(left), Some(right)) = (left.imm_f32(), right.imm_f32()) { - ValueLocation::Immediate( - Ieee32::from_bits( - (left.to_bits() & REST_MASK_F32) | (right.to_bits() & SIGN_MASK_F32), - ) - .into(), - ) - } else { - let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let rreg = match self.put_into_register(GPRType::Rx, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let sign_mask = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32)); - let rest_mask = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32)); - - dynasm!(self.asm - ; andps Rx(rreg.rx().unwrap()), [=>sign_mask.0] - ; andps Rx(lreg.rx().unwrap()), [=>rest_mask.0] - ; orps Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ); - - self.free_value(right)?; - - left - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_copysign(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let (Some(left), Some(right)) = (left.imm_f64(), right.imm_f64()) { - ValueLocation::Immediate( - Ieee64::from_bits( - (left.to_bits() & REST_MASK_F64) | (right.to_bits() & SIGN_MASK_F64), - ) - .into(), - ) - } else { - let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let rreg = match self.put_into_register(GPRType::Rx, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let rest_mask = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64)); - - dynasm!(self.asm - ; andpd Rx(rreg.rx().unwrap()), [=>sign_mask.0] - ; andpd Rx(lreg.rx().unwrap()), [=>rest_mask.0] - ; orpd Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ); - - self.free_value(right)?; - - left - }; - - self.push(out)?; - Ok(()) - } - - pub fn i32_clz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_i32().unwrap().leading_zeros().into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rd(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_2 = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; bsr Rd(temp.rq().unwrap()), [rsp + offset] - ; mov Rd(temp_2.rq().unwrap()), DWORD 0x3fu64 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap()) - ; mov Rd(temp_2.rq().unwrap()), DWORD 0x1fu64 as _ - ; xor Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_2))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } else { - dynasm!(self.asm - ; bsr Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ; mov Rd(reg.rq().unwrap()), DWORD 0x3fu64 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ; mov Rd(reg.rq().unwrap()), DWORD 0x1fu64 as _ - ; xor Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i64_clz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate((imm.as_i64().unwrap().leading_zeros() as u64).into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I64).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rq(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_2 = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; bsr Rq(temp.rq().unwrap()), [rsp + offset] - ; mov Rq(temp_2.rq().unwrap()), QWORD 0x7fu64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap()) - ; mov Rq(temp_2.rq().unwrap()), QWORD 0x3fu64 as _ - ; xor Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_2))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I64).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } else { - dynasm!(self.asm - ; bsr Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; mov Rq(reg.rq().unwrap()), QWORD 0x7fu64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; mov Rq(reg.rq().unwrap()), QWORD 0x3fu64 as _ - ; xor Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i32_ctz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_i32().unwrap().trailing_zeros().into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; tzcnt Rd(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_zero_val = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; bsf Rd(temp.rq().unwrap()), [rsp + offset] - ; mov Rd(temp_zero_val.rq().unwrap()), DWORD 0x20u32 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(temp_zero_val.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_zero_val))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; tzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } else { - dynasm!(self.asm - ; bsf Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ; mov Rd(reg.rq().unwrap()), DWORD 0x20u32 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i64_ctz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate((imm.as_i64().unwrap().trailing_zeros() as u64).into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I64).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; tzcnt Rq(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_zero_val = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; bsf Rq(temp.rq().unwrap()), [rsp + offset] - ; mov Rq(temp_zero_val.rq().unwrap()), QWORD 0x40u64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(temp_zero_val.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_zero_val))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; bsf Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; mov Rq(reg.rq().unwrap()), QWORD 0x40u64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i32_extend_u(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = if let ValueLocation::Immediate(imm) = val { - ValueLocation::Immediate((imm.as_i32().unwrap() as u32 as u64).into()) - } else { - let new_reg = self.take_reg(I64).unwrap(); - - // TODO: Track set-ness of bits - we can make this a no-op in most cases - // but we have to make this unconditional just in case this value - // came from a truncate. - match val { - ValueLocation::Reg(GPR::Rx(rxreg)) => { - dynasm!(self.asm - ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg) - ); - } - ValueLocation::Reg(GPR::Rq(rqreg)) => { - dynasm!(self.asm - ; mov Rd(new_reg.rq().unwrap()), Rd(rqreg) - ); - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - dynasm!(self.asm - ; mov Rd(new_reg.rq().unwrap()), [rsp + offset] - ); - } - ValueLocation::Cond(_) => self.copy_value(val, CCLoc::Reg(new_reg))?, - ValueLocation::Immediate(_) => { - return Err(Error::Microwasm( - "i32_extend_u unreachable code".to_string(), - )) - } - } - - ValueLocation::Reg(new_reg) - }; - - self.free_value(val)?; - - self.push(out)?; - Ok(()) - } - - pub fn i32_extend_s(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - self.free_value(val)?; - let new_reg = self.take_reg(I64).unwrap(); - - let out = match val { - ValueLocation::Reg(GPR::Rx(rxreg)) => { - dynasm!(self.asm - ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg) - ; movsxd Rq(new_reg.rq().unwrap()), Rd(new_reg.rq().unwrap()) - ); - - ValueLocation::Reg(new_reg) - } - ValueLocation::Reg(GPR::Rq(rqreg)) => { - dynasm!(self.asm - ; movsxd Rq(new_reg.rq().unwrap()), Rd(rqreg) - ); - - ValueLocation::Reg(new_reg) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - dynasm!(self.asm - ; movsxd Rq(new_reg.rq().unwrap()), DWORD [rsp + offset] - ); - - ValueLocation::Reg(new_reg) - } - // `CondCode` can only be 0 or 1, so sign-extension is always the same as - // zero-extension - val @ ValueLocation::Cond(_) => { - self.copy_value(val, CCLoc::Reg(new_reg))?; - - ValueLocation::Reg(new_reg) - } - ValueLocation::Immediate(imm) => { - self.block_state.regs.release(new_reg)?; - - ValueLocation::Immediate((imm.as_i32().unwrap() as i64).into()) - } - }; - - self.push(out)?; - Ok(()) - } - - unop!(i32_popcnt, popcnt, Rd, u32, u32::count_ones); - conversion!( - f64_from_f32, - cvtss2sd, - Rx, - rx, - Rx, - rx, - f32, - f64, - as_f32, - |a: Ieee32| Ieee64::from_bits((f32::from_bits(a.to_bits()) as f64).to_bits()) - ); - conversion!( - f32_from_f64, - cvtsd2ss, - Rx, - rx, - Rx, - rx, - f64, - f32, - as_f64, - |a: Ieee64| Ieee32::from_bits((f64::from_bits(a.to_bits()) as f32).to_bits()) - ); - - pub fn i32_truncate_f32_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I32(0xCF00_0000_u32 as i32)); - let zero = self.aligned_label(16, LabelValue::I32(0)); - - dynasm!(self.asm - ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jnae >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_truncate_f32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(), - ), - _ => { - let reg = match self.put_into_temp_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I32(0x4F00_0000_u32 as i32)); - - dynasm!(self.asm - ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jae >else_ - ; jp >trap - ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; jmp >ret - ; else_: - ; subss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_truncate_f64_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i32).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I64(0xC1E0_0000_0020_0000_u64 as i64)); - let zero = self.aligned_label(16, LabelValue::I64(0)); - - dynasm!(self.asm - ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jna >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_truncate_f64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u32).into(), - ), - _ => { - let reg = match self.put_into_temp_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I64(0x41E0_0000_0000_0000_u64 as i64)); - - dynasm!(self.asm - ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jae >else_ - ; jp >trap - ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; jmp >ret - ; else_: - ; subsd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - conversion!( - f32_convert_from_i32_s, - cvtsi2ss, - Rd, - rq, - Rx, - rx, - i32, - f32, - as_i32, - |a| Ieee32::from_bits((a as f32).to_bits()) - ); - conversion!( - f64_convert_from_i32_s, - cvtsi2sd, - Rd, - rq, - Rx, - rx, - i32, - f64, - as_i32, - |a| Ieee64::from_bits((a as f64).to_bits()) - ); - conversion!( - f32_convert_from_i64_s, - cvtsi2ss, - Rq, - rq, - Rx, - rx, - i64, - f32, - as_i64, - |a| Ieee32::from_bits((a as f32).to_bits()) - ); - conversion!( - f64_convert_from_i64_s, - cvtsi2sd, - Rq, - rq, - Rx, - rx, - i64, - f64, - as_i64, - |a| Ieee64::from_bits((a as f64).to_bits()) - ); - - pub fn i64_truncate_f32_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i64).into(), - ), - _ => { - let reg = match self.put_into_temp_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I32(0xDF00_0000_u32 as i32)); - let zero = self.aligned_label(16, LabelValue::I64(0)); - - dynasm!(self.asm - ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jnae >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i64_truncate_f64_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i64).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(8, LabelValue::I64(SIGN_MASK_F64 as i64)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I64(0xC3E0_0000_0000_0000_u64 as i64)); - let zero = self.aligned_label(16, LabelValue::I64(0)); - - dynasm!(self.asm - ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jnae >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i64_truncate_f32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as u64).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I64).unwrap(); - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let u64_trunc_f32_const = self.aligned_label(16, LabelValue::I32(0x5F00_0000_i32)); - - dynasm!(self.asm - ; comiss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0] - ; jae >large - ; jp >trap - ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap()) - ; js >trap - ; jmp >cont - ; large: - ; subss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0] - ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap()) - ; js >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >cont - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; cont: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i64_truncate_f64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u64).into(), - ), - _ => { - let reg = match self.put_into_register(F64, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I64).unwrap(); - - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let u64_trunc_f64_const = - self.aligned_label(16, LabelValue::I64(0x43E0_0000_0000_0000_i64)); - - dynasm!(self.asm - ; comisd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0] - ; jnb >large - ; jp >trap - ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), 0 - ; jl >trap - ; jmp >cont - ; large: - ; subsd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0] - ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), 0 - ; jnge >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >cont - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; cont: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f32_convert_from_i32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee32::from_bits((imm.as_i32().unwrap() as u32 as f32).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(F32).unwrap(); - - dynasm!(self.asm - ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap()) - ; cvtsi2ss Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap()) - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f64_convert_from_i32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee64::from_bits((imm.as_i32().unwrap() as u32 as f64).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(F64).unwrap(); - - dynasm!(self.asm - ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap()) - ; cvtsi2sd Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap()) - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f32_convert_from_i64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee32::from_bits((imm.as_i64().unwrap() as u64 as f32).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I64, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let out = self.take_reg(F32).unwrap(); - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap()) - ; js >negative - ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; jmp >ret - ; negative: - ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; shr Rq(temp.rq().unwrap()), 1 - ; and Rq(reg.rq().unwrap()), 1 - ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap()) - ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; addss Rx(out.rx().unwrap()), Rx(out.rx().unwrap()) - ; ret: - ); - - self.free_value(ValueLocation::Reg(temp))?; - - ValueLocation::Reg(out) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f64_convert_from_i64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee64::from_bits((imm.as_i64().unwrap() as u64 as f64).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I64, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let out = self.take_reg(F32).unwrap(); - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap()) - ; js >negative - ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; jmp >ret - ; negative: - ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; shr Rq(temp.rq().unwrap()), 1 - ; and Rq(reg.rq().unwrap()), 1 - ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap()) - ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; addsd Rx(out.rx().unwrap()), Rx(out.rx().unwrap()) - ; ret: - ); - - self.free_value(ValueLocation::Reg(temp))?; - - ValueLocation::Reg(out) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_wrap_from_i64(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate((imm.as_i64().unwrap() as u64 as u32).into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn i32_reinterpret_from_f32(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_f32().unwrap().to_bits().into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn i64_reinterpret_from_f64(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_f64().unwrap().to_bits().into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_reinterpret_from_i32(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(Ieee32::from_bits(imm.as_i32().unwrap() as _).into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_reinterpret_from_i64(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(Ieee64::from_bits(imm.as_i64().unwrap() as _).into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - unop!(i64_popcnt, popcnt, Rq, u64, |a: u64| a.count_ones() as u64); - - // TODO: Use `lea` when the LHS operand isn't a temporary but both of the operands - // are in registers. - commutative_binop_i32!(i32_add, add, i32::wrapping_add); - commutative_binop_i32!(i32_and, and, |a, b| a & b); - commutative_binop_i32!(i32_or, or, |a, b| a | b); - commutative_binop_i32!(i32_xor, xor, |a, b| a ^ b); - binop_i32!(i32_sub, sub, i32::wrapping_sub); - - commutative_binop_i64!(i64_add, add, i64::wrapping_add); - commutative_binop_i64!(i64_and, and, |a, b| a & b); - commutative_binop_i64!(i64_or, or, |a, b| a | b); - commutative_binop_i64!(i64_xor, xor, |a, b| a ^ b); - binop_i64!(i64_sub, sub, i64::wrapping_sub); - - commutative_binop_f32!(f32_add, addss, |a, b| a + b); - commutative_binop_f32!(f32_mul, mulss, |a, b| a * b); - minmax_float!( - f32_min, - minss, - ucomiss, - addss, - orps, - as_f32, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - f32::from_bits(a.to_bits()) - .min(f32::from_bits(b.to_bits())) - .to_bits() - ) - ); - minmax_float!( - f32_max, - maxss, - ucomiss, - addss, - andps, - as_f32, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - f32::from_bits(a.to_bits()) - .max(f32::from_bits(b.to_bits())) - .to_bits() - ) - ); - binop_f32!(f32_sub, subss, |a, b| a - b); - binop_f32!(f32_div, divss, |a, b| a / b); - - pub fn f32_ceil(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::CeilF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f32_floor(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::FloorF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f32_nearest(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::NearestF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f32_trunc(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::TruncF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - commutative_binop_f64!(f64_add, addsd, |a, b| a + b); - commutative_binop_f64!(f64_mul, mulsd, |a, b| a * b); - minmax_float!( - f64_min, - minsd, - ucomisd, - addsd, - orpd, - as_f64, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - f64::from_bits(a.to_bits()) - .min(f64::from_bits(b.to_bits())) - .to_bits() - ) - ); - minmax_float!( - f64_max, - maxsd, - ucomisd, - addsd, - andpd, - as_f64, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - f64::from_bits(a.to_bits()) - .max(f64::from_bits(b.to_bits())) - .to_bits() - ) - ); - binop_f64!(f64_sub, subsd, |a, b| a - b); - binop_f64!(f64_div, divsd, |a, b| a / b); - - pub fn f64_ceil(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::CeilF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f64_floor(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::FloorF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f64_nearest(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::NearestF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f64_trunc(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::TruncF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - shift!( - i32_shl, - Rd, - shl, - |a, b| (a as i32).wrapping_shl(b as _), - I32 - ); - shift!( - i32_shr_s, - Rd, - sar, - |a, b| (a as i32).wrapping_shr(b as _), - I32 - ); - shift!( - i32_shr_u, - Rd, - shr, - |a, b| (a as u32).wrapping_shr(b as _), - I32 - ); - shift!( - i32_rotl, - Rd, - rol, - |a, b| (a as u32).rotate_left(b as _), - I32 - ); - shift!( - i32_rotr, - Rd, - ror, - |a, b| (a as u32).rotate_right(b as _), - I32 - ); - - shift!( - i64_shl, - Rq, - shl, - |a, b| (a as i64).wrapping_shl(b as _), - I64 - ); - shift!( - i64_shr_s, - Rq, - sar, - |a, b| (a as i64).wrapping_shr(b as _), - I64 - ); - shift!( - i64_shr_u, - Rq, - shr, - |a, b| (a as u64).wrapping_shr(b as _), - I64 - ); - shift!( - i64_rotl, - Rq, - rol, - |a, b| (a as u64).rotate_left(b as _), - I64 - ); - shift!( - i64_rotr, - Rq, - ror, - |a, b| (a as u64).rotate_right(b as _), - I64 - ); - - // TODO: Do this without emitting `mov` - fn cleanup_gprs(&mut self, gprs: impl Iterator) { - for gpr in gprs { - dynasm!(self.asm - ; pop Rq(gpr.rq().unwrap()) - ); - self.block_state.depth.free(1); - // DON'T MARK IT USED HERE! See comment in `full_div` - } - } - - int_div!( - i32_full_div_s, - i32_full_div_u, - i32_div_u, - i32_div_s, - i32_rem_u, - i32_rem_s, - imm_i32, - i32, - u32, - Rd, - DWORD - ); - int_div!( - i64_full_div_s, - i64_full_div_u, - i64_div_u, - i64_div_s, - i64_rem_u, - i64_rem_s, - imm_i64, - i64, - u64, - Rq, - QWORD - ); - - // TODO: With a proper SSE-like "Value" system we could do this way better (we wouldn't have - // to move `RAX`/`RDX` back afterwards). - fn full_div( - &mut self, - mut divisor: ValueLocation, - dividend: ValueLocation, - do_div: impl FnOnce(&mut Self, &mut ValueLocation) -> Result<(), Error>, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - // To stop `take_reg` from allocating either of these necessary registers - self.block_state.regs.mark_used(RAX); - self.block_state.regs.mark_used(RDX); - if divisor == ValueLocation::Reg(RAX) || divisor == ValueLocation::Reg(RDX) { - let new_reg = self.take_reg(GPRType::Rq).unwrap(); - self.copy_value(divisor, CCLoc::Reg(new_reg))?; - self.free_value(divisor)?; - - divisor = ValueLocation::Reg(new_reg); - } - self.block_state.regs.release(RAX)?; - self.block_state.regs.release(RDX)?; - - let saved_rax = if self.block_state.regs.is_free(RAX) { - None - } else { - dynasm!(self.asm - ; push rax - ); - self.block_state.depth.reserve(1); - // DON'T FREE THIS REGISTER HERE - since we don't - // remove it from the stack freeing the register - // here will cause `take_reg` to allocate it. - Some(()) - }; - - let saved_rdx = if self.block_state.regs.is_free(RDX) { - None - } else { - dynasm!(self.asm - ; push rdx - ); - self.block_state.depth.reserve(1); - // DON'T FREE THIS REGISTER HERE - since we don't - // remove it from the stack freeing the register - // here will cause `take_reg` to allocate it. - Some(()) - }; - - let saved = saved_rdx - .map(|_| RDX) - .into_iter() - .chain(saved_rax.map(|_| RAX)); - - self.copy_value(dividend, CCLoc::Reg(RAX))?; - self.block_state.regs.mark_used(RAX); - - self.free_value(dividend)?; - // To stop `take_reg` from allocating either of these necessary registers - self.block_state.regs.mark_used(RDX); - - do_div(self, &mut divisor)?; - self.free_value(divisor)?; - - if self.block_state.regs.is_free(RAX) { - return Err(Error::Microwasm("full_div: RAX is not free".to_string())); - } - if self.block_state.regs.is_free(RDX) { - return Err(Error::Microwasm("full_div: RDX is not free".to_string())); - } - - Ok((ValueLocation::Reg(RAX), ValueLocation::Reg(RDX), saved)) - } - - fn i32_full_div_u( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; xor edx, edx - ; div DWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I32, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(this.asm - ; xor edx, edx - ; div Rd(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - fn i32_full_div_s( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; cdq - ; idiv DWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I32, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(this.asm - ; cdq - ; idiv Rd(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - fn i64_full_div_u( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; xor rdx, rdx - ; div QWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I64, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - dynasm!(this.asm - ; xor rdx, rdx - ; div Rq(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - fn i64_full_div_s( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; cqo - ; idiv QWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I64, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(this.asm - ; cqo - ; idiv Rq(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - // `i32_mul` needs to be separate because the immediate form of the instruction - // has a different syntax to the immediate form of the other instructions. - pub fn i32_mul(&mut self) -> Result<(), Error> { - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - i32::wrapping_mul(right.as_i32().unwrap(), left.as_i32().unwrap()).into(), - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(_) => (left, right), - _ => { - if right.immediate().is_some() { - (left, right) - } else { - (right, left) - } - } - }; - - let out = match right { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = match self.put_into_register(I32, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let lreg = match self.put_into_temp_register(I32, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap()) - ); - left - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - let lreg = match self.put_into_temp_register(I32, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rd(lreg.rq().unwrap()), [rsp + offset] - ); - left - } - ValueLocation::Immediate(i) => { - let lreg = match self.put_into_register(I32, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let new_reg = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; imul Rd(new_reg.rq().unwrap()), Rd(lreg.rq().unwrap()), i.as_i32().unwrap() - ); - self.free_value(left)?; - ValueLocation::Reg(new_reg) - } - }; - - self.push(out)?; - self.free_value(right)?; - Ok(()) - } - - // `i64_mul` needs to be separate because the immediate form of the instruction - // has a different syntax to the immediate form of the other instructions. - pub fn i64_mul(&mut self) -> Result<(), Error> { - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - i64::wrapping_mul(right.as_i64().unwrap(), left.as_i64().unwrap()).into(), - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(_) => (left, right), - _ => { - if right.immediate().is_some() { - (left, right) - } else { - (right, left) - } - } - }; - - let out = match right { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = match self.put_into_register(I64, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let lreg = match self.put_into_temp_register(I64, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - left - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - let lreg = match self.put_into_temp_register(I64, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rq(lreg.rq().unwrap()), [rsp + offset] - ); - left - } - ValueLocation::Immediate(i) => { - let i = i.as_i64().unwrap(); - if let Ok(i) = i.try_into() { - let new_reg = self.take_reg(I64).unwrap(); - - let lreg = self - .put_into_register(I64, &mut left)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; imul Rq(new_reg.rq().unwrap()), Rq(lreg.rq().unwrap()), i - ); - - self.free_value(left)?; - - ValueLocation::Reg(new_reg) - } else { - let rreg = self - .put_into_register(I64, &mut right)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let lreg = self - .put_into_temp_register(I64, &mut left)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - left - } - } - }; - - self.push(out)?; - self.free_value(right)?; - Ok(()) - } - - fn cmov(&mut self, cond_code: CondCode, dst: GPR, src: CCLoc) { - match src { - CCLoc::Reg(reg) => match cond_code { - cc::EQUAL => { - dynasm!(self.asm - ; cmove Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::NOT_EQUAL => { - dynasm!(self.asm - ; cmovne Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GE_U => { - dynasm!(self.asm - ; cmovae Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LT_U => { - dynasm!(self.asm - ; cmovb Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GT_U => { - dynasm!(self.asm - ; cmova Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LE_U => { - dynasm!(self.asm - ; cmovbe Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GE_S => { - dynasm!(self.asm - ; cmovge Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LT_S => { - dynasm!(self.asm - ; cmovl Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GT_S => { - dynasm!(self.asm - ; cmovg Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LE_S => { - dynasm!(self.asm - ; cmovle Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - }, - CCLoc::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - match cond_code { - cc::EQUAL => { - dynasm!(self.asm - ; cmove Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::NOT_EQUAL => { - dynasm!(self.asm - ; cmovne Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GE_U => { - dynasm!(self.asm - ; cmovae Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LT_U => { - dynasm!(self.asm - ; cmovb Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GT_U => { - dynasm!(self.asm - ; cmova Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LE_U => { - dynasm!(self.asm - ; cmovbe Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GE_S => { - dynasm!(self.asm - ; cmovge Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LT_S => { - dynasm!(self.asm - ; cmovl Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GT_S => { - dynasm!(self.asm - ; cmovg Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LE_S => { - dynasm!(self.asm - ; cmovle Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - } - } - } - } - - pub fn select(&mut self) -> Result<(), Error> { - let mut cond = self.pop()?; - let mut else_ = self.pop()?; - let mut then = self.pop()?; - - if let ValueLocation::Immediate(i) = cond { - if i.as_i32().unwrap() == 0 { - self.free_value(then)?; - self.push(else_)?; - } else { - self.free_value(else_)?; - self.push(then)?; - } - - return Ok(()); - } - - let cond_code = match cond { - ValueLocation::Cond(cc) => cc, - _ => { - let cond_reg = match self.put_into_register(I32, &mut cond) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - dynasm!(self.asm - ; test Rd(cond_reg.rq().unwrap()), Rd(cond_reg.rq().unwrap()) - ); - self.free_value(cond)?; - - cc::NOT_EQUAL - } - }; - - let else_ = if let ValueLocation::Stack(offset) = else_ { - CCLoc::Stack(offset) - } else { - let gpr = match self.put_into_register(I32, &mut else_) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - CCLoc::Reg(gpr) - }; - - let then = if let ValueLocation::Stack(offset) = then { - CCLoc::Stack(offset) - } else { - let gpr = match self.put_into_register(I32, &mut then) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - CCLoc::Reg(gpr) - }; - - let out_gpr = match (then, else_) { - (CCLoc::Reg(then_reg), else_) if self.block_state.regs.num_usages(then_reg) <= 1 => { - self.cmov(!cond_code, then_reg, else_); - self.free_value(else_.into())?; - - then_reg - } - (then, CCLoc::Reg(else_reg)) if self.block_state.regs.num_usages(else_reg) <= 1 => { - self.cmov(cond_code, else_reg, then); - self.free_value(then.into())?; - - else_reg - } - (then, else_) => { - let out = self.take_reg(GPRType::Rq).unwrap(); - self.copy_value(else_.into(), CCLoc::Reg(out))?; - self.cmov(cond_code, out, then); - - self.free_value(then.into())?; - self.free_value(else_.into())?; - - out - } - }; - - self.push(ValueLocation::Reg(out_gpr))?; - Ok(()) - } - - pub fn pick(&mut self, depth: u32) { - let idx = self.block_state.stack.len() - 1 - depth as usize; - let v = self.block_state.stack[idx]; - if let ValueLocation::Reg(r) = v { - self.block_state.regs.mark_used(r); - } - self.block_state.stack.push(v); - } - - pub fn const_(&mut self, imm: Value) -> Result<(), Error> { - self.push(ValueLocation::Immediate(imm))?; - Ok(()) - } - - fn relocated_function_call< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - name: &cranelift_codegen::ir::ExternalName, - args: A, - rets: R, - func_def_loc: FunctionDefLocation, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs_skip_caller_vmctx(args); - - let saved_vmctx = if func_def_loc == FunctionDefLocation::PossiblyExternal { - dynasm!(self.asm - ; mov Rq(CALLER_VMCTX), Rq(VMCTX) - ); - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - self.block_state.regs.mark_used(GPR::Rq(VMCTX)); - Some(self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?) - } else { - None - }; - - self.save_volatile()?; - - self.pass_outgoing_args(&locs)?; - - // 2 bytes for the 64-bit `mov` opcode + register ident, the rest is the immediate - self.reloc_sink.reloc_external( - (self.asm.offset().0 - - self.func_starts[self.current_function as usize] - .0 - .unwrap() - .0) as u32 - + 2, - // Passing a default location here, since until proven otherwise, it's not used. - ir::SourceLoc::default(), - binemit::Reloc::Abs8, - name, - 0, - ); - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(temp.rq().unwrap()), QWORD 0xDEAD_BEEF_DEAD_BEEF_u64 as i64 - ; call Rq(temp.rq().unwrap()) - ); - self.block_state.regs.release(temp)?; - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(rets)?; - - if func_def_loc == FunctionDefLocation::PossiblyExternal { - let saved_vmctx = saved_vmctx.unwrap(); - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?; - self.free_value(saved_vmctx)?; - } - - Ok(()) - } - - fn builtin_function_call< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - i: BuiltinFunctionIndex, - args: A, - rets: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs(args); - - dynasm!(self.asm - ; push Rq(VMCTX) - ); - self.block_state.depth.reserve(1); - let depth = self.block_state.depth.clone(); - - self.save_volatile()?; - - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.pass_outgoing_args(&locs)?; - - let temp = self.take_reg(I64).unwrap(); - dynasm!(self.asm - ; mov Rq(temp.rq().unwrap()), [ - Rq(VMCTX) + self.module_context.vmctx_builtin_function(i.index()) as i32 - ] - ; call Rq(temp.rq().unwrap()) - ); - - self.block_state.regs.release(temp)?; - - for i in locs { - self.free_value(i.into())?; - } - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - - self.push_function_returns(rets)?; - - self.set_stack_depth(depth)?; - dynasm!(self.asm - ; pop Rq(VMCTX) - ); - self.block_state.depth.free(1); - - Ok(()) - } - - // TODO: Other memory indices - pub fn memory_size(&mut self) -> Result<(), Error> { - let memory_index = 0; - if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) { - self.push(ValueLocation::Immediate(defined_memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_memory32_size_index(), - [self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } else { - self.push(ValueLocation::Immediate(memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_imported_memory32_size_index(), - [self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } - Ok(()) - } - - // TODO: Other memory indices - pub fn memory_grow(&mut self) -> Result<(), Error> { - let memory_index = 0; - if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) { - self.push(ValueLocation::Immediate(defined_memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_memory32_grow_index(), - [self.pointer_type, self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } else { - self.push(ValueLocation::Immediate(memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_imported_memory32_grow_index(), - [self.pointer_type, self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } - Ok(()) - } - - // TODO: Use `ArrayVec`? - // TODO: This inefficiently duplicates registers but it's not really possible - // to double up stack space right now. - /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used. - fn save_volatile(&mut self) -> Result<(), Error> { - self.save_regs(SCRATCH_REGS.iter().copied())?; - Ok(()) - } - - fn save_regs(&mut self, to_save: I) -> Result<(), Error> - where - I: IntoIterator, - I::IntoIter: Clone, - { - // TODO: We can filter out registers that are already marked free, but just to ensure - // that this doesn't fail when confronted with the `memory_grow`/`memory_size` - // weirdness. - let to_save = to_save.into_iter(); - if to_save.clone().count() == 0 { - return Ok(()); - } - - let mut stack = mem::replace(&mut self.block_state.stack, vec![]); - let mut slice = &mut stack[..]; - - while let Some((first, rest)) = slice.split_first_mut() { - if let ValueLocation::Reg(vreg) = *first { - if to_save.clone().any(|r| r == vreg) { - let old = *first; - *first = self.push_physical(old)?; - for val in &mut *rest { - if *val == old { - self.free_value(*val)?; - *val = *first; - } - } - } - } - - slice = rest; - } - - self.block_state.stack = stack; - - Ok(()) - } - - /// Write the arguments to the callee to the registers and the stack using the SystemV - /// calling convention. - fn pass_outgoing_args( - &mut self, - out_locs: &(impl ExactSizeIterator + DoubleEndedIterator + Clone), - ) -> Result<(), Error> { - let total_stack_space = out_locs - .clone() - .flat_map(|l| { - if let CCLoc::Stack(offset) = l { - if offset >= 0 { - Some(offset as u32 + 1) - } else { - None - } - } else { - None - } - }) - .max() - .unwrap_or(0); - let original_depth = self.block_state.depth.clone(); - let mut needed_depth = original_depth.clone(); - needed_depth.reserve(total_stack_space); - - if needed_depth.0 & 1 != 0 { - needed_depth.reserve(1); - } - - self.set_stack_depth(needed_depth.clone())?; - - let mut pending = Vec::<(ValueLocation, CCLoc)>::with_capacity(out_locs.len()); - - for loc in out_locs.clone().rev() { - let val = self.pop()?; - - pending.push((val, loc)); - } - - while !pending.is_empty() { - let start_len = pending.len(); - - for (src, dst) in mem::replace(&mut pending, vec![]) { - if src != ValueLocation::from(dst) { - let dst = match dst { - CCLoc::Reg(r) => { - if !self.block_state.regs.is_free(r) { - pending.push((src, dst)); - continue; - } - - self.block_state.regs.mark_used(r); - - dst - } - CCLoc::Stack(offset) => CCLoc::Stack(offset - needed_depth.0 as i32), - }; - - self.copy_value(src, dst)?; - self.free_value(src)?; - } - } - - if pending.len() == start_len { - let src = match pending - .iter() - .filter_map(|(src, _)| { - if let ValueLocation::Reg(reg) = src { - Some(reg) - } else { - None - } - }) - .next() - { - None => { - return Err(Error::Microwasm( - "Programmer error: We shouldn't need to push \ - intermediate args if we don't have any argument sources in registers" - .to_string(), - )); - } - Some(val) => *val, - }; - let new_src = self.push_physical(ValueLocation::Reg(src))?; - for (old_src, _) in pending.iter_mut() { - if *old_src == ValueLocation::Reg(src) { - *old_src = new_src; - } - } - } - } - - // We do this a second time just in case we had to use `push_physical` to resolve cycles in - // `pending` - self.set_stack_depth(needed_depth)?; - - Ok(()) - } - - fn push_function_returns( - &mut self, - returns: impl IntoIterator, - ) -> Result<(), Error> { - for loc in ret_locs(returns)? { - if let CCLoc::Reg(reg) = loc { - self.block_state.regs.mark_used(reg); - } - - self.push(loc.into())?; - } - Ok(()) - } - - fn trap_if(&mut self, ccode: CondCode, trap_code: TrapCode) { - let label = self.create_label(); - self.br_on_cond_code(label, !ccode); - self.trap(trap_code); - self.define_label(label); - } - - pub fn call_indirect< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - type_id: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - dynasm!(self.asm - ; push Rq(VMCTX) - ); - self.block_state.depth.reserve(1); - let depth = self.block_state.depth.clone(); - - let locs = arg_locs_skip_caller_vmctx(arg_types); - - for loc in locs.clone() { - if let CCLoc::Reg(r) = loc { - self.block_state.regs.mark_used(r); - } - } - - let mut callee = self.pop()?; - let callee_reg = self - .put_into_temp_register(I32, &mut callee)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - self.save_volatile()?; - - for loc in locs.clone() { - if let CCLoc::Reg(r) = loc { - self.block_state.regs.release(r)?; - } - } - - self.pass_outgoing_args(&locs)?; - - dynasm!(self.asm - ; mov Rq(CALLER_VMCTX), Rq(VMCTX) - ); - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - - let table_index = 0; - let reg_offset = self - .module_context - .defined_table_index(table_index) - .map(|index| { - ( - None, - self.module_context.vmctx_vmtable_definition(index) as i32, - ) - }); - - let vmctx = GPR::Rq(VMCTX); - let (reg, offset) = reg_offset.unwrap_or_else(|| { - let reg = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + self.module_context.vmctx_vmtable_import_from(table_index) as i32 - ] - ); - - (Some(reg), 0) - }); - - let temp0 = self.take_reg(I64).unwrap(); - dynasm!(self.asm - ; cmp Rd(callee_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - offset + - self.module_context.vmtable_definition_current_elements() as i32 - ] - ;; self.trap_if(cc::GE_U, TrapCode::TableOutOfBounds) - ; imul - Rd(callee_reg.rq().unwrap()), - Rd(callee_reg.rq().unwrap()), - self.module_context.size_of_vmcaller_checked_anyfunc() as i32 - ; mov Rq(temp0.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - offset + - self.module_context.vmtable_definition_base() as i32 - ] - ); - - if let Some(reg) = reg { - self.block_state.regs.release(reg)?; - } - - let temp1 = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rd(temp1.rq().unwrap()), [ - Rq(VMCTX) + - self.module_context - .vmctx_vmshared_signature_id(type_id) as i32 - ] - ; cmp DWORD [ - Rq(temp0.rq().unwrap()) + - Rq(callee_reg.rq().unwrap()) + - self.module_context.vmcaller_checked_anyfunc_type_index() as i32 - ], Rd(temp1.rq().unwrap()) - ;; self.trap_if(cc::NOT_EQUAL, TrapCode::BadSignature) - ; mov Rq(VMCTX), [ - Rq(temp0.rq().unwrap()) + - Rq(callee_reg.rq().unwrap()) + - self.module_context.vmcaller_checked_anyfunc_vmctx() as i32 - ] - ; call QWORD [ - Rq(temp0.rq().unwrap()) + - Rq(callee_reg.rq().unwrap()) + - self.module_context.vmcaller_checked_anyfunc_func_ptr() as i32 - ] - ); - - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.block_state.regs.release(temp0)?; - self.block_state.regs.release(temp1)?; - self.free_value(callee)?; - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(return_types)?; - - self.set_stack_depth(depth)?; - dynasm!(self.asm - ; pop Rq(VMCTX) - ); - self.block_state.depth.free(1); - - Ok(()) - } - - pub fn swap(&mut self, depth: u32) { - let last = self.block_state.stack.len() - 1; - self.block_state.stack.swap(last, last - depth as usize); - } - - /// Call a function with the given index - pub fn call_direct, R: IntoIterator>( - &mut self, - index: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - self.relocated_function_call( - &ir::ExternalName::user(0, index), - arg_types, - return_types, - FunctionDefLocation::SameModule, - )?; - Ok(()) - } - - /// Recursively call the same function again - pub fn call_direct_self< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - defined_index: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs_skip_caller_vmctx(arg_types); - - self.save_volatile()?; - - let (_, label) = self.func_starts[defined_index as usize]; - - self.pass_outgoing_args(&locs)?; - dynasm!(self.asm - ; call =>label - ); - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(return_types)?; - Ok(()) - } - - /// Call a function with the given index - pub fn call_direct_imported< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - index: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs_skip_caller_vmctx(arg_types); - - dynasm!(self.asm - ; mov Rq(CALLER_VMCTX), Rq(VMCTX) - ); - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - self.block_state.regs.mark_used(GPR::Rq(VMCTX)); - let saved_vmctx = self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?; - - self.save_volatile()?; - self.pass_outgoing_args(&locs)?; - - let callee = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(callee.rq().unwrap()), [ - Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_body(index) as i32 - ] - ; mov Rq(VMCTX), [ - Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_vmctx(index) as i32 - ] - ; call Rq(callee.rq().unwrap()) - ); - - self.block_state.regs.release(callee)?; - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(return_types)?; - - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?; - self.free_value(saved_vmctx)?; - - Ok(()) - } - - // TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them - // as scratch registers - /// Writes the function prologue and stores the arguments as locals - pub fn start_function>( - &mut self, - params: P, - ) -> Result<(), Error> - where - P::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - self.apply_cc(BlockCallingConvention::function_start( - arg_locs_skip_caller_vmctx(params), - ))?; - Ok(()) - } - - pub fn ret(&mut self) { - dynasm!(self.asm - ; ret - ); - } - - pub fn epilogue(&mut self) { - for LabelInfo { - label, - align, - inner, - } in self.labels.drain() - { - match inner { - LabelValue::I32(val) => { - dynasm!(self.asm - ; .align align as usize - ;; self.asm.dynamic_label(label.0) - ; .dword val - ); - } - LabelValue::I64(val) => { - dynasm!(self.asm - ; .align align as usize - ;; self.asm.dynamic_label(label.0) - ; .qword val - ); - } - LabelValue::Ret => { - dynasm!(self.asm - ; .align align as usize - ;; self.asm.dynamic_label(label.0) - ; ret - ); - } - } - } - } - - pub fn trap(&mut self, _trap_id: TrapCode) { - // TODO: Emit trap info by writing the trap ID and current source location to a - // `binemit::TrapSink`. - dynasm!(self.asm - ; ud2 - ); - } - - pub fn ret_label(&mut self) -> Label { - #[derive(Copy, Clone, Hash)] - struct RetLabel; - - self.label(LabelValue::Ret) - } - - fn label(&mut self, label: LabelValue) -> Label { - self.aligned_label(1, label) - } - - fn aligned_label(&mut self, align: u32, label: LabelValue) -> Label { - let asm = &mut self.asm; - self.labels - .insert(|| Label(asm.new_dynamic_label()), align, label) - } - - fn target_to_label(&mut self, target: BrTarget