From e832b1096f7cba2154f6d02a2801be88bdd92577 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 7 Feb 2023 13:36:00 -0800 Subject: [PATCH] wasmtime: Overhaul trampolines This commit splits `VMCallerCheckedFuncRef::func_ptr` into three new function pointers: `VMCallerCheckedFuncRef::{wasm,array,native}_call`. Each one has a dedicated calling convention, so callers just choose the version that works for them. This is as opposed to the previous behavior where we would chain together many trampolines that converted between calling conventions, sometimes up to four on the way into Wasm and four more on the way back out. See [0] for details. [0] https://github.com/bytecodealliance/rfcs/blob/main/accepted/tail-calls.md#a-review-of-our-existing-trampolines-calling-conventions-and-call-paths Thanks to @bjorn3 for the initial idea of having multiple function pointers for different calling conventions. This is generally a nice ~5-10% speed up to our call benchmarks across the board: both Wasm-to-host and host-to-Wasm. The one exception is typed calls from Wasm to the host, which have a minor regression. We hypothesize that this is because the old hand-written assembly trampolines did not maintain a call frame and do a tail call, but the new Cranelift-generated trampolines do maintain a call frame and do a regular call. The regression is only a couple nanoseconds, which seems well-explained by these differences explain, and ultimately is not a big deal. However, this does lead to a ~5% code size regression for compiled modules. Before, we compiled a trampoline per escaping function's signature and we deduplicated these trampolines by signature. Now we compile two trampolines per escaping function: one for if the host calls via the array calling convention and one for it the host calls via the native calling convention. Additionally, we compile a trampoline for every type in the module, in case there is a native calling convention function from the host that we `call_indirect` of that type. Much of this is in the `.eh_frame` section in the compiled module, because each of our trampolines needs an entry there. Note that the `.eh_frame` section is not required for Wasmtime's correctness, and you can disable its generation to shrink compiled module code size; we just emit it to play nice with external unwinders and profilers. We believe there are code size gains available for follow up work to offset this code size regression in the future. Backing up a bit: the reason each Wasm module needs to provide these Wasm-to-native trampolines is because `wasmtime::Func::wrap` and friends allow embedders to create functions even when there is no compiler available, so they cannot bring their own trampoline. Instead the Wasm module has to supply it. This in turn means that we need to look up and patch in these Wasm-to-native trampolines during roughly instantiation time. But instantiation is super hot, and we don't want to add more passes over imports or any extra work on this path. So we integrate with `wasmtime::InstancePre` to patch these trampolines in ahead of time. Co-Authored-By: Jamey Sharp Co-Authored-By: Alex Crichton prtest:full --- Cargo.lock | 5 +- RELEASES.md | 12 + benches/call.rs | 2 +- benches/instantiation.rs | 8 + .../codegen/meta/src/shared/instructions.rs | 0 crates/c-api/include/wasmtime/func.h | 3 + crates/c-api/src/func.rs | 3 +- crates/cranelift-shared/src/obj.rs | 2 +- crates/cranelift/src/compiler.rs | 710 ++++++++++++++---- crates/cranelift/src/compiler/component.rs | 272 ++++--- crates/cranelift/src/func_environ.rs | 11 +- crates/cranelift/src/lib.rs | 141 ++-- crates/environ/src/compilation.rs | 82 +- crates/environ/src/component/compiler.rs | 34 +- crates/environ/src/component/info.rs | 25 + crates/environ/src/fact/transcode.rs | 22 + crates/environ/src/trap_encoding.rs | 1 + crates/environ/src/vmoffsets.rs | 105 ++- crates/jit/src/code_memory.rs | 16 +- crates/jit/src/instantiate.rs | 178 +++-- crates/jit/src/lib.rs | 3 +- crates/jit/src/profiling/jitdump_linux.rs | 26 +- crates/jit/src/profiling/perfmap_linux.rs | 25 +- crates/jit/src/profiling/vtune.rs | 35 +- crates/runtime/src/component.rs | 97 ++- crates/runtime/src/instance.rs | 40 +- .../runtime/src/instance/allocator/pooling.rs | 25 +- crates/runtime/src/lib.rs | 35 +- crates/runtime/src/trampolines.rs | 43 +- crates/runtime/src/trampolines/aarch64.rs | 110 +-- crates/runtime/src/trampolines/riscv64.rs | 107 +-- crates/runtime/src/trampolines/s390x.S | 51 -- crates/runtime/src/trampolines/s390x.rs | 37 +- crates/runtime/src/trampolines/x86_64.rs | 118 +-- crates/runtime/src/traphandlers/backtrace.rs | 141 ++-- .../src/traphandlers/backtrace/aarch64.rs | 6 - .../src/traphandlers/backtrace/riscv64.rs | 6 - .../src/traphandlers/backtrace/s390x.rs | 7 - .../src/traphandlers/backtrace/x86_64.rs | 17 +- crates/runtime/src/vmcontext.rs | 199 +++-- .../src/vmcontext/vm_host_func_context.rs | 136 +++- crates/wasmtime/Cargo.toml | 2 +- crates/wasmtime/src/component/component.rs | 371 +++++---- crates/wasmtime/src/component/func.rs | 17 +- crates/wasmtime/src/component/instance.rs | 61 +- crates/wasmtime/src/externals.rs | 6 +- crates/wasmtime/src/func.rs | 350 ++++++--- crates/wasmtime/src/func/typed.rs | 30 +- crates/wasmtime/src/instance.rs | 133 +++- crates/wasmtime/src/linker.rs | 9 - crates/wasmtime/src/module.rs | 402 +++++++--- crates/wasmtime/src/module/registry.rs | 30 +- crates/wasmtime/src/signatures.rs | 59 +- crates/wasmtime/src/store.rs | 123 +-- crates/wasmtime/src/store/data.rs | 4 - crates/wasmtime/src/store/func_refs.rs | 111 +++ crates/wasmtime/src/trampoline.rs | 19 +- crates/wasmtime/src/trampoline/func.rs | 61 +- crates/wasmtime/src/trampoline/global.rs | 131 ++-- crates/wasmtime/src/values.rs | 2 +- crates/winch/src/compiler.rs | 45 +- supply-chain/imports.lock | 7 + tests/all/async_functions.rs | 1 + tests/all/call_hook.rs | 2 +- tests/all/func.rs | 364 +++++++++ tests/all/traps.rs | 103 +++ tests/all/winch.rs | 2 + winch/codegen/src/trampoline.rs | 12 +- 68 files changed, 3517 insertions(+), 1836 deletions(-) mode change 100755 => 100644 cranelift/codegen/meta/src/shared/instructions.rs create mode 100644 crates/wasmtime/src/store/func_refs.rs diff --git a/Cargo.lock b/Cargo.lock index 12fa99a74487..282745d89c2c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -224,9 +224,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" [[package]] name = "byteorder" @@ -3674,6 +3674,7 @@ dependencies = [ "anyhow", "async-trait", "bincode", + "bumpalo", "cfg-if", "encoding_rs", "indexmap", diff --git a/RELEASES.md b/RELEASES.md index 14449c90bafa..7ba57914418d 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -8,6 +8,18 @@ Unreleased. ### Changed +* Overhauled the way that Wasmtime calls into Wasm and Wasm calls back out to + the host. Instead of chaining together trampolines to convert between calling + conventions, we now represent `funcref`s with multiple function pointers, one + per calling convention. This paves the way for supporting Wasm tail calls and + also results in ~10% speed ups to a variety of function call benchmarks, + however there are some slight compiled Wasm module code size regressions + (which can be alleviated by disabling optional `.eh_frame` + generation). Additionally, in the C API the `wasmtime_func_call_unchecked` + function gained one more parameter, which is the capacity of the + args-and-results + buffer. [#6262](https://github.com/bytecodealliance/wasmtime/pull/6262) + -------------------------------------------------------------------------------- ## 8.0.0 diff --git a/benches/call.rs b/benches/call.rs index 9ec51d35d24e..8c1b221422bd 100644 --- a/benches/call.rs +++ b/benches/call.rs @@ -166,7 +166,7 @@ fn bench_host_to_wasm( space[i] = param.to_raw(&mut *store); } untyped - .call_unchecked(&mut *store, space.as_mut_ptr()) + .call_unchecked(&mut *store, space.as_mut_ptr(), space.len()) .unwrap(); for (i, expected) in results.iter().enumerate() { assert_vals_eq( diff --git a/benches/instantiation.rs b/benches/instantiation.rs index 33a1e2f2ba47..48048cb89eaa 100644 --- a/benches/instantiation.rs +++ b/benches/instantiation.rs @@ -44,6 +44,10 @@ fn bench_sequential(c: &mut Criterion, path: &Path) { panic!("failed to load benchmark `{}`: {:?}", path.display(), e) }); let mut linker = Linker::new(&engine); + // Add these imports so we can benchmark instantiation of Sightglass + // benchmark programs. + linker.func_wrap("bench", "start", || {}).unwrap(); + linker.func_wrap("bench", "end", || {}).unwrap(); wasmtime_wasi::add_to_linker(&mut linker, |cx| cx).unwrap(); let pre = linker .instantiate_pre(&module) @@ -74,6 +78,10 @@ fn bench_parallel(c: &mut Criterion, path: &Path) { let module = Module::from_file(&engine, path).expect("failed to load WASI example module"); let mut linker = Linker::new(&engine); + // Add these imports so we can benchmark instantiation of Sightglass + // benchmark programs. + linker.func_wrap("bench", "start", || {}).unwrap(); + linker.func_wrap("bench", "end", || {}).unwrap(); wasmtime_wasi::add_to_linker(&mut linker, |cx| cx).unwrap(); let pre = Arc::new( linker diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs old mode 100755 new mode 100644 diff --git a/crates/c-api/include/wasmtime/func.h b/crates/c-api/include/wasmtime/func.h index e65d002c6631..0c86506f55c7 100644 --- a/crates/c-api/include/wasmtime/func.h +++ b/crates/c-api/include/wasmtime/func.h @@ -233,6 +233,8 @@ WASM_API_EXTERN wasmtime_error_t *wasmtime_func_call( * * * The `args_and_results` pointer has enough space to hold all the parameters * and all the results (but not at the same time). + * * The `args_and_results_len` contains the length of the `args_and_results` + * buffer. * * Parameters must all be configured as if they were the correct type. * * Values such as `externref` and `funcref` are valid within the store being * called. @@ -245,6 +247,7 @@ WASM_API_EXTERN wasmtime_error_t *wasmtime_func_call_unchecked( wasmtime_context_t *store, const wasmtime_func_t *func, wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len, wasm_trap_t **trap ); diff --git a/crates/c-api/src/func.rs b/crates/c-api/src/func.rs index fe2c27ed0acf..eb01f8bbea51 100644 --- a/crates/c-api/src/func.rs +++ b/crates/c-api/src/func.rs @@ -365,9 +365,10 @@ pub unsafe extern "C" fn wasmtime_func_call_unchecked( store: CStoreContextMut<'_>, func: &Func, args_and_results: *mut ValRaw, + args_and_results_len: usize, trap_ret: &mut *mut wasm_trap_t, ) -> Option> { - match func.call_unchecked(store, args_and_results) { + match func.call_unchecked(store, args_and_results, args_and_results_len) { Ok(()) => None, Err(trap) => store_err(trap, trap_ret), } diff --git a/crates/cranelift-shared/src/obj.rs b/crates/cranelift-shared/src/obj.rs index 25b32c8520a8..bcd111343541 100644 --- a/crates/cranelift-shared/src/obj.rs +++ b/crates/cranelift-shared/src/obj.rs @@ -161,7 +161,7 @@ impl<'a> ModuleTextBuilder<'a> { // loop could also be updated to forward the relocation to // the final object file as well. panic!( - "unresolved relocation could not be procesed against \ + "unresolved relocation could not be processed against \ {index:?}: {r:?}" ); } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 7594a341430a..53e26241ca16 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -1,8 +1,8 @@ use crate::debug::{DwarfSectionRelocTarget, ModuleMemoryOffset}; use crate::func_environ::FuncEnvironment; +use crate::{array_call_signature, native_call_signature, DEBUG_ASSERT_TRAP_CODE}; use crate::{ - blank_sig, builder::LinkOptions, func_signature, indirect_signature, value_type, - wasmtime_call_conv, CompiledFunction, FunctionAddressMap, + builder::LinkOptions, value_type, wasm_call_signature, CompiledFunction, FunctionAddressMap, }; use anyhow::{Context as _, Result}; use cranelift_codegen::ir::{ @@ -17,6 +17,7 @@ use cranelift_entity::{EntityRef, PrimaryMap}; use cranelift_frontend::FunctionBuilder; use cranelift_wasm::{ DefinedFuncIndex, FuncIndex, FuncTranslator, MemoryIndex, OwnedMemoryIndex, WasmFuncType, + WasmType, }; use object::write::{Object, StandardSegment, SymbolId}; use object::{RelocationEncoding, RelocationKind, SectionKind}; @@ -191,6 +192,8 @@ impl wasmtime_environ::Compiler for Compiler { let isa = &*self.isa; let module = &translation.module; let func_index = module.func_index(func_index); + let sig = translation.module.functions[func_index].signature; + let wasm_func_ty = &types[sig]; let CompilerContext { mut func_translator, @@ -199,7 +202,7 @@ impl wasmtime_environ::Compiler for Compiler { validator_allocations, } = self.take_context(); - context.func.signature = func_signature(isa, translation, types, func_index); + context.func.signature = wasm_call_signature(isa, wasm_func_ty); context.func.name = UserFuncName::User(UserExternalName { namespace: 0, index: func_index.as_u32(), @@ -290,7 +293,7 @@ impl wasmtime_environ::Compiler for Compiler { .buffer .traps() .into_iter() - .map(mach_trap_to_trap) + .filter_map(mach_trap_to_trap) .collect(); let stack_maps = mach_stack_maps_to_stack_maps(compiled_code.buffer.stack_maps()); @@ -345,12 +348,235 @@ impl wasmtime_environ::Compiler for Compiler { )) } - fn compile_host_to_wasm_trampoline( + fn compile_array_to_wasm_trampoline( &self, - ty: &WasmFuncType, + translation: &ModuleTranslation<'_>, + types: &ModuleTypes, + def_func_index: DefinedFuncIndex, + ) -> Result, CompileError> { + let func_index = translation.module.func_index(def_func_index); + let sig = translation.module.functions[func_index].signature; + let wasm_func_ty = &types[sig]; + + let isa = &*self.isa; + let pointer_type = isa.pointer_type(); + let wasm_call_sig = wasm_call_signature(isa, wasm_func_ty); + let array_call_sig = array_call_signature(isa); + + let CompilerContext { + mut func_translator, + codegen_context: mut context, + incremental_cache_ctx: mut cache_ctx, + validator_allocations, + } = self.take_context(); + + context.func = ir::Function::with_name_signature(Default::default(), array_call_sig); + + let mut builder = FunctionBuilder::new(&mut context.func, func_translator.context()); + let block0 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + builder.seal_block(block0); + + let (vmctx, caller_vmctx, values_vec_ptr, values_vec_len) = { + let params = builder.func.dfg.block_params(block0); + (params[0], params[1], params[2], params[3]) + }; + + // First load the actual arguments out of the array. + let mut args = self.load_values_from_array( + wasm_func_ty.params(), + &mut builder, + values_vec_ptr, + values_vec_len, + ); + args.insert(0, caller_vmctx); + args.insert(0, vmctx); + + // Just before we enter Wasm, save our stack pointer. + // + // Assert that we were really given a core Wasm vmctx, since that's + // what we are assuming with our offsets below. + debug_assert_vmctx_kind(isa, &mut builder, vmctx, wasmtime_environ::VMCONTEXT_MAGIC); + let offsets = VMOffsets::new(isa.pointer_bytes(), &translation.module); + let vm_runtime_limits_offset = offsets.vmctx_runtime_limits(); + save_last_wasm_entry_sp( + &mut builder, + pointer_type, + &offsets.ptr, + vm_runtime_limits_offset, + vmctx, + ); + + // Then call the Wasm function with those arguments. + let call = declare_and_call(&mut builder, wasm_call_sig, func_index.as_u32(), &args); + let results = builder.func.dfg.inst_results(call).to_vec(); + + // Then store the results back into the array. + self.store_values_to_array( + &mut builder, + wasm_func_ty.returns(), + &results, + values_vec_ptr, + values_vec_len, + ); + + builder.ins().return_(&[]); + builder.finalize(); + + let func = self.finish_trampoline(&mut context, cache_ctx.as_mut(), isa)?; + self.save_context(CompilerContext { + func_translator, + codegen_context: context, + incremental_cache_ctx: cache_ctx, + validator_allocations, + }); + Ok(Box::new(func) as _) + } + + fn compile_native_to_wasm_trampoline( + &self, + translation: &ModuleTranslation<'_>, + types: &ModuleTypes, + def_func_index: DefinedFuncIndex, ) -> Result, CompileError> { - self.host_to_wasm_trampoline(ty) - .map(|x| Box::new(x) as Box<_>) + let func_index = translation.module.func_index(def_func_index); + let sig = translation.module.functions[func_index].signature; + let wasm_func_ty = &types[sig]; + + let isa = &*self.isa; + let pointer_type = isa.pointer_type(); + let func_index = translation.module.func_index(def_func_index); + let wasm_call_sig = wasm_call_signature(isa, wasm_func_ty); + let native_call_sig = native_call_signature(isa, wasm_func_ty); + + let CompilerContext { + mut func_translator, + codegen_context: mut context, + incremental_cache_ctx: mut cache_ctx, + validator_allocations, + } = self.take_context(); + + context.func = ir::Function::with_name_signature(Default::default(), native_call_sig); + + let mut builder = FunctionBuilder::new(&mut context.func, func_translator.context()); + let block0 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + builder.seal_block(block0); + + let args = builder.func.dfg.block_params(block0).to_vec(); + let vmctx = args[0]; + + // Since we are entering Wasm, save our SP. + // + // Assert that we were really given a core Wasm vmctx, since that's + // what we are assuming with our offsets below. + debug_assert_vmctx_kind(isa, &mut builder, vmctx, wasmtime_environ::VMCONTEXT_MAGIC); + let offsets = VMOffsets::new(isa.pointer_bytes(), &translation.module); + let vm_runtime_limits_offset = offsets.vmctx_runtime_limits(); + save_last_wasm_entry_sp( + &mut builder, + pointer_type, + &offsets.ptr, + vm_runtime_limits_offset, + vmctx, + ); + + // Then call into Wasm. + let call = declare_and_call(&mut builder, wasm_call_sig, func_index.as_u32(), &args); + + // Forward the results along. + let results = builder.func.dfg.inst_results(call).to_vec(); + builder.ins().return_(&results); + builder.finalize(); + + let func = self.finish_trampoline(&mut context, cache_ctx.as_mut(), isa)?; + self.save_context(CompilerContext { + func_translator, + codegen_context: context, + incremental_cache_ctx: cache_ctx, + validator_allocations, + }); + Ok(Box::new(func) as _) + } + + fn compile_wasm_to_native_trampoline( + &self, + translation: &ModuleTranslation, + wasm_func_ty: &WasmFuncType, + ) -> Result, CompileError> { + let isa = &*self.isa; + let pointer_type = isa.pointer_type(); + let wasm_call_sig = wasm_call_signature(isa, wasm_func_ty); + let native_call_sig = native_call_signature(isa, wasm_func_ty); + + let CompilerContext { + mut func_translator, + codegen_context: mut context, + incremental_cache_ctx: mut cache_ctx, + validator_allocations, + } = self.take_context(); + + context.func = ir::Function::with_name_signature(Default::default(), wasm_call_sig); + + let mut builder = FunctionBuilder::new(&mut context.func, func_translator.context()); + let block0 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + builder.seal_block(block0); + + let args = builder.func.dfg.block_params(block0).to_vec(); + let callee_vmctx = args[0]; + let caller_vmctx = args[1]; + + // We are exiting Wasm, so save our PC and FP. + // + // Assert that the caller vmctx really is a core Wasm vmctx, since + // that's what we are assuming with our offsets below. + debug_assert_vmctx_kind( + isa, + &mut builder, + caller_vmctx, + wasmtime_environ::VMCONTEXT_MAGIC, + ); + let offsets = VMOffsets::new(isa.pointer_bytes(), &translation.module); + let limits = builder.ins().load( + pointer_type, + MemFlags::trusted(), + caller_vmctx, + i32::try_from(offsets.vmctx_runtime_limits()).unwrap(), + ); + save_last_wasm_exit_fp_and_pc(&mut builder, pointer_type, &offsets.ptr, limits); + + // Load the actual callee out of the + // `VMNativeCallHostFuncContext::host_func`. + let ptr_size = isa.pointer_bytes(); + let callee = builder.ins().load( + pointer_type, + MemFlags::trusted(), + callee_vmctx, + ptr_size.vmnative_call_host_func_context_funcref() + + ptr_size.vmcaller_checked_func_ref_native_call(), + ); + + // Do an indirect call to the callee. + let callee_signature = builder.func.import_signature(native_call_sig); + let call = builder.ins().call_indirect(callee_signature, callee, &args); + + // Forward the results back to the caller. + let results = builder.func.dfg.inst_results(call).to_vec(); + builder.ins().return_(&results); + builder.finalize(); + + let func = self.finish_trampoline(&mut context, cache_ctx.as_mut(), isa)?; + self.save_context(CompilerContext { + func_translator, + codegen_context: context, + incremental_cache_ctx: cache_ctx, + validator_allocations, + }); + Ok(Box::new(func) as _) } fn append_code( @@ -401,41 +627,45 @@ impl wasmtime_environ::Compiler for Compiler { Ok(ret) } - fn emit_trampoline_obj( + fn emit_trampolines_for_array_call_host_func( &self, ty: &WasmFuncType, host_fn: usize, obj: &mut Object<'static>, ) -> Result<(FunctionLoc, FunctionLoc)> { - let host_to_wasm = self.host_to_wasm_trampoline(ty)?; - let wasm_to_host = self.wasm_to_host_trampoline(ty, host_fn)?; + let wasm_to_array = self.wasm_to_array_trampoline(ty, host_fn)?; + let native_to_array = self.native_to_array_trampoline(ty, host_fn)?; + let mut builder = ModuleTextBuilder::new(obj, self, self.isa.text_section_builder(2)); - let (_, a) = builder.append_func( - "host_to_wasm", - &host_to_wasm.body, - host_to_wasm.alignment, - host_to_wasm.unwind_info.as_ref(), - &host_to_wasm.relocations, + + let (_, wasm_to_array) = builder.append_func( + "wasm_to_array", + &wasm_to_array.body, + wasm_to_array.alignment, + wasm_to_array.unwind_info.as_ref(), + &wasm_to_array.relocations, |_| unreachable!(), ); - let (_, b) = builder.append_func( - "wasm_to_host", - &wasm_to_host.body, - wasm_to_host.alignment, - wasm_to_host.unwind_info.as_ref(), - &wasm_to_host.relocations, + let (_, native_to_array) = builder.append_func( + "native_to_array", + &native_to_array.body, + native_to_array.alignment, + native_to_array.unwind_info.as_ref(), + &native_to_array.relocations, |_| unreachable!(), ); - let a = FunctionLoc { - start: u32::try_from(a.start).unwrap(), - length: u32::try_from(a.end - a.start).unwrap(), + + let wasm_to_array = FunctionLoc { + start: u32::try_from(wasm_to_array.start).unwrap(), + length: u32::try_from(wasm_to_array.end - wasm_to_array.start).unwrap(), }; - let b = FunctionLoc { - start: u32::try_from(b.start).unwrap(), - length: u32::try_from(b.end - b.start).unwrap(), + let native_to_array = FunctionLoc { + start: u32::try_from(native_to_array.start).unwrap(), + length: u32::try_from(native_to_array.end - native_to_array.start).unwrap(), }; + builder.finish(); - Ok((a, b)) + Ok((wasm_to_array, native_to_array)) } fn triple(&self) -> &target_lexicon::Triple { @@ -618,20 +848,39 @@ fn compile_uncached<'a>( } impl Compiler { - fn host_to_wasm_trampoline(&self, ty: &WasmFuncType) -> Result { + /// Creates a trampoline for calling a host function callee defined with the + /// "array" calling convention from a native calling convention caller. + /// + /// This style of trampoline is used with `Func::new`-style callees and + /// `TypedFunc::call`-style callers. + /// + /// Both callee and caller are on the host side, so there is no host/Wasm + /// transition and associated entry/exit state to maintain. + /// + /// The `host_fn` is a function pointer in this process with the following + /// signature: + /// + /// ```ignore + /// unsafe extern "C" fn(*mut VMContext, *mut VMContext, *mut ValRaw, usize) + /// ``` + /// + /// where the first two arguments are forwarded from the trampoline + /// generated here itself, and the second two arguments are a pointer/length + /// into stack-space of this trampoline with storage for both the arguments + /// to the function and the results. + /// + /// Note that `host_fn` is an immediate which is an actual function pointer + /// in this process. As such this compiled trampoline is not suitable for + /// serialization. + fn native_to_array_trampoline( + &self, + ty: &WasmFuncType, + host_fn: usize, + ) -> Result { let isa = &*self.isa; - let value_size = mem::size_of::(); let pointer_type = isa.pointer_type(); - - // The wasm signature we're calling in this trampoline has the actual - // ABI of the function signature described by `ty` - let wasm_signature = indirect_signature(isa, ty); - - // The host signature has the `VMTrampoline` signature where the ABI is - // fixed. - let mut host_signature = blank_sig(isa, wasmtime_call_conv(isa)); - host_signature.params.push(ir::AbiParam::new(pointer_type)); - host_signature.params.push(ir::AbiParam::new(pointer_type)); + let native_call_sig = native_call_signature(isa, ty); + let array_call_sig = array_call_signature(isa); let CompilerContext { mut func_translator, @@ -640,65 +889,37 @@ impl Compiler { validator_allocations, } = self.take_context(); - // The name doesn't matter here. - context.func = ir::Function::with_name_signature(UserFuncName::default(), host_signature); + context.func = ir::Function::with_name_signature(Default::default(), native_call_sig); - // This trampoline will load all the parameters from the `values_vec` - // that is passed in and then call the real function (also passed - // indirectly) with the specified ABI. - // - // All the results are then stored into the same `values_vec`. let mut builder = FunctionBuilder::new(&mut context.func, func_translator.context()); let block0 = builder.create_block(); - builder.append_block_params_for_function_params(block0); builder.switch_to_block(block0); builder.seal_block(block0); - let (vmctx_ptr_val, caller_vmctx_ptr_val, callee_value, values_vec_ptr_val) = { - let params = builder.func.dfg.block_params(block0); - (params[0], params[1], params[2], params[3]) - }; + let (values_vec_ptr, values_vec_len) = + self.allocate_stack_array_and_spill_args(ty, &mut builder, block0); + let values_vec_len = builder + .ins() + .iconst(pointer_type, i64::from(values_vec_len)); - // Load the argument values out of `values_vec`. - let mut mflags = ir::MemFlags::trusted(); - mflags.set_endianness(ir::Endianness::Little); - let callee_args = wasm_signature - .params - .iter() - .enumerate() - .map(|(i, r)| { - match i { - 0 => vmctx_ptr_val, - 1 => caller_vmctx_ptr_val, - _ => - // i - 2 because vmctx and caller vmctx aren't passed through `values_vec`. - { - builder.ins().load( - r.value_type, - mflags, - values_vec_ptr_val, - ((i - 2) * value_size) as i32, - ) - } - } - }) - .collect::>(); + let block_params = builder.func.dfg.block_params(block0); + let callee_args = [ + block_params[0], + block_params[1], + values_vec_ptr, + values_vec_len, + ]; - // Call the indirect function pointer we were given - let new_sig = builder.import_signature(wasm_signature); - let call = builder + let new_sig = builder.import_signature(array_call_sig); + let callee_value = builder.ins().iconst(pointer_type, host_fn as i64); + builder .ins() .call_indirect(new_sig, callee_value, &callee_args); - let results = builder.func.dfg.inst_results(call).to_vec(); - // Store the return values into `values_vec`. - for (i, r) in results.iter().enumerate() { - builder - .ins() - .store(mflags, *r, values_vec_ptr_val, (i * value_size) as i32); - } - builder.ins().return_(&[]); + let results = + self.load_values_from_array(ty.returns(), &mut builder, values_vec_ptr, values_vec_len); + builder.ins().return_(&results); builder.finalize(); let func = self.finish_trampoline(&mut context, cache_ctx.as_mut(), isa)?; @@ -711,9 +932,9 @@ impl Compiler { Ok(func) } - /// Creates a trampoline for WebAssembly calling into the host where all the - /// arguments are spilled to the stack and results are loaded from the - /// stack. + /// Creates a trampoline for WebAssembly to call a host function defined + /// with the "array" calling convention: where all the arguments are spilled + /// to an array on the stack and results are loaded from the stack array. /// /// This style of trampoline is currently only used with the /// `Func::new`-style created functions in the Wasmtime embedding API. The @@ -722,7 +943,7 @@ impl Compiler { /// type signature of: /// /// ```ignore - /// extern "C" fn(*mut VMContext, *mut VMContext, *mut ValRaw, usize) + /// unsafe extern "C" fn(*mut VMContext, *mut VMContext, *mut ValRaw, usize) /// ``` /// /// where the first two arguments are forwarded from the trampoline @@ -731,22 +952,18 @@ impl Compiler { /// to the function and the results. /// /// Note that `host_fn` is an immediate which is an actual function pointer - /// in this process. As such this compiled trampoline is not suitable for - /// serialization. - fn wasm_to_host_trampoline( + /// in this process, and `limits` is a pointer to `VMRuntimeLimits`. As such + /// this compiled trampoline is not suitable for serialization, and only + /// valid for a particular store. + fn wasm_to_array_trampoline( &self, ty: &WasmFuncType, host_fn: usize, ) -> Result { let isa = &*self.isa; let pointer_type = isa.pointer_type(); - let wasm_signature = indirect_signature(isa, ty); - let mut host_signature = blank_sig(isa, wasmtime_call_conv(isa)); - // The host signature has an added parameter for the `values_vec` - // input/output buffer in addition to the size of the buffer, in units - // of `ValRaw`. - host_signature.params.push(ir::AbiParam::new(pointer_type)); - host_signature.params.push(ir::AbiParam::new(pointer_type)); + let wasm_call_sig = wasm_call_signature(isa, ty); + let array_call_sig = array_call_signature(isa); let CompilerContext { mut func_translator, @@ -755,32 +972,57 @@ impl Compiler { validator_allocations, } = self.take_context(); - // The name doesn't matter here. - context.func = ir::Function::with_name_signature(Default::default(), wasm_signature); + context.func = ir::Function::with_name_signature(Default::default(), wasm_call_sig); let mut builder = FunctionBuilder::new(&mut context.func, func_translator.context()); let block0 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + builder.seal_block(block0); - let (values_vec_ptr_val, values_vec_len) = - self.wasm_to_host_spill_args(ty, &mut builder, block0); + let caller_vmctx = builder.func.dfg.block_params(block0)[1]; + + // Assert that we were really given a core Wasm vmctx, since that's + // what we are assuming with our offsets below. + debug_assert_vmctx_kind( + isa, + &mut builder, + caller_vmctx, + wasmtime_environ::VMCONTEXT_MAGIC, + ); + let ptr_size = isa.pointer_bytes(); + let limits = builder.ins().load( + pointer_type, + MemFlags::trusted(), + caller_vmctx, + ptr_size.vmcontext_runtime_limits(), + ); + save_last_wasm_exit_fp_and_pc(&mut builder, pointer_type, &ptr_size, limits); + + let (values_vec_ptr, values_vec_len) = + self.allocate_stack_array_and_spill_args(ty, &mut builder, block0); + let values_vec_len = builder + .ins() + .iconst(pointer_type, i64::from(values_vec_len)); let block_params = builder.func.dfg.block_params(block0); let callee_args = [ block_params[0], block_params[1], - values_vec_ptr_val, - builder - .ins() - .iconst(pointer_type, i64::from(values_vec_len)), + values_vec_ptr, + values_vec_len, ]; - let new_sig = builder.import_signature(host_signature); + let new_sig = builder.import_signature(array_call_sig); let callee_value = builder.ins().iconst(pointer_type, host_fn as i64); builder .ins() .call_indirect(new_sig, callee_value, &callee_args); - self.wasm_to_host_load_results(ty, builder, values_vec_ptr_val); + let results = + self.load_values_from_array(ty.returns(), &mut builder, values_vec_ptr, values_vec_len); + builder.ins().return_(&results); + builder.finalize(); let func = self.finish_trampoline(&mut context, cache_ctx.as_mut(), isa)?; self.save_context(CompilerContext { @@ -792,17 +1034,16 @@ impl Compiler { Ok(func) } - /// Used for spilling arguments in wasm-to-host trampolines into the stack - /// of the function of `builder` specified. + /// This function will allocate a stack slot suitable for storing both the + /// arguments and return values of the function, and then the arguments will + /// all be stored in this block. /// - /// The `block0` is the entry block of the function and `ty` is the wasm - /// signature of the trampoline generated. This function will allocate a - /// stack slot suitable for storing both the arguments and return values of - /// the function, and then the arguments will all be stored in this block. + /// `block0` must be the entry block of the function and `ty` must be the + /// Wasm function type of the trampoline. /// /// The stack slot pointer is returned in addition to the size, in units of /// `ValRaw`, of the stack slot. - fn wasm_to_host_spill_args( + fn allocate_stack_array_and_spill_args( &self, ty: &WasmFuncType, builder: &mut FunctionBuilder, @@ -817,14 +1058,40 @@ impl Compiler { let values_vec_byte_size = u32::try_from(value_size * values_vec_len).unwrap(); let values_vec_len = u32::try_from(values_vec_len).unwrap(); - let ss = builder.func.create_sized_stack_slot(ir::StackSlotData::new( + let slot = builder.func.create_sized_stack_slot(ir::StackSlotData::new( ir::StackSlotKind::ExplicitSlot, values_vec_byte_size, )); + let values_vec_ptr = builder.ins().stack_addr(pointer_type, slot, 0); - builder.append_block_params_for_function_params(block0); - builder.switch_to_block(block0); - builder.seal_block(block0); + // NB: `2..` because the vmctx and caller vmctx don't go in the array. + let args = builder.func.dfg.block_params(block0)[2..].to_vec(); + + { + let values_vec_len = builder + .ins() + .iconst(ir::types::I32, i64::try_from(values_vec_len).unwrap()); + self.store_values_to_array(builder, ty.params(), &args, values_vec_ptr, values_vec_len); + } + + (values_vec_ptr, values_vec_len) + } + + /// Store values to an array in the array calling convention. + /// + /// Used either to store arguments to the array when calling a function + /// using the array calling convention, or used to store results to the + /// array when implementing a function that exposes the array calling + /// convention. + fn store_values_to_array( + &self, + builder: &mut FunctionBuilder, + types: &[WasmType], + values: &[Value], + values_vec_ptr: Value, + values_vec_capacity: Value, + ) { + debug_assert_enough_capacity_for_length(builder, types.len(), values_vec_capacity); // Note that loads and stores are unconditionally done in the // little-endian format rather than the host's native-endianness, @@ -834,49 +1101,49 @@ impl Compiler { let mut mflags = MemFlags::trusted(); mflags.set_endianness(ir::Endianness::Little); - let values_vec_ptr_val = builder.ins().stack_addr(pointer_type, ss, 0); - for i in 0..ty.params().len() { - let val = builder.func.dfg.block_params(block0)[i + 2]; + let value_size = mem::size_of::(); + for (i, val) in values.iter().copied().enumerate() { builder .ins() - .store(mflags, val, values_vec_ptr_val, (i * value_size) as i32); + .store(mflags, val, values_vec_ptr, (i * value_size) as i32); } - (values_vec_ptr_val, values_vec_len) } - /// Use for loading the results of a host call from a trampoline's stack - /// space. + /// Used for loading the values of an array-call host function's value + /// array. /// - /// This is intended to be used with the stack space allocated by - /// `wasm_to_host_spill_args` above. This is called after the function call - /// is made which will load results from the stack space and then return - /// them with the appropriate ABI (e.g. System-V). - fn wasm_to_host_load_results( + /// This can be used to load arguments out of the array if the trampoline we + /// are building exposes the array calling convention, or it can be used to + /// laod results out of the array if the trampoline we are building calls a + /// function that uses the array calling convention. + fn load_values_from_array( &self, - ty: &WasmFuncType, - mut builder: FunctionBuilder, - values_vec_ptr_val: Value, - ) { + types: &[WasmType], + builder: &mut FunctionBuilder, + values_vec_ptr: Value, + values_vec_capacity: Value, + ) -> Vec { let isa = &*self.isa; let value_size = mem::size_of::(); - // Note that this is little-endian like `wasm_to_host_spill_args` above, + debug_assert_enough_capacity_for_length(builder, types.len(), values_vec_capacity); + + // Note that this is little-endian like `store_values_to_array` above, // see notes there for more information. let mut mflags = MemFlags::trusted(); mflags.set_endianness(ir::Endianness::Little); let mut results = Vec::new(); - for (i, r) in ty.returns().iter().enumerate() { + for (i, r) in types.iter().enumerate() { let load = builder.ins().load( value_type(isa, *r), mflags, - values_vec_ptr_val, + values_vec_ptr, (i * value_size) as i32, ); results.push(load); } - builder.ins().return_(&results); - builder.finalize(); + results } fn finish_trampoline( @@ -885,20 +1152,21 @@ impl Compiler { cache_ctx: Option<&mut IncrementalCacheContext>, isa: &dyn TargetIsa, ) -> Result { - let (compiled_code, code_buf) = compile_maybe_cached(context, isa, cache_ctx)?; + let (_, code_buf) = compile_maybe_cached(context, isa, cache_ctx)?; + let compiled_code = context.compiled_code().unwrap(); - // Processing relocations isn't the hardest thing in the world here but - // no trampoline should currently generate a relocation, so assert that - // they're all empty and if this ever trips in the future then handling - // will need to be added here to ensure they make their way into the - // `CompiledFunction` below. - assert!(compiled_code.buffer.relocs().is_empty()); + let relocations = compiled_code + .buffer + .relocs() + .into_iter() + .map(|item| mach_reloc_to_reloc(&context.func, item)) + .collect(); let traps = compiled_code .buffer .traps() .into_iter() - .map(mach_trap_to_trap) + .filter_map(mach_trap_to_trap) .collect(); let alignment = compiled_code.alignment; @@ -913,7 +1181,7 @@ impl Compiler { Ok(CompiledFunction { body: code_buf, unwind_info, - relocations: Default::default(), + relocations, sized_stack_slots: Default::default(), value_labels_ranges: Default::default(), address_map: Default::default(), @@ -1012,9 +1280,9 @@ fn mach_reloc_to_reloc(func: &Function, reloc: &MachReloc) -> Relocation { const ALWAYS_TRAP_CODE: u16 = 100; -fn mach_trap_to_trap(trap: &MachTrap) -> TrapInformation { +fn mach_trap_to_trap(trap: &MachTrap) -> Option { let &MachTrap { offset, code } = trap; - TrapInformation { + Some(TrapInformation { code_offset: offset, trap_code: match code { ir::TrapCode::StackOverflow => Trap::StackOverflow, @@ -1030,10 +1298,15 @@ fn mach_trap_to_trap(trap: &MachTrap) -> TrapInformation { ir::TrapCode::Interrupt => Trap::Interrupt, ir::TrapCode::User(ALWAYS_TRAP_CODE) => Trap::AlwaysTrapAdapter, + // These do not get converted to wasmtime traps, since they + // shouldn't ever be hit in theory. Instead of catching and handling + // these, we let the signal crash the process. + ir::TrapCode::User(DEBUG_ASSERT_TRAP_CODE) => return None, + // these should never be emitted by wasmtime-cranelift ir::TrapCode::User(_) => unreachable!(), }, - } + }) } fn mach_stack_maps_to_stack_maps(mach_stack_maps: &[MachStackMap]) -> Vec { @@ -1059,3 +1332,126 @@ fn mach_stack_maps_to_stack_maps(mach_stack_maps: &[MachStackMap]) -> Vec ir::Inst { + let name = ir::ExternalName::User(builder.func.declare_imported_user_function( + ir::UserExternalName { + namespace: 0, + index: func_index, + }, + )); + let signature = builder.func.import_signature(signature); + let callee = builder.func.dfg.ext_funcs.push(ir::ExtFuncData { + name, + signature, + colocated: true, + }); + builder.ins().call(callee, &args) +} + +fn debug_assert_enough_capacity_for_length( + builder: &mut FunctionBuilder, + length: usize, + capacity: ir::Value, +) { + if cfg!(debug_assertions) { + let enough_capacity = builder.ins().icmp_imm( + ir::condcodes::IntCC::UnsignedGreaterThanOrEqual, + capacity, + ir::immediates::Imm64::new(length.try_into().unwrap()), + ); + builder + .ins() + .trapz(enough_capacity, ir::TrapCode::User(DEBUG_ASSERT_TRAP_CODE)); + } +} + +fn debug_assert_vmctx_kind( + isa: &dyn TargetIsa, + builder: &mut FunctionBuilder, + vmctx: ir::Value, + expected_vmctx_magic: u32, +) { + if cfg!(debug_assertions) { + let magic = builder.ins().load( + ir::types::I32, + MemFlags::trusted().with_endianness(isa.endianness()), + vmctx, + 0, + ); + let is_expected_vmctx = builder.ins().icmp_imm( + ir::condcodes::IntCC::Equal, + magic, + i64::from(expected_vmctx_magic), + ); + builder.ins().trapz( + is_expected_vmctx, + ir::TrapCode::User(DEBUG_ASSERT_TRAP_CODE), + ); + } +} + +fn save_last_wasm_entry_sp( + builder: &mut FunctionBuilder, + pointer_type: ir::Type, + ptr_size: &impl PtrSize, + vm_runtime_limits_offset: u32, + vmctx: Value, +) { + // First we need to get the `VMRuntimeLimits`. + let limits = builder.ins().load( + pointer_type, + MemFlags::trusted(), + vmctx, + i32::try_from(vm_runtime_limits_offset).unwrap(), + ); + + // Then store our current stack pointer into the appropriate slot. + let sp = builder.ins().get_stack_pointer(pointer_type); + builder.ins().store( + MemFlags::trusted(), + sp, + limits, + ptr_size.vmruntime_limits_last_wasm_entry_sp(), + ); +} + +fn save_last_wasm_exit_fp_and_pc( + builder: &mut FunctionBuilder, + pointer_type: ir::Type, + ptr: &impl PtrSize, + limits: Value, +) { + // Save the exit Wasm FP to the limits. We dereference the current FP to get + // the previous FP because the current FP is the trampoline's FP, and we + // want the Wasm function's FP, which is the caller of this trampoline. + let trampoline_fp = builder.ins().get_frame_pointer(pointer_type); + let wasm_fp = builder.ins().load( + pointer_type, + MemFlags::trusted(), + trampoline_fp, + // The FP always points to the next older FP for all supported + // targets. See assertion in + // `crates/runtime/src/traphandlers/backtrace.rs`. + 0, + ); + builder.ins().store( + MemFlags::trusted(), + wasm_fp, + limits, + ptr.vmruntime_limits_last_wasm_exit_fp(), + ); + // Finally save the Wasm return address to the limits. + let wasm_pc = builder.ins().get_return_address(pointer_type); + builder.ins().store( + MemFlags::trusted(), + wasm_pc, + limits, + ptr.vmruntime_limits_last_wasm_exit_pc(), + ); +} diff --git a/crates/cranelift/src/compiler/component.rs b/crates/cranelift/src/compiler/component.rs index 207b11a79031..30048d6f1f3c 100644 --- a/crates/cranelift/src/compiler/component.rs +++ b/crates/cranelift/src/compiler/component.rs @@ -7,19 +7,26 @@ use cranelift_codegen::ir::{self, InstBuilder, MemFlags}; use cranelift_frontend::FunctionBuilder; use std::any::Any; use wasmtime_environ::component::{ - CanonicalOptions, Component, ComponentCompiler, ComponentTypes, FixedEncoding, LowerImport, - RuntimeMemoryIndex, Transcode, Transcoder, VMComponentOffsets, + AllCallFunc, CanonicalOptions, Component, ComponentCompiler, ComponentTypes, FixedEncoding, + LowerImport, RuntimeMemoryIndex, Transcode, Transcoder, VMComponentOffsets, }; use wasmtime_environ::{PtrSize, WasmFuncType}; -impl ComponentCompiler for Compiler { - fn compile_lowered_trampoline( +enum Abi { + Wasm, + Native, + Array, +} + +impl Compiler { + fn compile_lowered_trampoline_for_abi( &self, component: &Component, lowering: &LowerImport, types: &ComponentTypes, + abi: Abi, ) -> Result> { - let ty = &types[lowering.canonical_abi]; + let wasm_func_ty = &types[lowering.canonical_abi]; let isa = &*self.isa; let pointer_type = isa.pointer_type(); let offsets = VMComponentOffsets::new(isa.pointer_bytes(), component); @@ -33,20 +40,55 @@ impl ComponentCompiler for Compiler { context.func = ir::Function::with_name_signature( ir::UserFuncName::user(0, 0), - crate::indirect_signature(isa, ty), + match abi { + Abi::Wasm => crate::wasm_call_signature(isa, wasm_func_ty), + Abi::Native => crate::native_call_signature(isa, wasm_func_ty), + Abi::Array => crate::array_call_signature(isa), + }, ); let mut builder = FunctionBuilder::new(&mut context.func, func_translator.context()); let block0 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + builder.seal_block(block0); // Start off by spilling all the wasm arguments into a stack slot to be // passed to the host function. - let (values_vec_ptr_val, values_vec_len) = - self.wasm_to_host_spill_args(ty, &mut builder, block0); + let (values_vec_ptr, values_vec_len) = match abi { + Abi::Wasm | Abi::Native => { + let (ptr, len) = + self.allocate_stack_array_and_spill_args(wasm_func_ty, &mut builder, block0); + let len = builder.ins().iconst(pointer_type, i64::from(len)); + (ptr, len) + } + Abi::Array => { + let params = builder.func.dfg.block_params(block0); + (params[2], params[3]) + } + }; let vmctx = builder.func.dfg.block_params(block0)[0]; - // Save the exit FP and return address for stack walking purposes. - self.save_last_wasm_fp_and_pc(&mut builder, &offsets, vmctx); + // If we are crossing the Wasm-to-native boundary, we need to save the + // exit FP and return address for stack walking purposes. However, we + // always debug assert that our vmctx is a component context, regardless + // whether we are actually crossing that boundary because it should + // always hold. + super::debug_assert_vmctx_kind( + isa, + &mut builder, + vmctx, + wasmtime_environ::component::VMCOMPONENT_MAGIC, + ); + if let Abi::Wasm = abi { + let limits = builder.ins().load( + pointer_type, + MemFlags::trusted(), + vmctx, + i32::try_from(offsets.limits()).unwrap(), + ); + super::save_last_wasm_exit_fp_and_pc(&mut builder, pointer_type, &offsets.ptr, limits); + } // Below this will incrementally build both the signature of the host // function we're calling as well as the list of arguments since the @@ -122,15 +164,11 @@ impl ComponentCompiler for Compiler { // storage: *mut ValRaw host_sig.params.push(ir::AbiParam::new(pointer_type)); - callee_args.push(values_vec_ptr_val); + callee_args.push(values_vec_ptr); // storage_len: usize host_sig.params.push(ir::AbiParam::new(pointer_type)); - callee_args.push( - builder - .ins() - .iconst(pointer_type, i64::from(values_vec_len)), - ); + callee_args.push(values_vec_len); // Load host function pointer from the vmcontext and then call that // indirect function pointer with the list of arguments. @@ -143,9 +181,23 @@ impl ComponentCompiler for Compiler { let host_sig = builder.import_signature(host_sig); builder.ins().call_indirect(host_sig, host_fn, &callee_args); - // After the host function has returned the results are loaded from - // `values_vec_ptr_val` and then returned. - self.wasm_to_host_load_results(ty, builder, values_vec_ptr_val); + match abi { + Abi::Wasm | Abi::Native => { + // After the host function has returned the results are loaded from + // `values_vec_ptr` and then returned. + let results = self.load_values_from_array( + wasm_func_ty.returns(), + &mut builder, + values_vec_ptr, + values_vec_len, + ); + builder.ins().return_(&results); + } + Abi::Array => { + builder.ins().return_(&[]); + } + } + builder.finalize(); let func: CompiledFunction = self.finish_trampoline(&mut context, incremental_cache_ctx.as_mut(), isa)?; @@ -158,7 +210,11 @@ impl ComponentCompiler for Compiler { Ok(Box::new(func)) } - fn compile_always_trap(&self, ty: &WasmFuncType) -> Result> { + fn compile_always_trap_for_abi( + &self, + ty: &WasmFuncType, + abi: Abi, + ) -> Result> { let isa = &*self.isa; let CompilerContext { mut func_translator, @@ -168,7 +224,11 @@ impl ComponentCompiler for Compiler { } = self.take_context(); context.func = ir::Function::with_name_signature( ir::UserFuncName::user(0, 0), - crate::indirect_signature(isa, ty), + match abi { + Abi::Wasm => crate::wasm_call_signature(isa, ty), + Abi::Native => crate::native_call_signature(isa, ty), + Abi::Array => crate::array_call_signature(isa), + }, ); let mut builder = FunctionBuilder::new(&mut context.func, func_translator.context()); let block0 = builder.create_block(); @@ -191,11 +251,12 @@ impl ComponentCompiler for Compiler { Ok(Box::new(func)) } - fn compile_transcoder( + fn compile_transcoder_for_abi( &self, component: &Component, transcoder: &Transcoder, types: &ComponentTypes, + abi: Abi, ) -> Result> { let ty = &types[transcoder.signature]; let isa = &*self.isa; @@ -210,7 +271,11 @@ impl ComponentCompiler for Compiler { context.func = ir::Function::with_name_signature( ir::UserFuncName::user(0, 0), - crate::indirect_signature(isa, ty), + match abi { + Abi::Wasm => crate::wasm_call_signature(isa, ty), + Abi::Native => crate::native_call_signature(isa, ty), + Abi::Array => crate::array_call_signature(isa), + }, ); let mut builder = FunctionBuilder::new(&mut context.func, func_translator.context()); @@ -219,8 +284,20 @@ impl ComponentCompiler for Compiler { builder.switch_to_block(block0); builder.seal_block(block0); - self.translate_transcode(builder, &offsets, transcoder, block0); + match abi { + Abi::Wasm => { + self.translate_transcode(&mut builder, &offsets, transcoder, block0); + } + // Transcoders can only actually be called by Wasm, so let's assert + // that here. + Abi::Native | Abi::Array => { + builder + .ins() + .trap(ir::TrapCode::User(crate::DEBUG_ASSERT_TRAP_CODE)); + } + } + builder.finalize(); let func: CompiledFunction = self.finish_trampoline(&mut context, incremental_cache_ctx.as_mut(), isa)?; self.save_context(CompilerContext { @@ -233,54 +310,71 @@ impl ComponentCompiler for Compiler { } } -impl Compiler { - fn save_last_wasm_fp_and_pc( +impl ComponentCompiler for Compiler { + fn compile_lowered_trampoline( &self, - builder: &mut FunctionBuilder<'_>, - offsets: &VMComponentOffsets, - vmctx: ir::Value, - ) { - let pointer_type = self.isa.pointer_type(); - // First we need to get the `VMRuntimeLimits`. - let limits = builder.ins().load( - pointer_type, - MemFlags::trusted(), - vmctx, - i32::try_from(offsets.limits()).unwrap(), - ); - // Then save the exit Wasm FP to the limits. We dereference the current - // FP to get the previous FP because the current FP is the trampoline's - // FP, and we want the Wasm function's FP, which is the caller of this - // trampoline. - let trampoline_fp = builder.ins().get_frame_pointer(pointer_type); - let wasm_fp = builder.ins().load( - pointer_type, - MemFlags::trusted(), - trampoline_fp, - // The FP always points to the next older FP for all supported - // targets. See assertion in - // `crates/runtime/src/traphandlers/backtrace.rs`. - 0, - ); - builder.ins().store( - MemFlags::trusted(), - wasm_fp, - limits, - offsets.ptr.vmruntime_limits_last_wasm_exit_fp(), - ); - // Finally save the Wasm return address to the limits. - let wasm_pc = builder.ins().get_return_address(pointer_type); - builder.ins().store( - MemFlags::trusted(), - wasm_pc, - limits, - offsets.ptr.vmruntime_limits_last_wasm_exit_pc(), - ); + component: &Component, + lowering: &LowerImport, + types: &ComponentTypes, + ) -> Result>> { + Ok(AllCallFunc { + wasm_call: self.compile_lowered_trampoline_for_abi( + component, + lowering, + types, + Abi::Wasm, + )?, + array_call: self.compile_lowered_trampoline_for_abi( + component, + lowering, + types, + Abi::Array, + )?, + native_call: self.compile_lowered_trampoline_for_abi( + component, + lowering, + types, + Abi::Native, + )?, + }) } + fn compile_always_trap(&self, ty: &WasmFuncType) -> Result>> { + Ok(AllCallFunc { + wasm_call: self.compile_always_trap_for_abi(ty, Abi::Wasm)?, + array_call: self.compile_always_trap_for_abi(ty, Abi::Array)?, + native_call: self.compile_always_trap_for_abi(ty, Abi::Native)?, + }) + } + + fn compile_transcoder( + &self, + component: &Component, + transcoder: &Transcoder, + types: &ComponentTypes, + ) -> Result>> { + Ok(AllCallFunc { + wasm_call: self.compile_transcoder_for_abi(component, transcoder, types, Abi::Wasm)?, + array_call: self.compile_transcoder_for_abi( + component, + transcoder, + types, + Abi::Array, + )?, + native_call: self.compile_transcoder_for_abi( + component, + transcoder, + types, + Abi::Native, + )?, + }) + } +} + +impl Compiler { fn translate_transcode( &self, - mut builder: FunctionBuilder<'_>, + builder: &mut FunctionBuilder<'_>, offsets: &VMComponentOffsets, transcoder: &Transcoder, block: ir::Block, @@ -290,7 +384,13 @@ impl Compiler { // Save the exit FP and return address for stack walking purposes. This // is used when an invalid encoding is encountered and a trap is raised. - self.save_last_wasm_fp_and_pc(&mut builder, &offsets, vmctx); + let limits = builder.ins().load( + pointer_type, + MemFlags::trusted(), + vmctx, + i32::try_from(offsets.limits()).unwrap(), + ); + super::save_last_wasm_exit_fp_and_pc(builder, pointer_type, &offsets.ptr, limits); // Determine the static signature of the host libcall for this transcode // operation and additionally calculate the static offset within the @@ -329,9 +429,8 @@ impl Compiler { ); // Load the base pointers for the from/to linear memories. - let from_base = - self.load_runtime_memory_base(&mut builder, vmctx, offsets, transcoder.from); - let to_base = self.load_runtime_memory_base(&mut builder, vmctx, offsets, transcoder.to); + let from_base = self.load_runtime_memory_base(builder, vmctx, offsets, transcoder.from); + let to_base = self.load_runtime_memory_base(builder, vmctx, offsets, transcoder.to); // Helper function to cast a core wasm input to a host pointer type // which will go into the host libcall. @@ -379,24 +478,24 @@ impl Compiler { | Transcode::Utf8ToLatin1 | Transcode::Utf16ToLatin1 | Transcode::Utf8ToUtf16 => { - args.push(ptr_param(&mut builder, 0, from64, from_base)); - args.push(len_param(&mut builder, 1, from64)); - args.push(ptr_param(&mut builder, 2, to64, to_base)); + args.push(ptr_param(builder, 0, from64, from_base)); + args.push(len_param(builder, 1, from64)); + args.push(ptr_param(builder, 2, to64, to_base)); } Transcode::Utf16ToUtf8 | Transcode::Latin1ToUtf8 => { - args.push(ptr_param(&mut builder, 0, from64, from_base)); - args.push(len_param(&mut builder, 1, from64)); - args.push(ptr_param(&mut builder, 2, to64, to_base)); - args.push(len_param(&mut builder, 3, to64)); + args.push(ptr_param(builder, 0, from64, from_base)); + args.push(len_param(builder, 1, from64)); + args.push(ptr_param(builder, 2, to64, to_base)); + args.push(len_param(builder, 3, to64)); } Transcode::Utf8ToCompactUtf16 | Transcode::Utf16ToCompactUtf16 => { - args.push(ptr_param(&mut builder, 0, from64, from_base)); - args.push(len_param(&mut builder, 1, from64)); - args.push(ptr_param(&mut builder, 2, to64, to_base)); - args.push(len_param(&mut builder, 3, to64)); - args.push(len_param(&mut builder, 4, to64)); + args.push(ptr_param(builder, 0, from64, from_base)); + args.push(len_param(builder, 1, from64)); + args.push(ptr_param(builder, 2, to64, to_base)); + args.push(len_param(builder, 3, to64)); + args.push(len_param(builder, 4, to64)); } }; let call = builder.ins().call_indirect(sig, transcode_libcall, &args); @@ -426,20 +525,19 @@ impl Compiler { | Transcode::Utf16ToCompactProbablyUtf16 | Transcode::Utf8ToCompactUtf16 | Transcode::Utf16ToCompactUtf16 => { - raw_results.push(cast_from_pointer(&mut builder, results[0], to64)); + raw_results.push(cast_from_pointer(builder, results[0], to64)); } Transcode::Latin1ToUtf8 | Transcode::Utf16ToUtf8 | Transcode::Utf8ToLatin1 | Transcode::Utf16ToLatin1 => { - raw_results.push(cast_from_pointer(&mut builder, results[0], from64)); - raw_results.push(cast_from_pointer(&mut builder, results[1], to64)); + raw_results.push(cast_from_pointer(builder, results[0], from64)); + raw_results.push(cast_from_pointer(builder, results[1], to64)); } }; builder.ins().return_(&raw_results); - builder.finalize(); } fn load_runtime_memory_base( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index bb3fc608f31f..60e40405e7a7 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -107,7 +107,6 @@ wasmtime_environ::foreach_builtin_function!(declare_function_signatures); pub struct FuncEnvironment<'module_environment> { isa: &'module_environment (dyn TargetIsa + 'module_environment), module: &'module_environment Module, - translation: &'module_environment ModuleTranslation<'module_environment>, types: &'module_environment ModuleTypes, /// Heaps implementing WebAssembly linear memories. @@ -171,7 +170,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { Self { isa, module: &translation.module, - translation, types, heaps: PrimaryMap::default(), vmctx: None, @@ -1536,7 +1534,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m index: TypeIndex, ) -> WasmResult { let index = self.module.types[index].unwrap_function(); - let sig = crate::indirect_signature(self.isa, &self.types[index]); + let sig = crate::wasm_call_signature(self.isa, &self.types[index]); Ok(func.import_signature(sig)) } @@ -1545,7 +1543,8 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m func: &mut ir::Function, index: FuncIndex, ) -> WasmResult { - let sig = crate::func_signature(self.isa, self.translation, self.types, index); + let sig = self.module.functions[index].signature; + let sig = crate::wasm_call_signature(self.isa, &self.types[sig]); let signature = func.import_signature(sig); let name = ir::ExternalName::User(func.declare_imported_user_function(ir::UserExternalName { @@ -1600,7 +1599,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m pointer_type, mem_flags, funcref_ptr, - i32::from(self.offsets.ptr.vmcaller_checked_func_ref_func_ptr()), + i32::from(self.offsets.ptr.vmcaller_checked_func_ref_wasm_call()), ); // If necessary, check the signature. @@ -1707,7 +1706,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m // Load the callee address. let body_offset = - i32::try_from(self.offsets.vmctx_vmfunction_import_body(callee_index)).unwrap(); + i32::try_from(self.offsets.vmctx_vmfunction_import_wasm_call(callee_index)).unwrap(); let func_addr = pos.ins().load(pointer_type, mem_flags, base, body_offset); // First append the callee vmctx address. diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index aed73950edf2..63efbc79eb56 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -6,12 +6,10 @@ use cranelift_codegen::ir; use cranelift_codegen::isa::{unwind::UnwindInfo, CallConv, TargetIsa}; use cranelift_entity::PrimaryMap; -use cranelift_wasm::{DefinedFuncIndex, FuncIndex, WasmFuncType, WasmType}; +use cranelift_wasm::{DefinedFuncIndex, WasmFuncType, WasmType}; use target_lexicon::{Architecture, CallingConvention}; use wasmtime_cranelift_shared::Relocation; -use wasmtime_environ::{ - FilePos, InstructionAddressMap, ModuleTranslation, ModuleTypes, TrapInformation, -}; +use wasmtime_environ::{FilePos, InstructionAddressMap, TrapInformation}; pub use builder::builder; @@ -22,6 +20,9 @@ mod func_environ; type CompiledFunctions<'a> = PrimaryMap; +/// Trap code used for debug assertions we emit in our JIT code. +const DEBUG_ASSERT_TRAP_CODE: u16 = u16::MAX; + /// Compiled function: machine code body, jump table offsets, and unwind information. #[derive(Default)] pub struct CompiledFunction { @@ -122,66 +123,94 @@ fn value_type(isa: &dyn TargetIsa, ty: WasmType) -> ir::types::Type { } } -/// Returns a cranelift signature suitable to indirectly call the wasm signature -/// specified by `wasm`. +/// Get the Cranelift signature with the native calling convention for the given +/// Wasm function type. +/// +/// This parameters will start with the callee and caller VM contexts, followed +/// by the translation of each of the Wasm parameter types to native types. The +/// results are the Wasm result types translated to native types. +/// +/// The signature uses the wasmtime variant of the target's default calling +/// convention. The only difference from the default calling convention is how +/// multiple results are handled. +/// +/// When there is only a single result, or zero results, these signatures are +/// suitable for calling from the host via +/// +/// ```ignore +/// unsafe extern "C" fn( +/// callee_vmctx: *mut VMOpaqueContext, +/// caller_vmctx: *mut VMOpaqueContext, +/// // ...wasm parameter types... +/// ) -> // ...wasm result type... +/// ``` +/// +/// When there are more than one results, these signatures are suitable for +/// calling from the host via +/// +/// ```ignore +/// unsafe extern "C" fn( +/// callee_vmctx: *mut VMOpaqueContext, +/// caller_vmctx: *mut VMOpaqueContext, +/// // ...wasm parameter types... +/// retptr: *mut (), +/// ) -> // ...wasm result type 0... +/// ``` /// -/// This will implicitly use the default calling convention for `isa` since to -/// indirectly call a wasm function it must be possibly exported somehow (e.g. -/// this assumes the function target to call doesn't use the "fast" calling -/// convention). -fn indirect_signature(isa: &dyn TargetIsa, wasm: &WasmFuncType) -> ir::Signature { +/// where the first result is returned directly and the rest via the return +/// pointer. +fn native_call_signature(isa: &dyn TargetIsa, wasm: &WasmFuncType) -> ir::Signature { let mut sig = blank_sig(isa, wasmtime_call_conv(isa)); push_types(isa, &mut sig, wasm); - return sig; + sig } -/// Returns the cranelift function signature of the function specified. +/// Get the Cranelift signature for all array-call functions, that is: +/// +/// ```ignore +/// unsafe extern "C" fn( +/// callee_vmctx: *mut VMOpaqueContext, +/// caller_vmctx: *mut VMOpaqueContext, +/// values_ptr: *mut ValRaw, +/// values_len: usize, +/// ) +/// ``` /// -/// Note that this will determine the calling convention for the function, and -/// namely includes an optimization where functions never exported from a module -/// use a custom theoretically faster calling convention instead of the default. -fn func_signature( - isa: &dyn TargetIsa, - translation: &ModuleTranslation, - types: &ModuleTypes, - index: FuncIndex, -) -> ir::Signature { - let func = &translation.module.functions[index]; - let call_conv = match translation.module.defined_func_index(index) { - // If this is a defined function in the module and it doesn't escape - // then we can optimize this function to use the fastest calling - // convention since it's purely an internal implementation detail of - // the module itself. - Some(_idx) if !func.is_escaping() => { - let on_apple_aarch64 = isa - .triple() - .default_calling_convention() - .unwrap_or(CallingConvention::SystemV) - == CallingConvention::AppleAarch64; - - if on_apple_aarch64 { - // FIXME: We need an Apple-specific calling convention, so that - // Cranelift's ABI implementation generates unwinding directives - // about pointer authentication usage, so we can't just use - // `CallConv::Fast`. - CallConv::WasmtimeAppleAarch64 - } else if isa.triple().architecture == Architecture::S390x { - // On S390x we need a Wasmtime calling convention to ensure - // we're using little-endian vector lane order. - wasmtime_call_conv(isa) - } else { - CallConv::Fast - } - } - - // ... otherwise if it's an imported function or if it's a possibly - // exported function then we use the default ABI wasmtime would - // otherwise select. - _ => wasmtime_call_conv(isa), +/// This signature uses the target's default calling convention. +/// +/// Note that regardless of the Wasm function type, the array-call calling +/// convention always uses that same signature. +fn array_call_signature(isa: &dyn TargetIsa) -> ir::Signature { + let mut sig = blank_sig(isa, CallConv::triple_default(isa.triple())); + // The array-call signature has an added parameter for the `values_vec` + // input/output buffer in addition to the size of the buffer, in units + // of `ValRaw`. + sig.params.push(ir::AbiParam::new(isa.pointer_type())); + sig.params.push(ir::AbiParam::new(isa.pointer_type())); + sig +} + +/// Get the internal Wasm calling convention signature for the given type. +fn wasm_call_signature(isa: &dyn TargetIsa, wasm_func_ty: &WasmFuncType) -> ir::Signature { + let call_conv = if isa.triple().default_calling_convention().ok() + == Some(CallingConvention::AppleAarch64) + { + // FIXME: We need an Apple-specific calling convention, so that + // Cranelift's ABI implementation generates unwinding directives + // about pointer authentication usage, so we can't just use + // `CallConv::Fast`. + CallConv::WasmtimeAppleAarch64 + } else if isa.triple().architecture == Architecture::S390x { + // On S390x we need a Wasmtime calling convention to ensure + // we're using little-endian vector lane order. + wasmtime_call_conv(isa) + } else { + CallConv::Fast }; + let mut sig = blank_sig(isa, call_conv); - push_types(isa, &mut sig, &types[func.signature]); - return sig; + push_types(isa, &mut sig, wasm_func_ty); + sig } /// Returns the reference type to use for the provided wasm type. diff --git a/crates/environ/src/compilation.rs b/crates/environ/src/compilation.rs index 0b27e92d33ec..d77b31b51e3b 100644 --- a/crates/environ/src/compilation.rs +++ b/crates/environ/src/compilation.rs @@ -172,11 +172,39 @@ pub trait Compiler: Send + Sync { types: &ModuleTypes, ) -> Result<(WasmFunctionInfo, Box), CompileError>; - /// Creates a function of type `VMTrampoline` which will then call the - /// function pointer argument which has the `ty` type provided. - fn compile_host_to_wasm_trampoline( + /// Compile a trampoline for an array-call host function caller calling the + /// `index`th Wasm function. + /// + /// The trampoline should save the necessary state to record the + /// host-to-Wasm transition (e.g. registers used for fast stack walking). + fn compile_array_to_wasm_trampoline( &self, - ty: &WasmFuncType, + translation: &ModuleTranslation<'_>, + types: &ModuleTypes, + index: DefinedFuncIndex, + ) -> Result, CompileError>; + + /// Compile a trampoline for a native-call host function caller calling the + /// `index`th Wasm function. + /// + /// The trampoline should save the necessary state to record the + /// host-to-Wasm transition (e.g. registers used for fast stack walking). + fn compile_native_to_wasm_trampoline( + &self, + translation: &ModuleTranslation<'_>, + types: &ModuleTypes, + index: DefinedFuncIndex, + ) -> Result, CompileError>; + + /// Compile a trampoline for a Wasm caller calling a native callee with the + /// given signature. + /// + /// The trampoline should save the necessary state to record the + /// Wasm-to-host transition (e.g. registers used for fast stack walking). + fn compile_wasm_to_native_trampoline( + &self, + translation: &ModuleTranslation<'_>, + wasm_func_ty: &WasmFuncType, ) -> Result, CompileError>; /// Appends a list of compiled functions to an in-memory object. @@ -197,10 +225,15 @@ pub trait Compiler: Send + Sync { /// /// The `resolve_reloc` argument is intended to resolving relocations /// between function, chiefly resolving intra-module calls within one core - /// wasm module. The closure here takes two arguments: first the index - /// within `funcs` that is being resolved and next the `FuncIndex` which is - /// the relocation target to resolve. The return value is an index within - /// `funcs` that the relocation points to. + /// wasm module. The closure here takes two arguments: + /// + /// 1. First, the index within `funcs` that is being resolved, + /// + /// 2. and next the `FuncIndex` which is the relocation target to + /// resolve. + /// + /// The return value is an index within `funcs` that the relocation points + /// to. fn append_code( &self, obj: &mut Object<'static>, @@ -209,16 +242,35 @@ pub trait Compiler: Send + Sync { resolve_reloc: &dyn Fn(usize, FuncIndex) -> usize, ) -> Result>; - /// Inserts two functions for host-to-wasm and wasm-to-host trampolines into - /// the `obj` provided. + /// Inserts two trampolines into `obj` for a array-call host function: + /// + /// 1. A wasm-call trampoline: A trampoline that takes arguments in their + /// wasm-call locations, moves them to their array-call locations, calls + /// the array-call host function, and finally moves the return values + /// from the array-call locations to the wasm-call return + /// locations. Additionally, this trampoline manages the wasm-to-host + /// state transition for the runtime. + /// + /// 2. A native-call trampoline: A trampoline that takes arguments in their + /// native-call locations, moves them to their array-call locations, + /// calls the array-call host function, and finally moves the return + /// values from the array-call locations to the native-call return + /// locations. Does not need to manage any wasm/host state transitions, + /// since both caller and callee are on the host side. + /// + /// This will configure the same sections as `append_code`, but will likely + /// be much smaller. + /// + /// The two returned `FunctionLoc` structures describe where to find these + /// trampolines in the text section, respectively. /// - /// This will configure the same sections as `emit_obj`, but will likely be - /// much smaller. The two returned `Trampoline` structures describe where to - /// find the host-to-wasm and wasm-to-host trampolines in the text section, - /// respectively. - fn emit_trampoline_obj( + /// These trampolines are only valid for in-process JIT usage. They bake in + /// the function pointer to the host code. + fn emit_trampolines_for_array_call_host_func( &self, ty: &WasmFuncType, + // Actually `host_fn: VMArrayCallFunction` but that type is not + // available in `wasmtime-environ`. host_fn: usize, obj: &mut Object<'static>, ) -> Result<(FunctionLoc, FunctionLoc)>; diff --git a/crates/environ/src/component/compiler.rs b/crates/environ/src/component/compiler.rs index a939a02c5c00..0beb0fcd63b6 100644 --- a/crates/environ/src/component/compiler.rs +++ b/crates/environ/src/component/compiler.rs @@ -1,8 +1,36 @@ use crate::component::{Component, ComponentTypes, LowerImport, Transcoder}; use crate::WasmFuncType; use anyhow::Result; +use serde::{Deserialize, Serialize}; use std::any::Any; +/// A triple of related functions/trampolines variants with differing calling +/// conventions: `{wasm,array,native}_call`. +/// +/// Generic so we can use this with either the `Box`s that +/// implementations of the compiler trait return or with `FunctionLoc`s inside +/// an object file, for example. +#[derive(Serialize, Deserialize)] +pub struct AllCallFunc { + /// The function exposing the Wasm calling convention. + pub wasm_call: T, + /// The function exposing the array calling convention. + pub array_call: T, + /// The function exposing the native calling convention. + pub native_call: T, +} + +impl AllCallFunc { + /// Map an `AllCallFunc` into an `AllCallFunc`. + pub fn map(self, mut f: impl FnMut(T) -> U) -> AllCallFunc { + AllCallFunc { + wasm_call: f(self.wasm_call), + array_call: f(self.array_call), + native_call: f(self.native_call), + } + } +} + /// Compilation support necessary for components. pub trait ComponentCompiler: Send + Sync { /// Creates a trampoline for a `canon.lower`'d host function. @@ -27,14 +55,14 @@ pub trait ComponentCompiler: Send + Sync { component: &Component, lowering: &LowerImport, types: &ComponentTypes, - ) -> Result>; + ) -> Result>>; /// Creates a function which will always trap that has the `ty` specified. /// /// This will create a small trampoline whose only purpose is to generate a /// trap at runtime. This is used to implement the degenerate case of a /// `canon lift`'d function immediately being `canon lower`'d. - fn compile_always_trap(&self, ty: &WasmFuncType) -> Result>; + fn compile_always_trap(&self, ty: &WasmFuncType) -> Result>>; /// Compiles a trampoline to implement string transcoding from adapter /// modules. @@ -52,5 +80,5 @@ pub trait ComponentCompiler: Send + Sync { component: &Component, transcoder: &Transcoder, types: &ComponentTypes, - ) -> Result>; + ) -> Result>>; } diff --git a/crates/environ/src/component/info.rs b/crates/environ/src/component/info.rs index 9e477153a438..34aad5977796 100644 --- a/crates/environ/src/component/info.rs +++ b/crates/environ/src/component/info.rs @@ -291,6 +291,13 @@ pub struct LowerImport { pub options: CanonicalOptions, } +impl LowerImport { + /// Get the symbol name for this lowered import. + pub fn symbol_name(&self) -> String { + format!("wasm_component_lowering_{}", self.index.as_u32()) + } +} + /// Description of what to initialize when a `GlobalInitializer::AlwaysTrap` is /// encountered. #[derive(Debug, Serialize, Deserialize)] @@ -302,6 +309,13 @@ pub struct AlwaysTrap { pub canonical_abi: SignatureIndex, } +impl AlwaysTrap { + /// Get the symbol name for this always-trap function. + pub fn symbol_name(&self) -> String { + format!("wasm_component_always_trap_{}", self.index.as_u32()) + } +} + /// Definition of a core wasm item and where it can come from within a /// component. /// @@ -486,4 +500,15 @@ pub struct Transcoder { pub signature: SignatureIndex, } +impl Transcoder { + /// Get the symbol name for this transcoder function. + pub fn symbol_name(&self) -> String { + let index = self.index.as_u32(); + let op = self.op.symbol_fragment(); + let from = if self.from64 { "64" } else { "32" }; + let to = if self.to64 { "64" } else { "32" }; + format!("wasm_component_transcoder_{index}_{op}_memory{from}_to_memory{to}") + } +} + pub use crate::fact::{FixedEncoding, Transcode}; diff --git a/crates/environ/src/fact/transcode.rs b/crates/environ/src/fact/transcode.rs index 7d72413050f5..62947cbfab54 100644 --- a/crates/environ/src/fact/transcode.rs +++ b/crates/environ/src/fact/transcode.rs @@ -31,6 +31,28 @@ pub enum Transcode { Utf8ToUtf16, } +impl Transcode { + /// Get this transcoding's symbol fragment. + pub fn symbol_fragment(&self) -> &'static str { + match self { + Transcode::Copy(x) => match x { + FixedEncoding::Utf8 => "copy_utf8", + FixedEncoding::Utf16 => "copy_utf16", + FixedEncoding::Latin1 => "copy_latin1", + }, + Transcode::Latin1ToUtf16 => "latin1_to_utf16", + Transcode::Latin1ToUtf8 => "latin1_to_utf8", + Transcode::Utf16ToCompactProbablyUtf16 => "utf16_to_compact_probably_utf16", + Transcode::Utf16ToCompactUtf16 => "utf16_to_compact_utf16", + Transcode::Utf16ToLatin1 => "utf16_to_latin1", + Transcode::Utf16ToUtf8 => "utf16_to_utf8", + Transcode::Utf8ToCompactUtf16 => "utf8_to_compact_utf16", + Transcode::Utf8ToLatin1 => "utf8_to_latin1", + Transcode::Utf8ToUtf16 => "utf8_to_utf16", + } + } +} + #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] #[allow(missing_docs)] pub enum FixedEncoding { diff --git a/crates/environ/src/trap_encoding.rs b/crates/environ/src/trap_encoding.rs index a999fc9c7484..01b0c6dd9b1b 100644 --- a/crates/environ/src/trap_encoding.rs +++ b/crates/environ/src/trap_encoding.rs @@ -87,6 +87,7 @@ pub enum Trap { /// Used to indicate that a trap was raised by atomic wait operations on non shared memory. AtomicWaitNonSharedMemory, + // // if adding a variant here be sure to update the `check!` macro below } diff --git a/crates/environ/src/vmoffsets.rs b/crates/environ/src/vmoffsets.rs index 666a7201f064..fb04b2a896e3 100644 --- a/crates/environ/src/vmoffsets.rs +++ b/crates/environ/src/vmoffsets.rs @@ -98,30 +98,53 @@ pub trait PtrSize { /// Returns the pointer size, in bytes, for the target. fn size(&self) -> u8; - /// The offset of the `func_ptr` field. + /// The offset of the `VMContext::runtime_limits` field + fn vmcontext_runtime_limits(&self) -> u8 { + u8::try_from(align( + u32::try_from(std::mem::size_of::()).unwrap(), + u32::from(self.size()), + )) + .unwrap() + } + + /// The offset of the `native_call` field. #[allow(clippy::erasing_op)] #[inline] - fn vmcaller_checked_func_ref_func_ptr(&self) -> u8 { + fn vmcaller_checked_func_ref_native_call(&self) -> u8 { 0 * self.size() } + /// The offset of the `array_call` field. + #[allow(clippy::erasing_op)] + #[inline] + fn vmcaller_checked_func_ref_array_call(&self) -> u8 { + 1 * self.size() + } + + /// The offset of the `wasm_call` field. + #[allow(clippy::erasing_op)] + #[inline] + fn vmcaller_checked_func_ref_wasm_call(&self) -> u8 { + 2 * self.size() + } + /// The offset of the `type_index` field. #[allow(clippy::identity_op)] #[inline] fn vmcaller_checked_func_ref_type_index(&self) -> u8 { - 1 * self.size() + 3 * self.size() } /// The offset of the `vmctx` field. #[inline] fn vmcaller_checked_func_ref_vmctx(&self) -> u8 { - 2 * self.size() + 4 * self.size() } /// Return the size of `VMCallerCheckedFuncRef`. #[inline] fn size_of_vmcaller_checked_func_ref(&self) -> u8 { - 3 * self.size() + 5 * self.size() } /// Return the size of `VMGlobalDefinition`; this is the size of the largest value type (i.e. a @@ -193,6 +216,19 @@ pub trait PtrSize { fn size_of_vmmemory_pointer(&self) -> u8 { self.size() } + + // Offsets within `VMArrayCallHostFuncContext`. + + // Offsets within `VMNativeCallHostFuncContext`. + + /// Return the offset of `VMNativeCallHostFuncContext::funcref`. + fn vmnative_call_host_func_context_funcref(&self) -> u8 { + u8::try_from(align( + u32::try_from(std::mem::size_of::()).unwrap(), + u32::from(self.size()), + )) + .unwrap() + } } /// Type representing the size of a pointer for the current compilation host @@ -444,24 +480,38 @@ impl From> for VMOffsets

{ } impl VMOffsets

{ - /// The offset of the `body` field. + /// The offset of the `wasm_call` field. #[allow(clippy::erasing_op)] #[inline] - pub fn vmfunction_import_body(&self) -> u8 { + pub fn vmfunction_import_wasm_call(&self) -> u8 { 0 * self.pointer_size() } + /// The offset of the `native_call` field. + #[allow(clippy::erasing_op)] + #[inline] + pub fn vmfunction_import_native_call(&self) -> u8 { + 1 * self.pointer_size() + } + + /// The offset of the `array_call` field. + #[allow(clippy::erasing_op)] + #[inline] + pub fn vmfunction_import_array_call(&self) -> u8 { + 2 * self.pointer_size() + } + /// The offset of the `vmctx` field. #[allow(clippy::identity_op)] #[inline] pub fn vmfunction_import_vmctx(&self) -> u8 { - 1 * self.pointer_size() + 3 * self.pointer_size() } /// Return the size of `VMFunctionImport`. #[inline] pub fn size_of_vmfunction_import(&self) -> u8 { - 2 * self.pointer_size() + 4 * self.pointer_size() } } @@ -759,10 +809,22 @@ impl VMOffsets

{ + index.as_u32() * u32::from(self.ptr.size_of_vmcaller_checked_func_ref()) } - /// Return the offset to the `body` field in `*const VMFunctionBody` index `index`. + /// Return the offset to the `wasm_call` field in `*const VMFunctionBody` index `index`. #[inline] - pub fn vmctx_vmfunction_import_body(&self, index: FuncIndex) -> u32 { - self.vmctx_vmfunction_import(index) + u32::from(self.vmfunction_import_body()) + pub fn vmctx_vmfunction_import_wasm_call(&self, index: FuncIndex) -> u32 { + self.vmctx_vmfunction_import(index) + u32::from(self.vmfunction_import_wasm_call()) + } + + /// Return the offset to the `native_call` field in `*const VMFunctionBody` index `index`. + #[inline] + pub fn vmctx_vmfunction_import_native_call(&self, index: FuncIndex) -> u32 { + self.vmctx_vmfunction_import(index) + u32::from(self.vmfunction_import_native_call()) + } + + /// Return the offset to the `array_call` field in `*const VMFunctionBody` index `index`. + #[inline] + pub fn vmctx_vmfunction_import_array_call(&self, index: FuncIndex) -> u32 { + self.vmctx_vmfunction_import(index) + u32::from(self.vmfunction_import_array_call()) } /// Return the offset to the `vmctx` field in `*const VMFunctionBody` index `index`. @@ -845,11 +907,22 @@ impl VMOffsets

{ } } -/// Equivalent of `VMCONTEXT_MAGIC` except for host functions. +/// Magic value for core Wasm VM contexts. +/// +/// This is stored at the start of all `VMContext` structures. +pub const VMCONTEXT_MAGIC: u32 = u32::from_le_bytes(*b"core"); + +/// Equivalent of `VMCONTEXT_MAGIC` except for array-call host functions. +/// +/// This is stored at the start of all `VMArrayCallHostFuncContext` structures +/// and double-checked on `VMArrayCallHostFuncContext::from_opaque`. +pub const VM_ARRAY_CALL_HOST_FUNC_MAGIC: u32 = u32::from_le_bytes(*b"ACHF"); + +/// Equivalent of `VMCONTEXT_MAGIC` except for native-call host functions. /// -/// This is stored at the start of all `VMHostFuncContext` structures and -/// double-checked on `VMHostFuncContext::from_opaque`. -pub const VM_HOST_FUNC_MAGIC: u32 = u32::from_le_bytes(*b"host"); +/// This is stored at the start of all `VMNativeCallHostFuncContext` structures +/// and double-checked on `VMNativeCallHostFuncContext::from_opaque`. +pub const VM_NATIVE_CALL_HOST_FUNC_MAGIC: u32 = u32::from_le_bytes(*b"NCHF"); #[cfg(test)] mod tests { diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs index b2272195f99a..cdf45d005ea4 100644 --- a/crates/jit/src/code_memory.rs +++ b/crates/jit/src/code_memory.rs @@ -5,14 +5,12 @@ use crate::unwind::UnwindRegistration; use anyhow::{anyhow, bail, Context, Result}; use object::read::{File, Object, ObjectSection}; use object::ObjectSymbol; -use std::mem; use std::mem::ManuallyDrop; use std::ops::Range; use wasmtime_environ::obj; -use wasmtime_environ::FunctionLoc; use wasmtime_jit_icache_coherence as icache_coherence; use wasmtime_runtime::libcalls; -use wasmtime_runtime::{MmapVec, VMTrampoline}; +use wasmtime_runtime::MmapVec; /// Management of executable memory within a `MmapVec` /// @@ -198,18 +196,6 @@ impl CodeMemory { &self.mmap[self.trap_data.clone()] } - /// Returns a `VMTrampoline` function pointer for the given function in the - /// text section. - /// - /// # Unsafety - /// - /// This function is unsafe as there's no guarantee that the returned - /// function pointer is valid. - pub unsafe fn vmtrampoline(&self, loc: FunctionLoc) -> VMTrampoline { - let ptr = self.text()[loc.start as usize..][..loc.length as usize].as_ptr(); - mem::transmute::<*const u8, VMTrampoline>(ptr) - } - /// Publishes the internal ELF image to be ready for execution. /// /// This method can only be called once and will panic if called twice. This diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index 00bd3263f10a..e55a3d4dc487 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -20,10 +20,36 @@ use wasmtime_environ::{ PrimaryMap, SignatureIndex, StackMapInformation, Tunables, WasmFunctionInfo, }; use wasmtime_runtime::{ - CompiledModuleId, CompiledModuleIdAllocator, GdbJitImageRegistration, MmapVec, VMTrampoline, + CompiledModuleId, CompiledModuleIdAllocator, GdbJitImageRegistration, MmapVec, }; -/// Secondary in-memory results of compilation. +/// Secondary in-memory results of function compilation. +#[derive(Serialize, Deserialize)] +pub struct CompiledFunctionInfo { + wasm_func_info: WasmFunctionInfo, + wasm_func_loc: FunctionLoc, + array_to_wasm_trampoline: Option, + native_to_wasm_trampoline: Option, +} + +impl CompiledFunctionInfo { + /// Create a new `CompiledFunctionInfo`. + pub fn new( + wasm_func_info: WasmFunctionInfo, + wasm_func_loc: FunctionLoc, + array_to_wasm_trampoline: Option, + native_to_wasm_trampoline: Option, + ) -> CompiledFunctionInfo { + CompiledFunctionInfo { + wasm_func_info, + wasm_func_loc, + array_to_wasm_trampoline, + native_to_wasm_trampoline, + } + } +} + +/// Secondary in-memory results of module compilation. /// /// This opaque structure can be optionally passed back to /// `CompiledModule::from_artifacts` to avoid decoding extra information there. @@ -33,14 +59,14 @@ pub struct CompiledModuleInfo { module: Module, /// Metadata about each compiled function. - funcs: PrimaryMap, + funcs: PrimaryMap, /// Sorted list, by function index, of names we have for this module. func_names: Vec, - /// The trampolines compiled into the text section and their start/length - /// relative to the start of the text section. - pub trampolines: Vec<(SignatureIndex, FunctionLoc)>, + /// Metadata about wasm-to-native trampolines. Used when exposing a native + /// callee (e.g. `Func::wrap`) to a Wasm caller. Sorted by signature index. + wasm_to_native_trampolines: Vec<(SignatureIndex, FunctionLoc)>, /// General compilation metadata. meta: Metadata, @@ -145,17 +171,26 @@ impl<'a> ObjectBuilder<'a> { /// * `funcs` - compilation metadata about functions within the translation /// as well as where the functions are located in the text section. /// - /// * `trampolines` - list of all trampolines necessary for this module - /// and where they're located in the text section. + /// * `array_to_wasm_trampolines` - list of all trampolines necessary for + /// array callers (e.g. `Func::new`) calling Wasm callees. One for each + /// defined function that escapes. Must be sorted by `DefinedFuncIndex`. + /// + /// * `native_to_wasm_trampolines` - list of all trampolines necessary for + /// native callers (e.g. `Func::wrap`) calling Wasm callees. One for each + /// defined function that escapes. Must be sorted by `DefinedFuncIndex`. + /// + /// * `wasm_to_native_trampolines` - list of all trampolines necessary for + /// Wasm callers calling native callees (e.g. `Func::wrap`). One for each + /// function signature in the module. Must be sorted by `SignatureIndex`. /// - /// Returns the `CompiledModuleInfo` corresopnding to this core wasm module + /// Returns the `CompiledModuleInfo` corresponding to this core Wasm module /// as a result of this append operation. This is then serialized into the /// final artifact by the caller. pub fn append( &mut self, translation: ModuleTranslation<'_>, - funcs: PrimaryMap, - trampolines: Vec<(SignatureIndex, FunctionLoc)>, + funcs: PrimaryMap, + wasm_to_native_trampolines: Vec<(SignatureIndex, FunctionLoc)>, ) -> Result { let ModuleTranslation { mut module, @@ -269,7 +304,7 @@ impl<'a> ObjectBuilder<'a> { Ok(CompiledModuleInfo { module, funcs, - trampolines, + wasm_to_native_trampolines, func_names, meta: Metadata { native_debug_info_present: self.tunables.generate_native_debuginfo, @@ -387,8 +422,8 @@ impl<'a> ObjectBuilder<'a> { /// A compiled wasm module, ready to be instantiated. pub struct CompiledModule { module: Arc, - funcs: PrimaryMap, - trampolines: Vec<(SignatureIndex, FunctionLoc)>, + funcs: PrimaryMap, + wasm_to_native_trampolines: Vec<(SignatureIndex, FunctionLoc)>, meta: Metadata, code_memory: Arc, dbg_jit_registration: Option, @@ -423,7 +458,7 @@ impl CompiledModule { let mut ret = Self { module: Arc::new(info.module), funcs: info.funcs, - trampolines: info.trampolines, + wasm_to_native_trampolines: info.wasm_to_native_trampolines, dbg_jit_registration: None, code_memory, meta: info.meta, @@ -513,23 +548,76 @@ impl CompiledModule { /// Returns the body of the function that `index` points to. #[inline] pub fn finished_function(&self, index: DefinedFuncIndex) -> &[u8] { - let (_, loc) = &self.funcs[index]; + let loc = self.funcs[index].wasm_func_loc; &self.text()[loc.start as usize..][..loc.length as usize] } - /// Returns the per-signature trampolines for this module. - pub fn trampolines(&self) -> impl Iterator + '_ { - let text = self.text(); - self.trampolines.iter().map(move |(signature, loc)| { - ( - *signature, - unsafe { - let ptr = &text[loc.start as usize]; - std::mem::transmute::<*const u8, VMTrampoline>(ptr) - }, - loc.length as usize, - ) - }) + /// Get the array-to-Wasm trampoline for the function `index` points to. + /// + /// If the function `index` points to does not escape, then `None` is + /// returned. + /// + /// These trampolines are used for array callers (e.g. `Func::new`) + /// calling Wasm callees. + pub fn array_to_wasm_trampoline(&self, index: DefinedFuncIndex) -> Option<&[u8]> { + let loc = self.funcs[index].array_to_wasm_trampoline?; + Some(&self.text()[loc.start as usize..][..loc.length as usize]) + } + + /// Returns an iterator over all array-to-Wasm trampolines defined within + /// this module, providing both their index and their in-memory body. + pub fn array_to_wasm_trampolines( + &self, + ) -> impl ExactSizeIterator + '_ { + self.funcs + .keys() + .map(move |i| (i, self.array_to_wasm_trampoline(i).unwrap())) + } + + /// Get the native-to-Wasm trampoline for the function `index` points to. + /// + /// If the function `index` points to does not escape, then `None` is + /// returned. + /// + /// These trampolines are used for native callers (e.g. `Func::wrap`) + /// calling Wasm callees. + pub fn native_to_wasm_trampoline(&self, index: DefinedFuncIndex) -> Option<&[u8]> { + let loc = self.funcs[index].native_to_wasm_trampoline?; + Some(&self.text()[loc.start as usize..][..loc.length as usize]) + } + + /// Returns an iterator over all native-to-Wasm trampolines defined within + /// this module, providing both their index and their in-memory body. + pub fn native_to_wasm_trampolines( + &self, + ) -> impl ExactSizeIterator + '_ { + self.funcs + .keys() + .map(move |i| (i, self.native_to_wasm_trampoline(i).unwrap())) + } + + /// Get the Wasm-to-native trampoline for the given signature. + /// + /// These trampolines are used for filling in + /// `VMCallerCheckedFuncRef::wasm_call` for `Func::wrap`-style host funcrefs + /// that don't have access to a compiler when created. + pub fn wasm_to_native_trampoline(&self, signature: SignatureIndex) -> &[u8] { + let idx = self + .wasm_to_native_trampolines + .binary_search_by_key(&signature, |entry| entry.0) + .expect("should have a Wasm-to-native trampline for all signatures"); + let (_, loc) = self.wasm_to_native_trampolines[idx]; + &self.text()[loc.start as usize..][..loc.length as usize] + } + + /// Returns an iterator over all native-to-Wasm trampolines defined within + /// this module, providing both their index and their in-memory body. + pub fn wasm_to_native_trampolines( + &self, + ) -> impl ExactSizeIterator + '_ { + self.wasm_to_native_trampolines + .iter() + .map(move |(i, _)| (*i, self.wasm_to_native_trampoline(*i))) } /// Returns the stack map information for all functions defined in this @@ -538,9 +626,11 @@ impl CompiledModule { /// The iterator returned iterates over the span of the compiled function in /// memory with the stack maps associated with those bytes. pub fn stack_maps(&self) -> impl Iterator { - self.finished_functions() - .map(|(_, f)| f) - .zip(self.funcs.values().map(|f| &f.0.stack_maps[..])) + self.finished_functions().map(|(_, f)| f).zip( + self.funcs + .values() + .map(|f| &f.wasm_func_info.stack_maps[..]), + ) } /// Lookups a defined function by a program counter value. @@ -550,13 +640,11 @@ impl CompiledModule { pub fn func_by_text_offset(&self, text_offset: usize) -> Option<(DefinedFuncIndex, u32)> { let text_offset = u32::try_from(text_offset).unwrap(); - let index = match self - .funcs - .binary_search_values_by_key(&text_offset, |(_, loc)| { - debug_assert!(loc.length > 0); - // Return the inclusive "end" of the function - loc.start + loc.length - 1 - }) { + let index = match self.funcs.binary_search_values_by_key(&text_offset, |e| { + debug_assert!(e.wasm_func_loc.length > 0); + // Return the inclusive "end" of the function + e.wasm_func_loc.start + e.wasm_func_loc.length - 1 + }) { Ok(k) => { // Exact match, pc is at the end of this function k @@ -569,15 +657,15 @@ impl CompiledModule { } }; - let (_, loc) = self.funcs.get(index)?; - let start = loc.start; - let end = loc.start + loc.length; + let CompiledFunctionInfo { wasm_func_loc, .. } = self.funcs.get(index)?; + let start = wasm_func_loc.start; + let end = wasm_func_loc.start + wasm_func_loc.length; if text_offset < start || end < text_offset { return None; } - Some((index, text_offset - loc.start)) + Some((index, text_offset - wasm_func_loc.start)) } /// Gets the function location information for a given function index. @@ -586,7 +674,7 @@ impl CompiledModule { .funcs .get(index) .expect("defined function should be present") - .1 + .wasm_func_loc } /// Gets the function information for a given function index. @@ -595,7 +683,7 @@ impl CompiledModule { .funcs .get(index) .expect("defined function should be present") - .0 + .wasm_func_info } /// Creates a new symbolication context which can be used to further diff --git a/crates/jit/src/lib.rs b/crates/jit/src/lib.rs index 1c7e44df58e2..11f48d7ff0b6 100644 --- a/crates/jit/src/lib.rs +++ b/crates/jit/src/lib.rs @@ -29,7 +29,8 @@ mod unwind; pub use crate::code_memory::CodeMemory; pub use crate::instantiate::{ - subslice_range, CompiledModule, CompiledModuleInfo, ObjectBuilder, SymbolizeContext, + subslice_range, CompiledFunctionInfo, CompiledModule, CompiledModuleInfo, ObjectBuilder, + SymbolizeContext, }; pub use demangling::*; pub use profiling::*; diff --git a/crates/jit/src/profiling/jitdump_linux.rs b/crates/jit/src/profiling/jitdump_linux.rs index 1878aa7f06c6..cce66016c2d9 100644 --- a/crates/jit/src/profiling/jitdump_linux.rs +++ b/crates/jit/src/profiling/jitdump_linux.rs @@ -88,10 +88,30 @@ impl ProfilingAgent for JitDumpAgent { } // Note: these are the trampolines into exported functions. - for (idx, func, len) in module.trampolines() { - let (addr, len) = (func as usize as *const u8, len); + for (name, body) in module + .array_to_wasm_trampolines() + .map(|(i, body)| { + ( + format!("wasm::array_to_wasm_trampoline[{}]", i.index()), + body, + ) + }) + .chain(module.native_to_wasm_trampolines().map(|(i, body)| { + ( + format!("wasm::native_to_wasm_trampoline[{}]", i.index()), + body, + ) + })) + .chain(module.wasm_to_native_trampolines().map(|(i, body)| { + ( + format!("wasm::wasm_to_native_trampolines[{}]", i.index()), + body, + ) + })) + { + let addr = body.as_ptr(); + let len = body.len(); let timestamp = jitdump_file.get_time_stamp(); - let name = format!("wasm::trampoline[{}]", idx.index()); if let Err(err) = jitdump_file.dump_code_load_record(&name, addr, len, timestamp, pid, tid) { diff --git a/crates/jit/src/profiling/perfmap_linux.rs b/crates/jit/src/profiling/perfmap_linux.rs index fa1e23ac9d21..b4b032574851 100644 --- a/crates/jit/src/profiling/perfmap_linux.rs +++ b/crates/jit/src/profiling/perfmap_linux.rs @@ -54,9 +54,28 @@ impl ProfilingAgent for PerfMapAgent { } // Note: these are the trampolines into exported functions. - for (idx, func, len) in module.trampolines() { - let (addr, len) = (func as usize as *const u8, len); - let name = format!("wasm::trampoline[{}]", idx.index()); + for (name, body) in module + .array_to_wasm_trampolines() + .map(|(i, body)| { + ( + format!("wasm::array_to_wasm_trampoline[{}]", i.index()), + body, + ) + }) + .chain(module.native_to_wasm_trampolines().map(|(i, body)| { + ( + format!("wasm::native_to_wasm_trampoline[{}]", i.index()), + body, + ) + })) + .chain(module.wasm_to_native_trampolines().map(|(i, body)| { + ( + format!("wasm::wasm_to_native_trampolines[{}]", i.index()), + body, + ) + })) + { + let (addr, len) = (body.as_ptr(), body.len()); if let Err(err) = Self::make_line(&mut file, &name, addr, len) { eprintln!("Error when writing export trampoline info to the perf map file: {err}"); return; diff --git a/crates/jit/src/profiling/vtune.rs b/crates/jit/src/profiling/vtune.rs index 64699ed30fc4..ca9546ca989d 100644 --- a/crates/jit/src/profiling/vtune.rs +++ b/crates/jit/src/profiling/vtune.rs @@ -106,16 +106,31 @@ impl State { } // Note: these are the trampolines into exported functions. - for (idx, func, len) in module.trampolines() { - let idx = idx.index(); - let (addr, len) = (func as usize as *const u8, len); - let method_name = format!("wasm::trampoline[{}]", idx,); - log::trace!( - "new trampoline for exported signature {} @ {:?}\n", - idx, - addr - ); - self.notify_code(&module_name, &method_name, addr, len); + for (name, body) in module + .array_to_wasm_trampolines() + .map(|(i, body)| { + ( + format!("wasm::array_to_wasm_trampoline[{}]", i.index()), + body, + ) + }) + .chain(module.native_to_wasm_trampolines().map(|(i, body)| { + ( + format!("wasm::native_to_wasm_trampoline[{}]", i.index()), + body, + ) + })) + .chain(module.wasm_to_native_trampolines().map(|(i, body)| { + ( + format!("wasm::wasm_to_native_trampolines[{}]", i.index()), + body, + ) + })) + { + let addr = body.as_ptr(); + let len = body.len(); + log::trace!("new trampoline `{}` @ {:?}\n", name, addr); + self.notify_code(&module_name, &name, addr, len); } } diff --git a/crates/runtime/src/component.rs b/crates/runtime/src/component.rs index 233da3586299..dd2c24772678 100644 --- a/crates/runtime/src/component.rs +++ b/crates/runtime/src/component.rs @@ -7,8 +7,8 @@ //! cranelift-compiled adapters, will use this `VMComponentContext` as well. use crate::{ - Store, VMCallerCheckedFuncRef, VMFunctionBody, VMGlobalDefinition, VMMemoryDefinition, - VMOpaqueContext, VMSharedSignatureIndex, ValRaw, + Store, VMArrayCallFunction, VMCallerCheckedFuncRef, VMGlobalDefinition, VMMemoryDefinition, + VMNativeCallFunction, VMOpaqueContext, VMSharedSignatureIndex, VMWasmCallFunction, ValRaw, }; use memoffset::offset_of; use std::alloc::{self, Layout}; @@ -268,7 +268,10 @@ impl ComponentInstance { unsafe fn anyfunc(&self, offset: u32) -> NonNull { let ret = self.vmctx_plus_offset::(offset); - debug_assert!((*ret).func_ptr.as_ptr() as usize != INVALID_PTR); + debug_assert!( + mem::transmute::>, usize>((*ret).wasm_call) + != INVALID_PTR + ); debug_assert!((*ret).vmctx as usize != INVALID_PTR); NonNull::new(ret).unwrap() } @@ -321,18 +324,18 @@ impl ComponentInstance { /// * `idx` - the index that's being configured /// * `lowering` - the host-related closure information to get invoked when /// the lowering is called. - /// * `anyfunc_func_ptr` - the cranelift-compiled trampoline which will - /// read the `VMComponentContext` and invoke `lowering` provided. This - /// function pointer will be passed to wasm if wasm needs to instantiate - /// something. - /// * `anyfunc_type_index` - the signature index for the core wasm type + /// * `{wasm,native,array}_call` - the cranelift-compiled trampolines which will + /// read the `VMComponentContext` and invoke `lowering` provided. + /// * `type_index` - the signature index for the core wasm type /// registered within the engine already. pub fn set_lowering( &mut self, idx: LoweredIndex, lowering: VMLowering, - anyfunc_func_ptr: NonNull, - anyfunc_type_index: VMSharedSignatureIndex, + wasm_call: NonNull, + native_call: NonNull, + array_call: VMArrayCallFunction, + type_index: VMSharedSignatureIndex, ) { unsafe { debug_assert!( @@ -344,8 +347,10 @@ impl ComponentInstance { *self.vmctx_plus_offset(self.offsets.lowering(idx)) = lowering; self.set_anyfunc( self.offsets.lowering_anyfunc(idx), - anyfunc_func_ptr, - anyfunc_type_index, + wasm_call, + native_call, + array_call, + type_index, ); } } @@ -354,34 +359,58 @@ impl ComponentInstance { pub fn set_always_trap( &mut self, idx: RuntimeAlwaysTrapIndex, - func_ptr: NonNull, + wasm_call: NonNull, + native_call: NonNull, + array_call: VMArrayCallFunction, type_index: VMSharedSignatureIndex, ) { - unsafe { self.set_anyfunc(self.offsets.always_trap_anyfunc(idx), func_ptr, type_index) } + unsafe { + self.set_anyfunc( + self.offsets.always_trap_anyfunc(idx), + wasm_call, + native_call, + array_call, + type_index, + ); + } } /// Same as `set_lowering` but for the transcoder functions. pub fn set_transcoder( &mut self, idx: RuntimeTranscoderIndex, - func_ptr: NonNull, + wasm_call: NonNull, + native_call: NonNull, + array_call: VMArrayCallFunction, type_index: VMSharedSignatureIndex, ) { - unsafe { self.set_anyfunc(self.offsets.transcoder_anyfunc(idx), func_ptr, type_index) } + unsafe { + self.set_anyfunc( + self.offsets.transcoder_anyfunc(idx), + wasm_call, + native_call, + array_call, + type_index, + ); + } } unsafe fn set_anyfunc( &mut self, offset: u32, - func_ptr: NonNull, + wasm_call: NonNull, + native_call: NonNull, + array_call: VMArrayCallFunction, type_index: VMSharedSignatureIndex, ) { debug_assert!(*self.vmctx_plus_offset::(offset) == INVALID_PTR); - let vmctx = self.vmctx(); + let vmctx = VMOpaqueContext::from_vmcomponent(self.vmctx()); *self.vmctx_plus_offset(offset) = VMCallerCheckedFuncRef { - func_ptr, + wasm_call: Some(wasm_call), + native_call, + array_call, type_index, - vmctx: VMOpaqueContext::from_vmcomponent(vmctx), + vmctx, }; } @@ -529,12 +558,20 @@ impl OwnedComponentInstance { &mut self, idx: LoweredIndex, lowering: VMLowering, - anyfunc_func_ptr: NonNull, - anyfunc_type_index: VMSharedSignatureIndex, + wasm_call: NonNull, + native_call: NonNull, + array_call: VMArrayCallFunction, + type_index: VMSharedSignatureIndex, ) { unsafe { - self.instance_mut() - .set_lowering(idx, lowering, anyfunc_func_ptr, anyfunc_type_index) + self.instance_mut().set_lowering( + idx, + lowering, + wasm_call, + native_call, + array_call, + type_index, + ) } } @@ -542,12 +579,14 @@ impl OwnedComponentInstance { pub fn set_always_trap( &mut self, idx: RuntimeAlwaysTrapIndex, - func_ptr: NonNull, + wasm_call: NonNull, + native_call: NonNull, + array_call: VMArrayCallFunction, type_index: VMSharedSignatureIndex, ) { unsafe { self.instance_mut() - .set_always_trap(idx, func_ptr, type_index) + .set_always_trap(idx, wasm_call, native_call, array_call, type_index) } } @@ -555,12 +594,14 @@ impl OwnedComponentInstance { pub fn set_transcoder( &mut self, idx: RuntimeTranscoderIndex, - func_ptr: NonNull, + wasm_call: NonNull, + native_call: NonNull, + array_call: VMArrayCallFunction, type_index: VMSharedSignatureIndex, ) { unsafe { self.instance_mut() - .set_transcoder(idx, func_ptr, type_index) + .set_transcoder(idx, wasm_call, native_call, array_call, type_index) } } } diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs index c3c6d6378a08..581d824ad81c 100644 --- a/crates/runtime/src/instance.rs +++ b/crates/runtime/src/instance.rs @@ -9,7 +9,7 @@ use crate::table::{Table, TableElement, TableElementType}; use crate::vmcontext::{ VMBuiltinFunctionsArray, VMCallerCheckedFuncRef, VMContext, VMFunctionImport, VMGlobalDefinition, VMGlobalImport, VMMemoryDefinition, VMMemoryImport, VMOpaqueContext, - VMRuntimeLimits, VMTableDefinition, VMTableImport, VMCONTEXT_MAGIC, + VMRuntimeLimits, VMTableDefinition, VMTableImport, }; use crate::{ ExportFunction, ExportGlobal, ExportMemory, ExportTable, Imports, ModuleRuntimeInfo, Store, @@ -31,7 +31,7 @@ use wasmtime_environ::{ packed_option::ReservedValue, DataIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, ElemIndex, EntityIndex, EntityRef, EntitySet, FuncIndex, GlobalIndex, GlobalInit, HostPtr, MemoryIndex, Module, PrimaryMap, SignatureIndex, TableIndex, - TableInitialization, Trap, VMOffsets, WasmType, + TableInitialization, Trap, VMOffsets, WasmType, VMCONTEXT_MAGIC, }; mod allocator; @@ -157,6 +157,7 @@ impl Instance { self.runtime_info.module() } + #[inline] fn offsets(&self) -> &VMOffsets { self.runtime_info.offsets() } @@ -519,24 +520,35 @@ impl Instance { *base.add(sig.index()) }; - let (func_ptr, vmctx) = if let Some(def_index) = self.module().defined_func_index(index) { - ( - self.runtime_info.function(def_index), - VMOpaqueContext::from_vmcontext(self.vmctx_ptr()), - ) + let funcref = if let Some(def_index) = self.module().defined_func_index(index) { + VMCallerCheckedFuncRef { + native_call: self + .runtime_info + .native_to_wasm_trampoline(def_index) + .expect("should have native-to-Wasm trampoline for escaping function"), + array_call: self + .runtime_info + .array_to_wasm_trampoline(def_index) + .expect("should have array-to-Wasm trampoline for escaping function"), + wasm_call: Some(self.runtime_info.function(def_index)), + vmctx: VMOpaqueContext::from_vmcontext(self.vmctx_ptr()), + type_index, + } } else { let import = self.imported_function(index); - (import.body.as_ptr(), import.vmctx) + VMCallerCheckedFuncRef { + native_call: import.native_call, + array_call: import.array_call, + wasm_call: Some(import.wasm_call), + vmctx: import.vmctx, + type_index, + } }; // Safety: we have a `&mut self`, so we have exclusive access // to this Instance. unsafe { - *into = VMCallerCheckedFuncRef { - vmctx, - type_index, - func_ptr: NonNull::new(func_ptr).expect("Non-null function pointer"), - }; + std::ptr::write(into, funcref); } } @@ -1240,7 +1252,7 @@ impl InstanceHandle { } /// Performs post-initialization of an instance after its handle has been - /// creqtaed and registered with a store. + /// created and registered with a store. /// /// Failure of this function means that the instance still must persist /// within the store since failure may indicate partial failure, or some diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs index 73af4b04c57a..87821bb9bd76 100644 --- a/crates/runtime/src/instance/allocator/pooling.rs +++ b/crates/runtime/src/instance/allocator/pooling.rs @@ -940,10 +940,9 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { mod test { use super::*; use crate::{ - CompiledModuleId, Imports, MemoryImage, ModuleRuntimeInfo, StorePtr, VMFunctionBody, - VMSharedSignatureIndex, + CompiledModuleId, Imports, MemoryImage, ModuleRuntimeInfo, StorePtr, VMSharedSignatureIndex, }; - use std::sync::Arc; + use std::{ptr::NonNull, sync::Arc}; use wasmtime_environ::{DefinedFuncIndex, DefinedMemoryIndex}; pub(crate) fn empty_runtime_info( @@ -955,7 +954,25 @@ mod test { fn module(&self) -> &Arc { &self.0 } - fn function(&self, _: DefinedFuncIndex) -> *mut VMFunctionBody { + fn function(&self, _: DefinedFuncIndex) -> NonNull { + unimplemented!() + } + fn array_to_wasm_trampoline( + &self, + _: DefinedFuncIndex, + ) -> Option { + unimplemented!() + } + fn native_to_wasm_trampoline( + &self, + _: DefinedFuncIndex, + ) -> Option> { + unimplemented!() + } + fn wasm_to_native_trampoline( + &self, + _: VMSharedSignatureIndex, + ) -> Option> { unimplemented!() } fn memory_image( diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 3428a0ffb014..54970f19a2cc 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -22,6 +22,7 @@ use anyhow::Error; use std::fmt; +use std::ptr::NonNull; use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; use std::sync::Arc; use wasmtime_environ::{DefinedFuncIndex, DefinedMemoryIndex, HostPtr, VMOffsets}; @@ -65,16 +66,16 @@ pub use crate::memory::{ pub use crate::mmap::Mmap; pub use crate::mmap_vec::MmapVec; pub use crate::table::{Table, TableElement}; -pub use crate::trampolines::prepare_host_to_wasm_trampoline; pub use crate::traphandlers::{ catch_traps, init_traps, raise_lib_trap, raise_user_trap, resume_panic, tls_eager_initialize, Backtrace, SignalHandler, TlsRestore, Trap, TrapReason, }; pub use crate::vmcontext::{ - VMCallerCheckedFuncRef, VMContext, VMFunctionBody, VMFunctionImport, VMGlobalDefinition, - VMGlobalImport, VMHostFuncContext, VMInvokeArgument, VMMemoryDefinition, VMMemoryImport, + VMArrayCallFunction, VMArrayCallHostFuncContext, VMCallerCheckedFuncRef, VMContext, + VMFunctionBody, VMFunctionImport, VMGlobalDefinition, VMGlobalImport, VMInvokeArgument, + VMMemoryDefinition, VMMemoryImport, VMNativeCallFunction, VMNativeCallHostFuncContext, VMOpaqueContext, VMRuntimeLimits, VMSharedSignatureIndex, VMTableDefinition, VMTableImport, - VMTrampoline, ValRaw, + VMWasmCallFunction, ValRaw, }; mod module_id; @@ -170,7 +171,31 @@ pub trait ModuleRuntimeInfo: Send + Sync + 'static { fn module(&self) -> &Arc; /// Returns the address, in memory, that the function `index` resides at. - fn function(&self, index: DefinedFuncIndex) -> *mut VMFunctionBody; + fn function(&self, index: DefinedFuncIndex) -> NonNull; + + /// Returns the address, in memory, of the trampoline that allows the given + /// defined Wasm function to be called by the native calling convention. + /// + /// Returns `None` for Wasm functions which do not escape, and therefore are + /// not callable from outside the Wasm module itself. + fn native_to_wasm_trampoline( + &self, + index: DefinedFuncIndex, + ) -> Option>; + + /// Returns the address, in memory, of the trampoline that allows the given + /// defined Wasm function to be called by the array calling convention. + /// + /// Returns `None` for Wasm functions which do not escape, and therefore are + /// not callable from outside the Wasm module itself. + fn array_to_wasm_trampoline(&self, index: DefinedFuncIndex) -> Option; + + /// Return the addres, in memory, of the trampoline that allows Wasm to call + /// a native function of the given signature. + fn wasm_to_native_trampoline( + &self, + signature: VMSharedSignatureIndex, + ) -> Option>; /// Returns the `MemoryImage` structure used for copy-on-write /// initialization of the memory, if it's applicable. diff --git a/crates/runtime/src/trampolines.rs b/crates/runtime/src/trampolines.rs index ee981c900a71..16741075cef5 100644 --- a/crates/runtime/src/trampolines.rs +++ b/crates/runtime/src/trampolines.rs @@ -1,45 +1,4 @@ -//! Trampolines for calling into Wasm from the host and calling the host from -//! Wasm. - -use crate::VMContext; -use std::mem; - -/// Given a Wasm function pointer and a `vmctx`, prepare the `vmctx` for calling -/// into that Wasm function, and return the host-to-Wasm entry trampoline. -/// -/// Callers must never call Wasm function pointers directly. Callers must -/// instead call this function and then enter Wasm through the returned -/// host-to-Wasm trampoline. -/// -/// # Unsafety -/// -/// The `vmctx` argument must be valid. -/// -/// The generic type `T` must be a function pointer type and `func` must be a -/// pointer to a Wasm function of that signature. -/// -/// After calling this function, you may not mess with the vmctx or any other -/// Wasm state until after you've called the trampoline returned by this -/// function. -#[inline] -pub unsafe fn prepare_host_to_wasm_trampoline(vmctx: *mut VMContext, func: T) -> T { - assert_eq!(mem::size_of::(), mem::size_of::()); - - // Save the callee in the `vmctx`. The trampoline will read this function - // pointer and tail call to it. - (*vmctx) - .instance_mut() - .set_callee(Some(mem::transmute_copy(&func))); - - // Give callers the trampoline, transmuted into their desired function - // signature (the trampoline is variadic and works with all signatures). - mem::transmute_copy(&(host_to_wasm_trampoline as usize)) -} - -extern "C" { - fn host_to_wasm_trampoline(); - pub(crate) fn wasm_to_host_trampoline(); -} +//! Wasm-to-libcall trampolines. cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")] { diff --git a/crates/runtime/src/trampolines/aarch64.rs b/crates/runtime/src/trampolines/aarch64.rs index 5716821a9024..80edd8ae5a89 100644 --- a/crates/runtime/src/trampolines/aarch64.rs +++ b/crates/runtime/src/trampolines/aarch64.rs @@ -1,98 +1,3 @@ -use wasmtime_asm_macros::asm_func; - -#[rustfmt::skip] -asm_func!( - "host_to_wasm_trampoline", - r#" - .cfi_startproc - bti c - - // Load the pointer to `VMRuntimeLimits` in `x9`. - ldur x9, [x1, #8] - - // Check to see if callee is a core `VMContext` (MAGIC == "core"). NB: - // we do not support big-endian aarch64 so the magic value is always - // little-endian encoded. - ldur w10, [x0] - mov w11, #0x6f63 - movk w11, #0x6572, lsl #16 - cmp w10, w11 - - // Store the last Wasm SP into the `last_wasm_entry_sp` in the limits, if - // this was core Wasm, otherwise store an invalid sentinal value. - mov x12, #-1 - mov x13, sp - csel x12, x13, x12, eq - stur x12, [x9, #40] - - // Tail call to the callee function pointer in the vmctx. - ldur x16, [x1, #16] - br x16 - - .cfi_endproc - "# -); - -#[cfg(test)] -mod host_to_wasm_trampoline_offsets_tests { - use wasmtime_environ::{Module, PtrSize, VMOffsets}; - - #[test] - fn test() { - let module = Module::new(); - let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); - - assert_eq!(8, offsets.vmctx_runtime_limits()); - assert_eq!(40, offsets.ptr.vmruntime_limits_last_wasm_entry_sp()); - assert_eq!(16, offsets.vmctx_callee()); - assert_eq!(0x65726f63, u32::from_le_bytes(*b"core")); - } -} - -asm_func!( - "wasm_to_host_trampoline", - " - .cfi_startproc - bti c - - // Load the pointer to `VMRuntimeLimits` in `x9`. - ldur x9, [x1, #8] - - // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. - stur fp, [x9, #24] - - // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. - stur lr, [x9, #32] - - // Tail call to the actual host function. - // - // This *must* be a tail call so that we do not push to the stack and mess - // up the offsets of stack arguments (if any). - ldur x16, [x0, #8] - br x16 - - .cfi_endproc - ", -); - -#[cfg(test)] -mod wasm_to_host_trampoline_offsets_tests { - use crate::VMHostFuncContext; - use memoffset::offset_of; - use wasmtime_environ::{Module, PtrSize, VMOffsets}; - - #[test] - fn test() { - let module = Module::new(); - let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); - - assert_eq!(8, offsets.vmctx_runtime_limits()); - assert_eq!(24, offsets.ptr.vmruntime_limits_last_wasm_exit_fp()); - assert_eq!(32, offsets.ptr.vmruntime_limits_last_wasm_exit_pc()); - assert_eq!(8, offset_of!(VMHostFuncContext, host_func)); - } -} - #[rustfmt::skip] macro_rules! wasm_to_libcall_trampoline { ($libcall:ident ; $libcall_impl:ident) => { @@ -120,3 +25,18 @@ macro_rules! wasm_to_libcall_trampoline { ); }; } + +#[cfg(test)] +mod wasm_to_libcall_trampoline_offsets_tests { + use wasmtime_environ::{Module, PtrSize, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(24, offsets.ptr.vmruntime_limits_last_wasm_exit_fp()); + assert_eq!(32, offsets.ptr.vmruntime_limits_last_wasm_exit_pc()); + } +} diff --git a/crates/runtime/src/trampolines/riscv64.rs b/crates/runtime/src/trampolines/riscv64.rs index b2af6c5d3e44..456800594367 100644 --- a/crates/runtime/src/trampolines/riscv64.rs +++ b/crates/runtime/src/trampolines/riscv64.rs @@ -1,95 +1,3 @@ -use wasmtime_asm_macros::asm_func; - -#[rustfmt::skip] -asm_func!( - "host_to_wasm_trampoline", - r#" - .cfi_startproc - - // Load the pointer to `VMRuntimeLimits` in `t0`. - ld t0, 8(a1) - - // Check to see if callee is a core `VMContext` (MAGIC == "core"). NB: - // we do not support big-endian riscv64 so the magic value is always - // little-endian encoded. - li t1,0x65726f63 - lwu t3,0(a0) - bne t3,t1,ne - mv t1,sp - j over - ne: - li t1,-1 - over: - // Store the last Wasm SP into the `last_wasm_entry_sp` in the limits, if this - // was core Wasm, otherwise store an invalid sentinal value. - sd t1,40(t0) - - ld t0,16(a1) - jr t0 - - .cfi_endproc - "# -); - -#[cfg(test)] -mod host_to_wasm_trampoline_offsets_tests { - use wasmtime_environ::{Module, PtrSize, VMOffsets}; - - #[test] - fn test() { - let module = Module::new(); - let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); - - assert_eq!(8, offsets.vmctx_runtime_limits()); - assert_eq!(40, offsets.ptr.vmruntime_limits_last_wasm_entry_sp()); - assert_eq!(16, offsets.vmctx_callee()); - assert_eq!(0x65726f63, u32::from_le_bytes(*b"core")); - } -} - -#[rustfmt::skip] -asm_func!( - "wasm_to_host_trampoline", - " - .cfi_startproc simple - - // Load the pointer to `VMRuntimeLimits` in `t0`. - ld t0,8(a1) - - // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. - sd fp,24(t0) - - // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. - sd ra,32(t0) - - // Tail call to the actual host function. - // - // This *must* be a tail call so that we do not push to the stack and mess - // up the offsets of stack arguments (if any). - ld t0, 8(a0) - jr t0 - .cfi_endproc - ", -); - -#[cfg(test)] -mod wasm_to_host_trampoline_offsets_tests { - use crate::VMHostFuncContext; - use memoffset::offset_of; - use wasmtime_environ::{Module, PtrSize, VMOffsets}; - - #[test] - fn test() { - let module = Module::new(); - let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); - - assert_eq!(8, offsets.vmctx_runtime_limits()); - assert_eq!(24, offsets.ptr.vmruntime_limits_last_wasm_exit_fp()); - assert_eq!(32, offsets.ptr.vmruntime_limits_last_wasm_exit_pc()); - assert_eq!(8, offset_of!(VMHostFuncContext, host_func)); - } -} - #[rustfmt::skip] macro_rules! wasm_to_libcall_trampoline { ($libcall:ident ; $libcall_impl:ident) => { @@ -118,3 +26,18 @@ macro_rules! wasm_to_libcall_trampoline { ); }; } + +#[cfg(test)] +mod wasm_to_libcall_trampoline_offsets_tests { + use wasmtime_environ::{Module, PtrSize, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(24, offsets.ptr.vmruntime_limits_last_wasm_exit_fp()); + assert_eq!(32, offsets.ptr.vmruntime_limits_last_wasm_exit_pc()); + } +} diff --git a/crates/runtime/src/trampolines/s390x.S b/crates/runtime/src/trampolines/s390x.S index 0a4f11a774ae..db93394164d5 100644 --- a/crates/runtime/src/trampolines/s390x.S +++ b/crates/runtime/src/trampolines/s390x.S @@ -9,57 +9,6 @@ .type host_to_wasm_trampoline,@function .p2align 2 -host_to_wasm_trampoline: - .cfi_startproc - - // Load the pointer to `VMRuntimeLimits` in `%r1`. - lg %r1, 8(%r3) - - // Check to see if callee is a core `VMContext` (MAGIC == "core"). - l %r0, 0(%r2) - cfi %r0, 0x65726f63 - - // Store the last Wasm SP into the `last_wasm_entry_sp` in the limits, if - // this was core Wasm, otherwise store an invalid sentinal value. - lgr %r0, %r15 - locghine %r0, -1 - stg %r0, 40(%r1) - - // Tail call to the callee function pointer in the vmctx. - lg %r1, 16(%r3) - br %r1 - - .cfi_endproc - .size host_to_wasm_trampoline,.-host_to_wasm_trampoline - - .hidden wasm_to_host_trampoline - .globl wasm_to_host_trampoline - .type wasm_to_host_trampoline,@function - .p2align 2 - -wasm_to_host_trampoline: - .cfi_startproc - - // Load the pointer to `VMRuntimeLimits` in `%r1`. - lg %r1, 8(%r3) - - // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. - lg %r0, 0(%r15) - stg %r0, 24(%r1) - - // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. - stg %r14, 32(%r1) - - // Tail call to the actual host function. - // - // This *must* be a tail call so that we do not push to the stack and mess - // up the offsets of stack arguments (if any). - lg %r1, 8(%r2) - br %r1 - - .cfi_endproc - .size wasm_to_host_trampoline,.-wasm_to_host_trampoline - #define LIBCALL_TRAMPOLINE(libcall, libcall_impl) \ .hidden libcall ; \ .globl libcall ; \ diff --git a/crates/runtime/src/trampolines/s390x.rs b/crates/runtime/src/trampolines/s390x.rs index c6cd6335c075..f8ca65ca4fc9 100644 --- a/crates/runtime/src/trampolines/s390x.rs +++ b/crates/runtime/src/trampolines/s390x.rs @@ -1,29 +1,16 @@ -// The host_to_wasm_trampoline implementation is in the s390x.S -// file, but we still want to have this unit test here. - -#[cfg(test)] -mod host_to_wasm_trampoline_offsets_tests { - use wasmtime_environ::{Module, PtrSize, VMOffsets}; - - #[test] - fn test() { - let module = Module::new(); - let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); +// The implementation for libcall trampolines is in the s390x.S +// file. We provide this dummy definition of wasm_to_libcall_trampoline +// here to make libcalls.rs compile on s390x. Note that this means we +// have to duplicate the list of libcalls used in the assembler file. - assert_eq!(8, offsets.vmctx_runtime_limits()); - assert_eq!(40, offsets.ptr.vmruntime_limits_last_wasm_entry_sp()); - assert_eq!(16, offsets.vmctx_callee()); - assert_eq!(0x65726f63, u32::from_le_bytes(*b"core")); - } +macro_rules! wasm_to_libcall_trampoline { + ($libcall:ident ; $libcall_impl:ident) => {}; } // The wasm_to_host_trampoline implementation is in the s390x.S // file, but we still want to have this unit test here. - #[cfg(test)] -mod wasm_to_host_trampoline_offsets_tests { - use crate::VMHostFuncContext; - use memoffset::offset_of; +mod wasm_to_libcall_trampoline_offsets_tests { use wasmtime_environ::{Module, PtrSize, VMOffsets}; #[test] @@ -34,15 +21,5 @@ mod wasm_to_host_trampoline_offsets_tests { assert_eq!(8, offsets.vmctx_runtime_limits()); assert_eq!(24, offsets.ptr.vmruntime_limits_last_wasm_exit_fp()); assert_eq!(32, offsets.ptr.vmruntime_limits_last_wasm_exit_pc()); - assert_eq!(8, offset_of!(VMHostFuncContext, host_func)); } } - -// The implementation for libcall trampolines is in the s390x.S -// file. We provide this dummy definition of wasm_to_libcall_trampoline -// here to make libcalls.rs compile on s390x. Note that this means we -// have to duplicate the list of libcalls used in the assembler file. - -macro_rules! wasm_to_libcall_trampoline { - ($libcall:ident ; $libcall_impl:ident) => {}; -} diff --git a/crates/runtime/src/trampolines/x86_64.rs b/crates/runtime/src/trampolines/x86_64.rs index 1f237c42fe83..d5af4446dc71 100644 --- a/crates/runtime/src/trampolines/x86_64.rs +++ b/crates/runtime/src/trampolines/x86_64.rs @@ -1,17 +1,15 @@ -use wasmtime_asm_macros::asm_func; - // Helper macros for getting the first and second arguments according to the // system calling convention, as well as some callee-saved scratch registers we // can safely use in the trampolines. cfg_if::cfg_if! { if #[cfg(windows)] { - macro_rules! arg0 { () => ("rcx") } - macro_rules! arg1 { () => ("rdx") } + macro_rules! callee_vmctx { () => ("rcx") } + #[allow(unused)] macro_rules! caller_vmctx { () => ("rdx") } macro_rules! scratch0 { () => ("r10") } macro_rules! scratch1 { () => ("r11") } } else if #[cfg(unix)] { - macro_rules! arg0 { () => ("rdi") } - macro_rules! arg1 { () => ("rsi") } + macro_rules! callee_vmctx { () => ("rdi") } + #[allow(unused)] macro_rules! caller_vmctx { () => ("rsi") } macro_rules! scratch0 { () => ("r10") } macro_rules! scratch1 { () => ("r11") } } else { @@ -19,97 +17,6 @@ cfg_if::cfg_if! { } } -#[rustfmt::skip] -asm_func!( - "host_to_wasm_trampoline", - concat!( - " - .cfi_startproc simple - .cfi_def_cfa_offset 0 - - // Load the pointer to `VMRuntimeLimits` in `scratch0`. - mov ", scratch0!(), ", 8[", arg1!(), "] - - // Check to see if this is a core `VMContext` (MAGIC == 'core'). - cmp DWORD PTR [", arg0!(), "], 0x65726f63 - - // Store the last Wasm SP into the `last_wasm_entry_sp` in the limits, if this - // was core Wasm, otherwise store an invalid sentinal value. - mov ", scratch1!(), ", -1 - cmove ", scratch1!(), ", rsp - mov 40[", scratch0!(), "], ", scratch1!(), " - - // Tail call to the callee function pointer in the vmctx. - jmp 16[", arg1!(), "] - - .cfi_endproc - ", - ), -); - -#[cfg(test)] -mod host_to_wasm_trampoline_offsets_tests { - use wasmtime_environ::{Module, PtrSize, VMOffsets}; - - #[test] - fn test() { - let module = Module::new(); - let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); - - assert_eq!(8, offsets.vmctx_runtime_limits()); - assert_eq!(40, offsets.ptr.vmruntime_limits_last_wasm_entry_sp()); - assert_eq!(16, offsets.vmctx_callee()); - assert_eq!(0x65726f63, u32::from_le_bytes(*b"core")); - } -} - -#[rustfmt::skip] -asm_func!( - "wasm_to_host_trampoline", - concat!( - " - .cfi_startproc simple - .cfi_def_cfa_offset 0 - - // Load the pointer to `VMRuntimeLimits` in `scratch0`. - mov ", scratch0!(), ", 8[", arg1!(), "] - - // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. - mov 24[", scratch0!(), "], rbp - - // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. - mov ", scratch1!(), ", [rsp] - mov 32[", scratch0!(), "], ", scratch1!(), " - - // Tail call to the actual host function. - // - // This *must* be a tail call so that we do not push to the stack and mess - // up the offsets of stack arguments (if any). - jmp 8[", arg0!(), "] - - .cfi_endproc - ", - ), -); - -#[cfg(test)] -mod wasm_to_host_trampoline_offsets_tests { - use crate::VMHostFuncContext; - use memoffset::offset_of; - use wasmtime_environ::{Module, PtrSize, VMOffsets}; - - #[test] - fn test() { - let module = Module::new(); - let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); - - assert_eq!(8, offsets.vmctx_runtime_limits()); - assert_eq!(24, offsets.ptr.vmruntime_limits_last_wasm_exit_fp()); - assert_eq!(32, offsets.ptr.vmruntime_limits_last_wasm_exit_pc()); - assert_eq!(8, offset_of!(VMHostFuncContext, host_func)); - } -} - #[rustfmt::skip] macro_rules! wasm_to_libcall_trampoline { ($libcall:ident ; $libcall_impl:ident) => { @@ -121,7 +28,7 @@ macro_rules! wasm_to_libcall_trampoline { .cfi_def_cfa_offset 0 // Load the pointer to `VMRuntimeLimits` in `", scratch0!(), "`. - mov ", scratch0!(), ", 8[", arg0!(), "] + mov ", scratch0!(), ", 8[", callee_vmctx!(), "] // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. mov 24[", scratch0!(), "], rbp @@ -140,3 +47,18 @@ macro_rules! wasm_to_libcall_trampoline { ); }; } + +#[cfg(test)] +mod wasm_to_libcall_trampoline_offsets_tests { + use wasmtime_environ::{Module, PtrSize, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(24, offsets.ptr.vmruntime_limits_last_wasm_exit_fp()); + assert_eq!(32, offsets.ptr.vmruntime_limits_last_wasm_exit_pc()); + } +} diff --git a/crates/runtime/src/traphandlers/backtrace.rs b/crates/runtime/src/traphandlers/backtrace.rs index 0efed892bffa..7efe97dbc831 100644 --- a/crates/runtime/src/traphandlers/backtrace.rs +++ b/crates/runtime/src/traphandlers/backtrace.rs @@ -133,7 +133,14 @@ impl Backtrace { None => { let pc = *(*state.limits).last_wasm_exit_pc.get(); let fp = *(*state.limits).last_wasm_exit_fp.get(); - assert_ne!(pc, 0); + + if pc == 0 { + // Host function calling another host function that + // traps. No Wasm on the stack. + assert_eq!(fp, 0); + return; + } + (pc, fp) } }; @@ -165,6 +172,22 @@ impl Backtrace { return; } + // We save `CallThreadState` linked list entries for various kinds + // of {native,array} x {native,array} calls -- and we technically + // "shouldn't" because these calls can't enter Wasm -- because our + // Wasm call path unconditionally calls + // `wasmtime_runtime::catch_traps` even when the callee is not + // actually Wasm. We do this because the host-to-Wasm call path is + // very hot and these host-to-host calls that flow through that code + // path are very rare and also not hot. Anyways, these unnecessary + // `catch_traps` calls result in these null/empty `CallThreadState` + // entries. Recognize and ignore them. + if state.old_last_wasm_entry_sp() == 0 { + debug_assert_eq!(state.old_last_wasm_exit_fp(), 0); + debug_assert_eq!(state.old_last_wasm_exit_pc(), 0); + continue; + } + if let ControlFlow::Break(()) = Self::trace_through_wasm( state.old_last_wasm_exit_pc(), state.old_last_wasm_exit_fp(), @@ -180,61 +203,37 @@ impl Backtrace { } /// Walk through a contiguous sequence of Wasm frames starting with the - /// frame at the given PC and FP and ending at `first_wasm_sp`. + /// frame at the given PC and FP and ending at `trampoline_sp`. unsafe fn trace_through_wasm( mut pc: usize, mut fp: usize, - first_wasm_sp: usize, + trampoline_sp: usize, mut f: impl FnMut(Frame) -> ControlFlow<()>, ) -> ControlFlow<()> { log::trace!("=== Tracing through contiguous sequence of Wasm frames ==="); - log::trace!("first_wasm_sp = 0x{:016x}", first_wasm_sp); + log::trace!("trampoline_sp = 0x{:016x}", trampoline_sp); log::trace!(" initial pc = 0x{:016x}", pc); log::trace!(" initial fp = 0x{:016x}", fp); - // In our host-to-Wasm trampoline, we save `-1` as a sentinal SP - // value for when the callee is not actually a core Wasm - // function (as determined by looking at the callee `vmctx`). If - // we encounter `-1`, this is an empty sequence of Wasm frames - // where a host called a host function so the following - // happened: - // - // * We entered the host-to-wasm-trampoline, saved (an invalid - // sentinal for) entry SP, and tail called to the "Wasm" - // callee, - // - // * entered the Wasm-to-host trampoline, saved the exit FP and - // PC, and tail called to the host callee, - // - // * and are now in host code. - // - // Ultimately, this means that there are 0 Wasm frames in this - // contiguous sequence of Wasm frames, and we have nothing to - // walk through here. - if first_wasm_sp == -1_isize as usize { - log::trace!("=== Done tracing (empty sequence of Wasm frames) ==="); - return ControlFlow::Continue(()); - } - - // We use `0` as a sentinal value for when there is not any Wasm - // on the stack and these values are non-existant. If we - // actually entered Wasm (see above guard for `-1`) then, then - // by the time we got here we should have either exited Wasm - // through the Wasm-to-host trampoline and properly set these - // values, or we should have caught a trap in a signal handler - // and also properly recovered these values in that case. + // We already checked for this case in the `trace_with_trap_state` + // caller. assert_ne!(pc, 0); assert_ne!(fp, 0); - assert_ne!(first_wasm_sp, 0); + assert_ne!(trampoline_sp, 0); - // The stack grows down, and therefore any frame pointer we are - // dealing with should be less than the stack pointer on entry - // to Wasm. - assert!(first_wasm_sp >= fp, "{first_wasm_sp:#x} >= {fp:#x}"); - - arch::assert_entry_sp_is_aligned(first_wasm_sp); + arch::assert_entry_sp_is_aligned(trampoline_sp); loop { + // At the start of each iteration of the loop, we know that `fp` is + // a frame pointer from Wasm code. Therefore, we know it is not + // being used as an extra general-purpose register, and it is safe + // dereference to get the PC and the next older frame pointer. + + // The stack grows down, and therefore any frame pointer we are + // dealing with should be less than the stack pointer on entry + // to Wasm. + assert!(trampoline_sp >= fp, "{trampoline_sp:#x} >= {fp:#x}"); + arch::assert_fp_is_aligned(fp); log::trace!("--- Tracing through one Wasm frame ---"); @@ -243,20 +242,6 @@ impl Backtrace { f(Frame { pc, fp })?; - // If our FP has reached the SP upon entry to Wasm from the - // host, then we've successfully walked all the Wasm frames, - // and have now reached a host frame. We're done iterating - // through this contiguous sequence of Wasm frames. - if arch::reached_entry_sp(fp, first_wasm_sp) { - log::trace!("=== Done tracing contiguous sequence of Wasm frames ==="); - return ControlFlow::Continue(()); - } - - // If we didn't return above, then we know we are still in a - // Wasm frame, and since Cranelift maintains frame pointers, - // we know that the FP isn't an arbitrary value and it is - // safe to dereference it to read the next PC/FP. - pc = arch::get_next_older_pc_from_fp(fp); // We rely on this offset being zero for all supported architectures @@ -265,7 +250,51 @@ impl Backtrace { // code as well! assert_eq!(arch::NEXT_OLDER_FP_FROM_FP_OFFSET, 0); + // Get the next older frame pointer from the current Wasm frame + // pointer. + // + // The next older frame pointer may or may not be a Wasm frame's + // frame pointer, but it is trusted either way (i.e. is actually a + // frame pointer and not being used as a general-purpose register) + // because we always enter Wasm from the host via a trampoline, and + // this trampoline maintains a proper frame pointer. + // + // We want to detect when we've reached the trampoline, and break + // out of this stack-walking loop. All of our architectures' stacks + // grow down and look something vaguely like this: + // + // | ... | + // | Native Frames | + // | ... | + // |-------------------| + // | ... | <-- Trampoline FP | + // | Trampoline Frame | | + // | ... | <-- Trampoline SP | + // |-------------------| Stack + // | Return Address | Grows + // | Previous FP | <-- Wasm FP Down + // | ... | | + // | Wasm Frames | | + // | ... | V + // + // The trampoline records its own stack pointer (`trampoline_sp`), + // which is guaranteed to be above all Wasm frame pointers but at or + // below its own frame pointer. It is usually two words above the + // Wasm frame pointer (at least on x86-64, exact details vary across + // architectures) but not always: if the first Wasm function called + // by the host has many arguments, some of them could be passed on + // the stack in between the return address and the trampoline's + // frame. + // + // To check when we've reached the trampoline frame, it is therefore + // sufficient to check when the next frame pointer is greater than + // `trampoline_sp`. let next_older_fp = *(fp as *mut usize).add(arch::NEXT_OLDER_FP_FROM_FP_OFFSET); + if next_older_fp > trampoline_sp { + log::trace!("=== Done tracing contiguous sequence of Wasm frames ==="); + return ControlFlow::Continue(()); + } + // Because the stack always grows down, the older FP must be greater // than the current FP. assert!(next_older_fp > fp, "{next_older_fp:#x} > {fp:#x}"); diff --git a/crates/runtime/src/traphandlers/backtrace/aarch64.rs b/crates/runtime/src/traphandlers/backtrace/aarch64.rs index 063bdf8db145..4f89e0d1241e 100644 --- a/crates/runtime/src/traphandlers/backtrace/aarch64.rs +++ b/crates/runtime/src/traphandlers/backtrace/aarch64.rs @@ -34,12 +34,6 @@ pub unsafe fn get_next_older_pc_from_fp(fp: usize) -> usize { // And the current frame pointer points to the next older frame pointer. pub const NEXT_OLDER_FP_FROM_FP_OFFSET: usize = 0; -pub fn reached_entry_sp(fp: usize, first_wasm_sp: usize) -> bool { - // Calls in aarch64 push two i64s (old FP and return PC) so our entry SP is - // two i64s above the first Wasm FP. - fp == first_wasm_sp - 16 -} - pub fn assert_entry_sp_is_aligned(sp: usize) { assert_eq!(sp % 16, 0, "stack should always be aligned to 16"); } diff --git a/crates/runtime/src/traphandlers/backtrace/riscv64.rs b/crates/runtime/src/traphandlers/backtrace/riscv64.rs index 44badb60506e..adb789971921 100644 --- a/crates/runtime/src/traphandlers/backtrace/riscv64.rs +++ b/crates/runtime/src/traphandlers/backtrace/riscv64.rs @@ -6,12 +6,6 @@ pub unsafe fn get_next_older_pc_from_fp(fp: usize) -> usize { // And the current frame pointer points to the next older frame pointer. pub const NEXT_OLDER_FP_FROM_FP_OFFSET: usize = 0; -pub fn reached_entry_sp(fp: usize, first_wasm_sp: usize) -> bool { - // Calls in riscv64 push two i64s (old FP and return PC) so our entry SP is - // two i64s above the first Wasm FP. - fp == first_wasm_sp - 16 -} - pub fn assert_entry_sp_is_aligned(sp: usize) { assert_eq!(sp % 16, 0, "stack should always be aligned to 16"); } diff --git a/crates/runtime/src/traphandlers/backtrace/s390x.rs b/crates/runtime/src/traphandlers/backtrace/s390x.rs index 4cc46a9d9e61..46df876c60f2 100644 --- a/crates/runtime/src/traphandlers/backtrace/s390x.rs +++ b/crates/runtime/src/traphandlers/backtrace/s390x.rs @@ -9,13 +9,6 @@ pub unsafe fn get_next_older_pc_from_fp(fp: usize) -> usize { // by the current "FP". pub const NEXT_OLDER_FP_FROM_FP_OFFSET: usize = 0; -pub fn reached_entry_sp(fp: usize, first_wasm_sp: usize) -> bool { - // The "FP" (backchain pointer) holds the value of the stack pointer at - // function entry. If this equals the value the stack pointer had when we - // first entered a Wasm function, we are done. - fp == first_wasm_sp -} - pub fn assert_entry_sp_is_aligned(sp: usize) { assert_eq!(sp % 8, 0, "stack should always be aligned to 8"); } diff --git a/crates/runtime/src/traphandlers/backtrace/x86_64.rs b/crates/runtime/src/traphandlers/backtrace/x86_64.rs index 7c250c5e706a..50c27fbead3a 100644 --- a/crates/runtime/src/traphandlers/backtrace/x86_64.rs +++ b/crates/runtime/src/traphandlers/backtrace/x86_64.rs @@ -7,23 +7,8 @@ pub unsafe fn get_next_older_pc_from_fp(fp: usize) -> usize { // And the current frame pointer points to the next older frame pointer. pub const NEXT_OLDER_FP_FROM_FP_OFFSET: usize = 0; -pub fn reached_entry_sp(fp: usize, first_wasm_sp: usize) -> bool { - // When the FP is just below the SP (because we are in a function prologue - // where the `call` pushed the return pointer, but the callee hasn't pushed - // the frame pointer yet) we are done. - fp == first_wasm_sp - 8 -} - pub fn assert_entry_sp_is_aligned(sp: usize) { - // The stack pointer should always be aligned to 16 bytes *except* inside - // function prologues where the return PC is pushed to the stack but before - // the old frame pointer has been saved to the stack via `push rbp`. And - // this happens to be exactly where we are inside of our host-to-Wasm - // trampoline that records the value of SP when we first enter - // Wasm. Therefore, the SP should *always* be 8-byte aligned but *never* - // 16-byte aligned. - assert_eq!(sp % 8, 0); - assert_eq!(sp % 16, 8); + assert_eq!(sp % 16, 0, "stack should always be aligned to 16"); } pub fn assert_fp_is_aligned(fp: usize) { diff --git a/crates/runtime/src/vmcontext.rs b/crates/runtime/src/vmcontext.rs index 9fd0ba5aa511..59e174e1fcf2 100644 --- a/crates/runtime/src/vmcontext.rs +++ b/crates/runtime/src/vmcontext.rs @@ -11,24 +11,72 @@ use std::marker; use std::ptr::NonNull; use std::sync::atomic::{AtomicUsize, Ordering}; use std::u32; -pub use vm_host_func_context::VMHostFuncContext; -use wasmtime_environ::DefinedMemoryIndex; +pub use vm_host_func_context::{VMArrayCallHostFuncContext, VMNativeCallHostFuncContext}; +use wasmtime_environ::{DefinedMemoryIndex, VMCONTEXT_MAGIC}; -pub const VMCONTEXT_MAGIC: u32 = u32::from_le_bytes(*b"core"); +/// A function pointer that exposes the array calling convention. +/// +/// Regardless of the underlying Wasm function type, all functions using the +/// array calling convention have the same Rust signature. +/// +/// Arguments: +/// +/// * Callee `vmctx` for the function itself. +/// +/// * Caller's `vmctx` (so that host functions can access the linear memory of +/// their Wasm callers). +/// +/// * A pointer to a buffer of `ValRaw`s where both arguments are passed into +/// this function, and where results are returned from this function. +/// +/// * The capacity of the `ValRaw` buffer. Must always be at least +/// `max(len(wasm_params), len(wasm_results))`. +pub type VMArrayCallFunction = + unsafe extern "C" fn(*mut VMOpaqueContext, *mut VMOpaqueContext, *mut ValRaw, usize); + +/// A function pointer that exposes the native calling convention. +/// +/// Different Wasm function types end up mapping to different Rust function +/// types, so this isn't simply a type alias the way that `VMArrayCallFunction` +/// is. +/// +/// This is the default calling convention for the target (e.g. System-V or +/// fast-call) except multiple return values are handled by returning the first +/// return value in a register and everything else through a return-pointer. +#[repr(transparent)] +pub struct VMNativeCallFunction(VMFunctionBody); + +/// A function pointer that exposes the Wasm calling convention. +/// +/// In practice, different Wasm function types end up mapping to different Rust +/// function types, so this isn't simply a type alias the way that +/// `VMArrayCallFunction` is. However, the exact details of the calling +/// convention are left to the Wasm compiler (e.g. Cranelift or Winch). Runtime +/// code never does anything with these function pointers except shuffle them +/// around and pass them back to Wasm. +#[repr(transparent)] +pub struct VMWasmCallFunction(VMFunctionBody); /// An imported function. #[derive(Debug, Copy, Clone)] #[repr(C)] pub struct VMFunctionImport { - /// A pointer to the imported function body. - pub body: NonNull, + /// Function pointer to use when calling this imported function from Wasm. + pub wasm_call: NonNull, + + /// Function pointer to use when calling this imported function from native code. + pub native_call: NonNull, + + /// Function pointer to use when calling this imported function with the + /// "array" calling convention that `Func::new` et al use. + pub array_call: VMArrayCallFunction, /// The VM state associated with this function. /// - /// For core wasm instances this will be `*mut VMContext` but for the - /// upcoming implementation of the component model this will be something - /// else. The actual definition of what this pointer points to depends on - /// the definition of `func_ptr` and what compiled it. + /// For Wasm functions defined by core wasm instances this will be `*mut + /// VMContext`, but for lifted/lowered component model functions this will + /// be a `VMComponentContext`, and for a host function it will be a + /// `VMHostFuncContext`, etc. pub vmctx: *mut VMOpaqueContext, } @@ -53,8 +101,16 @@ mod test_vmfunction_import { usize::from(offsets.size_of_vmfunction_import()) ); assert_eq!( - offset_of!(VMFunctionImport, body), - usize::from(offsets.vmfunction_import_body()) + offset_of!(VMFunctionImport, wasm_call), + usize::from(offsets.vmfunction_import_wasm_call()) + ); + assert_eq!( + offset_of!(VMFunctionImport, native_call), + usize::from(offsets.vmfunction_import_native_call()) + ); + assert_eq!( + offset_of!(VMFunctionImport, array_call), + usize::from(offsets.vmfunction_import_array_call()) ); assert_eq!( offset_of!(VMFunctionImport, vmctx), @@ -577,22 +633,50 @@ impl Default for VMSharedSignatureIndex { } } -/// The VM caller-checked "anyfunc" record, for caller-side signature checking. -/// It consists of the actual function pointer and a signature id to be checked -/// by the caller. +/// The VM caller-checked "funcref" record, for caller-side signature checking. +/// +/// It consists of function pointer(s), a signature id to be checked by the +/// caller, and the vmctx closure associated with this function. #[derive(Debug, Clone)] #[repr(C)] pub struct VMCallerCheckedFuncRef { - /// Function body. - pub func_ptr: NonNull, + /// Function pointer for this funcref if being called via the native calling + /// convention. + pub native_call: NonNull, + + /// Function pointer for this funcref if being called via the "array" + /// calling convention that `Func::new` et al use. + pub array_call: VMArrayCallFunction, + + /// Function pointer for this funcref if being called via the calling + /// convention we use when compiling Wasm. + /// + /// Most functions come with a function pointer that we can use when they + /// are called from Wasm. The notable exception is when we `Func::wrap` a + /// host function, and we don't have a Wasm compiler on hand to compile a + /// Wasm-to-native trampoline for the function. In this case, we leave + /// `wasm_call` empty until the function is passed as an import to Wasm (or + /// otherwise exposed to Wasm via tables/globals). At this point, we look up + /// a Wasm-to-native trampoline for the function in the the Wasm's compiled + /// module and use that fill in `VMFunctionImport::wasm_call`. **However** + /// there is no guarantee that the Wasm module has a trampoline for this + /// function's signature. The Wasm module only has trampolines for its + /// types, and if this function isn't of one of those types, then the Wasm + /// module will not have a trampoline for it. This is actually okay, because + /// it means that the Wasm cannot actually call this function. But it does + /// mean that this field needs to be an `Option` even though it is non-null + /// the vast vast vast majority of the time. + pub wasm_call: Option>, + /// Function signature id. pub type_index: VMSharedSignatureIndex, + /// The VM state associated with this function. /// - /// For core wasm instances this will be `*mut VMContext` but for the - /// upcoming implementation of the component model this will be something - /// else. The actual definition of what this pointer points to depends on - /// the definition of `func_ptr` and what compiled it. + /// The actual definition of what this pointer points to depends on the + /// function being referenced: for core Wasm functions, this is a `*mut + /// VMContext`, for host functions it is a `*mut VMHostFuncContext`, and for + /// component functions it is a `*mut VMComponentContext`. pub vmctx: *mut VMOpaqueContext, // If more elements are added here, remember to add offset_of tests below! } @@ -608,7 +692,7 @@ mod test_vmcaller_checked_anyfunc { use wasmtime_environ::{Module, PtrSize, VMOffsets}; #[test] - fn check_vmcaller_checked_anyfunc_offsets() { + fn check_vmcaller_checked_func_ref_offsets() { let module = Module::new(); let offsets = VMOffsets::new(size_of::<*mut u8>() as u8, &module); assert_eq!( @@ -616,8 +700,16 @@ mod test_vmcaller_checked_anyfunc { usize::from(offsets.ptr.size_of_vmcaller_checked_func_ref()) ); assert_eq!( - offset_of!(VMCallerCheckedFuncRef, func_ptr), - usize::from(offsets.ptr.vmcaller_checked_func_ref_func_ptr()) + offset_of!(VMCallerCheckedFuncRef, native_call), + usize::from(offsets.ptr.vmcaller_checked_func_ref_native_call()) + ); + assert_eq!( + offset_of!(VMCallerCheckedFuncRef, array_call), + usize::from(offsets.ptr.vmcaller_checked_func_ref_array_call()) + ); + assert_eq!( + offset_of!(VMCallerCheckedFuncRef, wasm_call), + usize::from(offsets.ptr.vmcaller_checked_func_ref_wasm_call()) ); assert_eq!( offset_of!(VMCallerCheckedFuncRef, type_index), @@ -793,6 +885,16 @@ mod test_vmruntime_limits { use std::mem::size_of; use wasmtime_environ::{Module, PtrSize, VMOffsets}; + #[test] + fn vmctx_runtime_limits_offset() { + let module = Module::new(); + let offsets = VMOffsets::new(size_of::<*mut u8>() as u8, &module); + assert_eq!( + offsets.vmctx_runtime_limits(), + offsets.ptr.vmcontext_runtime_limits().into() + ); + } + #[test] fn field_offsets() { let module = Module::new(); @@ -1101,45 +1203,6 @@ impl ValRaw { } } -/// Type definition of the trampoline used to enter WebAssembly from the host. -/// -/// This function type is what's generated for the entry trampolines that are -/// compiled into a WebAssembly module's image. Note that trampolines are not -/// always used by Wasmtime since the `TypedFunc` API allows bypassing the -/// trampoline and directly calling the underlying wasm function (at the time of -/// this writing). -/// -/// The trampoline's arguments here are: -/// -/// * `*mut VMOpaqueContext` - this a contextual pointer defined within the -/// context of the receiving function pointer. For now this is always `*mut -/// VMContext` but with the component model it may be the case that this is a -/// different type of pointer. -/// -/// * `*mut VMContext` - this is the "caller" context, which at this time is -/// always unconditionally core wasm (even in the component model). This -/// contextual pointer cannot be `NULL` and provides information necessary to -/// resolve the caller's context for the `Caller` API in Wasmtime. -/// -/// * `*const VMFunctionBody` - this is the indirect function pointer which is -/// the actual target function to invoke. This function uses the System-V ABI -/// for its argumenst and a semi-custom ABI for the return values (one return -/// value is returned directly, multiple return values have the first one -/// returned directly and remaining ones returned indirectly through a -/// stack pointer). This function pointer may be Cranelift-compiled code or it -/// may also be a host-compiled trampoline (e.g. when a host function calls a -/// host function through the `wasmtime::Func` wrapper). The definition of the -/// first argument of this function depends on what this receiving function -/// pointer desires. -/// -/// * `*mut ValRaw` - this is storage space for both arguments and results of -/// the function. The trampoline will read the arguments from this array to -/// pass to the function pointer provided. The results are then written to the -/// array afterwards (both reads and writes start at index 0). It's the -/// caller's responsibility to make sure this array is appropriately sized. -pub type VMTrampoline = - unsafe extern "C" fn(*mut VMOpaqueContext, *mut VMContext, *const VMFunctionBody, *mut ValRaw); - /// An "opaque" version of `VMContext` which must be explicitly casted to a /// target context. /// @@ -1169,7 +1232,17 @@ impl VMOpaqueContext { /// Helper function to clearly indicate that casts are desired. #[inline] - pub fn from_vm_host_func_context(ptr: *mut VMHostFuncContext) -> *mut VMOpaqueContext { + pub fn from_vm_array_call_host_func_context( + ptr: *mut VMArrayCallHostFuncContext, + ) -> *mut VMOpaqueContext { + ptr.cast() + } + + /// Helper function to clearly indicate that casts are desired. + #[inline] + pub fn from_vm_native_call_host_func_context( + ptr: *mut VMNativeCallHostFuncContext, + ) -> *mut VMOpaqueContext { ptr.cast() } } diff --git a/crates/runtime/src/vmcontext/vm_host_func_context.rs b/crates/runtime/src/vmcontext/vm_host_func_context.rs index 1eedc8dfb88a..a74223bc5516 100644 --- a/crates/runtime/src/vmcontext/vm_host_func_context.rs +++ b/crates/runtime/src/vmcontext/vm_host_func_context.rs @@ -2,34 +2,32 @@ //! //! Keep in sync with `wasmtime_environ::VMHostFuncOffsets`. -use wasmtime_environ::VM_HOST_FUNC_MAGIC; +use crate::VMCallerCheckedFuncRef; -use super::{VMCallerCheckedFuncRef, VMFunctionBody, VMOpaqueContext, VMSharedSignatureIndex}; -use std::{ - any::Any, - ptr::{self, NonNull}, -}; +use super::VMOpaqueContext; +use std::any::Any; +use wasmtime_environ::{VM_ARRAY_CALL_HOST_FUNC_MAGIC, VM_NATIVE_CALL_HOST_FUNC_MAGIC}; -/// The `VM*Context` for host functions. +/// The `VM*Context` for array-call host functions. /// -/// Its `magic` field must always be `wasmtime_environ::VM_HOST_FUNC_MAGIC`, and -/// this is how you can determine whether a `VM*Context` is a -/// `VMHostFuncContext` versus a different kind of context. +/// Its `magic` field must always be +/// `wasmtime_environ::VM_ARRAY_CALL_HOST_FUNC_MAGIC`, and this is how you can +/// determine whether a `VM*Context` is a `VMArrayCallHostFuncContext` versus a +/// different kind of context. #[repr(C)] -pub struct VMHostFuncContext { +pub struct VMArrayCallHostFuncContext { magic: u32, // _padding: u32, // (on 64-bit systems) - pub(crate) host_func: NonNull, - wasm_to_host_trampoline: VMCallerCheckedFuncRef, + pub(crate) funcref: VMCallerCheckedFuncRef, host_state: Box, } // Declare that this type is send/sync, it's the responsibility of // `VMHostFuncContext::new` callers to uphold this guarantee. -unsafe impl Send for VMHostFuncContext {} -unsafe impl Sync for VMHostFuncContext {} +unsafe impl Send for VMArrayCallHostFuncContext {} +unsafe impl Sync for VMArrayCallHostFuncContext {} -impl VMHostFuncContext { +impl VMArrayCallHostFuncContext { /// Create the context for the given host function. /// /// # Safety @@ -37,44 +35,108 @@ impl VMHostFuncContext { /// The `host_func` must be a pointer to a host (not Wasm) function and it /// must be `Send` and `Sync`. pub unsafe fn new( - host_func: NonNull, - signature: VMSharedSignatureIndex, + funcref: VMCallerCheckedFuncRef, host_state: Box, - ) -> Box { - let wasm_to_host_trampoline = VMCallerCheckedFuncRef { - func_ptr: NonNull::new(crate::trampolines::wasm_to_host_trampoline as _).unwrap(), - type_index: signature, - vmctx: ptr::null_mut(), - }; - let mut ctx = Box::new(VMHostFuncContext { - magic: wasmtime_environ::VM_HOST_FUNC_MAGIC, - host_func, - wasm_to_host_trampoline, + ) -> Box { + debug_assert!(funcref.vmctx.is_null()); + let mut ctx = Box::new(VMArrayCallHostFuncContext { + magic: wasmtime_environ::VM_ARRAY_CALL_HOST_FUNC_MAGIC, + funcref, host_state, }); - ctx.wasm_to_host_trampoline.vmctx = - VMOpaqueContext::from_vm_host_func_context(&*ctx as *const _ as *mut _); + ctx.funcref.vmctx = VMOpaqueContext::from_vm_array_call_host_func_context(&mut *ctx); ctx } - /// Get the Wasm-to-host trampoline for this host function context. - pub fn wasm_to_host_trampoline(&self) -> NonNull { - NonNull::from(&self.wasm_to_host_trampoline) + /// Get the host state for this host function context. + #[inline] + pub fn host_state(&self) -> &(dyn Any + Send + Sync) { + &*self.host_state + } + + /// Get this context's funcref. + #[inline] + pub fn funcref(&self) -> &VMCallerCheckedFuncRef { + &self.funcref + } + + /// Helper function to cast between context types using a debug assertion to + /// protect against some mistakes. + #[inline] + pub unsafe fn from_opaque(opaque: *mut VMOpaqueContext) -> *mut VMArrayCallHostFuncContext { + // See comments in `VMContext::from_opaque` for this debug assert + debug_assert_eq!((*opaque).magic, VM_ARRAY_CALL_HOST_FUNC_MAGIC); + opaque.cast() + } +} + +/// The `VM*Context` for native-call host functions. +/// +/// Its `magic` field must always be +/// `wasmtime_environ::VM_NATIVE_CALL_HOST_FUNC_MAGIC`, and this is how you can +/// determine whether a `VM*Context` is a `VMNativeCallHostFuncContext` versus a +/// different kind of context. +#[repr(C)] +pub struct VMNativeCallHostFuncContext { + magic: u32, + // _padding: u32, // (on 64-bit systems) + funcref: VMCallerCheckedFuncRef, + host_state: Box, +} + +#[test] +fn vmnative_call_host_func_context_offsets() { + use memoffset::offset_of; + use wasmtime_environ::{HostPtr, PtrSize}; + assert_eq!( + usize::from(HostPtr.vmnative_call_host_func_context_funcref()), + offset_of!(VMNativeCallHostFuncContext, funcref) + ); +} + +// Declare that this type is send/sync, it's the responsibility of +// `VMHostFuncContext::new` callers to uphold this guarantee. +unsafe impl Send for VMNativeCallHostFuncContext {} +unsafe impl Sync for VMNativeCallHostFuncContext {} + +impl VMNativeCallHostFuncContext { + /// Create the context for the given host function. + /// + /// # Safety + /// + /// The `host_func` must be a pointer to a host (not Wasm) function and it + /// must be `Send` and `Sync`. + pub unsafe fn new( + funcref: VMCallerCheckedFuncRef, + host_state: Box, + ) -> Box { + let mut ctx = Box::new(VMNativeCallHostFuncContext { + magic: wasmtime_environ::VM_NATIVE_CALL_HOST_FUNC_MAGIC, + funcref, + host_state, + }); + ctx.funcref.vmctx = VMOpaqueContext::from_vm_native_call_host_func_context(&mut *ctx); + ctx } /// Get the host state for this host function context. + #[inline] pub fn host_state(&self) -> &(dyn Any + Send + Sync) { &*self.host_state } -} -impl VMHostFuncContext { + /// Get this context's funcref. + #[inline] + pub fn funcref(&self) -> &VMCallerCheckedFuncRef { + &self.funcref + } + /// Helper function to cast between context types using a debug assertion to /// protect against some mistakes. #[inline] - pub unsafe fn from_opaque(opaque: *mut VMOpaqueContext) -> *mut VMHostFuncContext { + pub unsafe fn from_opaque(opaque: *mut VMOpaqueContext) -> *mut VMNativeCallHostFuncContext { // See comments in `VMContext::from_opaque` for this debug assert - debug_assert_eq!((*opaque).magic, VM_HOST_FUNC_MAGIC); + debug_assert_eq!((*opaque).magic, VM_NATIVE_CALL_HOST_FUNC_MAGIC); opaque.cast() } } diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index 3ea14a0e2979..87abea69ee7a 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -44,6 +44,7 @@ rayon = { version = "1.0", optional = true } object = { workspace = true } async-trait = { workspace = true, optional = true } encoding_rs = { version = "0.8.31", optional = true } +bumpalo = "3.11.0" [target.'cfg(target_os = "windows")'.dependencies.windows-sys] workspace = true @@ -134,4 +135,3 @@ component-model = [ "dep:wasmtime-component-util", "dep:encoding_rs", ] - diff --git a/crates/wasmtime/src/component/component.rs b/crates/wasmtime/src/component/component.rs index 8b534ede0a21..6067de1d7b7e 100644 --- a/crates/wasmtime/src/component/component.rs +++ b/crates/wasmtime/src/component/component.rs @@ -1,21 +1,24 @@ use crate::code::CodeObject; +use crate::module::ModuleFunctionIndices; use crate::signatures::SignatureCollection; use crate::{Engine, Module}; use anyhow::{bail, Context, Result}; use serde::{Deserialize, Serialize}; -use std::collections::{BTreeSet, HashMap}; +use std::any::Any; use std::fs; use std::mem; use std::path::Path; use std::ptr::NonNull; use std::sync::Arc; use wasmtime_environ::component::{ - ComponentTypes, GlobalInitializer, LoweredIndex, RuntimeAlwaysTrapIndex, + AllCallFunc, ComponentTypes, GlobalInitializer, LoweredIndex, RuntimeAlwaysTrapIndex, RuntimeTranscoderIndex, StaticModuleIndex, Translator, }; -use wasmtime_environ::{EntityRef, FunctionLoc, ObjectKind, PrimaryMap, ScopeVec, SignatureIndex}; +use wasmtime_environ::{FunctionLoc, ObjectKind, PrimaryMap, ScopeVec}; use wasmtime_jit::{CodeMemory, CompiledModuleInfo}; -use wasmtime_runtime::{MmapVec, VMFunctionBody, VMTrampoline}; +use wasmtime_runtime::{ + MmapVec, VMArrayCallFunction, VMFunctionBody, VMNativeCallFunction, VMWasmCallFunction, +}; /// A compiled WebAssembly Component. // @@ -49,10 +52,15 @@ struct CompiledComponentInfo { /// Where lowered function trampolines are located within the `text` /// section of `code_memory`. /// - /// These trampolines are the function pointer within the - /// `VMCallerCheckedFuncRef` and will delegate indirectly to a host function - /// pointer when called. - lowerings: PrimaryMap, + /// These are the + /// + /// 1. Wasm-call, + /// 2. array-call, and + /// 3. native-call + /// + /// function pointers that end up in a `VMCallerCheckedFuncRef` for each + /// lowering. + lowerings: PrimaryMap>, /// Where the "always trap" functions are located within the `text` section /// of `code_memory`. @@ -61,15 +69,17 @@ struct CompiledComponentInfo { /// functions that are `canon lift`'d then immediately `canon lower`'d. The /// `u32` value here is the offset of the trap instruction from the start fo /// the function. - always_trap: PrimaryMap, + always_trap: PrimaryMap>, /// Where all the cranelift-generated transcode functions are located in the /// compiled image of this component. - transcoders: PrimaryMap, + transcoders: PrimaryMap>, +} - /// Extra trampolines other than those contained in static modules - /// necessary for this component. - trampolines: Vec<(SignatureIndex, FunctionLoc)>, +pub(crate) struct AllCallFuncPointers { + pub wasm_call: NonNull, + pub array_call: VMArrayCallFunction, + pub native_call: NonNull, } #[derive(Serialize, Deserialize)] @@ -188,42 +198,26 @@ impl Component { // Compile all core wasm modules, in parallel, which will internally // compile all their functions in parallel as well. - let module_funcs = engine.run_maybe_parallel(modules.values_mut().collect(), |module| { + let compilations = engine.run_maybe_parallel(modules.values_mut().collect(), |module| { Module::compile_functions(engine, module, types.module_types()) })?; - // Compile all host-to-wasm trampolines where the required set of - // trampolines is unioned from all core wasm modules plus what the - // component itself needs. - let module_trampolines = modules - .iter() - .flat_map(|(_, m)| m.exported_signatures.iter().copied()) - .collect::>(); - let trampolines = module_trampolines - .iter() - .copied() - .chain( - // All lowered functions will require a trampoline to be available in - // case they're used when entering wasm. For example a lowered function - // could be immediately lifted in which case we'll need a trampoline to - // call that lowered function. - // - // Most of the time trampolines can come from the core wasm modules - // since lifted functions come from core wasm. For these esoteric cases - // though we may have to compile trampolines specifically into the - // component object as well in case core wasm doesn't provide the - // necessary trampoline. - component.initializers.iter().filter_map(|init| match init { - GlobalInitializer::LowerImport(i) => Some(i.canonical_abi), - GlobalInitializer::AlwaysTrap(i) => Some(i.canonical_abi), - _ => None, - }), - ) - .collect::>(); - let compiled_trampolines = engine - .run_maybe_parallel(trampolines.iter().cloned().collect(), |i| { - compiler.compile_host_to_wasm_trampoline(&types[i]) - })?; + let mut compiled_funcs = vec![]; + let wasm_to_native_trampoline_indices = Module::compile_wasm_to_native_trampolines( + engine, + modules.values().as_slice(), + types.module_types(), + &mut compiled_funcs, + )?; + + let mut indices = vec![]; + for ((i, translation), compilation) in modules.into_iter().zip(compilations) { + let prefix = format!("wasm_{}_", i.as_u32()); + indices.push(( + compiled_funcs.len(), + ModuleFunctionIndices::new(translation, compilation, &prefix, &mut compiled_funcs), + )); + } // Compile all transcoders required which adapt from a // core-wasm-specific ABI (e.g. 32 or 64-bit) into the host transcoder @@ -236,11 +230,17 @@ impl Component { _ => None, }) .collect(); - let transcoders = engine.run_maybe_parallel(transcoders, |info| { - compiler - .component_compiler() - .compile_transcoder(&component, info, &types) - })?; + let transcoder_indices = flatten_all_calls( + &mut compiled_funcs, + engine.run_maybe_parallel(transcoders, |info| -> Result<_> { + Ok(( + info.symbol_name(), + compiler + .component_compiler() + .compile_transcoder(&component, info, &types)?, + )) + })?, + ); // Compile all "always trap" functions which are small typed shims that // exits to solely trap immediately for components. @@ -252,11 +252,17 @@ impl Component { _ => None, }) .collect(); - let always_trap = engine.run_maybe_parallel(always_trap, |info| { - compiler - .component_compiler() - .compile_always_trap(&types[info.canonical_abi]) - })?; + let always_trap_indices = flatten_all_calls( + &mut compiled_funcs, + engine.run_maybe_parallel(always_trap, |info| -> Result<_> { + Ok(( + info.symbol_name(), + compiler + .component_compiler() + .compile_always_trap(&types[info.canonical_abi])?, + )) + })?, + ); // Compile all "lowerings" which are adapters that go from core wasm // into the host which will process the canonical ABI. @@ -268,134 +274,60 @@ impl Component { _ => None, }) .collect(); - let lowerings = engine.run_maybe_parallel(lowerings, |lowering| { - compiler - .component_compiler() - .compile_lowered_trampoline(&component, lowering, &types) - })?; - - // Collect the results of all of the function-based compilations above - // into one large list of functions to get appended into the text - // section of the final module. - let mut funcs = Vec::new(); - let mut module_func_start_index = Vec::new(); - let mut func_index_to_module_index = Vec::new(); - let mut func_infos = Vec::new(); - for (i, list) in module_funcs.into_iter().enumerate() { - module_func_start_index.push(func_index_to_module_index.len()); - let mut infos = Vec::new(); - for (j, (info, func)) in list.into_iter().enumerate() { - func_index_to_module_index.push(i); - let name = format!("_wasm{i}_function{j}"); - funcs.push((name, func)); - infos.push(info); - } - func_infos.push(infos); - } - for (sig, func) in trampolines.iter().zip(compiled_trampolines) { - let name = format!("_wasm_trampoline{}", sig.as_u32()); - funcs.push((name, func)); - } - let ntranscoders = transcoders.len(); - for (i, func) in transcoders.into_iter().enumerate() { - let name = format!("_wasm_component_transcoder{i}"); - funcs.push((name, func)); - } - let nalways_trap = always_trap.len(); - for (i, func) in always_trap.into_iter().enumerate() { - let name = format!("_wasm_component_always_trap{i}"); - funcs.push((name, func)); - } - let nlowerings = lowerings.len(); - for (i, func) in lowerings.into_iter().enumerate() { - let name = format!("_wasm_component_lowering{i}"); - funcs.push((name, func)); - } + let lowering_indices = flatten_all_calls( + &mut compiled_funcs, + engine.run_maybe_parallel(lowerings, |lowering| -> Result<_> { + Ok(( + lowering.symbol_name(), + compiler + .component_compiler() + .compile_lowered_trampoline(&component, lowering, &types)?, + )) + })?, + ); let mut object = compiler.object(ObjectKind::Component)?; - let locs = compiler.append_code(&mut object, &funcs, tunables, &|i, idx| { - // Map from the `i`th function which is requesting the relocation to - // the index in `modules` that the function belongs to. Using that - // metadata we can resolve `idx: FuncIndex` to a `DefinedFuncIndex` - // to the index of that module's function that's being called. - // - // Note that this will panic if `i` is a function beyond the initial - // set of core wasm module functions. That's intentional, however, - // since trampolines and otherwise should not have relocations to - // resolve. - let module_index = func_index_to_module_index[i]; - let defined_index = modules[StaticModuleIndex::new(module_index)] - .module - .defined_func_index(idx) - .unwrap(); - // Additionally use the module index to determine where that - // module's list of functions started at to factor in as an offset - // as well. - let offset = module_func_start_index[module_index]; - defined_index.index() + offset - })?; + let locs = compiler.append_code( + &mut object, + &compiled_funcs, + tunables, + &|caller_index, callee_index| { + // Find the index of the module that contains the function we are calling. + let module_index = indices.partition_point(|(i, _)| *i <= caller_index) - 1; + indices[module_index].1.resolve_reloc(callee_index) + }, + )?; engine.append_compiler_info(&mut object); engine.append_bti(&mut object); - // Disassemble the result of the appending to the text section, where - // each function is in the module, into respective maps. - let mut locs = locs.into_iter().map(|(_sym, loc)| loc); - let funcs = func_infos - .into_iter() - .map(|infos| { - infos - .into_iter() - .zip(&mut locs) - .collect::>() - }) - .collect::>(); - let signature_to_trampoline = trampolines - .iter() - .cloned() - .zip(&mut locs) - .collect::>(); - let transcoders = locs - .by_ref() - .take(ntranscoders) - .collect::>(); - let always_trap = locs - .by_ref() - .take(nalways_trap) - .collect::>(); - let lowerings = locs - .by_ref() - .take(nlowerings) - .collect::>(); - assert!(locs.next().is_none()); - // Convert all `ModuleTranslation` instances into `CompiledModuleInfo` // through an `ObjectBuilder` here. This is then used to create the // final `mmap` which is the final compilation artifact. let mut builder = wasmtime_jit::ObjectBuilder::new(object, tunables); let mut static_modules = PrimaryMap::new(); - for ((_, module), funcs) in modules.into_iter().zip(funcs) { - // Build the list of trampolines for this module from its set of - // exported signatures, which is the list of expected trampolines, - // from the set of trampolines that were compiled for everything - // within this component. - let trampolines = module - .exported_signatures - .iter() - .map(|sig| (*sig, signature_to_trampoline[sig])) - .collect(); - let info = builder.append(module, funcs, trampolines)?; + for (_, module_indices) in indices { + let info = module_indices.append_to_object( + &locs, + &wasm_to_native_trampoline_indices, + &mut builder, + )?; static_modules.push(info); } let info = CompiledComponentInfo { - always_trap, component, - lowerings, - trampolines: trampolines - .difference(&module_trampolines) - .map(|i| (*i, signature_to_trampoline[i])) + always_trap: always_trap_indices + .into_iter() + .map(|x| x.map(|i| locs[i].1)) + .collect(), + lowerings: lowering_indices + .into_iter() + .map(|x| x.map(|i| locs[i].1)) + .collect(), + transcoders: transcoder_indices + .into_iter() + .map(|x| x.map(|i| locs[i].1)) .collect(), - transcoders, }; let artifacts = ComponentArtifacts { info, @@ -429,20 +361,8 @@ impl Component { // Create a signature registration with the `Engine` for all trampolines // and core wasm types found within this component, both for the // component and for all included core wasm modules. - let signatures = SignatureCollection::new_for_module( - engine.signatures(), - types.module_types(), - static_modules - .iter() - .flat_map(|(_, m)| m.trampolines.iter().copied()) - .chain(info.trampolines.iter().copied()) - .map(|(sig, loc)| { - let trampoline = code_memory.text()[loc.start as usize..].as_ptr(); - (sig, unsafe { - mem::transmute::<*const u8, VMTrampoline>(trampoline) - }) - }), - ); + let signatures = + SignatureCollection::new_for_module(engine.signatures(), types.module_types()); // Assemble the `CodeObject` artifact which is shared by all core wasm // modules as well as the final component. @@ -491,19 +411,55 @@ impl Component { self.inner.code.code_memory().text() } - pub(crate) fn lowering_ptr(&self, index: LoweredIndex) -> NonNull { - let info = &self.inner.info.lowerings[index]; - self.func(info) + pub(crate) fn lowering_ptrs(&self, index: LoweredIndex) -> AllCallFuncPointers { + let AllCallFunc { + wasm_call, + array_call, + native_call, + } = &self.inner.info.lowerings[index]; + AllCallFuncPointers { + wasm_call: self.func(wasm_call).cast(), + array_call: unsafe { + mem::transmute::, VMArrayCallFunction>( + self.func(array_call), + ) + }, + native_call: self.func(native_call).cast(), + } } - pub(crate) fn always_trap_ptr(&self, index: RuntimeAlwaysTrapIndex) -> NonNull { - let loc = &self.inner.info.always_trap[index]; - self.func(loc) + pub(crate) fn always_trap_ptrs(&self, index: RuntimeAlwaysTrapIndex) -> AllCallFuncPointers { + let AllCallFunc { + wasm_call, + array_call, + native_call, + } = &self.inner.info.always_trap[index]; + AllCallFuncPointers { + wasm_call: self.func(wasm_call).cast(), + array_call: unsafe { + mem::transmute::, VMArrayCallFunction>( + self.func(array_call), + ) + }, + native_call: self.func(native_call).cast(), + } } - pub(crate) fn transcoder_ptr(&self, index: RuntimeTranscoderIndex) -> NonNull { - let info = &self.inner.info.transcoders[index]; - self.func(info) + pub(crate) fn transcoder_ptrs(&self, index: RuntimeTranscoderIndex) -> AllCallFuncPointers { + let AllCallFunc { + wasm_call, + array_call, + native_call, + } = &self.inner.info.transcoders[index]; + AllCallFuncPointers { + wasm_call: self.func(wasm_call).cast(), + array_call: unsafe { + mem::transmute::, VMArrayCallFunction>( + self.func(array_call), + ) + }, + native_call: self.func(native_call).cast(), + } } fn func(&self, loc: &FunctionLoc) -> NonNull { @@ -528,3 +484,36 @@ impl Component { Ok(self.code_object().code_memory().mmap().to_vec()) } } + +/// Flatten a list of grouped `AllCallFunc>` into the flat +/// list of all compiled functions. +fn flatten_all_calls( + compiled_funcs: &mut Vec<(String, Box)>, + all_calls: Vec<(String, AllCallFunc>)>, +) -> Vec> { + compiled_funcs.reserve(3 * all_calls.len()); + + all_calls + .into_iter() + .map( + |( + prefix, + AllCallFunc { + wasm_call, + array_call, + native_call, + }, + )| { + let i = compiled_funcs.len(); + compiled_funcs.push((format!("{prefix}_wasm_call"), wasm_call)); + compiled_funcs.push((format!("{prefix}_array_call"), array_call)); + compiled_funcs.push((format!("{prefix}_native_call"), native_call)); + AllCallFunc { + wasm_call: i + 0, + array_call: i + 1, + native_call: i + 2, + } + }, + ) + .collect() +} diff --git a/crates/wasmtime/src/component/func.rs b/crates/wasmtime/src/component/func.rs index 9a311a469ecd..9b4317e9937b 100644 --- a/crates/wasmtime/src/component/func.rs +++ b/crates/wasmtime/src/component/func.rs @@ -12,7 +12,7 @@ use wasmtime_environ::component::{ CanonicalAbiInfo, CanonicalOptions, ComponentTypes, CoreDef, RuntimeComponentInstanceIndex, TypeFuncIndex, MAX_FLAT_PARAMS, MAX_FLAT_RESULTS, }; -use wasmtime_runtime::{Export, ExportFunction, VMTrampoline}; +use wasmtime_runtime::{Export, ExportFunction}; /// A helper macro to safely map `MaybeUninit` to `MaybeUninit` where `U` /// is a field projection within `T`. @@ -92,14 +92,13 @@ pub struct Func(Stored); #[doc(hidden)] pub struct FuncData { - trampoline: VMTrampoline, export: ExportFunction, ty: TypeFuncIndex, types: Arc, options: Options, instance: Instance, component_instance: RuntimeComponentInstanceIndex, - post_return: Option<(ExportFunction, VMTrampoline)>, + post_return: Option, post_return_arg: Option, } @@ -116,20 +115,17 @@ impl Func { Export::Function(f) => f, _ => unreachable!(), }; - let trampoline = store.lookup_trampoline(unsafe { export.anyfunc.as_ref() }); let memory = options .memory .map(|i| NonNull::new(data.instance().runtime_memory(i)).unwrap()); let realloc = options.realloc.map(|i| data.instance().runtime_realloc(i)); let post_return = options.post_return.map(|i| { let anyfunc = data.instance().runtime_post_return(i); - let trampoline = store.lookup_trampoline(unsafe { anyfunc.as_ref() }); - (ExportFunction { anyfunc }, trampoline) + ExportFunction { anyfunc } }); let component_instance = options.instance; let options = unsafe { Options::new(store.id(), memory, realloc, options.string_encoding) }; Func(store.store_data_mut().insert(FuncData { - trampoline, export, options, ty, @@ -419,7 +415,6 @@ impl Func { LowerReturn: Copy, { let FuncData { - trampoline, export, options, instance, @@ -476,8 +471,8 @@ impl Func { crate::Func::call_unchecked_raw( store, export.anyfunc, - trampoline, space.as_mut_ptr().cast(), + mem::size_of_val(space) / mem::size_of::(), )?; // Note that `.assume_init_ref()` here is unsafe but we're relying @@ -619,12 +614,12 @@ impl Func { // Note that if this traps (returns an error) this function // intentionally leaves the instance in a "poisoned" state where it // can no longer be entered because `may_enter` is `false`. - if let Some((func, trampoline)) = post_return { + if let Some(func) = post_return { crate::Func::call_unchecked_raw( &mut store, func.anyfunc, - trampoline, &post_return_arg as *const ValRaw as *mut ValRaw, + 1, )?; } diff --git a/crates/wasmtime/src/component/instance.rs b/crates/wasmtime/src/component/instance.rs index 81c526c1c60e..cbc7529c0b63 100644 --- a/crates/wasmtime/src/component/instance.rs +++ b/crates/wasmtime/src/component/instance.rs @@ -16,6 +16,8 @@ use wasmtime_environ::component::{ use wasmtime_environ::{EntityIndex, EntityType, Global, GlobalInit, PrimaryMap, WasmType}; use wasmtime_runtime::component::{ComponentInstance, OwnedComponentInstance}; +use super::component::AllCallFuncPointers; + /// An instantiated component. /// /// This is similar to [`crate::Instance`] except that it represents an @@ -311,14 +313,23 @@ impl<'a> Instantiator<'a> { RuntimeImport::Func(func) => func, _ => unreachable!(), }; + let AllCallFuncPointers { + wasm_call, + array_call, + native_call, + } = self.component.lowering_ptrs(import.index); + let type_index = self + .component + .signatures() + .shared_signature(import.canonical_abi) + .expect("found unregistered signature"); self.data.state.set_lowering( import.index, func.lowering(), - self.component.lowering_ptr(import.index), - self.component - .signatures() - .shared_signature(import.canonical_abi) - .expect("found unregistered signature"), + wasm_call, + native_call, + array_call, + type_index, ); // The `func` provided here must be retained within the `Store` itself @@ -332,24 +343,38 @@ impl<'a> Instantiator<'a> { } fn always_trap(&mut self, trap: &AlwaysTrap) { - self.data.state.set_always_trap( - trap.index, - self.component.always_trap_ptr(trap.index), - self.component - .signatures() - .shared_signature(trap.canonical_abi) - .expect("found unregistered signature"), - ); + let AllCallFuncPointers { + wasm_call, + array_call, + native_call, + } = self.component.always_trap_ptrs(trap.index); + let signature = self + .component + .signatures() + .shared_signature(trap.canonical_abi) + .expect("found unregistered signature"); + self.data + .state + .set_always_trap(trap.index, wasm_call, native_call, array_call, signature); } fn transcoder(&mut self, transcoder: &Transcoder) { + let AllCallFuncPointers { + wasm_call, + array_call, + native_call, + } = self.component.transcoder_ptrs(transcoder.index); + let signature = self + .component + .signatures() + .shared_signature(transcoder.signature) + .expect("found unregistered signature"); self.data.state.set_transcoder( transcoder.index, - self.component.transcoder_ptr(transcoder.index), - self.component - .signatures() - .shared_signature(transcoder.signature) - .expect("found unregistered signature"), + wasm_call, + native_call, + array_call, + signature, ); } diff --git a/crates/wasmtime/src/externals.rs b/crates/wasmtime/src/externals.rs index 152ec643930f..39a81c840fd4 100644 --- a/crates/wasmtime/src/externals.rs +++ b/crates/wasmtime/src/externals.rs @@ -243,7 +243,7 @@ impl Global { bail!("value provided does not match the type of this global"); } unsafe { - let wasmtime_export = generate_global_export(store, &ty, val)?; + let wasmtime_export = generate_global_export(store, ty, val); Ok(Global::from_wasmtime_global(wasmtime_export, store)) } } @@ -320,7 +320,7 @@ impl Global { Val::F64(f) => *definition.as_u64_mut() = f, Val::FuncRef(f) => { *definition.as_anyfunc_mut() = f.map_or(ptr::null(), |f| { - f.caller_checked_anyfunc(store).as_ptr().cast() + f.caller_checked_func_ref(store).as_ptr().cast() }); } Val::ExternRef(x) => { @@ -496,7 +496,7 @@ impl Table { unsafe { match (*table).get(index)? { runtime::TableElement::FuncRef(f) => { - let func = Func::from_caller_checked_anyfunc(store, f); + let func = Func::from_caller_checked_func_ref(store, f); Some(Val::FuncRef(func)) } runtime::TableElement::ExternRef(None) => Some(Val::ExternRef(None)), diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index 35f76922d1ae..77f4895cc12c 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -1,6 +1,6 @@ use crate::store::{StoreData, StoreOpaque, Stored}; use crate::{ - AsContext, AsContextMut, CallHook, Engine, Extern, FuncType, Instance, StoreContext, + AsContext, AsContextMut, CallHook, Engine, Extern, FuncType, Instance, Module, StoreContext, StoreContextMut, Val, ValRaw, ValType, }; use anyhow::{bail, Context as _, Error, Result}; @@ -8,11 +8,11 @@ use std::future::Future; use std::mem; use std::panic::{self, AssertUnwindSafe}; use std::pin::Pin; -use std::ptr::NonNull; +use std::ptr::{self, NonNull}; use std::sync::Arc; use wasmtime_runtime::{ - ExportFunction, InstanceHandle, VMCallerCheckedFuncRef, VMContext, VMFunctionBody, - VMFunctionImport, VMHostFuncContext, VMOpaqueContext, VMSharedSignatureIndex, VMTrampoline, + ExportFunction, InstanceHandle, VMArrayCallHostFuncContext, VMCallerCheckedFuncRef, VMContext, + VMFunctionImport, VMNativeCallHostFuncContext, VMOpaqueContext, VMSharedSignatureIndex, }; /// A WebAssembly function which can be called. @@ -181,6 +181,18 @@ pub struct Func(Stored); pub(crate) struct FuncData { kind: FuncKind, + // A pointer to the in-store `VMCallerCheckedFuncRef` for this function, if + // any. + // + // When a function is passed to Wasm but doesn't have a Wasm-to-native + // trampoline, we have to patch it in. But that requires mutating the + // `VMCallerCheckedFuncRef`, and this function could be shared across + // threads. So we instead copy and pin the `VMCallerCheckedFuncRef` into + // `StoreOpaque::func_refs`, where we can safely patch the field without + // worrying about synchronization and we hold a pointer to it here so we can + // reuse it rather than re-copy if it is passed to Wasm again. + in_store_func_ref: Option, + // This is somewhat expensive to load from the `Engine` and in most // optimized use cases (e.g. `TypedFunc`) it's not actually needed or it's // only needed rarely. To handle that this is an optionally-contained field @@ -191,6 +203,34 @@ pub(crate) struct FuncData { ty: Option>, } +use in_store_func_ref::InStoreFuncRef; +mod in_store_func_ref { + use super::*; + + #[derive(Copy, Clone)] + pub struct InStoreFuncRef(NonNull); + + impl InStoreFuncRef { + /// Create a new `InStoreFuncRef`. + /// + /// Safety: Callers must ensure that the given `func_ref` is pinned + /// inside a store, and that this resulting `InStoreFuncRef` is only + /// used in conjuction with that store and on its same thread. + pub unsafe fn new(func_ref: NonNull) -> InStoreFuncRef { + InStoreFuncRef(func_ref) + } + + pub fn func_ref(&self) -> NonNull { + self.0 + } + } + + // Safety: The `InStoreFuncRef::new` constructor puts the correctness + // responsibility on its callers. + unsafe impl Send for InStoreFuncRef {} + unsafe impl Sync for InStoreFuncRef {} +} + /// The three ways that a function can be created and referenced from within a /// store. enum FuncKind { @@ -199,10 +239,7 @@ enum FuncKind { /// function. The instance's `InstanceHandle` is already owned by the store /// and we just have some pointers into that which represent how to call the /// function. - StoreOwned { - trampoline: VMTrampoline, - export: ExportFunction, - }, + StoreOwned { export: ExportFunction }, /// A function is shared across possibly other stores, hence the `Arc`. This /// variant happens when a `Linker`-defined function is instantiated within @@ -495,7 +532,7 @@ impl Func { }) } - pub(crate) unsafe fn from_caller_checked_anyfunc( + pub(crate) unsafe fn from_caller_checked_func_ref( store: &mut StoreOpaque, raw: *mut VMCallerCheckedFuncRef, ) -> Option { @@ -881,27 +918,32 @@ impl Func { &self, mut store: impl AsContextMut, params_and_returns: *mut ValRaw, + params_and_returns_capacity: usize, ) -> Result<()> { let mut store = store.as_context_mut(); let data = &store.0.store_data()[self.0]; let anyfunc = data.export().anyfunc; - let trampoline = data.trampoline(); - Self::call_unchecked_raw(&mut store, anyfunc, trampoline, params_and_returns) + Self::call_unchecked_raw( + &mut store, + anyfunc, + params_and_returns, + params_and_returns_capacity, + ) } pub(crate) unsafe fn call_unchecked_raw( store: &mut StoreContextMut<'_, T>, anyfunc: NonNull, - trampoline: VMTrampoline, params_and_returns: *mut ValRaw, + params_and_returns_capacity: usize, ) -> Result<()> { invoke_wasm_and_catch_traps(store, |caller| { - let trampoline = wasmtime_runtime::prepare_host_to_wasm_trampoline(caller, trampoline); - trampoline( - anyfunc.as_ref().vmctx, - caller, - anyfunc.as_ref().func_ptr.as_ptr(), + let anyfunc = anyfunc.as_ref(); + (anyfunc.array_call)( + anyfunc.vmctx, + caller.cast::(), params_and_returns, + params_and_returns_capacity, ) }) } @@ -917,7 +959,7 @@ impl Func { /// caller must guarantee that `raw` is owned by the `store` provided and is /// valid within the `store`. pub unsafe fn from_raw(mut store: impl AsContextMut, raw: usize) -> Option { - Func::from_caller_checked_anyfunc(store.as_context_mut().0, raw as *mut _) + Func::from_caller_checked_func_ref(store.as_context_mut().0, raw as *mut _) } /// Extracts the raw value of this `Func`, which is owned by `store`. @@ -930,8 +972,9 @@ impl Func { /// The returned value is only valid for as long as the store is alive and /// this function is properly rooted within it. Additionally this function /// should not be liberally used since it's a very low-level knob. - pub unsafe fn to_raw(&self, store: impl AsContext) -> usize { - self.caller_checked_anyfunc(store.as_context().0).as_ptr() as usize + pub unsafe fn to_raw(&self, mut store: impl AsContextMut) -> usize { + self.caller_checked_func_ref(store.as_context_mut().0) + .as_ptr() as usize } /// Invokes this function with the `params` given, returning the results @@ -1050,7 +1093,7 @@ impl Func { } unsafe { - self.call_unchecked(&mut *store, values_vec.as_mut_ptr())?; + self.call_unchecked(&mut *store, values_vec.as_mut_ptr(), values_vec_size)?; } for ((i, slot), val) in results.iter_mut().enumerate().zip(&values_vec) { @@ -1063,31 +1106,74 @@ impl Func { } #[inline] - pub(crate) fn caller_checked_anyfunc( + pub(crate) fn caller_checked_func_ref( &self, - store: &StoreOpaque, + store: &mut StoreOpaque, ) -> NonNull { - store.store_data()[self.0].export().anyfunc + let func_data = &mut store.store_data_mut()[self.0]; + if let Some(in_store) = func_data.in_store_func_ref { + in_store.func_ref() + } else { + let func_ref = func_data.export().anyfunc; + unsafe { + if func_ref.as_ref().wasm_call.is_none() { + let func_ref = store.func_refs().push(func_ref.as_ref().clone()); + store.store_data_mut()[self.0].in_store_func_ref = + Some(InStoreFuncRef::new(func_ref)); + store.fill_func_refs(); + func_ref + } else { + func_ref + } + } + } } pub(crate) unsafe fn from_wasmtime_function( export: ExportFunction, store: &mut StoreOpaque, ) -> Self { - let anyfunc = export.anyfunc.as_ref(); - let trampoline = store.lookup_trampoline(&*anyfunc); - Func::from_func_kind(FuncKind::StoreOwned { trampoline, export }, store) + Func::from_func_kind(FuncKind::StoreOwned { export }, store) } fn from_func_kind(kind: FuncKind, store: &mut StoreOpaque) -> Self { - Func(store.store_data_mut().insert(FuncData { kind, ty: None })) + Func(store.store_data_mut().insert(FuncData { + kind, + in_store_func_ref: None, + ty: None, + })) } - pub(crate) fn vmimport(&self, store: &mut StoreOpaque) -> VMFunctionImport { + pub(crate) fn vmimport(&self, store: &mut StoreOpaque, module: &Module) -> VMFunctionImport { unsafe { - let f = self.caller_checked_anyfunc(store); + let f = { + let func_data = &mut store.store_data_mut()[self.0]; + // If we already patched this `funcref.wasm_call` and saved a + // copy in the store, use the patched version. Otherwise, use + // the potentially un-patched version. + if let Some(func_ref) = func_data.in_store_func_ref { + func_ref.func_ref() + } else { + func_data.export().anyfunc + } + }; VMFunctionImport { - body: f.as_ref().func_ptr, + wasm_call: if let Some(wasm_call) = f.as_ref().wasm_call { + wasm_call + } else { + // Assert that this is a native-call function, since those + // are the only ones that could be missing a `wasm_call` + // trampoline. + let _ = VMNativeCallHostFuncContext::from_opaque(f.as_ref().vmctx); + + let sig = self.sig_index(store.store_data()); + module.runtime_info().wasm_to_native_trampoline(sig).expect( + "must have a wasm-to-native trampoline for this signature if the Wasm \ + module is importing a function of this signature", + ) + }, + native_call: f.as_ref().native_call, + array_call: f.as_ref().array_call, vmctx: f.as_ref().vmctx, } } @@ -1674,11 +1760,10 @@ for_each_function_signature!(impl_host_abi); /// This trait should not be implemented by external users, it's only intended /// as an implementation detail of this crate. pub trait IntoFunc: Send + Sync + 'static { + /// Convert this function into a `VM{Array,Native}CallHostFuncContext` and + /// internal `VMCallerCheckedFuncRef`. #[doc(hidden)] - fn into_func( - self, - engine: &Engine, - ) -> (Box, VMSharedSignatureIndex, VMTrampoline); + fn into_func(self, engine: &Engine) -> HostContext; } /// A structure representing the caller's context when creating a function @@ -1850,7 +1935,7 @@ macro_rules! impl_into_func { $($args: WasmTy,)* R: WasmRet, { - fn into_func(self, engine: &Engine) -> (Box, VMSharedSignatureIndex, VMTrampoline) { + fn into_func(self, engine: &Engine) -> HostContext { let f = move |_: Caller<'_, T>, $($args:$args),*| { self($($args),*) }; @@ -1866,17 +1951,18 @@ macro_rules! impl_into_func { $($args: WasmTy,)* R: WasmRet, { - fn into_func(self, engine: &Engine) -> (Box, VMSharedSignatureIndex, VMTrampoline) { - /// This shim is called by Wasm code, constructs a `Caller`, - /// calls the wrapped host function, and returns the translated - /// result back to Wasm. + fn into_func(self, engine: &Engine) -> HostContext { + /// This shim is a regular, non-closure function we can stuff + /// inside `VMCallerCheckedFuncRef::native_call`. /// - /// Note that this shim's ABI must *exactly* match that expected - /// by Cranelift, since Cranelift is generating raw function - /// calls directly to this function. - unsafe extern "C" fn wasm_to_host_shim( + /// It reads the actual callee closure out of + /// `VMNativeCallHostFuncContext::host_state`, forwards + /// arguments to that function, and finally forwards the results + /// back out to the caller. It also handles traps and panics + /// along the way. + unsafe extern "C" fn native_call_shim( vmctx: *mut VMOpaqueContext, - caller_vmctx: *mut VMContext, + caller_vmctx: *mut VMOpaqueContext, $( $args: $args::Abi, )* retptr: R::Retptr, ) -> R::Abi @@ -1897,8 +1983,9 @@ macro_rules! impl_into_func { // destructors. As a result anything requiring a destructor // should be part of this block, and the long-jmp-ing // happens after the block in handling `CallResult`. + let caller_vmctx = VMContext::from_opaque(caller_vmctx); let result = Caller::with(caller_vmctx, |mut caller| { - let vmctx = VMHostFuncContext::from_opaque(vmctx); + let vmctx = VMNativeCallHostFuncContext::from_opaque(vmctx); let state = (*vmctx).host_state(); // Double-check ourselves in debug mode, but we control @@ -1964,33 +2051,26 @@ macro_rules! impl_into_func { /// It reads the arguments out of the incoming `args` array, /// calls the given function pointer, and then stores the result /// back into the `args` array. - unsafe extern "C" fn host_to_wasm_trampoline<$($args,)* R>( + unsafe extern "C" fn array_call_trampoline( callee_vmctx: *mut VMOpaqueContext, - caller_vmctx: *mut VMContext, - ptr: *const VMFunctionBody, + caller_vmctx: *mut VMOpaqueContext, args: *mut ValRaw, + _args_len: usize ) where + F: Fn(Caller<'_, T>, $( $args ),*) -> R + 'static, $($args: WasmTy,)* R: WasmRet, { - let ptr = mem::transmute::< - *const VMFunctionBody, - unsafe extern "C" fn( - *mut VMOpaqueContext, - *mut VMContext, - $( $args::Abi, )* - R::Retptr, - ) -> R::Abi, - >(ptr); - let mut _n = 0; $( + debug_assert!(_n < _args_len); let $args = $args::abi_from_raw(args.add(_n)); _n += 1; )* + R::wrap_trampoline(args, |retptr| { - ptr(callee_vmctx, caller_vmctx, $( $args, )* retptr) + native_call_shim::(callee_vmctx, caller_vmctx, $( $args, )* retptr) }); } @@ -2001,17 +2081,23 @@ macro_rules! impl_into_func { let shared_signature_id = engine.signatures().register(ty.as_wasm_func_type()); - let trampoline = host_to_wasm_trampoline::<$($args,)* R>; + let array_call = array_call_trampoline::; + let native_call = NonNull::new(native_call_shim:: as *mut _).unwrap(); let ctx = unsafe { - VMHostFuncContext::new( - NonNull::new(wasm_to_host_shim:: as *mut _).unwrap(), - shared_signature_id, + VMNativeCallHostFuncContext::new( + VMCallerCheckedFuncRef { + native_call, + array_call, + wasm_call: None, + type_index: shared_signature_id, + vmctx: ptr::null_mut(), + }, Box::new(self), ) }; - (ctx, shared_signature_id, trampoline) + ctx.into() } } } @@ -2019,6 +2105,24 @@ macro_rules! impl_into_func { for_each_function_signature!(impl_into_func); +#[doc(hidden)] +pub enum HostContext { + Native(Box), + Array(Box), +} + +impl From> for HostContext { + fn from(ctx: Box) -> Self { + HostContext::Native(ctx) + } +} + +impl From> for HostContext { + fn from(ctx: Box) -> Self { + HostContext::Array(ctx) + } +} + /// Representation of a host-defined function. /// /// This is used for `Func::new` but also for `Linker`-defined functions. For @@ -2030,16 +2134,7 @@ for_each_function_signature!(impl_into_func); /// `Store` itself, but that's an unsafe contract of using this for now /// rather than part of the struct type (to avoid `Func` in the API). pub(crate) struct HostFunc { - // The host function context that is shared with our host-to-Wasm - // trampoline. - ctx: Box, - - // The index for this function's signature within the engine-wide shared - // signature registry. - signature: VMSharedSignatureIndex, - - // Trampoline to enter this function from Rust. - host_to_wasm_trampoline: VMTrampoline, + ctx: HostContext, // Stored to unregister this function's signature with the engine when this // is dropped. @@ -2077,9 +2172,9 @@ impl HostFunc { Ok(result) }) }; - let (ctx, signature, trampoline) = crate::trampoline::create_function(&ty, func, engine) + let ctx = crate::trampoline::create_array_call_function(&ty, func, engine) .expect("failed to create function"); - HostFunc::_new(engine, ctx, signature, trampoline) + HostFunc::_new(engine, ctx.into()) } /// Analog of [`Func::wrap`] @@ -2087,22 +2182,15 @@ impl HostFunc { engine: &Engine, func: impl IntoFunc, ) -> Self { - let (ctx, signature, trampoline) = func.into_func(engine); - HostFunc::_new(engine, ctx, signature, trampoline) + let ctx = func.into_func(engine); + HostFunc::_new(engine, ctx) } /// Requires that this function's signature is already registered within /// `Engine`. This happens automatically during the above two constructors. - fn _new( - engine: &Engine, - ctx: Box, - signature: VMSharedSignatureIndex, - trampoline: VMTrampoline, - ) -> Self { + fn _new(engine: &Engine, ctx: HostContext) -> Self { HostFunc { ctx, - signature, - host_to_wasm_trampoline: trampoline, engine: engine.clone(), } } @@ -2138,9 +2226,25 @@ impl HostFunc { /// The caller must arrange for the `Arc` to be "rooted" in the store /// provided via another means, probably by pushing to /// `StoreOpaque::rooted_host_funcs`. - pub unsafe fn to_func_store_rooted(self: &Arc, store: &mut StoreOpaque) -> Func { + /// + /// Similarly, the caller must arrange for `rooted_func_ref` to be rooted in + /// the same store. + pub unsafe fn to_func_store_rooted( + self: &Arc, + store: &mut StoreOpaque, + rooted_func_ref: Option>, + ) -> Func { self.validate_store(store); - Func::from_func_kind(FuncKind::RootedHost(RootedHostFunc::new(self)), store) + + if rooted_func_ref.is_some() { + debug_assert!(self.funcref().wasm_call.is_none()); + debug_assert!(matches!(self.ctx, HostContext::Native(_))); + } + + Func::from_func_kind( + FuncKind::RootedHost(RootedHostFunc::new(self, rooted_func_ref)), + store, + ) } /// Same as [`HostFunc::to_func`], different ownership. @@ -2161,12 +2265,23 @@ impl HostFunc { } pub(crate) fn sig_index(&self) -> VMSharedSignatureIndex { - self.signature + self.funcref().type_index + } + + pub(crate) fn funcref(&self) -> &VMCallerCheckedFuncRef { + match &self.ctx { + HostContext::Native(ctx) => ctx.funcref(), + HostContext::Array(ctx) => ctx.funcref(), + } + } + + pub(crate) fn host_ctx(&self) -> &HostContext { + &self.ctx } fn export_func(&self) -> ExportFunction { ExportFunction { - anyfunc: self.ctx.wasm_to_host_trampoline(), + anyfunc: NonNull::from(self.funcref()), } } } @@ -2174,22 +2289,12 @@ impl HostFunc { impl Drop for HostFunc { fn drop(&mut self) { unsafe { - self.engine.signatures().unregister(self.signature); + self.engine.signatures().unregister(self.sig_index()); } } } impl FuncData { - #[inline] - pub(crate) fn trampoline(&self) -> VMTrampoline { - match &self.kind { - FuncKind::StoreOwned { trampoline, .. } => *trampoline, - FuncKind::SharedHost(host) => host.host_to_wasm_trampoline, - FuncKind::RootedHost(host) => host.host_to_wasm_trampoline, - FuncKind::Host(host) => host.host_to_wasm_trampoline, - } - } - #[inline] fn export(&self) -> ExportFunction { self.kind.export() @@ -2206,7 +2311,9 @@ impl FuncKind { match self { FuncKind::StoreOwned { export, .. } => *export, FuncKind::SharedHost(host) => host.export_func(), - FuncKind::RootedHost(host) => host.export_func(), + FuncKind::RootedHost(rooted) => ExportFunction { + anyfunc: NonNull::from(rooted.func_ref()), + }, FuncKind::Host(host) => host.export_func(), } } @@ -2218,8 +2325,9 @@ use self::rooted::*; /// `RootedHostFunc` instead of accidentally safely allowing access to its /// constructor. mod rooted { + use wasmtime_runtime::VMCallerCheckedFuncRef; + use super::HostFunc; - use std::ops::Deref; use std::ptr::NonNull; use std::sync::Arc; @@ -2228,7 +2336,10 @@ mod rooted { /// /// For more documentation see `FuncKind::RootedHost`, `InstancePre`, and /// `HostFunc::to_func_store_rooted`. - pub(crate) struct RootedHostFunc(NonNull); + pub(crate) struct RootedHostFunc { + func: NonNull, + func_ref: Option>, + } // These are required due to the usage of `NonNull` but should be safe // because `HostFunc` is itself send/sync. @@ -2240,16 +2351,31 @@ mod rooted { /// access to the pointer given at any time, including outside the /// window of validity of `func`, so callers must not use the return /// value past the lifetime of the provided `func`. - pub(crate) unsafe fn new(func: &Arc) -> RootedHostFunc { - RootedHostFunc(NonNull::from(&**func)) + /// + /// Similarly, callers must ensure that the given `func_ref` is valid + /// for the liftime of the return value. + pub(crate) unsafe fn new( + func: &Arc, + func_ref: Option>, + ) -> RootedHostFunc { + RootedHostFunc { + func: NonNull::from(&**func), + func_ref, + } } - } - impl Deref for RootedHostFunc { - type Target = HostFunc; + pub(crate) fn func(&self) -> &HostFunc { + // Safety invariants are upheld by the `RootedHostFunc::new` caller. + unsafe { self.func.as_ref() } + } - fn deref(&self) -> &HostFunc { - unsafe { self.0.as_ref() } + pub(crate) fn func_ref(&self) -> &VMCallerCheckedFuncRef { + if let Some(f) = self.func_ref { + // Safety invariants are upheld by the `RootedHostFunc::new` caller. + unsafe { f.as_ref() } + } else { + self.func().funcref() + } } } } diff --git a/crates/wasmtime/src/func/typed.rs b/crates/wasmtime/src/func/typed.rs index 733e60720091..53e44edddd16 100644 --- a/crates/wasmtime/src/func/typed.rs +++ b/crates/wasmtime/src/func/typed.rs @@ -4,9 +4,10 @@ use crate::{AsContextMut, ExternRef, Func, FuncType, StoreContextMut, ValRaw, Va use anyhow::{bail, Result}; use std::marker; use std::mem::{self, MaybeUninit}; -use std::ptr; +use std::ptr::{self, NonNull}; use wasmtime_runtime::{ - VMCallerCheckedFuncRef, VMContext, VMFunctionBody, VMOpaqueContext, VMSharedSignatureIndex, + VMCallerCheckedFuncRef, VMContext, VMNativeCallFunction, VMOpaqueContext, + VMSharedSignatureIndex, }; /// A statically typed WebAssembly function. @@ -84,7 +85,7 @@ where !store.0.async_support(), "must use `call_async` with async stores" ); - let func = self.func.caller_checked_anyfunc(store.0); + let func = self.func.caller_checked_func_ref(store.0); unsafe { Self::call_raw(&mut store, func, params) } } @@ -122,7 +123,7 @@ where ); store .on_fiber(|store| { - let func = self.func.caller_checked_anyfunc(store.0); + let func = self.func.caller_checked_func_ref(store.0); unsafe { Self::call_raw(store, func, params) } }) .await? @@ -177,12 +178,8 @@ where let result = invoke_wasm_and_catch_traps(store, |caller| { let (anyfunc, ret, params, returned) = &mut captures; let anyfunc = anyfunc.as_ref(); - let result = Params::invoke::( - anyfunc.func_ptr.as_ptr(), - anyfunc.vmctx, - caller, - *params, - ); + let result = + Params::invoke::(anyfunc.native_call, anyfunc.vmctx, caller, *params); ptr::write(ret.as_mut_ptr(), result); *returned = true }); @@ -443,7 +440,7 @@ unsafe impl WasmTy for Option { #[inline] fn into_abi(self, store: &mut StoreOpaque) -> Self::Abi { if let Some(f) = self { - f.caller_checked_anyfunc(store).as_ptr() + f.caller_checked_func_ref(store).as_ptr() } else { ptr::null_mut() } @@ -451,7 +448,7 @@ unsafe impl WasmTy for Option { #[inline] unsafe fn from_abi(abi: Self::Abi, store: &mut StoreOpaque) -> Self { - Func::from_caller_checked_anyfunc(store, abi) + Func::from_caller_checked_func_ref(store, abi) } } @@ -475,7 +472,7 @@ pub unsafe trait WasmParams: Send { #[doc(hidden)] unsafe fn invoke( - func: *const VMFunctionBody, + func: NonNull, vmctx1: *mut VMOpaqueContext, vmctx2: *mut VMContext, abi: Self::Abi, @@ -505,7 +502,7 @@ where } unsafe fn invoke( - func: *const VMFunctionBody, + func: NonNull, vmctx1: *mut VMOpaqueContext, vmctx2: *mut VMContext, abi: Self::Abi, @@ -565,13 +562,13 @@ macro_rules! impl_wasm_params { } unsafe fn invoke( - func: *const VMFunctionBody, + func: NonNull, vmctx1: *mut VMOpaqueContext, vmctx2: *mut VMContext, abi: Self::Abi, ) -> R::ResultAbi { let fnptr = mem::transmute::< - *const VMFunctionBody, + NonNull, unsafe extern "C" fn( *mut VMOpaqueContext, *mut VMContext, @@ -588,7 +585,6 @@ macro_rules! impl_wasm_params { // Upon returning `R::call` will convert all the returns back // into `R`. ::call(|retptr| { - let fnptr = wasmtime_runtime::prepare_host_to_wasm_trampoline(vmctx2, fnptr); fnptr(vmctx1, vmctx2, $($t,)* retptr) }) } diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs index cbb2f0b2ae66..317379a810e9 100644 --- a/crates/wasmtime/src/instance.rs +++ b/crates/wasmtime/src/instance.rs @@ -7,11 +7,13 @@ use crate::{ }; use anyhow::{anyhow, bail, Context, Result}; use std::mem; +use std::ptr::NonNull; use std::sync::Arc; use wasmtime_environ::{EntityType, FuncIndex, GlobalIndex, MemoryIndex, PrimaryMap, TableIndex}; use wasmtime_runtime::{ - Imports, InstanceAllocationRequest, StorePtr, VMContext, VMFunctionBody, VMFunctionImport, - VMGlobalImport, VMMemoryImport, VMOpaqueContext, VMTableImport, + Imports, InstanceAllocationRequest, StorePtr, VMCallerCheckedFuncRef, VMContext, + VMFunctionImport, VMGlobalImport, VMMemoryImport, VMNativeCallFunction, VMOpaqueContext, + VMTableImport, }; /// An instantiated WebAssembly module. @@ -163,7 +165,7 @@ impl Instance { })?; let mut owned_imports = OwnedImports::new(module); for import in imports { - owned_imports.push(import, store); + owned_imports.push(import, store, module); } Ok(owned_imports) } @@ -251,6 +253,7 @@ impl Instance { // Register the module just before instantiation to ensure we keep the module // properly referenced while in use by the store. store.modules_mut().register_module(module); + store.fill_func_refs(); // The first thing we do is issue an instance allocation request // to the instance allocator. This, on success, will give us an @@ -338,16 +341,14 @@ impl Instance { // trap-handling configuration in `store` as well. let instance = store.0.instance_mut(id); let f = instance.get_exported_func(start); - let vmctx = instance.vmctx_ptr(); + let caller_vmctx = instance.vmctx_ptr(); unsafe { super::func::invoke_wasm_and_catch_traps(store, |_default_caller| { - let trampoline = mem::transmute::< - *const VMFunctionBody, - unsafe extern "C" fn(*mut VMOpaqueContext, *mut VMContext), - >(f.anyfunc.as_ref().func_ptr.as_ptr()); - let trampoline = - wasmtime_runtime::prepare_host_to_wasm_trampoline(vmctx, trampoline); - trampoline(f.anyfunc.as_ref().vmctx, vmctx) + let func = mem::transmute::< + NonNull, + extern "C" fn(*mut VMOpaqueContext, *mut VMContext), + >(f.anyfunc.as_ref().native_call); + func(f.anyfunc.as_ref().vmctx, caller_vmctx) })?; } Ok(()) @@ -567,10 +568,10 @@ impl OwnedImports { self.globals.clear(); } - fn push(&mut self, item: &Extern, store: &mut StoreOpaque) { + fn push(&mut self, item: &Extern, store: &mut StoreOpaque, module: &Module) { match item { Extern::Func(i) => { - self.functions.push(i.vmimport(store)); + self.functions.push(i.vmimport(store, module)); } Extern::Global(i) => { self.globals.push(i.vmimport(store)); @@ -595,7 +596,9 @@ impl OwnedImports { wasmtime_runtime::Export::Function(f) => { let f = f.anyfunc.as_ref(); self.functions.push(VMFunctionImport { - body: f.func_ptr, + wasm_call: f.wasm_call.unwrap(), + native_call: f.native_call, + array_call: f.array_call, vmctx: f.vmctx, }); } @@ -658,9 +661,42 @@ pub struct InstancePre { /// preallocate space in a `Store` up front for all entries to be inserted. host_funcs: usize, + /// The `VMCallerCheckedFuncRef`s for the functions in `items` that do not + /// have a `wasm_call` trampoline. We pre-allocate and pre-patch these + /// `VMCallerCheckedFuncRef`s so that we don't have to do it at + /// instantiation time. + /// + /// This is an `Arc<[T]>` for the same reason as `items`. + func_refs: Arc<[PrePatchedFuncRef]>, + _marker: std::marker::PhantomData T>, } +pub(crate) use pre_patched_func_ref::PrePatchedFuncRef; +mod pre_patched_func_ref { + use super::*; + + pub struct PrePatchedFuncRef(VMCallerCheckedFuncRef); + + impl PrePatchedFuncRef { + /// Safety: callers must arrange for the given `func_ref` to be usable + /// in a `Send + Sync` manner (i.e. its associated `Module` is kept + /// alive or `Func` is alive and supports these things) and that the + /// `wasm_call` field is already patched in, if necessary. + pub unsafe fn new(func_ref: VMCallerCheckedFuncRef) -> PrePatchedFuncRef { + PrePatchedFuncRef(func_ref) + } + + pub fn func_ref(&self) -> &VMCallerCheckedFuncRef { + &self.0 + } + } + + // Safety: This is upheld by `PrePatchedFuncRef::new` callers. + unsafe impl Send for InstancePre {} + unsafe impl Sync for InstancePre {} +} + /// InstancePre's clone does not require T: Clone impl Clone for InstancePre { fn clone(&self) -> Self { @@ -668,6 +704,7 @@ impl Clone for InstancePre { module: self.module.clone(), items: self.items.clone(), host_funcs: self.host_funcs, + func_refs: self.func_refs.clone(), _marker: self._marker, } } @@ -685,17 +722,33 @@ impl InstancePre { pub(crate) unsafe fn new(module: &Module, items: Vec) -> Result> { typecheck(module, &items, |cx, ty, item| cx.definition(ty, &item.ty()))?; - let host_funcs = items - .iter() - .filter(|i| match i { - Definition::HostFunc(_) => true, - _ => false, - }) - .count(); + let mut func_refs = vec![]; + let mut host_funcs = 0; + for item in &items { + match item { + Definition::Extern(_, _) => {} + Definition::HostFunc(f) => { + host_funcs += 1; + if f.funcref().wasm_call.is_none() { + // `f` needs its `VMCallerCheckedFuncRef::wasm_call` + // patched with a Wasm-to-native trampoline. + debug_assert!(matches!(f.host_ctx(), crate::HostContext::Native(_))); + func_refs.push(PrePatchedFuncRef::new(VMCallerCheckedFuncRef { + wasm_call: module + .runtime_info() + .wasm_to_native_trampoline(f.sig_index()), + ..*f.funcref() + })); + } + } + } + } + Ok(InstancePre { module: module.clone(), items: items.into(), host_funcs, + func_refs: func_refs.into(), _marker: std::marker::PhantomData, }) } @@ -723,8 +776,13 @@ impl InstancePre { /// [`Engine`] than the [`InstancePre`] originally came from. pub fn instantiate(&self, mut store: impl AsContextMut) -> Result { let mut store = store.as_context_mut(); - let imports = - pre_instantiate_raw(&mut store.0, &self.module, &self.items, self.host_funcs)?; + let imports = pre_instantiate_raw( + &mut store.0, + &self.module, + &self.items, + self.host_funcs, + &self.func_refs, + )?; // This unsafety should be handled by the type-checking performed by the // constructor of `InstancePre` to assert that all the imports we're passing @@ -752,8 +810,13 @@ impl InstancePre { T: Send, { let mut store = store.as_context_mut(); - let imports = - pre_instantiate_raw(&mut store.0, &self.module, &self.items, self.host_funcs)?; + let imports = pre_instantiate_raw( + &mut store.0, + &self.module, + &self.items, + self.host_funcs, + &self.func_refs, + )?; // This unsafety should be handled by the type-checking performed by the // constructor of `InstancePre` to assert that all the imports we're passing @@ -773,6 +836,7 @@ fn pre_instantiate_raw( module: &Module, items: &Arc<[Definition]>, host_funcs: usize, + func_refs: &Arc<[PrePatchedFuncRef]>, ) -> Result { if host_funcs > 0 { // Any linker-defined function of the `Definition::HostFunc` variant @@ -786,8 +850,10 @@ fn pre_instantiate_raw( // items into the store once. This avoids cloning each individual item // below. store.push_rooted_funcs(items.clone()); + store.push_instance_pre_func_refs(func_refs.clone()); } + let mut func_refs = func_refs.iter().map(|f| NonNull::from(f.func_ref())); let mut imports = OwnedImports::new(module); for import in items.iter() { if !import.comes_from_same_store(store) { @@ -797,8 +863,21 @@ fn pre_instantiate_raw( // `InstancePre` where the `T` of the original item should match the // `T` of the store. Additionally the rooting necessary has happened // above. - let item = unsafe { import.to_extern_store_rooted(store) }; - imports.push(&item, store); + let item = match import { + Definition::Extern(e, _) => e.clone(), + Definition::HostFunc(func) => unsafe { + func.to_func_store_rooted( + store, + if func.funcref().wasm_call.is_none() { + Some(func_refs.next().unwrap()) + } else { + None + }, + ) + .into() + }, + }; + imports.push(&item, store, module); } Ok(imports) diff --git a/crates/wasmtime/src/linker.rs b/crates/wasmtime/src/linker.rs index 62821362a963..d1758a915ea2 100644 --- a/crates/wasmtime/src/linker.rs +++ b/crates/wasmtime/src/linker.rs @@ -1339,15 +1339,6 @@ impl Definition { } } - /// Note the unsafety here is due to calling - /// `HostFunc::to_func_store_rooted`. - pub(crate) unsafe fn to_extern_store_rooted(&self, store: &mut StoreOpaque) -> Extern { - match self { - Definition::Extern(e, _) => e.clone(), - Definition::HostFunc(func) => func.to_func_store_rooted(store).into(), - } - } - pub(crate) fn comes_from_same_store(&self, store: &StoreOpaque) -> bool { match self { Definition::Extern(e, _) => e.comes_from_same_store(store), diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 8e366bf50cbe..7112a67e3721 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -7,20 +7,25 @@ use crate::{ use anyhow::{bail, Context, Result}; use once_cell::sync::OnceCell; use std::any::Any; +use std::collections::BTreeMap; use std::fs; use std::mem; use std::ops::Range; use std::path::Path; +use std::ptr::NonNull; use std::sync::Arc; use wasmparser::{Parser, ValidPayload, Validator}; use wasmtime_environ::{ - DefinedFuncIndex, DefinedMemoryIndex, HostPtr, ModuleEnvironment, ModuleTranslation, - ModuleTypes, ObjectKind, PrimaryMap, VMOffsets, WasmFunctionInfo, + DefinedFuncIndex, DefinedMemoryIndex, FuncIndex, FunctionLoc, HostPtr, ModuleEnvironment, + ModuleTranslation, ModuleType, ModuleTypes, ObjectKind, PrimaryMap, SignatureIndex, VMOffsets, + WasmFunctionInfo, +}; +use wasmtime_jit::{ + CodeMemory, CompiledFunctionInfo, CompiledModule, CompiledModuleInfo, ObjectBuilder, }; -use wasmtime_jit::{CodeMemory, CompiledModule, CompiledModuleInfo}; use wasmtime_runtime::{ - CompiledModuleId, MemoryImage, MmapVec, ModuleMemoryImages, VMFunctionBody, - VMSharedSignatureIndex, + CompiledModuleId, MemoryImage, MmapVec, ModuleMemoryImages, VMArrayCallFunction, + VMNativeCallFunction, VMSharedSignatureIndex, VMWasmCallFunction, }; mod registry; @@ -129,6 +134,134 @@ struct ModuleInner { offsets: VMOffsets, } +pub(crate) struct CompileFunctionResult { + info: WasmFunctionInfo, + function: Box, + // These trampolines are only present if the function can escape. + array_to_wasm_trampoline: Option>, + native_to_wasm_trampoline: Option>, +} + +pub(crate) struct ModuleFunctionIndices<'a> { + translation: ModuleTranslation<'a>, + func_infos: PrimaryMap, + + // Indices within the associated `compiled_funcs` for various types of code. + func_indices: Vec, + array_to_wasm_trampoline_indices: Vec<(usize, DefinedFuncIndex)>, + native_to_wasm_trampoline_indices: Vec<(usize, DefinedFuncIndex)>, +} + +impl<'a> ModuleFunctionIndices<'a> { + pub(crate) fn new( + translation: ModuleTranslation<'a>, + function_compilations: Vec, + symbol_prefix: &str, + compiled_funcs: &mut Vec<(String, Box)>, + ) -> Self { + let mut func_infos = PrimaryMap::with_capacity(function_compilations.len()); + let mut func_indices = Vec::with_capacity(function_compilations.len()); + let mut array_to_wasm_trampoline_indices = vec![]; + let mut native_to_wasm_trampoline_indices = vec![]; + + for CompileFunctionResult { + info, + function, + array_to_wasm_trampoline, + native_to_wasm_trampoline, + } in function_compilations + { + let def_idx = func_infos.push(info); + let idx = translation.module.func_index(def_idx).as_u32(); + + if let Some(array_to_wasm_trampoline) = array_to_wasm_trampoline { + let sym = format!("{symbol_prefix}_array_to_wasm_trampoline_{idx}"); + array_to_wasm_trampoline_indices.push((compiled_funcs.len(), def_idx)); + compiled_funcs.push((sym, array_to_wasm_trampoline)); + } + + if let Some(native_to_wasm_trampoline) = native_to_wasm_trampoline { + let sym = format!("{symbol_prefix}_native_to_wasm_trampoline_{idx}"); + native_to_wasm_trampoline_indices.push((compiled_funcs.len(), def_idx)); + compiled_funcs.push((sym, native_to_wasm_trampoline)); + } + + let sym = format!("{symbol_prefix}_function_{idx}"); + func_indices.push(compiled_funcs.len()); + compiled_funcs.push((sym, function)); + } + + ModuleFunctionIndices { + translation, + func_infos, + func_indices, + array_to_wasm_trampoline_indices, + native_to_wasm_trampoline_indices, + } + } + + pub(crate) fn resolve_reloc(&self, idx: FuncIndex) -> usize { + let defined = self.translation.module.defined_func_index(idx).unwrap(); + self.func_indices[defined.as_u32() as usize] + } + + pub(crate) fn append_to_object( + self, + locs: &[(object::write::SymbolId, FunctionLoc)], + wasm_to_native_trampoline_indices: &[(usize, SignatureIndex)], + object: &mut ObjectBuilder, + ) -> Result { + let funcs: PrimaryMap = self + .func_infos + .into_iter() + .enumerate() + .zip(self.func_indices.iter().copied().map(|i| locs[i].1)) + .map( + |((defined_func_index, (_id, wasm_func_info)), wasm_func_loc)| { + let defined_func_index = + DefinedFuncIndex::from_u32(u32::try_from(defined_func_index).unwrap()); + + let array_to_wasm_trampoline_index = self + .array_to_wasm_trampoline_indices + .binary_search_by_key(&defined_func_index, |(_i, def_func_idx)| { + *def_func_idx + }) + .ok(); + let array_to_wasm_trampoline = array_to_wasm_trampoline_index.map(|i| { + let compiled_func_index = self.array_to_wasm_trampoline_indices[i].0; + locs[compiled_func_index].1 + }); + + let native_to_wasm_trampoline_index = self + .native_to_wasm_trampoline_indices + .binary_search_by_key(&defined_func_index, |(_i, def_func_idx)| { + *def_func_idx + }) + .ok(); + let native_to_wasm_trampoline = native_to_wasm_trampoline_index.map(|i| { + let compiled_func_index = self.native_to_wasm_trampoline_indices[i].0; + locs[compiled_func_index].1 + }); + + CompiledFunctionInfo::new( + wasm_func_info, + wasm_func_loc, + array_to_wasm_trampoline, + native_to_wasm_trampoline, + ) + }, + ) + .collect(); + + let wasm_to_native_trampolines = wasm_to_native_trampoline_indices + .iter() + .map(|&(i, sig_idx)| (sig_idx, locs[i].1)) + .collect(); + + object.append(self.translation, funcs, wasm_to_native_trampolines) + } +} + impl Module { /// Creates a new WebAssembly `Module` from the given in-memory `bytes`. /// @@ -369,6 +502,44 @@ impl Module { Module::new(engine, &*mmap) } + #[cfg(compiler)] + pub(crate) fn compile_wasm_to_native_trampolines( + engine: &Engine, + translations: &[ModuleTranslation<'_>], + types: &ModuleTypes, + compiled_funcs: &mut Vec<(String, Box)>, + ) -> Result> { + let mut sigs = BTreeMap::new(); + for trans in translations.iter() { + sigs.extend(trans.module.types.iter().filter_map(|(_, ty)| match ty { + ModuleType::Function(ty) => Some((*ty, trans)), + })); + } + + let trampolines = engine.run_maybe_parallel( + sigs.into_iter().collect(), + |(sig_index, translation)| -> Result<_> { + let wasm_func_ty = &types[sig_index]; + Ok(( + format!("wasm_to_native_trampoline[{}]", sig_index.as_u32()), + sig_index, + engine + .compiler() + .compile_wasm_to_native_trampoline(&translation, wasm_func_ty)?, + )) + }, + )?; + + let mut indices = Vec::with_capacity(trampolines.len()); + for (symbol, sig, trampoline) in trampolines { + let idx = compiled_funcs.len(); + indices.push((idx, sig)); + compiled_funcs.push((symbol, trampoline)); + } + + Ok(indices) + } + /// Converts an input binary-encoded WebAssembly module to compilation /// artifacts and type information. /// @@ -403,78 +574,39 @@ impl Module { let types = types.finish(); // Afterwards compile all functions and trampolines required by the - // module. - let signatures = translation.exported_signatures.clone(); - let (funcs, trampolines) = engine.join_maybe_parallel( - // In one (possibly) parallel task all wasm functions are compiled - // in parallel. Note that this is also where the actual validation - // of all function bodies happens as well. - || Self::compile_functions(engine, &mut translation, &types), - // In another (possibly) parallel task all trampolines necessary - // for untyped host-to-wasm entry are compiled. Note that this - // isn't really expected to take all that long, it's moreso "well - // if we're using rayon why not use it here too". - || -> Result<_> { - engine.run_maybe_parallel(signatures, |sig| { - let ty = &types[sig]; - Ok(compiler.compile_host_to_wasm_trampoline(ty)?) - }) - }, - ); - - // Weave the separate list of compiled functions into one list, storing - // the other metadata off to the side for now. - let funcs = funcs?; - let trampolines = trampolines?; - let mut func_infos = PrimaryMap::with_capacity(funcs.len()); - let mut compiled_funcs = Vec::with_capacity(funcs.len() + trampolines.len()); - for (info, func) in funcs { - let idx = func_infos.push(info); - let sym = format!( - "_wasm_function_{}", - translation.module.func_index(idx).as_u32() - ); - compiled_funcs.push((sym, func)); - } - for (sig, func) in translation.exported_signatures.iter().zip(trampolines) { - let sym = format!("_trampoline_{}", sig.as_u32()); - compiled_funcs.push((sym, func)); - } + // module. Note that this is also where the actual validation of all + // function bodies happens as well. + let funcs = Self::compile_functions(engine, &mut translation, &types)?; + let mut compiled_funcs = vec![]; + + let wasm_to_native_trampolines = Module::compile_wasm_to_native_trampolines( + engine, + std::slice::from_ref(&translation), + &types, + &mut compiled_funcs, + )?; + + let module_func_indices = + ModuleFunctionIndices::new(translation, funcs, "wasm", &mut compiled_funcs); // Emplace all compiled functions into the object file with any other // sections associated with code as well. let mut obj = engine.compiler().object(ObjectKind::Module)?; - let locs = compiler.append_code(&mut obj, &compiled_funcs, tunables, &|i, idx| { - assert!(i < func_infos.len()); - let defined = translation.module.defined_func_index(idx).unwrap(); - defined.as_u32() as usize + let locs = compiler.append_code(&mut obj, &compiled_funcs, tunables, &|_i, idx| { + module_func_indices.resolve_reloc(idx) })?; // If requested, generate and add dwarf information. - if tunables.generate_native_debuginfo && !func_infos.is_empty() { - let mut locs = locs.iter(); - let mut funcs = compiled_funcs.iter(); - let funcs = (0..func_infos.len()) - .map(|_| (locs.next().unwrap().0, &*funcs.next().unwrap().1)) + if tunables.generate_native_debuginfo && !module_func_indices.func_indices.is_empty() { + let funcs = module_func_indices + .func_indices + .iter() + .copied() + .map(|i| (locs[i].0, &*compiled_funcs[i].1)) .collect(); - compiler.append_dwarf(&mut obj, &translation, &funcs)?; + compiler.append_dwarf(&mut obj, &module_func_indices.translation, &funcs)?; } - // Process all the results of compilation into a final state for our - // internal representation. - let mut locs = locs.into_iter(); - let funcs = func_infos - .into_iter() - .map(|(_, info)| (info, locs.next().unwrap().1)) - .collect(); - let trampolines = translation - .exported_signatures - .iter() - .cloned() - .map(|i| (i, locs.next().unwrap().1)) - .collect(); - assert!(locs.next().is_none()); - // Insert `Engine` and type-level information into the compiled // artifact so if this module is deserialized later it contains all // information necessary. @@ -488,7 +620,8 @@ impl Module { engine.append_bti(&mut obj); let mut obj = wasmtime_jit::ObjectBuilder::new(obj, tunables); - let info = obj.append(translation, funcs, trampolines)?; + let info = + module_func_indices.append_to_object(&locs, &wasm_to_native_trampolines, &mut obj)?; obj.serialize_info(&(&info, &types)); let mmap = obj.finish()?; @@ -500,24 +633,59 @@ impl Module { engine: &Engine, translation: &mut ModuleTranslation<'_>, types: &ModuleTypes, - ) -> Result)>> { + ) -> Result> { let tunables = &engine.config().tunables; let functions = mem::take(&mut translation.function_body_inputs); let functions = functions.into_iter().collect::>(); let compiler = engine.compiler(); - let funcs = engine.run_maybe_parallel(functions, |(index, func)| { - let offset = func.body.range().start; - let result = compiler.compile_function(&translation, index, func, tunables, types); - result.with_context(|| { - let index = translation.module.func_index(index); - let name = match translation.debuginfo.name_section.func_names.get(&index) { - Some(name) => format!(" (`{}`)", name), - None => String::new(), - }; - let index = index.as_u32(); - format!("failed to compile wasm function {index}{name} at offset {offset:#x}") - }) - })?; + let funcs = + engine.run_maybe_parallel(functions, |(def_func_index, func)| -> Result<_> { + let func_index = translation.module.func_index(def_func_index); + let offset = func.body.range().start; + + let (info, function) = compiler + .compile_function(&translation, def_func_index, func, tunables, types) + .with_context(|| { + let name = match translation + .debuginfo + .name_section + .func_names + .get(&func_index) + { + Some(name) => format!(" (`{}`)", name), + None => String::new(), + }; + let func_index = func_index.as_u32(); + format!( + "failed to compile wasm function {func_index}{name} at offset {offset:#x}" + ) + })?; + + let (array_to_wasm_trampoline, native_to_wasm_trampoline) = + if translation.module.functions[func_index].is_escaping() { + ( + Some(compiler.compile_array_to_wasm_trampoline( + &translation, + types, + def_func_index, + )?), + Some(compiler.compile_native_to_wasm_trampoline( + &translation, + types, + def_func_index, + )?), + ) + } else { + (None, None) + }; + + Ok(CompileFunctionResult { + info, + function, + array_to_wasm_trampoline, + native_to_wasm_trampoline, + }) + })?; // If configured attempt to use static memory initialization which // can either at runtime be implemented as a single memcpy to @@ -640,13 +808,7 @@ impl Module { // Note that the unsafety here should be ok since the `trampolines` // field should only point to valid trampoline function pointers // within the text section. - let signatures = SignatureCollection::new_for_module( - engine.signatures(), - &types, - info.trampolines - .iter() - .map(|(idx, f)| (*idx, unsafe { code_memory.vmtrampoline(*f) })), - ); + let signatures = SignatureCollection::new_for_module(engine.signatures(), &types); // Package up all our data into a `CodeObject` and delegate to the final // step of module compilation. @@ -1171,12 +1333,46 @@ impl wasmtime_runtime::ModuleRuntimeInfo for ModuleInner { self.module.module() } - fn function(&self, index: DefinedFuncIndex) -> *mut VMFunctionBody { - self.module + fn function(&self, index: DefinedFuncIndex) -> NonNull { + let ptr = self + .module .finished_function(index) .as_ptr() - .cast::() - .cast_mut() + .cast::() + .cast_mut(); + NonNull::new(ptr).unwrap() + } + + fn native_to_wasm_trampoline( + &self, + index: DefinedFuncIndex, + ) -> Option> { + let ptr = self + .module + .native_to_wasm_trampoline(index)? + .as_ptr() + .cast::() + .cast_mut(); + Some(NonNull::new(ptr).unwrap()) + } + + fn array_to_wasm_trampoline(&self, index: DefinedFuncIndex) -> Option { + let ptr = self.module.array_to_wasm_trampoline(index)?.as_ptr(); + Some(unsafe { mem::transmute::<*const u8, VMArrayCallFunction>(ptr) }) + } + + fn wasm_to_native_trampoline( + &self, + signature: VMSharedSignatureIndex, + ) -> Option> { + let sig = self.code.signatures().local_signature(signature)?; + let ptr = self + .module + .wasm_to_native_trampoline(sig) + .as_ptr() + .cast::() + .cast_mut(); + Some(NonNull::new(ptr).unwrap()) } fn memory_image(&self, memory: DefinedMemoryIndex) -> Result>> { @@ -1263,7 +1459,25 @@ impl wasmtime_runtime::ModuleRuntimeInfo for BareModuleInfo { &self.module } - fn function(&self, _index: DefinedFuncIndex) -> *mut VMFunctionBody { + fn function(&self, _index: DefinedFuncIndex) -> NonNull { + unreachable!() + } + + fn array_to_wasm_trampoline(&self, _index: DefinedFuncIndex) -> Option { + unreachable!() + } + + fn native_to_wasm_trampoline( + &self, + _index: DefinedFuncIndex, + ) -> Option> { + unreachable!() + } + + fn wasm_to_native_trampoline( + &self, + _signature: VMSharedSignatureIndex, + ) -> Option> { unreachable!() } diff --git a/crates/wasmtime/src/module/registry.rs b/crates/wasmtime/src/module/registry.rs index ac2cc9c0a61c..459c139bb530 100644 --- a/crates/wasmtime/src/module/registry.rs +++ b/crates/wasmtime/src/module/registry.rs @@ -8,10 +8,11 @@ use once_cell::sync::Lazy; use std::collections::btree_map::Entry; use std::{ collections::BTreeMap, + ptr::NonNull, sync::{Arc, RwLock}, }; use wasmtime_jit::CodeMemory; -use wasmtime_runtime::{ModuleInfo, VMCallerCheckedFuncRef, VMTrampoline}; +use wasmtime_runtime::{ModuleInfo, VMSharedSignatureIndex, VMWasmCallFunction}; /// Used for registering modules with a store. /// @@ -124,12 +125,6 @@ impl ModuleRegistry { assert!(prev.is_none()); } - /// Looks up a trampoline from an anyfunc. - pub fn lookup_trampoline(&self, anyfunc: &VMCallerCheckedFuncRef) -> Option { - let (code, _offset) = self.code(anyfunc.func_ptr.as_ptr() as usize)?; - code.code.signatures().trampoline(anyfunc.type_index) - } - /// Fetches trap information about a program counter in a backtrace. pub fn lookup_trap_code(&self, pc: usize) -> Option { let (code, offset) = self.code(pc)?; @@ -149,6 +144,27 @@ impl ModuleRegistry { let info = FrameInfo::new(module, offset)?; Some((info, module)) } + + pub fn wasm_to_native_trampoline( + &self, + sig: VMSharedSignatureIndex, + ) -> Option> { + // TODO: We are doing a linear search over each module. This is fine for + // now because we typically have very few modules per store (almost + // always one, in fact). If this linear search ever becomes a + // bottleneck, we could avoid it by incrementally and lazily building a + // `VMSharedSignatureIndex` to `SignatureIndex` map. + // + // See also the comment in `ModuleInner::wasm_to_native_trampoline`. + for (_, code) in self.loaded_code.values() { + for module in code.modules.values() { + if let Some(trampoline) = module.runtime_info().wasm_to_native_trampoline(sig) { + return Some(trampoline); + } + } + } + None + } } impl LoadedCode { diff --git a/crates/wasmtime/src/signatures.rs b/crates/wasmtime/src/signatures.rs index d3c65eb87f03..e34de4638842 100644 --- a/crates/wasmtime/src/signatures.rs +++ b/crates/wasmtime/src/signatures.rs @@ -7,7 +7,7 @@ use std::{ }; use std::{convert::TryFrom, sync::Arc}; use wasmtime_environ::{ModuleTypes, PrimaryMap, SignatureIndex, WasmFuncType}; -use wasmtime_runtime::{VMSharedSignatureIndex, VMTrampoline}; +use wasmtime_runtime::VMSharedSignatureIndex; /// Represents a collection of shared signatures. /// @@ -19,27 +19,19 @@ use wasmtime_runtime::{VMSharedSignatureIndex, VMTrampoline}; pub struct SignatureCollection { registry: Arc>, signatures: PrimaryMap, - trampolines: HashMap, + reverse_signatures: HashMap, } impl SignatureCollection { - /// Creates a signature collection for a module given the module's signatures - /// and trampolines. - pub fn new_for_module( - registry: &SignatureRegistry, - types: &ModuleTypes, - trampolines: impl Iterator, - ) -> Self { - let (signatures, trampolines) = registry - .0 - .write() - .unwrap() - .register_for_module(types, trampolines); + /// Creates a signature collection for a module given the module's signatures. + pub fn new_for_module(registry: &SignatureRegistry, types: &ModuleTypes) -> Self { + let signatures = registry.0.write().unwrap().register_for_module(types); + let reverse_signatures = signatures.iter().map(|(k, v)| (*v, k)).collect(); Self { registry: registry.0.clone(), signatures, - trampolines, + reverse_signatures, } } @@ -57,15 +49,15 @@ impl SignatureCollection { self.signatures.get(index).copied() } - /// Gets a trampoline for a registered signature. - pub fn trampoline(&self, index: VMSharedSignatureIndex) -> Option { - self.trampolines.get(&index).copied() + /// Get the module-local signature index for the given shared signature index. + pub fn local_signature(&self, index: VMSharedSignatureIndex) -> Option { + self.reverse_signatures.get(&index).copied() } } impl Drop for SignatureCollection { fn drop(&mut self) { - if !self.signatures.is_empty() || !self.trampolines.is_empty() { + if !self.signatures.is_empty() { self.registry.write().unwrap().unregister_signatures(self); } } @@ -88,24 +80,13 @@ impl SignatureRegistryInner { fn register_for_module( &mut self, types: &ModuleTypes, - trampolines: impl Iterator, - ) -> ( - PrimaryMap, - HashMap, - ) { + ) -> PrimaryMap { let mut sigs = PrimaryMap::default(); - let mut map = HashMap::default(); - for (idx, ty) in types.wasm_signatures() { let b = sigs.push(self.register(ty)); assert_eq!(idx, b); } - - for (index, trampoline) in trampolines { - map.insert(sigs[index], trampoline); - } - - (sigs, map) + sigs } fn register(&mut self, ty: &WasmFuncType) -> VMSharedSignatureIndex { @@ -154,18 +135,8 @@ impl SignatureRegistryInner { } fn unregister_signatures(&mut self, collection: &SignatureCollection) { - // If the collection has a populated signatures map, use it to deregister - // This is always 1:1 from entry to registration - if !collection.signatures.is_empty() { - for (_, index) in collection.signatures.iter() { - self.unregister_entry(*index, 1); - } - } else { - // Otherwise, use the trampolines map, which has reference counts related - // to the stored index - for (index, _) in collection.trampolines.iter() { - self.unregister_entry(*index, 1); - } + for (_, index) in collection.signatures.iter() { + self.unregister_entry(*index, 1); } } diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index 71a9a54f7f89..b36e9650c73e 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -76,12 +76,13 @@ //! contents of `StoreOpaque`. This is an invariant that we, as the authors of //! `wasmtime`, must uphold for the public interface to be safe. +use crate::instance::PrePatchedFuncRef; use crate::linker::Definition; use crate::module::BareModuleInfo; +use crate::trampoline::VMHostGlobalContext; use crate::{module::ModuleRegistry, Engine, Module, Trap, Val, ValRaw}; use anyhow::{anyhow, bail, Result}; use std::cell::UnsafeCell; -use std::collections::HashMap; use std::convert::TryFrom; use std::fmt; use std::future::Future; @@ -95,15 +96,16 @@ use std::sync::Arc; use std::task::{Context, Poll}; use wasmtime_runtime::{ InstanceAllocationRequest, InstanceAllocator, InstanceHandle, ModuleInfo, - OnDemandInstanceAllocator, SignalHandler, StorePtr, VMCallerCheckedFuncRef, VMContext, - VMExternRef, VMExternRefActivationsTable, VMRuntimeLimits, VMSharedSignatureIndex, - VMTrampoline, WasmFault, + OnDemandInstanceAllocator, SignalHandler, StorePtr, VMContext, VMExternRef, + VMExternRefActivationsTable, VMRuntimeLimits, WasmFault, }; mod context; pub use self::context::*; mod data; pub use self::data::*; +mod func_refs; +use func_refs::FuncRefs; /// A [`Store`] is a collection of WebAssembly instances and host-defined state. /// @@ -278,11 +280,8 @@ pub struct StoreOpaque { signal_handler: Option>>, externref_activations_table: VMExternRefActivationsTable, modules: ModuleRegistry, - - // See documentation on `StoreOpaque::lookup_trampoline` for what these - // fields are doing. - host_trampolines: HashMap, - host_func_trampolines_registered: usize, + func_refs: FuncRefs, + host_globals: Vec>, // Numbers of resources instantiated in this store, and their limits instance_count: usize, @@ -478,8 +477,8 @@ impl Store { signal_handler: None, externref_activations_table: VMExternRefActivationsTable::new(), modules: ModuleRegistry::default(), - host_trampolines: HashMap::default(), - host_func_trampolines_registered: 0, + func_refs: FuncRefs::default(), + host_globals: Vec::new(), instance_count: 0, instance_limit: crate::DEFAULT_INSTANCE_LIMIT, memory_count: 0, @@ -1179,6 +1178,22 @@ impl StoreOpaque { &mut self.modules } + pub(crate) fn func_refs(&mut self) -> &mut FuncRefs { + &mut self.func_refs + } + + pub(crate) fn fill_func_refs(&mut self) { + self.func_refs.fill(&mut self.modules); + } + + pub(crate) fn push_instance_pre_func_refs(&mut self, func_refs: Arc<[PrePatchedFuncRef]>) { + self.func_refs.push_instance_pre_func_refs(func_refs); + } + + pub(crate) fn host_globals(&mut self) -> &mut Vec> { + &mut self.host_globals + } + pub unsafe fn add_instance(&mut self, handle: InstanceHandle, ondemand: bool) -> InstanceId { self.instances.push(StoreInstance { handle: handle.clone(), @@ -1216,92 +1231,6 @@ impl StoreOpaque { unsafe { wasmtime_runtime::gc(&self.modules, &mut self.externref_activations_table) } } - /// Looks up the corresponding `VMTrampoline` which can be used to enter - /// wasm given an anyfunc function pointer. - /// - /// This is a somewhat complicated implementation at this time, unfortnately. - /// Trampolines are a sort of side-channel of information which is - /// specifically juggled by the `wasmtime` crate in a careful fashion. The - /// sources for trampolines are: - /// - /// * Compiled modules - each compiled module has a trampoline for all - /// signatures of functions that escape the module (e.g. exports and - /// `ref.func`-able functions) - /// * `Func::new` - host-defined functions with a dynamic signature get an - /// on-the-fly-compiled trampoline (e.g. JIT-compiled as part of the - /// `Func::new` call). - /// * `Func::wrap` - host-defined functions where the trampoline is - /// monomorphized in Rust and compiled by LLVM. - /// - /// The purpose of this function is that given some wasm function pointer we - /// need to find the trampoline for it. For compiled wasm modules this is - /// pretty easy, the code pointer of the function pointer will point us - /// at a wasm module which has a table of trampolines-by-type that we can - /// lookup. - /// - /// If this lookup fails, however, then we're trying to get the trampoline - /// for a wasm function pointer defined by the host. The trampoline isn't - /// actually stored in the wasm function pointer itself so we need - /// side-channels of information. To achieve this a lazy scheme is - /// implemented here based on the assumption that most trampoline lookups - /// happen for wasm-defined functions, not host-defined functions. - /// - /// The `Store` already has a list of all functions in - /// `self.store_data().funcs`, it's just not indexed in a nice fashion by - /// type index or similar. To solve this there's an internal map in each - /// store, `host_trampolines`, which maps from a type index to the - /// store-owned trampoline. The actual population of this map, however, is - /// deferred to this function itself. - /// - /// Most of the time we are looking up a Wasm function's trampoline when - /// calling this function, and we don't want to make insertion of a host - /// function into the store more expensive than it has to be. We could - /// update the `host_trampolines` whenever a host function is inserted into - /// the store, but this is a relatively expensive hash map insertion. - /// Instead the work is deferred until we actually look up that trampoline - /// in this method. - /// - /// This all means that if the lookup of the trampoline fails within - /// `self.host_trampolines` we lazily populate `self.host_trampolines` by - /// iterating over `self.store_data().funcs`, inserting trampolines as we - /// go. If we find the right trampoline then it's returned. - pub fn lookup_trampoline(&mut self, anyfunc: &VMCallerCheckedFuncRef) -> VMTrampoline { - // First try to see if the `anyfunc` belongs to any module. Each module - // has its own map of trampolines-per-type-index and the code pointer in - // the `anyfunc` will enable us to quickly find a module. - if let Some(trampoline) = self.modules.lookup_trampoline(anyfunc) { - return trampoline; - } - - // Next consult the list of store-local host trampolines. This is - // primarily populated by functions created by `Func::new` or similar - // creation functions, host-defined functions. - if let Some(trampoline) = self.host_trampolines.get(&anyfunc.type_index) { - return *trampoline; - } - - // If no trampoline was found then it means that it hasn't been loaded - // into `host_trampolines` yet. Skip over all the ones we've looked at - // so far and start inserting into `self.host_trampolines`, returning - // the actual trampoline once found. - for f in self - .store_data - .funcs() - .skip(self.host_func_trampolines_registered) - { - self.host_func_trampolines_registered += 1; - self.host_trampolines.insert(f.sig_index(), f.trampoline()); - if f.sig_index() == anyfunc.type_index { - return f.trampoline(); - } - } - - // If reached this is a bug in Wasmtime. Lookup of a trampoline should - // only happen for wasm functions or host functions, all of which should - // be indexed by the above. - panic!("trampoline missing") - } - /// Yields the async context, assuming that we are executing on a fiber and /// that fiber is not in the process of dying. This function will return /// None in the latter case (the fiber is dying), and panic if diff --git a/crates/wasmtime/src/store/data.rs b/crates/wasmtime/src/store/data.rs index 22f45f8f1ad3..3d36f8420554 100644 --- a/crates/wasmtime/src/store/data.rs +++ b/crates/wasmtime/src/store/data.rs @@ -96,10 +96,6 @@ impl StoreData { true } - pub(crate) fn funcs(&self) -> impl Iterator { - self.funcs.iter() - } - pub(crate) fn reserve_funcs(&mut self, count: usize) { self.funcs.reserve(count); } diff --git a/crates/wasmtime/src/store/func_refs.rs b/crates/wasmtime/src/store/func_refs.rs new file mode 100644 index 000000000000..f25d87a81597 --- /dev/null +++ b/crates/wasmtime/src/store/func_refs.rs @@ -0,0 +1,111 @@ +use crate::{instance::PrePatchedFuncRef, module::ModuleRegistry}; +use std::{ptr::NonNull, sync::Arc}; +use wasmtime_runtime::{VMCallerCheckedFuncRef, VMNativeCallHostFuncContext}; + +/// An arena of `VMCallerCheckedFuncRef`s. +/// +/// Allows a store to pin and own funcrefs so that it can patch in trampolines +/// for `VMCallerCheckedFuncRef`s that are missing a `wasm_call` trampoline and +/// need Wasm to supply it. +#[derive(Default)] +pub struct FuncRefs { + /// A bump allocation arena where we allocate `VMCallerCheckedFuncRef`s such + /// that they are pinned and owned. + bump: SendSyncBump, + + /// Pointers into `self.bump` for entries that need `wasm_call` field filled + /// in. + with_holes: Vec, + + /// Pinned `VMCallerCheckedFuncRef`s that had their `wasm_call` field + /// pre-patched when constructing an `InstancePre`, and which we need to + /// keep alive for our owning store's lifetime. + instance_pre_func_refs: Vec>, +} + +use send_sync_bump::SendSyncBump; +mod send_sync_bump { + #[derive(Default)] + pub struct SendSyncBump(bumpalo::Bump); + + impl SendSyncBump { + pub fn alloc(&mut self, val: T) -> &mut T { + self.0.alloc(val) + } + } + + // Safety: We require `&mut self` on the only public method, which means it + // is safe to send `&SendSyncBump` references across threads because they + // can't actually do anything with it. + unsafe impl Sync for SendSyncBump {} +} + +use unpatched_func_ref::UnpatchedFuncRef; +mod unpatched_func_ref { + use super::*; + + pub struct UnpatchedFuncRef(NonNull); + + impl UnpatchedFuncRef { + /// Safety: Callers must ensure that the given `func_ref` and resulting + /// wrapped value are used in a `Send + Sync` compatible way. + pub unsafe fn new(func_ref: &VMCallerCheckedFuncRef) -> UnpatchedFuncRef { + debug_assert!(func_ref.wasm_call.is_none()); + UnpatchedFuncRef(NonNull::from(func_ref)) + } + + pub fn func_ref(&self) -> NonNull { + self.0 + } + } + + // Safety: It is `UnpatchedFuncRef::new` callers' responsibility to uphold + // this. + unsafe impl Send for UnpatchedFuncRef {} + unsafe impl Sync for UnpatchedFuncRef {} +} + +impl FuncRefs { + /// Push the given `VMCallerCheckedFuncRef` into this arena, returning a + /// pinned pointer to it. + /// + /// # Safety + /// + /// You may only access the return value on the same thread as this + /// `FuncRefs` and only while the store holding this `FuncRefs` exists. + pub unsafe fn push( + &mut self, + func_ref: VMCallerCheckedFuncRef, + ) -> NonNull { + debug_assert!(func_ref.wasm_call.is_none()); + // Debug assert that the vmctx is a `VMNativeCallHostFuncContext` as + // that is the only kind that can have holes. + let _ = unsafe { VMNativeCallHostFuncContext::from_opaque(func_ref.vmctx) }; + + let func_ref = self.bump.alloc(func_ref); + self.with_holes.push(UnpatchedFuncRef::new(func_ref)); + NonNull::from(func_ref) + } + + /// Patch any `VMCallerCheckedFuncRef::wasm_call`s that need filling in. + pub fn fill(&mut self, modules: &ModuleRegistry) { + self.with_holes.retain_mut(|f| { + unsafe { + let func_ref = f.func_ref().as_mut(); + debug_assert!(func_ref.wasm_call.is_none()); + + // Debug assert that the vmctx is a `VMNativeCallHostFuncContext` as + // that is the only kind that can have holes. + let _ = VMNativeCallHostFuncContext::from_opaque(func_ref.vmctx); + + func_ref.wasm_call = modules.wasm_to_native_trampoline(func_ref.type_index); + func_ref.wasm_call.is_none() + } + }); + } + + /// Push pre-patched `VMCallerCheckedFuncRef`s from an `InstancePre`. + pub fn push_instance_pre_func_refs(&mut self, func_refs: Arc<[PrePatchedFuncRef]>) { + self.instance_pre_func_refs.push(func_refs); + } +} diff --git a/crates/wasmtime/src/trampoline.rs b/crates/wasmtime/src/trampoline.rs index 94294f53ffeb..cc9435131305 100644 --- a/crates/wasmtime/src/trampoline.rs +++ b/crates/wasmtime/src/trampoline.rs @@ -5,19 +5,19 @@ mod global; mod memory; mod table; +pub use self::func::*; +pub use self::global::*; pub(crate) use memory::MemoryCreatorProxy; -pub use self::func::*; -use self::global::create_global; use self::memory::create_memory; use self::table::create_table; use crate::module::BareModuleInfo; use crate::store::{InstanceId, StoreOpaque}; -use crate::{GlobalType, MemoryType, TableType, Val}; +use crate::{MemoryType, TableType}; use anyhow::Result; use std::any::Any; use std::sync::Arc; -use wasmtime_environ::{GlobalIndex, MemoryIndex, Module, TableIndex}; +use wasmtime_environ::{MemoryIndex, Module, TableIndex}; use wasmtime_runtime::{ Imports, InstanceAllocationRequest, InstanceAllocator, OnDemandInstanceAllocator, SharedMemory, StorePtr, VMFunctionImport, VMSharedSignatureIndex, @@ -54,17 +54,6 @@ fn create_handle( } } -pub fn generate_global_export( - store: &mut StoreOpaque, - gt: &GlobalType, - val: Val, -) -> Result { - let instance = create_global(store, gt, val)?; - Ok(store - .instance_mut(instance) - .get_exported_global(GlobalIndex::from_u32(0))) -} - pub fn generate_memory_export( store: &mut StoreOpaque, m: &MemoryType, diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs index 587990759c1b..45c49bb82710 100644 --- a/crates/wasmtime/src/trampoline/func.rs +++ b/crates/wasmtime/src/trampoline/func.rs @@ -6,7 +6,7 @@ use std::panic::{self, AssertUnwindSafe}; use std::ptr::NonNull; use wasmtime_jit::{CodeMemory, ProfilingAgent}; use wasmtime_runtime::{ - VMContext, VMHostFuncContext, VMOpaqueContext, VMSharedSignatureIndex, VMTrampoline, + VMArrayCallHostFuncContext, VMCallerCheckedFuncRef, VMContext, VMOpaqueContext, }; struct TrampolineState { @@ -15,9 +15,16 @@ struct TrampolineState { code_memory: CodeMemory, } -unsafe extern "C" fn stub_fn( +/// Shim to call a host-defined function that uses the array calling convention. +/// +/// Together with `VMArrayCallHostFuncContext`, this implements the transition +/// from a raw, non-closure function pointer to a Rust closure that associates +/// data and function together. +/// +/// Also shepherds panics and traps across Wasm. +unsafe extern "C" fn array_call_shim( vmctx: *mut VMOpaqueContext, - caller_vmctx: *mut VMContext, + caller_vmctx: *mut VMOpaqueContext, values_vec: *mut ValRaw, values_vec_len: usize, ) where @@ -37,7 +44,7 @@ unsafe extern "C" fn stub_fn( // have any. To prevent leaks we avoid having any local destructors by // avoiding local variables. let result = panic::catch_unwind(AssertUnwindSafe(|| { - let vmctx = VMHostFuncContext::from_opaque(vmctx); + let vmctx = VMArrayCallHostFuncContext::from_opaque(vmctx); // Double-check ourselves in debug mode, but we control // the `Any` here so an unsafe downcast should also // work. @@ -45,7 +52,7 @@ unsafe extern "C" fn stub_fn( debug_assert!(state.is::>()); let state = &*(state as *const _ as *const TrampolineState); let values_vec = std::slice::from_raw_parts_mut(values_vec, values_vec_len); - (state.func)(caller_vmctx, values_vec) + (state.func)(VMContext::from_opaque(caller_vmctx), values_vec) })); match result { @@ -104,22 +111,26 @@ fn register_trampolines(profiler: &dyn ProfilingAgent, code: &CodeMemory) { } #[cfg(compiler)] -pub fn create_function( +pub fn create_array_call_function( ft: &FuncType, func: F, engine: &Engine, -) -> Result<(Box, VMSharedSignatureIndex, VMTrampoline)> +) -> Result> where F: Fn(*mut VMContext, &mut [ValRaw]) -> Result<()> + Send + Sync + 'static, { + use std::ptr; + let mut obj = engine .compiler() .object(wasmtime_environ::ObjectKind::Module)?; - let (t1, t2) = engine.compiler().emit_trampoline_obj( - ft.as_wasm_func_type(), - stub_fn:: as usize, - &mut obj, - )?; + let (wasm_call_range, native_call_range) = engine + .compiler() + .emit_trampolines_for_array_call_host_func( + ft.as_wasm_func_type(), + array_call_shim:: as usize, + &mut obj, + )?; engine.append_bti(&mut obj); let obj = wasmtime_jit::ObjectBuilder::new(obj, &engine.config().tunables).finish()?; @@ -134,19 +145,27 @@ where // we know their start/length. let text = code_memory.text(); - let host_trampoline = text[t1.start as usize..][..t1.length as usize].as_ptr(); - let wasm_trampoline = text[t2.start as usize..].as_ptr() as *mut _; - let wasm_trampoline = NonNull::new(wasm_trampoline).unwrap(); + + let array_call = array_call_shim::; + + let wasm_call = text[wasm_call_range.start as usize..].as_ptr() as *mut _; + let wasm_call = Some(NonNull::new(wasm_call).unwrap()); + + let native_call = text[native_call_range.start as usize..].as_ptr() as *mut _; + let native_call = NonNull::new(native_call).unwrap(); let sig = engine.signatures().register(ft.as_wasm_func_type()); unsafe { - let ctx = VMHostFuncContext::new( - wasm_trampoline, - sig, + Ok(VMArrayCallHostFuncContext::new( + VMCallerCheckedFuncRef { + array_call, + wasm_call, + native_call, + type_index: sig, + vmctx: ptr::null_mut(), + }, Box::new(TrampolineState { func, code_memory }), - ); - let host_trampoline = std::mem::transmute::<*const u8, VMTrampoline>(host_trampoline); - Ok((ctx, sig, host_trampoline)) + )) } } diff --git a/crates/wasmtime/src/trampoline/global.rs b/crates/wasmtime/src/trampoline/global.rs index e84601bd81f2..8657e2f7ed13 100644 --- a/crates/wasmtime/src/trampoline/global.rs +++ b/crates/wasmtime/src/trampoline/global.rs @@ -1,81 +1,72 @@ -use crate::store::{InstanceId, StoreOpaque}; -use crate::trampoline::create_handle; +use crate::store::StoreOpaque; use crate::{GlobalType, Mutability, Val}; -use anyhow::Result; -use wasmtime_environ::{ - AnyfuncIndex, EntityIndex, Global, GlobalInit, Module, ModuleType, SignatureIndex, -}; -use wasmtime_runtime::VMFunctionImport; +use std::ptr; +use wasmtime_environ::GlobalInit; +use wasmtime_runtime::VMGlobalDefinition; -pub fn create_global(store: &mut StoreOpaque, gt: &GlobalType, val: Val) -> Result { - let mut module = Module::new(); - let mut func_imports = Vec::new(); - let mut externref_init = None; - let mut one_signature = None; +#[repr(C)] +pub struct VMHostGlobalContext { + ty: GlobalType, + global: VMGlobalDefinition, +} - let global = Global { - wasm_ty: gt.content().to_wasm_type(), - mutability: match gt.mutability() { - Mutability::Const => false, - Mutability::Var => true, - }, - initializer: match val { - Val::I32(i) => GlobalInit::I32Const(i), - Val::I64(i) => GlobalInit::I64Const(i), - Val::F32(f) => GlobalInit::F32Const(f), - Val::F64(f) => GlobalInit::F64Const(f), - Val::V128(i) => GlobalInit::V128Const(i.into()), - Val::ExternRef(None) | Val::FuncRef(None) => GlobalInit::RefNullConst, - Val::ExternRef(Some(x)) => { - // There is no `GlobalInit` variant for using an existing - // `externref` that isn't an import (because Wasm can't create - // an `externref` by itself). Therefore, initialize the global - // as null, and then monkey patch it after instantiation below. - externref_init = Some(x); - GlobalInit::RefNullConst +impl Drop for VMHostGlobalContext { + fn drop(&mut self) { + match self.ty.content() { + crate::ValType::I32 + | crate::ValType::I64 + | crate::ValType::F32 + | crate::ValType::F64 + | crate::ValType::V128 + | crate::ValType::FuncRef => { + // Nothing to drop. } - Val::FuncRef(Some(f)) => { - // Add a function import to the stub module, and then initialize - // our global with a `ref.func` to grab that imported function. - let f = f.caller_checked_anyfunc(store); - let f = unsafe { f.as_ref() }; - let sig_id = SignatureIndex::from_u32(0); - one_signature = Some(f.type_index); - module.types.push(ModuleType::Function(sig_id)); - let func_index = module.push_escaped_function(sig_id, AnyfuncIndex::from_u32(0)); - module.num_imported_funcs = 1; - module.num_escaped_funcs = 1; - module - .initializers - .push(wasmtime_environ::Initializer::Import { - name: "".into(), - field: "".into(), - index: EntityIndex::Function(func_index), - }); + crate::ValType::ExternRef => unsafe { + ptr::drop_in_place(self.global.as_externref_mut()) + }, + } + } +} - func_imports.push(VMFunctionImport { - body: f.func_ptr, - vmctx: f.vmctx, - }); +pub fn generate_global_export( + store: &mut StoreOpaque, + ty: GlobalType, + val: Val, +) -> wasmtime_runtime::ExportGlobal { + let mut ctx = Box::new(VMHostGlobalContext { + ty, + global: VMGlobalDefinition::new(), + }); - GlobalInit::RefFunc(func_index) + unsafe { + match val { + Val::I32(x) => *ctx.global.as_i32_mut() = x, + Val::I64(x) => *ctx.global.as_i64_mut() = x, + Val::F32(x) => *ctx.global.as_f32_bits_mut() = x, + Val::F64(x) => *ctx.global.as_f64_bits_mut() = x, + Val::V128(x) => *ctx.global.as_u128_mut() = x, + Val::FuncRef(f) => { + *ctx.global.as_anyfunc_mut() = f.map_or(ptr::null_mut(), |f| { + f.caller_checked_func_ref(store).as_ptr() + }) } - }, - }; - - let global_id = module.globals.push(global); - module - .exports - .insert(String::new(), EntityIndex::Global(global_id)); - let id = create_handle(module, store, Box::new(()), &func_imports, one_signature)?; - - if let Some(x) = externref_init { - let instance = store.instance_mut(id); - let g = instance.get_exported_global(global_id); - unsafe { - *(*g.definition).as_externref_mut() = Some(x.inner); + Val::ExternRef(x) => *ctx.global.as_externref_mut() = x.map(|x| x.inner), } } - Ok(id) + let ret = wasmtime_runtime::ExportGlobal { + definition: &mut ctx.global as *mut _, + global: wasmtime_environ::Global { + wasm_ty: ctx.ty.content().to_wasm_type(), + mutability: match ctx.ty.mutability() { + Mutability::Const => false, + Mutability::Var => true, + }, + // TODO: This is just a dummy value; nothing should actually read + // this. We should probably remove this field from the struct. + initializer: GlobalInit::I32Const(0), + }, + }; + store.host_globals().push(ctx); + ret } diff --git a/crates/wasmtime/src/values.rs b/crates/wasmtime/src/values.rs index 377c309249a2..bba872f35769 100644 --- a/crates/wasmtime/src/values.rs +++ b/crates/wasmtime/src/values.rs @@ -195,7 +195,7 @@ impl Val { bail!("cross-`Store` values are not supported in tables"); } Ok(TableElement::FuncRef( - f.caller_checked_anyfunc(store).as_ptr(), + f.caller_checked_func_ref(store).as_ptr(), )) } (Val::FuncRef(None), ValType::FuncRef) => Ok(TableElement::FuncRef(ptr::null_mut())), diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index 6d18f7194b12..c2eab1b9aaff 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -76,21 +76,33 @@ impl wasmtime_environ::Compiler for Compiler { )) } - fn compile_host_to_wasm_trampoline( + fn compile_array_to_wasm_trampoline( &self, - ty: &wasmtime_environ::WasmFuncType, + translation: &ModuleTranslation<'_>, + types: &ModuleTypes, + index: DefinedFuncIndex, ) -> Result, CompileError> { - let wasm_ty = wasmparser::FuncType::new( - ty.params().iter().copied().map(Into::into), - ty.returns().iter().copied().map(Into::into), - ); + let _ = (translation, types, index); + todo!() + } - let buffer = self - .isa - .host_to_wasm_trampoline(&wasm_ty) - .map_err(|e| CompileError::Codegen(format!("{:?}", e)))?; + fn compile_native_to_wasm_trampoline( + &self, + translation: &ModuleTranslation<'_>, + types: &ModuleTypes, + index: DefinedFuncIndex, + ) -> Result, CompileError> { + let _ = (translation, types, index); + todo!() + } - Ok(Box::new(CompiledFunction(buffer))) + fn compile_wasm_to_native_trampoline( + &self, + translation: &ModuleTranslation<'_>, + wasm_func_ty: &wasmtime_environ::WasmFuncType, + ) -> Result, CompileError> { + let _ = (translation, wasm_func_ty); + todo!() } fn append_code( @@ -133,12 +145,15 @@ impl wasmtime_environ::Compiler for Compiler { Ok(ret) } - fn emit_trampoline_obj( + fn emit_trampolines_for_array_call_host_func( &self, - _ty: &wasmtime_environ::WasmFuncType, - _host_fn: usize, - _obj: &mut wasmtime_environ::object::write::Object<'static>, + ty: &wasmtime_environ::WasmFuncType, + // Actually `host_fn: VMArrayCallFunction` but that type is not + // available in `wasmtime-environ`. + host_fn: usize, + obj: &mut Object<'static>, ) -> Result<(FunctionLoc, FunctionLoc)> { + drop((ty, host_fn, obj)); todo!() } diff --git a/supply-chain/imports.lock b/supply-chain/imports.lock index d18118ac367a..ddb79f2d6ca1 100644 --- a/supply-chain/imports.lock +++ b/supply-chain/imports.lock @@ -1,6 +1,13 @@ # cargo-vet imports lock +[[publisher.bumpalo]] +version = "3.12.0" +when = "2023-01-17" +user-id = 696 +user-login = "fitzgen" +user-name = "Nick Fitzgerald" + [[publisher.wasm-mutate]] version = "0.2.23" when = "2023-04-13" diff --git a/tests/all/async_functions.rs b/tests/all/async_functions.rs index f2c5e74ec79e..fdb26f247955 100644 --- a/tests/all/async_functions.rs +++ b/tests/all/async_functions.rs @@ -539,6 +539,7 @@ async fn resume_separate_thread3() { #[tokio::test] async fn recursive_async() -> Result<()> { + let _ = env_logger::try_init(); let mut store = async_store(); let m = Module::new( store.engine(), diff --git a/tests/all/call_hook.rs b/tests/all/call_hook.rs index 4dc7417342c9..f5c61fdf4560 100644 --- a/tests/all/call_hook.rs +++ b/tests/all/call_hook.rs @@ -92,7 +92,7 @@ fn call_wrapped_func() -> Result<(), Error> { Val::F32(3.0f32.to_bits()).to_raw(&mut store), Val::F64(4.0f64.to_bits()).to_raw(&mut store), ]; - f.call_unchecked(&mut store, args.as_mut_ptr())?; + f.call_unchecked(&mut store, args.as_mut_ptr(), args.len())?; } n += 1; diff --git a/tests/all/func.rs b/tests/all/func.rs index 30bc0c95b615..a1d37f174f82 100644 --- a/tests/all/func.rs +++ b/tests/all/func.rs @@ -3,6 +3,368 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst}; use std::sync::Arc; use wasmtime::*; +#[test] +fn call_wasm_to_wasm() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (func (result i32 i32 i32) + i32.const 1 + i32.const 2 + i32.const 3 + ) + (func (export "run") (result i32 i32 i32) + call 0 + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let instance = Instance::new(&mut store, &module, &[])?; + let func = instance + .get_typed_func::<(), (i32, i32, i32)>(&mut store, "run") + .unwrap(); + let results = func.call(&mut store, ())?; + assert_eq!(results, (1, 2, 3)); + Ok(()) +} + +#[test] +fn call_wasm_to_native() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (import "" "" (func (result i32 i32 i32))) + (func (export "run") (result i32 i32 i32) + call 0 + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let import_func = Func::wrap(&mut store, || (1_i32, 2_i32, 3_i32)); + let instance = Instance::new(&mut store, &module, &[import_func.into()])?; + let func = instance + .get_typed_func::<(), (i32, i32, i32)>(&mut store, "run") + .unwrap(); + let results = func.call(&mut store, ())?; + assert_eq!(results, (1, 2, 3)); + Ok(()) +} + +#[test] +fn call_wasm_to_array() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (import "" "" (func (result i32 i32 i32))) + (func (export "run") (result i32 i32 i32) + call 0 + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let import_func = Func::new( + &mut store, + FuncType::new(vec![], vec![ValType::I32, ValType::I32, ValType::I32]), + |_, _params, results| { + results[0] = Val::I32(1); + results[1] = Val::I32(2); + results[2] = Val::I32(3); + Ok(()) + }, + ); + let instance = Instance::new(&mut store, &module, &[import_func.into()])?; + let func = instance + .get_typed_func::<(), (i32, i32, i32)>(&mut store, "run") + .unwrap(); + let results = func.call(&mut store, ())?; + assert_eq!(results, (1, 2, 3)); + Ok(()) +} + +#[test] +fn call_native_to_wasm() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (func (export "run") (result i32 i32 i32) + i32.const 42 + i32.const 420 + i32.const 4200 + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let instance = Instance::new(&mut store, &module, &[])?; + let func = instance + .get_typed_func::<(), (i32, i32, i32)>(&mut store, "run") + .unwrap(); + let results = func.call(&mut store, ())?; + assert_eq!(results, (42, 420, 4200)); + Ok(()) +} + +#[test] +fn call_native_to_native() -> Result<()> { + let mut store = Store::<()>::default(); + + let func = Func::wrap(&mut store, |a: i32, b: i32, c: i32| -> (i32, i32, i32) { + (b, c, a) + }); + let func = func.typed::<(i32, i32, i32), (i32, i32, i32)>(&store)?; + let results = func.call(&mut store, (1, 2, 3))?; + assert_eq!(results, (2, 3, 1)); + Ok(()) +} + +#[test] +fn call_native_to_array() -> Result<()> { + let mut store = Store::<()>::default(); + + let func = Func::new( + &mut store, + FuncType::new( + [ValType::I32, ValType::I32, ValType::I32], + [ValType::I32, ValType::I32, ValType::I32], + ), + |_caller, params, results| { + results[0] = params[2].clone(); + results[1] = params[0].clone(); + results[2] = params[1].clone(); + Ok(()) + }, + ); + let func = func.typed::<(i32, i32, i32), (i32, i32, i32)>(&store)?; + let results = func.call(&mut store, (1, 2, 3))?; + assert_eq!(results, (3, 1, 2)); + Ok(()) +} + +#[test] +fn call_array_to_wasm() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (func (export "run") (param i32 i32 i32) (result i32 i32 i32) + local.get 1 + local.get 2 + local.get 0 + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let instance = Instance::new(&mut store, &module, &[])?; + let func = instance.get_func(&mut store, "run").unwrap(); + let mut results = [Val::I32(0), Val::I32(0), Val::I32(0)]; + func.call( + &mut store, + &[Val::I32(10), Val::I32(20), Val::I32(30)], + &mut results, + )?; + assert_eq!(results[0].i32(), Some(20)); + assert_eq!(results[1].i32(), Some(30)); + assert_eq!(results[2].i32(), Some(10)); + Ok(()) +} + +#[test] +fn call_array_to_native() -> Result<()> { + let mut store = Store::<()>::default(); + let func = Func::wrap(&mut store, |a: i32, b: i32, c: i32| -> (i32, i32, i32) { + (a * 10, b * 10, c * 10) + }); + let mut results = [Val::I32(0), Val::I32(0), Val::I32(0)]; + func.call( + &mut store, + &[Val::I32(10), Val::I32(20), Val::I32(30)], + &mut results, + )?; + assert_eq!(results[0].i32(), Some(100)); + assert_eq!(results[1].i32(), Some(200)); + assert_eq!(results[2].i32(), Some(300)); + Ok(()) +} + +#[test] +fn call_array_to_array() -> Result<()> { + let mut store = Store::<()>::default(); + let func = Func::new( + &mut store, + FuncType::new( + [ValType::I32, ValType::I32, ValType::I32], + [ValType::I32, ValType::I32, ValType::I32], + ), + |_caller, params, results| { + results[0] = params[2].clone(); + results[1] = params[0].clone(); + results[2] = params[1].clone(); + Ok(()) + }, + ); + let mut results = [Val::I32(0), Val::I32(0), Val::I32(0)]; + func.call( + &mut store, + &[Val::I32(10), Val::I32(20), Val::I32(30)], + &mut results, + )?; + assert_eq!(results[0].i32(), Some(30)); + assert_eq!(results[1].i32(), Some(10)); + assert_eq!(results[2].i32(), Some(20)); + Ok(()) +} + +#[test] +fn call_indirect_native_from_wasm_import_global() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (import "" "" (global funcref)) + (table 1 1 funcref) + (func (export "run") (result i32 i32 i32) + i32.const 0 + global.get 0 + table.set + i32.const 0 + call_indirect (result i32 i32 i32) + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let func = Func::wrap(&mut store, || -> (i32, i32, i32) { (10, 20, 30) }); + let global = Global::new( + &mut store, + GlobalType::new(ValType::FuncRef, Mutability::Const), + Val::FuncRef(Some(func)), + )?; + let instance = Instance::new(&mut store, &module, &[global.into()])?; + let func = instance.get_typed_func::<(), (i32, i32, i32)>(&mut store, "run")?; + let results = func.call(&mut store, ())?; + assert_eq!(results, (10, 20, 30)); + Ok(()) +} + +#[test] +fn call_indirect_native_from_wasm_import_table() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (import "" "" (table 1 1 funcref)) + (func (export "run") (result i32 i32 i32) + i32.const 0 + call_indirect (result i32 i32 i32) + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let func = Func::wrap(&mut store, || -> (i32, i32, i32) { (10, 20, 30) }); + let table = Table::new( + &mut store, + TableType::new(ValType::FuncRef, 1, Some(1)), + Val::FuncRef(Some(func)), + )?; + let instance = Instance::new(&mut store, &module, &[table.into()])?; + let func = instance.get_typed_func::<(), (i32, i32, i32)>(&mut store, "run")?; + let results = func.call(&mut store, ())?; + assert_eq!(results, (10, 20, 30)); + Ok(()) +} + +#[test] +fn call_indirect_native_from_wasm_import_func_returns_funcref() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (import "" "" (func (result funcref))) + (table 1 1 funcref) + (func (export "run") (result i32 i32 i32) + i32.const 0 + call 0 + table.set + i32.const 0 + call_indirect (result i32 i32 i32) + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let func = Func::wrap(&mut store, || -> (i32, i32, i32) { (10, 20, 30) }); + let get_func = Func::wrap(&mut store, move || -> Option { Some(func) }); + let instance = Instance::new(&mut store, &module, &[get_func.into()])?; + let func = instance.get_typed_func::<(), (i32, i32, i32)>(&mut store, "run")?; + let results = func.call(&mut store, ())?; + assert_eq!(results, (10, 20, 30)); + Ok(()) +} + +#[test] +fn call_indirect_native_from_exported_table() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (table (export "table") 1 1 funcref) + (func (export "run") (result i32 i32 i32) + i32.const 0 + call_indirect (result i32 i32 i32) + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let func = Func::wrap(&mut store, || -> (i32, i32, i32) { (10, 20, 30) }); + let instance = Instance::new(&mut store, &module, &[])?; + let table = instance.get_table(&mut store, "table").unwrap(); + table.set(&mut store, 0, func.into())?; + let run = instance.get_typed_func::<(), (i32, i32, i32)>(&mut store, "run")?; + let results = run.call(&mut store, ())?; + assert_eq!(results, (10, 20, 30)); + Ok(()) +} + +// wasm exports global, host puts native-call funcref in global, wasm calls funcref +#[test] +fn call_indirect_native_from_exported_global() -> Result<()> { + let wasm = wat::parse_str( + r#" + (module + (global (export "global") (mut funcref) (ref.null func)) + (table 1 1 funcref) + (func (export "run") (result i32 i32 i32) + i32.const 0 + global.get 0 + table.set + i32.const 0 + call_indirect (result i32 i32 i32) + ) + ) + "#, + )?; + let mut store = Store::<()>::default(); + let module = Module::new(store.engine(), &wasm)?; + let func = Func::wrap(&mut store, || -> (i32, i32, i32) { (10, 20, 30) }); + let instance = Instance::new(&mut store, &module, &[])?; + let global = instance.get_global(&mut store, "global").unwrap(); + global.set(&mut store, func.into())?; + let run = instance.get_typed_func::<(), (i32, i32, i32)>(&mut store, "run")?; + let results = run.call(&mut store, ())?; + assert_eq!(results, (10, 20, 30)); + Ok(()) +} + #[test] fn func_constructors() { let mut store = Store::<()>::default(); @@ -449,6 +811,8 @@ fn func_write_nothing() -> anyhow::Result<()> { #[test] fn return_cross_store_value() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let wasm = wat::parse_str( r#" (import "" "" (func (result funcref))) diff --git a/tests/all/traps.rs b/tests/all/traps.rs index 77dcf38654c3..84d8fe587b56 100644 --- a/tests/all/traps.rs +++ b/tests/all/traps.rs @@ -1365,3 +1365,106 @@ fn wasm_fault_address_reported_by_default() -> Result<()> { ); Ok(()) } + +#[test] +fn trap_with_array_to_wasm_stack_args() -> Result<()> { + let engine = Engine::default(); + let mut store = Store::new(&engine, ()); + let module = Module::new( + &engine, + r#" + (module + (func $trap + unreachable) + (func $run (param i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64) + call $trap) + (export "run" (func $run)) + ) + "#, + )?; + + let instance = Instance::new(&mut store, &module, &[])?; + let run = instance.get_func(&mut store, "run").unwrap(); + + let err = run + .call( + &mut store, + &[ + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + Val::I64(0), + ], + &mut [], + ) + .unwrap_err(); + assert!(err.is::()); + + let trace = err.downcast_ref::().unwrap(); + assert_eq!(trace.frames().len(), 2); + assert_eq!(trace.frames()[0].func_name(), Some("trap")); + assert_eq!(trace.frames()[1].func_name(), Some("run")); + + Ok(()) +} + +#[test] +fn trap_with_native_to_wasm_stack_args() -> Result<()> { + let engine = Engine::default(); + let mut store = Store::new(&engine, ()); + let module = Module::new( + &engine, + r#" + (module + (func $trap + unreachable) + (func $run (param i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64 i64) + call $trap) + (export "run" (func $run)) + ) + "#, + )?; + + let instance = Instance::new(&mut store, &module, &[])?; + let run = instance.get_func(&mut store, "run").unwrap(); + + let err = run + .typed::<( + i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64, + ), ()>(&mut store)? + .call(&mut store, (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) + .unwrap_err(); + assert!(err.is::()); + + let trace = err.downcast_ref::().unwrap(); + assert_eq!(trace.frames().len(), 2); + assert_eq!(trace.frames()[0].func_name(), Some("trap")); + assert_eq!(trace.frames()[1].func_name(), Some("run")); + + Ok(()) +} diff --git a/tests/all/winch.rs b/tests/all/winch.rs index 289a69260f73..5d993accfdae 100644 --- a/tests/all/winch.rs +++ b/tests/all/winch.rs @@ -2,6 +2,7 @@ use anyhow::Result; use wasmtime::*; #[test] +#[ignore] fn compiles_with_winch() -> Result<()> { let mut c = Config::new(); @@ -41,6 +42,7 @@ fn compiles_with_winch() -> Result<()> { } #[test] +#[ignore] fn compiles_with_winch_stack_arguments() -> Result<()> { let mut c = Config::new(); diff --git a/winch/codegen/src/trampoline.rs b/winch/codegen/src/trampoline.rs index 3fdf187c10f5..3d579efe195f 100644 --- a/winch/codegen/src/trampoline.rs +++ b/winch/codegen/src/trampoline.rs @@ -59,12 +59,12 @@ where /// Emit the host to wasm trampoline. pub fn emit_host_to_wasm(&mut self, ty: &FuncType) { - // The host to wasm trampoline is currently hard coded (see - // vmcontext.rs in the wasmtime-runtime crate, VMTrampoline). - // The first two parameters are VMContexts (not used at this - // time). The third parameter is the function pointer to call. - // The fourth parameter is an address to storage space for - // both the return value and the arguments to the function. + // The host to wasm trampoline is currently hard coded (see vmcontext.rs + // in the wasmtime-runtime crate, `VMArrayCallFunction`). The first two + // parameters are VMContexts (not used at this time). The third + // parameter is the function pointer to call. The fourth parameter is + // an address to storage space for both the return value and the + // arguments to the function. let trampoline_ty = FuncType::new( vec![ValType::I64, ValType::I64, ValType::I64, ValType::I64], vec![],