From aefede4552c58bae779c661fdc7c7b83a2bcdaac Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Mon, 27 Sep 2021 20:31:41 +0000 Subject: [PATCH] Bug 1731856 - Prototype relaxed-SIMD min/max instructions. r=lth See https://github.com/WebAssembly/relaxed-simd/issues/33 Differential Revision: https://phabricator.services.mozilla.com/D126390 --- js/src/jit-test/lib/wasm-binary.js | 4 + .../jit-test/tests/wasm/simd/experimental.js | 85 ++++++++++++++++++- js/src/jit/MacroAssembler.h | 24 ++++++ js/src/jit/arm64/CodeGenerator-arm64.cpp | 12 +++ js/src/jit/arm64/MacroAssembler-arm64-inl.h | 40 +++++++++ .../x86-shared/CodeGenerator-x86-shared.cpp | 12 +++ .../MacroAssembler-x86-shared-inl.h | 20 +++++ js/src/wasm/WasmBaselineCompile.cpp | 37 ++++++++ js/src/wasm/WasmConstants.h | 8 +- js/src/wasm/WasmIonCompile.cpp | 9 ++ js/src/wasm/WasmOpIter.cpp | 4 + js/src/wasm/WasmValidate.cpp | 9 ++ 12 files changed, 259 insertions(+), 5 deletions(-) diff --git a/js/src/jit-test/lib/wasm-binary.js b/js/src/jit-test/lib/wasm-binary.js index 7ae0be157d537..8be4ec4014ea7 100644 --- a/js/src/jit-test/lib/wasm-binary.js +++ b/js/src/jit-test/lib/wasm-binary.js @@ -137,6 +137,10 @@ const F32x4RelaxedFmaCode = 0xaf; const F32x4RelaxedFmsCode = 0xb0; const F64x2RelaxedFmaCode = 0xcf; const F64x2RelaxedFmsCode = 0xd0; +const F32x4RelaxedMin = 0xb4; +const F32x4RelaxedMax = 0xe2; +const F64x2RelaxedMin = 0xd4; +const F64x2RelaxedMax = 0xee; // SIMD wormhole opcodes. const WORMHOLE_SELFTEST = 0; diff --git a/js/src/jit-test/tests/wasm/simd/experimental.js b/js/src/jit-test/tests/wasm/simd/experimental.js index 94f95778a857b..3d7c5fcda07a7 100644 --- a/js/src/jit-test/tests/wasm/simd/experimental.js +++ b/js/src/jit-test/tests/wasm/simd/experimental.js @@ -10,6 +10,11 @@ function wasmEval(bytes, imports) { return new WebAssembly.Instance(new WebAssembly.Module(bytes), imports); } +function wasmValidateAndEval(bytes, imports) { + assertEq(WebAssembly.validate(bytes), true, "test of WasmValidate.cpp"); + return wasmEval(bytes, imports); +} + function get(arr, loc, len) { let res = []; for ( let i=0; i < len; i++ ) { @@ -82,7 +87,7 @@ for ( let [opcode, as, xs, ys, operator] of [[F32x4RelaxedFmaCode, fas, fxs, fys var k = xs.length; var ans = iota(k).map((i) => operator(as[i], xs[i], ys[i])) - var ins = wasmEval(moduleWithSections([ + var ins = wasmValidateAndEval(moduleWithSections([ sigSection([v2vSig]), declSection([0]), memorySection(1), @@ -102,4 +107,82 @@ for ( let [opcode, as, xs, ys, operator] of [[F32x4RelaxedFmaCode, fas, fxs, fys ins.exports.run(); var result = get(mem, 0, k); assertSame(result, ans); + + assertEq(false, WebAssembly.validate(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "run"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(0), + ...V128Load(0), + SimdPrefix, varU32(opcode)])]})])]))); +} + + +// Relaxed MIN/MAX, https://github.com/WebAssembly/relaxed-simd/issues/33 + +const Neg0 = -1/Infinity; +var minMaxTests = [ + {a: 0, b: 0, min: 0, max: 0, }, + {a: Neg0, b: Neg0, min: Neg0, max: Neg0, }, + {a: 1/3, b: 2/3, min: 1/3, max: 2/3, }, + {a: -1/3, b: -2/3, min: -2/3, max: -1/3, }, + {a: -1000, b: 1, min: -1000, max: 1, }, + {a: 10, b: -2, min: -2, max: 10, }, +]; + +for (let k of [4, 2]) { + const minOpcode = k == 4 ? F32x4RelaxedMin : F64x2RelaxedMin; + const maxOpcode = k == 4 ? F32x4RelaxedMax : F64x2RelaxedMax; + + var ins = wasmValidateAndEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0, 0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "min"}, + {funcIndex: 1, name: "max"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + SimdPrefix, varU32(minOpcode)])]}), + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + SimdPrefix, varU32(maxOpcode)])]})])])); + for (let i = 0; i < minMaxTests.length; i++) { + var Ty = k == 4 ? Float32Array : Float64Array; + var mem = new Ty(ins.exports.mem.buffer); + var minResult = new Ty(k); + var maxResult = new Ty(k); + for (let j = 0; j < k; j++) { + const {a, b, min, max } = minMaxTests[(j + i) % minMaxTests.length]; + mem[j + k] = a; + mem[j + k * 2] = b; + minResult[j] = min; + maxResult[j] = max; + } + ins.exports.min(); + var result = get(mem, 0, k); + assertSame(result, minResult); + ins.exports.max(); + var result = get(mem, 0, k); + assertSame(result, maxResult); + } + + for (let op of [minOpcode, maxOpcode]) { + assertEq(false, WebAssembly.validate(moduleWithSections([ + sigSection([v2vSig]), + declSection([0, 0]), + memorySection(1), + exportSection([]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(0), + SimdPrefix, varU32(op)])]})])]))); + } } diff --git a/js/src/jit/MacroAssembler.h b/js/src/jit/MacroAssembler.h index 4a7046bbadfa2..b484d866e424c 100644 --- a/js/src/jit/MacroAssembler.h +++ b/js/src/jit/MacroAssembler.h @@ -3485,6 +3485,30 @@ class MacroAssembler : public MacroAssemblerSpecific { inline void fmsFloat64x2(FloatRegister src1, FloatRegister src2, FloatRegister srcDest) DEFINED_ON(x86_shared, arm64); + inline void minFloat32x4Relaxed(FloatRegister src, FloatRegister srcDest) + DEFINED_ON(x86_shared, arm64); + + inline void minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) DEFINED_ON(arm64); + + inline void maxFloat32x4Relaxed(FloatRegister src, FloatRegister srcDest) + DEFINED_ON(x86_shared, arm64); + + inline void maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) DEFINED_ON(arm64); + + inline void minFloat64x2Relaxed(FloatRegister src, FloatRegister srcDest) + DEFINED_ON(x86_shared, arm64); + + inline void minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) DEFINED_ON(arm64); + + inline void maxFloat64x2Relaxed(FloatRegister src, FloatRegister srcDest) + DEFINED_ON(x86_shared, arm64); + + inline void maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) DEFINED_ON(arm64); + public: // ======================================================================== // Truncate floating point. diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp index 4434aacfc4e8d..3d7b36ea978bf 100644 --- a/js/src/jit/arm64/CodeGenerator-arm64.cpp +++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp @@ -3372,6 +3372,18 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) { case wasm::SimdOp::I16x8Q15MulrSatS: masm.q15MulrSatInt16x8(lhs, rhs, dest); break; + case wasm::SimdOp::F32x4RelaxedMin: + masm.minFloat32x4Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4RelaxedMax: + masm.maxFloat32x4Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2RelaxedMin: + masm.minFloat64x2Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2RelaxedMax: + masm.maxFloat64x2Relaxed(lhs, rhs, dest); + break; default: MOZ_CRASH("Binary SimdOp not implemented"); } diff --git a/js/src/jit/arm64/MacroAssembler-arm64-inl.h b/js/src/jit/arm64/MacroAssembler-arm64-inl.h index bf014eb97a8ef..bfa68046131aa 100644 --- a/js/src/jit/arm64/MacroAssembler-arm64-inl.h +++ b/js/src/jit/arm64/MacroAssembler-arm64-inl.h @@ -3849,6 +3849,46 @@ void MacroAssembler::fmsFloat64x2(FloatRegister src1, FloatRegister src2, Fmls(Simd2D(srcDest), Simd2D(src1), Simd2D(src2)); } +void MacroAssembler::minFloat32x4Relaxed(FloatRegister src, + FloatRegister srcDest) { + Fmin(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest)); +} + +void MacroAssembler::minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmin(Simd4S(dest), Simd4S(rhs), Simd4S(lhs)); +} + +void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src, + FloatRegister srcDest) { + Fmax(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest)); +} + +void MacroAssembler::maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmax(Simd4S(dest), Simd4S(rhs), Simd4S(lhs)); +} + +void MacroAssembler::minFloat64x2Relaxed(FloatRegister src, + FloatRegister srcDest) { + Fmin(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest)); +} + +void MacroAssembler::minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmin(Simd2D(dest), Simd2D(rhs), Simd2D(lhs)); +} + +void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src, + FloatRegister srcDest) { + Fmax(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest)); +} + +void MacroAssembler::maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmax(Simd2D(dest), Simd2D(rhs), Simd2D(lhs)); +} + //}}} check_macroassembler_style // =============================================================== diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp index da0df6c0cf372..67fc9b58c2ce0 100644 --- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp @@ -2662,6 +2662,18 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) { case wasm::SimdOp::I16x8Q15MulrSatS: masm.q15MulrSatInt16x8(rhs, lhsDest); break; + case wasm::SimdOp::F32x4RelaxedMin: + masm.minFloat32x4Relaxed(rhs, lhsDest); + break; + case wasm::SimdOp::F32x4RelaxedMax: + masm.maxFloat32x4Relaxed(rhs, lhsDest); + break; + case wasm::SimdOp::F64x2RelaxedMin: + masm.minFloat64x2Relaxed(rhs, lhsDest); + break; + case wasm::SimdOp::F64x2RelaxedMax: + masm.maxFloat64x2Relaxed(rhs, lhsDest); + break; # ifdef ENABLE_WASM_SIMD_WORMHOLE case wasm::SimdOp::MozWHSELFTEST: masm.loadConstantSimd128(wasm::WormholeSignature(), lhsDest); diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h index b7dbdd6d86789..6d405978fa8d7 100644 --- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h @@ -2872,6 +2872,26 @@ void MacroAssembler::fmsFloat64x2(FloatRegister src1, FloatRegister src2, subFloat64x2(scratch, srcDest); } +void MacroAssembler::minFloat32x4Relaxed(FloatRegister src, + FloatRegister srcDest) { + vminps(Operand(src), srcDest, srcDest); +} + +void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src, + FloatRegister srcDest) { + vmaxps(Operand(src), srcDest, srcDest); +} + +void MacroAssembler::minFloat64x2Relaxed(FloatRegister src, + FloatRegister srcDest) { + vminpd(Operand(src), srcDest, srcDest); +} + +void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src, + FloatRegister srcDest) { + vmaxpd(Operand(src), srcDest, srcDest); +} + // ======================================================================== // Truncate floating point. diff --git a/js/src/wasm/WasmBaselineCompile.cpp b/js/src/wasm/WasmBaselineCompile.cpp index 6befc98a72808..e8a67915ff30d 100644 --- a/js/src/wasm/WasmBaselineCompile.cpp +++ b/js/src/wasm/WasmBaselineCompile.cpp @@ -7389,6 +7389,23 @@ static void RelaxedFmsF64x2(MacroAssembler& masm, RegV128 rs1, RegV128 rs2, RegV128 rsd) { masm.fmsFloat64x2(rs1, rs2, rsd); } + +static void RelaxedMinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { + masm.minFloat32x4Relaxed(rs, rsd); +} + +static void RelaxedMaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { + masm.maxFloat32x4Relaxed(rs, rsd); +} + +static void RelaxedMinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { + masm.minFloat64x2Relaxed(rs, rsd); +} + +static void RelaxedMaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { + masm.maxFloat64x2Relaxed(rs, rsd); +} + # endif void BaseCompiler::emitVectorAndNot() { @@ -9008,6 +9025,26 @@ bool BaseCompiler::emitBody() { } CHECK_NEXT(dispatchTernary1(RelaxedFmsF64x2, ValType::V128)); break; + case uint32_t(SimdOp::F32x4RelaxedMin): + if (!moduleEnv_.v128RelaxedEnabled()) { + return iter_.unrecognizedOpcode(&op); + } + CHECK_NEXT(dispatchVectorBinary(RelaxedMinF32x4)); + case uint32_t(SimdOp::F32x4RelaxedMax): + if (!moduleEnv_.v128RelaxedEnabled()) { + return iter_.unrecognizedOpcode(&op); + } + CHECK_NEXT(dispatchVectorBinary(RelaxedMaxF32x4)); + case uint32_t(SimdOp::F64x2RelaxedMin): + if (!moduleEnv_.v128RelaxedEnabled()) { + return iter_.unrecognizedOpcode(&op); + } + CHECK_NEXT(dispatchVectorBinary(RelaxedMinF64x2)); + case uint32_t(SimdOp::F64x2RelaxedMax): + if (!moduleEnv_.v128RelaxedEnabled()) { + return iter_.unrecognizedOpcode(&op); + } + CHECK_NEXT(dispatchVectorBinary(RelaxedMaxF64x2)); # endif default: break; diff --git a/js/src/wasm/WasmConstants.h b/js/src/wasm/WasmConstants.h index 131626bbef969..24360cb23d32b 100644 --- a/js/src/wasm/WasmConstants.h +++ b/js/src/wasm/WasmConstants.h @@ -683,7 +683,7 @@ enum class SimdOp { I32x4Sub = 0xb1, // SubSatS = 0xb2 // SubSatU = 0xb3 - // Dot = 0xb4 + F32x4RelaxedMin = 0xb4, I32x4Mul = 0xb5, I32x4MinS = 0xb6, I32x4MinU = 0xb7, @@ -715,7 +715,7 @@ enum class SimdOp { I64x2Sub = 0xd1, // Unused = 0xd2 // Unused = 0xd3 - // Dot = 0xd4 + F64x2RelaxedMin = 0xd4, I64x2Mul = 0xd5, I64x2Eq = 0xd6, I64x2Ne = 0xd7, @@ -729,7 +729,7 @@ enum class SimdOp { I64x2ExtMulHighUI32x4 = 0xdf, F32x4Abs = 0xe0, F32x4Neg = 0xe1, - // Round = 0xe2 + F32x4RelaxedMax = 0xe2, F32x4Sqrt = 0xe3, F32x4Add = 0xe4, F32x4Sub = 0xe5, @@ -741,7 +741,7 @@ enum class SimdOp { F32x4PMax = 0xeb, F64x2Abs = 0xec, F64x2Neg = 0xed, - // Round = 0xee + F64x2RelaxedMax = 0xee, F64x2Sqrt = 0xef, F64x2Add = 0xf0, F64x2Sub = 0xf1, diff --git a/js/src/wasm/WasmIonCompile.cpp b/js/src/wasm/WasmIonCompile.cpp index 62cf4406f65b4..369d68ddc5173 100644 --- a/js/src/wasm/WasmIonCompile.cpp +++ b/js/src/wasm/WasmIonCompile.cpp @@ -5463,6 +5463,15 @@ static bool EmitBodyExprs(FunctionCompiler& f) { } CHECK(EmitTernarySimd128(f, SimdOp(op.b1))); } + case uint32_t(SimdOp::F32x4RelaxedMin): + case uint32_t(SimdOp::F32x4RelaxedMax): + case uint32_t(SimdOp::F64x2RelaxedMin): + case uint32_t(SimdOp::F64x2RelaxedMax): { + if (!f.moduleEnv().v128RelaxedEnabled()) { + return f.iter().unrecognizedOpcode(&op); + } + CHECK(EmitBinarySimd128(f, /* commutative= */ true, SimdOp(op.b1))); + } # endif default: diff --git a/js/src/wasm/WasmOpIter.cpp b/js/src/wasm/WasmOpIter.cpp index cb7ee6cb41b5b..9c2d334bfe423 100644 --- a/js/src/wasm/WasmOpIter.cpp +++ b/js/src/wasm/WasmOpIter.cpp @@ -496,6 +496,10 @@ OpKind wasm::Classify(OpBytes op) { case SimdOp::I64x2ExtMulLowUI32x4: case SimdOp::I64x2ExtMulHighUI32x4: case SimdOp::I16x8Q15MulrSatS: + case SimdOp::F32x4RelaxedMin: + case SimdOp::F32x4RelaxedMax: + case SimdOp::F64x2RelaxedMin: + case SimdOp::F64x2RelaxedMax: WASM_SIMD_OP(OpKind::Binary); case SimdOp::I8x16Neg: case SimdOp::I16x8Neg: diff --git a/js/src/wasm/WasmValidate.cpp b/js/src/wasm/WasmValidate.cpp index 1f0440047fb50..8867af47f8511 100644 --- a/js/src/wasm/WasmValidate.cpp +++ b/js/src/wasm/WasmValidate.cpp @@ -1017,6 +1017,15 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env, CHECK( iter.readTernary(ValType::V128, ¬hing, ¬hing, ¬hing)); } + case uint32_t(SimdOp::F32x4RelaxedMin): + case uint32_t(SimdOp::F32x4RelaxedMax): + case uint32_t(SimdOp::F64x2RelaxedMin): + case uint32_t(SimdOp::F64x2RelaxedMax): { + if (!env.v128RelaxedEnabled()) { + return iter.unrecognizedOpcode(&op); + } + CHECK(iter.readBinary(ValType::V128, ¬hing, ¬hing)); + } # endif default: