bytecodealliance · alexcrichton · Jul 29, 2022 · Jul 23, 2022 · Jul 25, 2022 · Jul 29, 2022
@@ -770,12 +770,6 @@ impl Ieee32 {
         f32::from_bits(self.0)
     }
 
-    /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
-    /// more accurate result than an unfused multiply-add.
-    pub fn mul_add(&self, a: Self, b: Self) -> Self {
-        Self::with_float(self.as_f32().mul_add(a.as_f32(), b.as_f32()))
-    }
-
     /// Returns the square root of self.
     pub fn sqrt(self) -> Self {
         Self::with_float(self.as_f32().sqrt())
@@ -962,12 +956,6 @@ impl Ieee64 {
         f64::from_bits(self.0)
     }
 
-    /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
-    /// more accurate result than an unfused multiply-add.
-    pub fn mul_add(&self, a: Self, b: Self) -> Self {
-        Self::with_float(self.as_f64().mul_add(a.as_f64(), b.as_f64()))
-    }
-
     /// Returns the square root of self.
     pub fn sqrt(self) -> Self {
         Self::with_float(self.as_f64().sqrt())

@@ -0,0 +1,25 @@
+test interpret
+
+; The interpreter can run `fma.clif` on most platforms, however on `x86_64-pc-windows-gnu` we
+; use libm which has issues with some inputs. We should delete this file and enable the interpreter
+; on the main `fma.clif` file once those are fixed.
+
+; See: https://github.com/bytecodealliance/wasmtime/pull/4517
+; See: https://github.com/rust-lang/libm/issues/263
+
+function %fma_f32(f32, f32, f32) -> f32 {
+block0(v0: f32, v1: f32, v2: f32):
+    v3 = fma v0, v1, v2
+    return v3
+}
+; run: %fma_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
+; run: %fma_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3b88e6p14
+
+
+function %fma_f64(f64, f64, f64) -> f64 {
+block0(v0: f64, v1: f64, v2: f64):
+    v3 = fma v0, v1, v2
+    return v3
+}
+; run: %fma_f64(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
+; run: %fma_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba6ebee17417p21
@@ -1,4 +1,3 @@
-test interpret
 test run
 target aarch64
 target s390x
@@ -38,6 +37,15 @@ block0(v0: f32, v1: f32, v2: f32):
 ; run: %fma_f32(0x0.000002p-126, 0x0.000002p-126, 0x0.0) == 0x0.0
 ; run: %fma_f32(0x0.0, 0x0.0, 0x0.000002p-126) == 0x0.000002p-126
 
+; Regression tests for x86_64-pc-windows-gnu
+; See: https://github.com/bytecodealliance/wasmtime/issues/4512
+; run: %fma_f32(0x1.0p100, 0x1.0p100, -Inf) == -Inf
+; run: %fma_f32(0x1.fffffep23, 0x1.000004p28, 0x1.fcp5) == 0x1.000002p52
+; run: %fma_f32(0x1.84ae3p125, 0x1.6p-141, 0x1.0p-149) == 0x1.0b37c2p-15
+; run: %fma_f32(0x1.00001p50, 0x1.1p50, 0x1.0p-149) == 0x1.100012p100
+; run: %fma_f32(0x1.000002p50, 0x1.8p50, -0x1.0p-149) == 0x1.800002p100
+; run: %fma_f32(0x1.83bd78p4, -0x1.cp118, -0x1.344108p-2) == -0x1.5345cap123
+
 
 
 ;; The IEEE754 Standard does not make a lot of guarantees about what
@@ -98,6 +106,14 @@ block0(v0: f64, v1: f64, v2: f64):
 ; run: %fma_f64(0x0.0, 0x0.0, 0x0.0000000000001p-1022) == 0x0.0000000000001p-1022
 
 
+; Regression tests for x86_64-pc-windows-gnu
+; See: https://github.com/bytecodealliance/wasmtime/issues/4512
+; run: %fma_f64(0x1.0p1000, 0x1.0p1000, -Inf) == -Inf
+; run: %fma_f64(-0x1.4f8ac19291ffap1023, 0x1.39c33c8d39b7p-1025, 0x1.ee11f685e2e12p-1) == 0x1.2071b0283f156p-1
+; run: %fma_f64(0x1.0000000000008p500, 0x1.1p500, 0x1.0p-1074) == 0x1.1000000000009p1000
+; run: %fma_f64(0x1.0000000000001p500, 0x1.8p500, -0x1.0p-1074) == 0x1.8000000000001p1000
+; run: %fma_f64(0x0.ffffffep513, 0x1.0000002p511, -0x1.0p-1074) == 0x1.fffffffffffffp1023
+
 ;; The IEEE754 Standard does not make a lot of guarantees about what
 ;; comes out of NaN producing operations, we just check if its a NaN
 function %fma_is_nan_f64(f64, f64, f64) -> i32 {

@@ -17,6 +17,9 @@ log = { version = "0.4.8", default-features = false }
 smallvec = "1.6.1"
 thiserror = "1.0.15"
 
+[target.x86_64-pc-windows-gnu.dependencies]
+libm = "0.2"
+
 [dev-dependencies]
 cranelift-frontend = { path = "../frontend", version = "0.87.0" }
 cranelift-reader = { path = "../reader", version = "0.87.0" }

@@ -555,10 +555,32 @@ impl Value for DataValue {
     fn fma(self, b: Self, c: Self) -> ValueResult<Self> {
         match (self, b, c) {
             (DataValue::F32(a), DataValue::F32(b), DataValue::F32(c)) => {
-                Ok(DataValue::F32(a.mul_add(b, c)))
+                // The `fma` function for `x86_64-pc-windows-gnu` is incorrect. Use `libm`'s instead.
+                // See: https://github.com/bytecodealliance/wasmtime/issues/4512
+                #[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))]
+                let res = libm::fmaf(a.as_f32(), b.as_f32(), c.as_f32());
+
+                #[cfg(not(all(
+                    target_arch = "x86_64",
+                    target_os = "windows",
+                    target_env = "gnu"
+                )))]
+                let res = a.as_f32().mul_add(b.as_f32(), c.as_f32());
+
+                Ok(DataValue::F32(res.into()))
             }
             (DataValue::F64(a), DataValue::F64(b), DataValue::F64(c)) => {
-                Ok(DataValue::F64(a.mul_add(b, c)))
+                #[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))]
+                let res = libm::fma(a.as_f64(), b.as_f64(), c.as_f64());
+
+                #[cfg(not(all(
+                    target_arch = "x86_64",
+                    target_os = "windows",
+                    target_env = "gnu"
+                )))]
+                let res = a.as_f64().mul_add(b.as_f64(), c.as_f64());
+
+                Ok(DataValue::F64(res.into()))
             }
             (a, _b, _c) => Err(ValueError::InvalidType(ValueTypeClass::Float, a.ty())),
         }