diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 076dcaadf8..3d40e666bf 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -194,7 +194,8 @@ pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(minps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 { - simd_fmin(a, b) + // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmin`. + minps(a, b) } /// Compares the first single-precision (32-bit) floating-point element of `a` @@ -219,7 +220,8 @@ pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(maxps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 { - simd_fmax(a, b) + // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmax`. + maxps(a, b) } /// Bitwise AND of packed single-precision (32-bit) floating-point elements. @@ -1935,8 +1937,12 @@ extern "C" { fn rsqrtps(a: __m128) -> __m128; #[link_name = "llvm.x86.sse.min.ss"] fn minss(a: __m128, b: __m128) -> __m128; + #[link_name = "llvm.x86.sse.min.ps"] + fn minps(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.max.ss"] fn maxss(a: __m128, b: __m128) -> __m128; + #[link_name = "llvm.x86.sse.max.ps"] + fn maxps(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.movmsk.ps"] fn movmskps(a: __m128) -> i32; #[link_name = "llvm.x86.sse.cmp.ps"] @@ -2634,6 +2640,21 @@ mod tests { let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_min_ps(a, b); assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0)); + + // `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic. `simd_min` + // is lowered by the llvm codegen backend to `llvm.minnum.v*` llvm intrinsic. This intrinsic + // doesn't specify how -0.0 is handled. Unfortunately it happens to behave different from + // the `minps` x86 instruction on x86. The `llvm.minnum.v*` llvm intrinsic equals + // `r1` to `a` and `r2` to `b`. + let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0); + let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0); + let r1: [u8; 16] = transmute(_mm_min_ps(a, b)); + let r2: [u8; 16] = transmute(_mm_min_ps(b, a)); + let a: [u8; 16] = transmute(a); + let b: [u8; 16] = transmute(b); + assert_eq!(r1, b); + assert_eq!(r2, a); + assert_ne!(a, b); // sanity check that -0.0 is actually present } #[simd_test(enable = "sse")]