From 70a6abfd29c20fbaa7bee58259a16a62ed48ad93 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 4 Nov 2023 19:37:36 +0000 Subject: [PATCH] Add unsigned saturating add/sub intrinsics for aarch64 --- example/neon.rs | 21 +++++++++++++++++++++ src/intrinsics/llvm_aarch64.rs | 8 ++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/example/neon.rs b/example/neon.rs index 6ea053d0b0046..1033713732240 100644 --- a/example/neon.rs +++ b/example/neon.rs @@ -180,6 +180,24 @@ unsafe fn test_vpadd_u8() { assert_eq!(r, e); } +#[cfg(target_arch = "aarch64")] +unsafe fn test_vqsub_u8() { + let a = u8x8::from([1, 2, 3, 4, 5, 6, 7, 0xff]); + let b = u8x8::from([30, 1, 1, 1, 34, 0xff, 36, 37]); + let r: u8x8 = transmute(vqsub_u8(transmute(a), transmute(b))); + let e = u8x8::from([0, 1, 2, 3, 0, 0, 0, 218]); + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +unsafe fn test_vqadd_u8() { + let a = u8x8::from([1, 2, 3, 4, 5, 6, 7, 0xff]); + let b = u8x8::from([30, 1, 1, 1, 34, 0xff, 36, 37]); + let r: u8x8 = transmute(vqadd_u8(transmute(a), transmute(b))); + let e = u8x8::from([31, 3, 4, 5, 39, 0xff, 43, 0xff]); + assert_eq!(r, e); +} + #[cfg(target_arch = "aarch64")] fn main() { unsafe { @@ -204,6 +222,9 @@ fn main() { test_vpadd_u16(); test_vpadd_u32(); test_vpadd_u8(); + + test_vqsub_u8(); + test_vqadd_u8(); } } diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index fdad5a474d6d1..ee098be1fce6b 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -44,7 +44,9 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } - _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") => { + _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") + || intrinsic.starts_with("llvm.aarch64.neon.uqadd.v") => + { intrinsic_args!(fx, args => (x, y); intrinsic); simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| { @@ -52,7 +54,9 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } - _ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v") => { + _ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v") + || intrinsic.starts_with("llvm.aarch64.neon.uqsub.v") => + { intrinsic_args!(fx, args => (x, y); intrinsic); simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {