From cfa778db0cb332d9ef72b9a09e18dc4cec03d800 Mon Sep 17 00:00:00 2001 From: zhongyunde 00443407 Date: Sat, 9 Sep 2023 07:27:20 -0400 Subject: [PATCH] [ARM][ISel] Fix crash of ISD::FMINNUM/FMAXNUM The instruction of ISD::FMINNUM/FMAXNUM should be legal if HasFPARMv8 && HasNEON. For the combination of armv7+fp-armv8, armv7 imply the feature HasNEON on, and fp-armv8 matchs the feature HasFPARMv8, so it is legal. Fixes https://github.com/llvm/llvm-project/issues/65820 --- llvm/lib/Target/ARM/ARMInstrNEON.td | 16 +- .../CodeGen/ARM/minnum-maxnum-intrinsics.ll | 468 +++++++++--------- 2 files changed, 234 insertions(+), 250 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 9cc083a220c01..f31e1e9f97892 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5711,19 +5711,19 @@ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" i def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, N3RegFrm, NoItinerary, "vmaxnm", "f32", v2f32, v2f32, fmaxnum, 1>, - Requires<[HasV8, HasNEON]>; + Requires<[HasFPARMv8, HasNEON]>; def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, N3RegFrm, NoItinerary, "vmaxnm", "f32", v4f32, v4f32, fmaxnum, 1>, - Requires<[HasV8, HasNEON]>; + Requires<[HasFPARMv8, HasNEON]>; def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, N3RegFrm, NoItinerary, "vmaxnm", "f16", v4f16, v4f16, fmaxnum, 1>, - Requires<[HasV8, HasNEON, HasFullFP16]>; + Requires<[HasFPARMv8, HasNEON, HasFullFP16]>; def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, N3RegFrm, NoItinerary, "vmaxnm", "f16", v8f16, v8f16, fmaxnum, 1>, - Requires<[HasV8, HasNEON, HasFullFP16]>; + Requires<[HasFPARMv8, HasNEON, HasFullFP16]>; } // VMIN : Vector Minimum @@ -5753,19 +5753,19 @@ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" i def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, N3RegFrm, NoItinerary, "vminnm", "f32", v2f32, v2f32, fminnum, 1>, - Requires<[HasV8, HasNEON]>; + Requires<[HasFPARMv8, HasNEON]>; def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, N3RegFrm, NoItinerary, "vminnm", "f32", v4f32, v4f32, fminnum, 1>, - Requires<[HasV8, HasNEON]>; + Requires<[HasFPARMv8, HasNEON]>; def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, N3RegFrm, NoItinerary, "vminnm", "f16", v4f16, v4f16, fminnum, 1>, - Requires<[HasV8, HasNEON, HasFullFP16]>; + Requires<[HasFPARMv8, HasNEON, HasFullFP16]>; def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, N3RegFrm, NoItinerary, "vminnm", "f16", v8f16, v8f16, fminnum, 1>, - Requires<[HasV8, HasNEON, HasFullFP16]>; + Requires<[HasFPARMv8, HasNEON, HasFullFP16]>; } // Vector Pairwise Operations. diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll index e17075d067c26..be741f536ac75 100644 --- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=armv7 -mattr=+neon %s -o - | FileCheck %s --check-prefix=ARMV7 -; RUN: llc -mtriple=armv8.2-a -mattr=+fp-armv8 %s -o - | FileCheck %s --check-prefix=ARMV8 +; RUN: llc -mtriple=armv7 -mattr=+fp-armv8 %s -o - | FileCheck %s --check-prefix=ARMV8 +; RUN: llc -mtriple=armv8.2a -mattr=+fp-armv8 %s -o - | FileCheck %s --check-prefix=ARMV8 ; RUN: llc -mtriple=armv8.1m-none-none-eabi -mattr=+mve.fp,+fp64 %s -o - | FileCheck %s --check-prefix=ARMV8M declare float @llvm.minnum.f32(float, float) @@ -29,7 +30,7 @@ define float @fminnum32_intrinsic(float %x, float %y) { ; ARMV8-NEXT: vmov s2, r0 ; ARMV8-NEXT: vminnm.f32 s0, s2, s0 ; ARMV8-NEXT: vmov r0, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnum32_intrinsic: ; ARMV8M: @ %bb.0: @@ -59,7 +60,7 @@ define float @fminnum32_nsz_intrinsic(float %x, float %y) { ; ARMV8-NEXT: vmov s2, r0 ; ARMV8-NEXT: vminnm.f32 s0, s2, s0 ; ARMV8-NEXT: vmov r0, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnum32_nsz_intrinsic: ; ARMV8M: @ %bb.0: @@ -89,7 +90,7 @@ define float @fminnum32_non_zero_intrinsic(float %x) { ; ARMV8-NEXT: vmov s2, r0 ; ARMV8-NEXT: vminnm.f32 s0, s2, s0 ; ARMV8-NEXT: vmov r0, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnum32_non_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -119,7 +120,7 @@ define float @fmaxnum32_intrinsic(float %x, float %y) { ; ARMV8-NEXT: vmov s2, r0 ; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 ; ARMV8-NEXT: vmov r0, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnum32_intrinsic: ; ARMV8M: @ %bb.0: @@ -149,7 +150,7 @@ define float @fmaxnum32_nsz_intrinsic(float %x, float %y) { ; ARMV8-NEXT: vmov s2, r0 ; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 ; ARMV8-NEXT: vmov r0, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnum32_nsz_intrinsic: ; ARMV8M: @ %bb.0: @@ -183,7 +184,7 @@ define float @fmaxnum32_zero_intrinsic(float %x) { ; ARMV8-NEXT: vmov s2, r0 ; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 ; ARMV8-NEXT: vmov r0, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ARMV8-NEXT: .p2align 2 ; ARMV8-NEXT: @ %bb.1: ; ARMV8-NEXT: .LCPI5_0: @@ -221,7 +222,7 @@ define float @fmaxnum32_non_zero_intrinsic(float %x) { ; ARMV8-NEXT: vmov s2, r0 ; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 ; ARMV8-NEXT: vmov r0, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnum32_non_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -251,7 +252,7 @@ define double @fminnum64_intrinsic(double %x, double %y) { ; ARMV8-NEXT: vmov d17, r0, r1 ; ARMV8-NEXT: vminnm.f64 d16, d17, d16 ; ARMV8-NEXT: vmov r0, r1, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnum64_intrinsic: ; ARMV8M: @ %bb.0: @@ -281,7 +282,7 @@ define double @fminnum64_nsz_intrinsic(double %x, double %y) { ; ARMV8-NEXT: vmov d17, r0, r1 ; ARMV8-NEXT: vminnm.f64 d16, d17, d16 ; ARMV8-NEXT: vmov r0, r1, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnum64_nsz_intrinsic: ; ARMV8M: @ %bb.0: @@ -316,7 +317,7 @@ define double @fminnum64_zero_intrinsic(double %x) { ; ARMV8-NEXT: vmov d17, r0, r1 ; ARMV8-NEXT: vminnm.f64 d16, d17, d16 ; ARMV8-NEXT: vmov r0, r1, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ARMV8-NEXT: .p2align 3 ; ARMV8-NEXT: @ %bb.1: ; ARMV8-NEXT: .LCPI9_0: @@ -356,7 +357,7 @@ define double @fminnum64_non_zero_intrinsic(double %x) { ; ARMV8-NEXT: vmov d17, r0, r1 ; ARMV8-NEXT: vminnm.f64 d16, d17, d16 ; ARMV8-NEXT: vmov r0, r1, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnum64_non_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -386,7 +387,7 @@ define double@fmaxnum64_intrinsic(double %x, double %y) { ; ARMV8-NEXT: vmov d17, r0, r1 ; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 ; ARMV8-NEXT: vmov r0, r1, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnum64_intrinsic: ; ARMV8M: @ %bb.0: @@ -416,7 +417,7 @@ define double@fmaxnum64_nsz_intrinsic(double %x, double %y) { ; ARMV8-NEXT: vmov d17, r0, r1 ; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 ; ARMV8-NEXT: vmov r0, r1, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnum64_nsz_intrinsic: ; ARMV8M: @ %bb.0: @@ -442,16 +443,11 @@ define double @fmaxnum64_zero_intrinsic(double %x) { ; ; ARMV8-LABEL: fmaxnum64_zero_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr d16, .LCPI13_0 +; ARMV8-NEXT: vmov.i32 d16, #0x0 ; ARMV8-NEXT: vmov d17, r0, r1 ; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 ; ARMV8-NEXT: vmov r0, r1, d16 -; ARMV8-NEXT: mov pc, lr -; ARMV8-NEXT: .p2align 3 -; ARMV8-NEXT: @ %bb.1: -; ARMV8-NEXT: .LCPI13_0: -; ARMV8-NEXT: .long 0 @ double 0 -; ARMV8-NEXT: .long 0 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnum64_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -486,7 +482,7 @@ define double @fmaxnum64_non_zero_intrinsic(double %x) { ; ARMV8-NEXT: vmov d17, r0, r1 ; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 ; ARMV8-NEXT: vmov r0, r1, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnum64_non_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -524,23 +520,14 @@ define <4 x float> @fminnumv432_intrinsic(<4 x float> %x, <4 x float> %y) { ; ; ARMV8-LABEL: fminnumv432_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr s0, [sp, #4] -; ARMV8-NEXT: vmov s12, r1 -; ARMV8-NEXT: vldr s2, [sp, #8] -; ARMV8-NEXT: vmov s10, r2 -; ARMV8-NEXT: vminnm.f32 s0, s12, s0 -; ARMV8-NEXT: vldr s4, [sp, #12] -; ARMV8-NEXT: vldr s6, [sp] -; ARMV8-NEXT: vmov s14, r0 -; ARMV8-NEXT: vmov r1, s0 -; ARMV8-NEXT: vminnm.f32 s0, s10, s2 -; ARMV8-NEXT: vmov s8, r3 -; ARMV8-NEXT: vminnm.f32 s6, s14, s6 -; ARMV8-NEXT: vmov r2, s0 -; ARMV8-NEXT: vminnm.f32 s0, s8, s4 -; ARMV8-NEXT: vmov r0, s6 -; ARMV8-NEXT: vmov r3, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmov d16, r0, r1 +; ARMV8-NEXT: mov r0, sp +; ARMV8-NEXT: vld1.64 {d18, d19}, [r0] +; ARMV8-NEXT: vminnm.f32 q8, q8, q9 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnumv432_intrinsic: ; ARMV8M: @ %bb.0: @@ -570,23 +557,14 @@ define <4 x float> @fminnumv432_nsz_intrinsic(<4 x float> %x, <4 x float> %y) { ; ; ARMV8-LABEL: fminnumv432_nsz_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr s0, [sp, #4] -; ARMV8-NEXT: vmov s12, r1 -; ARMV8-NEXT: vldr s2, [sp, #8] -; ARMV8-NEXT: vmov s10, r2 -; ARMV8-NEXT: vminnm.f32 s0, s12, s0 -; ARMV8-NEXT: vldr s4, [sp, #12] -; ARMV8-NEXT: vldr s6, [sp] -; ARMV8-NEXT: vmov s14, r0 -; ARMV8-NEXT: vmov r1, s0 -; ARMV8-NEXT: vminnm.f32 s0, s10, s2 -; ARMV8-NEXT: vmov s8, r3 -; ARMV8-NEXT: vminnm.f32 s6, s14, s6 -; ARMV8-NEXT: vmov r2, s0 -; ARMV8-NEXT: vminnm.f32 s0, s8, s4 -; ARMV8-NEXT: vmov r0, s6 -; ARMV8-NEXT: vmov r3, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmov d16, r0, r1 +; ARMV8-NEXT: mov r0, sp +; ARMV8-NEXT: vld1.64 {d18, d19}, [r0] +; ARMV8-NEXT: vminnm.f32 q8, q8, q9 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnumv432_nsz_intrinsic: ; ARMV8M: @ %bb.0: @@ -615,20 +593,13 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) { ; ; ARMV8-LABEL: fminnumv432_non_zero_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vmov.f32 s0, #-1.000000e+00 -; ARMV8-NEXT: vmov s4, r2 -; ARMV8-NEXT: vmov s6, r1 -; ARMV8-NEXT: vminnm.f32 s4, s4, s0 -; ARMV8-NEXT: vmov s8, r0 -; ARMV8-NEXT: vminnm.f32 s6, s6, s0 -; ARMV8-NEXT: vmov s2, r3 -; ARMV8-NEXT: vminnm.f32 s8, s8, s0 -; ARMV8-NEXT: vminnm.f32 s0, s2, s0 -; ARMV8-NEXT: vmov r0, s8 -; ARMV8-NEXT: vmov r1, s6 -; ARMV8-NEXT: vmov r2, s4 -; ARMV8-NEXT: vmov r3, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmov d16, r0, r1 +; ARMV8-NEXT: vmov.f32 q9, #-1.000000e+00 +; ARMV8-NEXT: vminnm.f32 q8, q8, q9 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnumv432_non_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -674,25 +645,21 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; ; ARMV8-LABEL: fminnumv432_one_zero_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr s0, .LCPI18_0 -; ARMV8-NEXT: vmov s8, r1 -; ARMV8-NEXT: vmov.f32 s2, #-1.000000e+00 -; ARMV8-NEXT: vminnm.f32 s0, s8, s0 -; ARMV8-NEXT: vmov s6, r2 -; ARMV8-NEXT: vmov s10, r0 -; ARMV8-NEXT: vmov r1, s0 -; ARMV8-NEXT: vminnm.f32 s0, s6, s2 -; ARMV8-NEXT: vmov s4, r3 -; ARMV8-NEXT: vminnm.f32 s10, s10, s2 -; ARMV8-NEXT: vmov r2, s0 -; ARMV8-NEXT: vminnm.f32 s0, s4, s2 -; ARMV8-NEXT: vmov r0, s10 -; ARMV8-NEXT: vmov r3, s0 -; ARMV8-NEXT: mov pc, lr -; ARMV8-NEXT: .p2align 2 +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmov d16, r0, r1 +; ARMV8-NEXT: adr r0, .LCPI18_0 +; ARMV8-NEXT: vld1.64 {d18, d19}, [r0:128] +; ARMV8-NEXT: vminnm.f32 q8, q8, q9 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: bx lr +; ARMV8-NEXT: .p2align 4 ; ARMV8-NEXT: @ %bb.1: ; ARMV8-NEXT: .LCPI18_0: +; ARMV8-NEXT: .long 0xbf800000 @ float -1 ; ARMV8-NEXT: .long 0x00000000 @ float 0 +; ARMV8-NEXT: .long 0xbf800000 @ float -1 +; ARMV8-NEXT: .long 0xbf800000 @ float -1 ; ; ARMV8M-LABEL: fminnumv432_one_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -740,23 +707,14 @@ define <4 x float> @fmaxnumv432_intrinsic(<4 x float> %x, <4 x float> %y) { ; ; ARMV8-LABEL: fmaxnumv432_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr s0, [sp, #4] -; ARMV8-NEXT: vmov s12, r1 -; ARMV8-NEXT: vldr s2, [sp, #8] -; ARMV8-NEXT: vmov s10, r2 -; ARMV8-NEXT: vmaxnm.f32 s0, s12, s0 -; ARMV8-NEXT: vldr s4, [sp, #12] -; ARMV8-NEXT: vldr s6, [sp] -; ARMV8-NEXT: vmov s14, r0 -; ARMV8-NEXT: vmov r1, s0 -; ARMV8-NEXT: vmaxnm.f32 s0, s10, s2 -; ARMV8-NEXT: vmov s8, r3 -; ARMV8-NEXT: vmaxnm.f32 s6, s14, s6 -; ARMV8-NEXT: vmov r2, s0 -; ARMV8-NEXT: vmaxnm.f32 s0, s8, s4 -; ARMV8-NEXT: vmov r0, s6 -; ARMV8-NEXT: vmov r3, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmov d16, r0, r1 +; ARMV8-NEXT: mov r0, sp +; ARMV8-NEXT: vld1.64 {d18, d19}, [r0] +; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnumv432_intrinsic: ; ARMV8M: @ %bb.0: @@ -786,23 +744,14 @@ define <4 x float> @fmaxnumv432_nsz_intrinsic(<4 x float> %x, <4 x float> %y) { ; ; ARMV8-LABEL: fmaxnumv432_nsz_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr s0, [sp, #4] -; ARMV8-NEXT: vmov s12, r1 -; ARMV8-NEXT: vldr s2, [sp, #8] -; ARMV8-NEXT: vmov s10, r2 -; ARMV8-NEXT: vmaxnm.f32 s0, s12, s0 -; ARMV8-NEXT: vldr s4, [sp, #12] -; ARMV8-NEXT: vldr s6, [sp] -; ARMV8-NEXT: vmov s14, r0 -; ARMV8-NEXT: vmov r1, s0 -; ARMV8-NEXT: vmaxnm.f32 s0, s10, s2 -; ARMV8-NEXT: vmov s8, r3 -; ARMV8-NEXT: vmaxnm.f32 s6, s14, s6 -; ARMV8-NEXT: vmov r2, s0 -; ARMV8-NEXT: vmaxnm.f32 s0, s8, s4 -; ARMV8-NEXT: vmov r0, s6 -; ARMV8-NEXT: vmov r3, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmov d16, r0, r1 +; ARMV8-NEXT: mov r0, sp +; ARMV8-NEXT: vld1.64 {d18, d19}, [r0] +; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnumv432_nsz_intrinsic: ; ARMV8M: @ %bb.0: @@ -849,24 +798,13 @@ define <4 x float> @fmaxnumv432_zero_intrinsic(<4 x float> %x) { ; ; ARMV8-LABEL: fmaxnumv432_zero_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr s0, .LCPI21_0 -; ARMV8-NEXT: vmov s4, r2 -; ARMV8-NEXT: vmov s6, r1 -; ARMV8-NEXT: vmov s8, r0 -; ARMV8-NEXT: vmaxnm.f32 s6, s6, s0 -; ARMV8-NEXT: vmov s2, r3 -; ARMV8-NEXT: vmaxnm.f32 s8, s8, s0 -; ARMV8-NEXT: vmaxnm.f32 s4, s4, s0 -; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 -; ARMV8-NEXT: vmov r0, s8 -; ARMV8-NEXT: vmov r1, s6 -; ARMV8-NEXT: vmov r2, s4 -; ARMV8-NEXT: vmov r3, s0 -; ARMV8-NEXT: mov pc, lr -; ARMV8-NEXT: .p2align 2 -; ARMV8-NEXT: @ %bb.1: -; ARMV8-NEXT: .LCPI21_0: -; ARMV8-NEXT: .long 0x00000000 @ float 0 +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmov d16, r0, r1 +; ARMV8-NEXT: vmov.i32 q9, #0x0 +; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnumv432_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -912,24 +850,13 @@ define <4 x float> @fmaxnumv432_minus_zero_intrinsic(<4 x float> %x) { ; ; ARMV8-LABEL: fmaxnumv432_minus_zero_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr s0, .LCPI22_0 -; ARMV8-NEXT: vmov s4, r2 -; ARMV8-NEXT: vmov s6, r1 -; ARMV8-NEXT: vmov s8, r0 -; ARMV8-NEXT: vmaxnm.f32 s6, s6, s0 -; ARMV8-NEXT: vmov s2, r3 -; ARMV8-NEXT: vmaxnm.f32 s8, s8, s0 -; ARMV8-NEXT: vmaxnm.f32 s4, s4, s0 -; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 -; ARMV8-NEXT: vmov r0, s8 -; ARMV8-NEXT: vmov r1, s6 -; ARMV8-NEXT: vmov r2, s4 -; ARMV8-NEXT: vmov r3, s0 -; ARMV8-NEXT: mov pc, lr -; ARMV8-NEXT: .p2align 2 -; ARMV8-NEXT: @ %bb.1: -; ARMV8-NEXT: .LCPI22_0: -; ARMV8-NEXT: .long 0x80000000 @ float -0 +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmov d16, r0, r1 +; ARMV8-NEXT: vmov.i32 q9, #0x80000000 +; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnumv432_minus_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -957,20 +884,13 @@ define <4 x float> @fmaxnumv432_non_zero_intrinsic(<4 x float> %x) { ; ; ARMV8-LABEL: fmaxnumv432_non_zero_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vmov.f32 s0, #1.000000e+00 -; ARMV8-NEXT: vmov s4, r2 -; ARMV8-NEXT: vmov s6, r1 -; ARMV8-NEXT: vmaxnm.f32 s4, s4, s0 -; ARMV8-NEXT: vmov s8, r0 -; ARMV8-NEXT: vmaxnm.f32 s6, s6, s0 -; ARMV8-NEXT: vmov s2, r3 -; ARMV8-NEXT: vmaxnm.f32 s8, s8, s0 -; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 -; ARMV8-NEXT: vmov r0, s8 -; ARMV8-NEXT: vmov r1, s6 -; ARMV8-NEXT: vmov r2, s4 -; ARMV8-NEXT: vmov r3, s0 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmov d16, r0, r1 +; ARMV8-NEXT: vmov.f32 q9, #1.000000e+00 +; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnumv432_non_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -1004,15 +924,19 @@ define <2 x double> @fminnumv264_intrinsic(<2 x double> %x, <2 x double> %y) { ; ; ARMV8-LABEL: fminnumv264_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr d16, [sp, #8] -; ARMV8-NEXT: vmov d18, r2, r3 -; ARMV8-NEXT: vldr d17, [sp] -; ARMV8-NEXT: vmov d19, r0, r1 -; ARMV8-NEXT: vminnm.f64 d16, d18, d16 -; ARMV8-NEXT: vminnm.f64 d17, d19, d17 +; ARMV8-NEXT: mov r12, sp +; ARMV8-NEXT: vld1.64 {d16, d17}, [r12] +; ARMV8-NEXT: vmov d18, r0, r1 +; ARMV8-NEXT: vmov d19, r2, r3 +; ARMV8-NEXT: vcmp.f64 d16, d18 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vcmp.f64 d17, d19 +; ARMV8-NEXT: vselgt.f64 d18, d18, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vselgt.f64 d16, d19, d17 ; ARMV8-NEXT: vmov r2, r3, d16 -; ARMV8-NEXT: vmov r0, r1, d17 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnumv264_intrinsic: ; ARMV8M: @ %bb.0: @@ -1052,15 +976,19 @@ define <2 x double> @fminnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y) ; ; ARMV8-LABEL: fminnumv264_nsz_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr d16, [sp, #8] -; ARMV8-NEXT: vmov d18, r2, r3 -; ARMV8-NEXT: vldr d17, [sp] -; ARMV8-NEXT: vmov d19, r0, r1 -; ARMV8-NEXT: vminnm.f64 d16, d18, d16 -; ARMV8-NEXT: vminnm.f64 d17, d19, d17 +; ARMV8-NEXT: mov r12, sp +; ARMV8-NEXT: vld1.64 {d16, d17}, [r12] +; ARMV8-NEXT: vmov d18, r0, r1 +; ARMV8-NEXT: vmov d19, r2, r3 +; ARMV8-NEXT: vcmp.f64 d16, d18 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vcmp.f64 d17, d19 +; ARMV8-NEXT: vselgt.f64 d18, d18, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vselgt.f64 d16, d19, d17 ; ARMV8-NEXT: vmov r2, r3, d16 -; ARMV8-NEXT: vmov r0, r1, d17 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnumv264_nsz_intrinsic: ; ARMV8M: @ %bb.0: @@ -1100,14 +1028,18 @@ define <2 x double> @fminnumv264_non_zero_intrinsic(<2 x double> %x) { ; ; ARMV8-LABEL: fminnumv264_non_zero_intrinsic: ; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov d17, r0, r1 ; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00 -; ARMV8-NEXT: vmov d18, r0, r1 -; ARMV8-NEXT: vmov d17, r2, r3 -; ARMV8-NEXT: vminnm.f64 d18, d18, d16 -; ARMV8-NEXT: vminnm.f64 d16, d17, d16 -; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vcmp.f64 d16, d17 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov d18, r2, r3 +; ARMV8-NEXT: vcmp.f64 d16, d18 +; ARMV8-NEXT: vselgt.f64 d17, d17, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov r0, r1, d17 +; ARMV8-NEXT: vselgt.f64 d16, d18, d16 ; ARMV8-NEXT: vmov r2, r3, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnumv264_non_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -1146,20 +1078,19 @@ define <2 x double> @fminnumv264_one_zero_intrinsic(<2 x double> %x) { ; ; ARMV8-LABEL: fminnumv264_one_zero_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vmov.f64 d16, #-1.000000e+00 -; ARMV8-NEXT: vldr d17, .LCPI27_0 -; ARMV8-NEXT: vmov d18, r0, r1 ; ARMV8-NEXT: vmov d19, r2, r3 -; ARMV8-NEXT: vminnm.f64 d16, d18, d16 -; ARMV8-NEXT: vminnm.f64 d17, d19, d17 -; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vcmp.f64 d19, #0 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov d18, r0, r1 +; ARMV8-NEXT: vmov.f64 d16, #-1.000000e+00 +; ARMV8-NEXT: vcmp.f64 d16, d18 +; ARMV8-NEXT: vmov.i32 d17, #0x0 +; ARMV8-NEXT: vmovlt.f64 d17, d19 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr ; ARMV8-NEXT: vmov r2, r3, d17 -; ARMV8-NEXT: mov pc, lr -; ARMV8-NEXT: .p2align 3 -; ARMV8-NEXT: @ %bb.1: -; ARMV8-NEXT: .LCPI27_0: -; ARMV8-NEXT: .long 0 @ double 0 -; ARMV8-NEXT: .long 0 +; ARMV8-NEXT: vselgt.f64 d16, d18, d16 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fminnumv264_one_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -1204,15 +1135,19 @@ define <2 x double> @fmaxnumv264_intrinsic(<2 x double> %x, <2 x double> %y) { ; ; ARMV8-LABEL: fmaxnumv264_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr d16, [sp, #8] -; ARMV8-NEXT: vmov d18, r2, r3 -; ARMV8-NEXT: vldr d17, [sp] -; ARMV8-NEXT: vmov d19, r0, r1 -; ARMV8-NEXT: vmaxnm.f64 d16, d18, d16 -; ARMV8-NEXT: vmaxnm.f64 d17, d19, d17 +; ARMV8-NEXT: mov r12, sp +; ARMV8-NEXT: vld1.64 {d16, d17}, [r12] +; ARMV8-NEXT: vmov d18, r0, r1 +; ARMV8-NEXT: vcmp.f64 d18, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov d19, r2, r3 +; ARMV8-NEXT: vcmp.f64 d19, d17 +; ARMV8-NEXT: vselgt.f64 d18, d18, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vselgt.f64 d16, d19, d17 ; ARMV8-NEXT: vmov r2, r3, d16 -; ARMV8-NEXT: vmov r0, r1, d17 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnumv264_intrinsic: ; ARMV8M: @ %bb.0: @@ -1252,15 +1187,19 @@ define <2 x double> @fmaxnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y) ; ; ARMV8-LABEL: fmaxnumv264_nsz_intrinsic: ; ARMV8: @ %bb.0: -; ARMV8-NEXT: vldr d16, [sp, #8] -; ARMV8-NEXT: vmov d18, r2, r3 -; ARMV8-NEXT: vldr d17, [sp] -; ARMV8-NEXT: vmov d19, r0, r1 -; ARMV8-NEXT: vmaxnm.f64 d16, d18, d16 -; ARMV8-NEXT: vmaxnm.f64 d17, d19, d17 +; ARMV8-NEXT: mov r12, sp +; ARMV8-NEXT: vld1.64 {d16, d17}, [r12] +; ARMV8-NEXT: vmov d18, r0, r1 +; ARMV8-NEXT: vcmp.f64 d18, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov d19, r2, r3 +; ARMV8-NEXT: vcmp.f64 d19, d17 +; ARMV8-NEXT: vselgt.f64 d18, d18, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vselgt.f64 d16, d19, d17 ; ARMV8-NEXT: vmov r2, r3, d16 -; ARMV8-NEXT: vmov r0, r1, d17 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnumv264_nsz_intrinsic: ; ARMV8M: @ %bb.0: @@ -1305,23 +1244,24 @@ define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) { ; ; ARMV8-LABEL: fmaxnumv264_zero_intrinsic: ; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov d18, r0, r1 ; ARMV8-NEXT: vldr d16, .LCPI30_0 -; ARMV8-NEXT: vmov d18, r2, r3 -; ARMV8-NEXT: vldr d17, .LCPI30_1 -; ARMV8-NEXT: vmov d19, r0, r1 -; ARMV8-NEXT: vmaxnm.f64 d16, d18, d16 -; ARMV8-NEXT: vmaxnm.f64 d17, d19, d17 -; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: vcmp.f64 d18, #0 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov d19, r2, r3 +; ARMV8-NEXT: vcmp.f64 d19, d16 +; ARMV8-NEXT: vmov.i32 d17, #0x0 +; ARMV8-NEXT: vselgt.f64 d17, d18, d17 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr ; ARMV8-NEXT: vmov r0, r1, d17 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: vselgt.f64 d16, d19, d16 +; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: bx lr ; ARMV8-NEXT: .p2align 3 ; ARMV8-NEXT: @ %bb.1: ; ARMV8-NEXT: .LCPI30_0: ; ARMV8-NEXT: .long 0 @ double -0 ; ARMV8-NEXT: .long 2147483648 -; ARMV8-NEXT: .LCPI30_1: -; ARMV8-NEXT: .long 0 @ double 0 -; ARMV8-NEXT: .long 0 ; ; ARMV8M-LABEL: fmaxnumv264_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -1375,13 +1315,17 @@ define <2 x double> @fmaxnumv264_minus_zero_intrinsic(<2 x double> %x) { ; ARMV8-LABEL: fmaxnumv264_minus_zero_intrinsic: ; ARMV8: @ %bb.0: ; ARMV8-NEXT: vldr d16, .LCPI31_0 -; ARMV8-NEXT: vmov d18, r0, r1 -; ARMV8-NEXT: vmov d17, r2, r3 -; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16 -; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 -; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vmov d18, r2, r3 +; ARMV8-NEXT: vcmp.f64 d17, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vcmp.f64 d18, d16 +; ARMV8-NEXT: vselgt.f64 d17, d17, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov r0, r1, d17 +; ARMV8-NEXT: vselgt.f64 d16, d18, d16 ; ARMV8-NEXT: vmov r2, r3, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ARMV8-NEXT: .p2align 3 ; ARMV8-NEXT: @ %bb.1: ; ARMV8-NEXT: .LCPI31_0: @@ -1431,13 +1375,17 @@ define <2 x double> @fmaxnumv264_non_zero_intrinsic(<2 x double> %x) { ; ARMV8-LABEL: fmaxnumv264_non_zero_intrinsic: ; ARMV8: @ %bb.0: ; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00 -; ARMV8-NEXT: vmov d18, r0, r1 -; ARMV8-NEXT: vmov d17, r2, r3 -; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16 -; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 -; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vcmp.f64 d17, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov d18, r2, r3 +; ARMV8-NEXT: vcmp.f64 d18, d16 +; ARMV8-NEXT: vselgt.f64 d17, d17, d16 +; ARMV8-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8-NEXT: vmov r0, r1, d17 +; ARMV8-NEXT: vselgt.f64 d16, d18, d16 ; ARMV8-NEXT: vmov r2, r3, d16 -; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: bx lr ; ; ARMV8M-LABEL: fmaxnumv264_non_zero_intrinsic: ; ARMV8M: @ %bb.0: @@ -1456,3 +1404,39 @@ define <2 x double> @fmaxnumv264_non_zero_intrinsic(<2 x double> %x) { %a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) ret <2 x double> %a } + +define void @pr65820(ptr %y, <4 x float> %splat) { +; ARMV7-LABEL: pr65820: +; ARMV7: @ %bb.0: @ %entry +; ARMV7-NEXT: vmov d16, r2, r3 +; ARMV7-NEXT: vdup.32 q8, d16[0] +; ARMV7-NEXT: vcgt.f32 q9, q8, #0 +; ARMV7-NEXT: vand q8, q8, q9 +; ARMV7-NEXT: vst1.32 {d16, d17}, [r0] +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: pr65820: +; ARMV8: @ %bb.0: @ %entry +; ARMV8-NEXT: vmov d16, r2, r3 +; ARMV8-NEXT: vmov.i32 q9, #0x0 +; ARMV8-NEXT: vdup.32 q8, d16[0] +; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9 +; ARMV8-NEXT: vst1.32 {d16, d17}, [r0] +; ARMV8-NEXT: bx lr +; +; ARMV8M-LABEL: pr65820: +; ARMV8M: @ %bb.0: @ %entry +; ARMV8M-NEXT: vmov d0, r2, r3 +; ARMV8M-NEXT: vmov r1, s0 +; ARMV8M-NEXT: vmov.i32 q0, #0x0 +; ARMV8M-NEXT: vdup.32 q1, r1 +; ARMV8M-NEXT: vmaxnm.f32 q0, q1, q0 +; ARMV8M-NEXT: vstrw.32 q0, [r0] +; ARMV8M-NEXT: bx lr +entry: + %broadcast.splat = shufflevector <4 x float> %splat, <4 x float> zeroinitializer, <4 x i32> zeroinitializer + %0 = fcmp ogt <4 x float> %broadcast.splat, zeroinitializer + %1 = select <4 x i1> %0, <4 x float> %broadcast.splat, <4 x float> zeroinitializer + store <4 x float> %1, ptr %y, align 4 + ret void +}