-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GlobalISel] Select SHL({Z|S}EXT, DUP Imm) into {U|S}HLL Imm #96782
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: None (chuongg3) Changes
=>
Patch is 28.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96782.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index dd54520c8ddad..2bb6ee6dc2b87 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7879,6 +7879,20 @@ def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
(SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
V128:$Rn, vecshiftR32Narrow:$imm)>;
+def : Pat<(shl (v8i16 (zext (v8i8 V64:$Rm))), (v8i16 (AArch64dup (i32 imm:$size)))),
+ (USHLLv8i8_shift V64:$Rm, (i32 imm:$size))>;
+def : Pat<(shl (v4i32 (zext (v4i16 V64:$Rm))), (v4i32 (AArch64dup (i32 imm:$size)))),
+ (USHLLv4i16_shift V64:$Rm, (i32 imm:$size))>;
+def : Pat<(shl (v2i64 (zext (v2i32 V64:$Rm))), (v2i64 (AArch64dup (i64 imm:$size)))),
+ (USHLLv2i32_shift V64:$Rm, (trunc_imm imm:$size))>;
+
+def : Pat<(shl (v8i16 (sext (v8i8 V64:$Rm))), (v8i16 (AArch64dup (i32 imm:$size)))),
+ (SSHLLv8i8_shift V64:$Rm, (i32 imm:$size))>;
+def : Pat<(shl (v4i32 (sext (v4i16 V64:$Rm))), (v4i32 (AArch64dup (i32 imm:$size)))),
+ (SSHLLv4i16_shift V64:$Rm, (i32 imm:$size))>;
+def : Pat<(shl (v2i64 (sext (v2i32 V64:$Rm))), (v2i64 (AArch64dup (i64 imm:$size)))),
+ (SSHLLv2i32_shift V64:$Rm, (trunc_imm imm:$size))>;
+
// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
// Anyexts are implemented as zexts.
def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 4a7c82b393c10..df342c8beef19 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5536,7 +5536,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
}
if (CV->getSplatValue()) {
- APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
+ APInt DefBits = APInt::getSplat(
+ DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
MachineInstr *NewOp;
bool Inv = false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 4aa6999d1d3ca..6bbf7cc689abb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -420,6 +420,25 @@ void AArch64RegisterBankInfo::applyMappingImpl(
MI.getOperand(2).setReg(Ext.getReg(0));
return applyDefaultMapping(OpdMapper);
}
+ case AArch64::G_DUP: {
+ // Extend smaller gpr to 32-bits
+ Builder.setInsertPt(*MI.getParent(), MI.getIterator());
+
+ Register ConstReg;
+ auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
+ ConstReg =
+ Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
+ ConstMI->eraseFromParent();
+ } else {
+ ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
+ .getReg(0);
+ }
+ MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
+ MI.getOperand(1).setReg(ConstReg);
+ return applyDefaultMapping(OpdMapper);
+ }
default:
llvm_unreachable("Don't know how to handle that operation");
}
@@ -774,8 +793,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
(getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
onlyDefinesFP(*ScalarDef, MRI, TRI)))
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
- else
+ else {
+ if (ScalarTy.getSizeInBits() < 32 &&
+ getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank)
+ MappingID = 1;
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
+ }
break;
}
case TargetOpcode::G_TRUNC: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index c97a00ccdd455..2b9ef7acd4a4d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -18,8 +18,8 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
;
; GISEL-LABEL: combine_vec_udiv_uniform:
; GISEL: // %bb.0:
-; GISEL-NEXT: adrp x8, .LCPI0_0
-; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
+; GISEL-NEXT: mov w8, #25645 // =0x642d
+; GISEL-NEXT: dup v1.8h, w8
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
index 4cd6eef531ce0..66c8c2efda9bc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
@@ -16,10 +16,11 @@ body: |
; CHECK-LABEL: name: v4s32_gpr
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
@@ -37,10 +38,11 @@ body: |
; CHECK-LABEL: name: v4s64_gpr
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
- ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
@@ -58,10 +60,11 @@ body: |
; CHECK-LABEL: name: v2s32_gpr
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $w0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
@@ -79,10 +82,11 @@ body: |
; CHECK-LABEL: name: v4s32_fpr
; CHECK: liveins: $s0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $s0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
@@ -100,10 +104,11 @@ body: |
; CHECK-LABEL: name: v2s64_fpr
; CHECK: liveins: $d0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
- ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
@@ -121,10 +126,11 @@ body: |
; CHECK-LABEL: name: v2s32_fpr
; CHECK: liveins: $s0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $s0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
@@ -142,10 +148,11 @@ body: |
; CHECK-LABEL: name: v2s64_fpr_copy
; CHECK: liveins: $d0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
- ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%6:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %6(<2 x s64>)
@@ -163,11 +170,13 @@ body: |
; CHECK-LABEL: name: v416s8_gpr
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
- ; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8)
- ; CHECK: $q0 = COPY [[DUP]](<16 x s8>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8)
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%trunc:_(s8) = G_TRUNC %0(s32)
%1:_(<16 x s8>) = G_DUP %trunc(s8)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 307aa397eabbb..5aff8e0351487 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -994,9 +994,9 @@ define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
;
; CHECK-GI-LABEL: smull_noextvec_v8i8_v8i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NEXT: mov w8, #-999 // =0xfffffc19
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT: dup v1.8h, w8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp3 = sext <8 x i8> %arg to <8 x i16>
@@ -1088,29 +1088,13 @@ define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
-; CHECK-NEON-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK-NEON: // %bb.0:
-; CHECK-NEON-NEXT: mov w8, #999 // =0x3e7
-; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEON-NEXT: dup v1.8h, w8
-; CHECK-NEON-NEXT: mul v0.8h, v0.8h, v1.8h
-; CHECK-NEON-NEXT: ret
-;
-; CHECK-SVE-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: mov w8, #999 // =0x3e7
-; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-SVE-NEXT: dup v1.8h, w8
-; CHECK-SVE-NEXT: mul v0.8h, v0.8h, v1.8h
-; CHECK-SVE-NEXT: ret
-;
-; CHECK-GI-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI38_0
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI38_0]
-; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #999 // =0x3e7
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp3 = zext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
ret <8 x i16> %tmp4
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index 170ba7292ae60..cb85bbda80a80 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -109,29 +109,11 @@ define <4 x i32> @movi4s_lsl16() {
}
define <4 x i32> @movi4s_fneg() {
-; CHECK-NOFP16-SD-LABEL: movi4s_fneg:
-; CHECK-NOFP16-SD: // %bb.0:
-; CHECK-NOFP16-SD-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s
-; CHECK-NOFP16-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: movi4s_fneg:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
-; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s
-; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: movi4s_fneg:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: movi4s_fneg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.4s, #240, lsl #8
+; CHECK-NEXT: fneg v0.4s, v0.4s
+; CHECK-NEXT: ret
ret <4 x i32> <i32 2147545088, i32 2147545088, i32 2147545088, i32 2147545088>
}
@@ -308,23 +290,17 @@ define <8 x i16> @mvni8h_neg() {
; CHECK-NOFP16-SD-NEXT: dup v0.8h, w8
; CHECK-NOFP16-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: mvni8h_neg:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.8h, #240
-; CHECK-FP16-SD-NEXT: fneg v0.8h, v0.8h
-; CHECK-FP16-SD-NEXT: ret
+; CHECK-FP16-LABEL: mvni8h_neg:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: movi v0.8h, #240
+; CHECK-FP16-NEXT: fneg v0.8h, v0.8h
+; CHECK-FP16-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: mvni8h_neg:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
-; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
+; CHECK-NOFP16-GI-NEXT: mov w8, #-32528 // =0xffff80f0
+; CHECK-NOFP16-GI-NEXT: dup v0.8h, w8
; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: mvni8h_neg:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: movi v0.8h, #240
-; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h
-; CHECK-FP16-GI-NEXT: ret
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
}
@@ -494,29 +470,11 @@ define <2 x double> @fmov2d() {
}
define <2 x double> @fmov2d_neg0() {
-; CHECK-NOFP16-SD-LABEL: fmov2d_neg0:
-; CHECK-NOFP16-SD: // %bb.0:
-; CHECK-NOFP16-SD-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d
-; CHECK-NOFP16-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: fmov2d_neg0:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.2d, #0000000000000000
-; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
-; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d
-; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: fmov2d_neg0:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000
-; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: fmov2d_neg0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: fneg v0.2d, v0.2d
+; CHECK-NEXT: ret
ret <2 x double> <double -0.0, double -0.0>
}
@@ -581,5 +539,4 @@ define <2 x i32> @movi1d() {
ret <2 x i32> %1
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
; CHECK-NOFP16: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
index d10d551805a6b..a06bc0856c9f1 100644
--- a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
+++ b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
@@ -1,56 +1,78 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) {
-; CHECK: test_sshll_v8i8:
-; CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+; CHECK-LABEL: test_sshll_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.8h, v0.8b, #3
+; CHECK-NEXT: ret
%1 = sext <8 x i8> %a to <8 x i16>
%tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %tmp
}
define <4 x i32> @test_sshll_v4i16(<4 x i16> %a) {
-; CHECK: test_sshll_v4i16:
-; CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+; CHECK-LABEL: test_sshll_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.4s, v0.4h, #9
+; CHECK-NEXT: ret
%1 = sext <4 x i16> %a to <4 x i32>
%tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
ret <4 x i32> %tmp
}
define <2 x i64> @test_sshll_v2i32(<2 x i32> %a) {
-; CHECK: test_sshll_v2i32:
-; CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+; CHECK-LABEL: test_sshll_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.2d, v0.2s, #19
+; CHECK-NEXT: ret
%1 = sext <2 x i32> %a to <2 x i64>
%tmp = shl <2 x i64> %1, <i64 19, i64 19>
ret <2 x i64> %tmp
}
define <8 x i16> @test_ushll_v8i8(<8 x i8> %a) {
-; CHECK: test_ushll_v8i8:
-; CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+; CHECK-LABEL: test_ushll_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.8h, v0.8b, #3
+; CHECK-NEXT: ret
%1 = zext <8 x i8> %a to <8 x i16>
%tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %tmp
}
define <4 x i32> @test_ushll_v4i16(<4 x i16> %a) {
-; CHECK: test_ushll_v4i16:
-; CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+; CHECK-LABEL: test_ushll_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.4s, v0.4h, #9
+; CHECK-NEXT: ret
%1 = zext <4 x i16> %a to <4 x i32>
%tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
ret <4 x i32> %tmp
}
define <2 x i64> @test_ushll_v2i32(<2 x i32> %a) {
-; CHECK: test_ushll_v2i32:
-; CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+; CHECK-LABEL: test_ushll_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.2d, v0.2s, #19
+; CHECK-NEXT: ret
%1 = zext <2 x i32> %a to <2 x i64>
%tmp = shl <2 x i64> %1, <i64 19, i64 19>
ret <2 x i64> %tmp
}
define <8 x i16> @test_sshll2_v16i8(<16 x i8> %a) {
-; CHECK: test_sshll2_v16i8:
-; CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+; CHECK-SD-LABEL: test_sshll2_v16i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll2 v0.8h, v0.16b, #3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sshll2_v16i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #3
+; CHECK-GI-NEXT: ret
%1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%2 = sext <8 x i8> %1 to <8 x i16>
%tmp = shl <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -58,8 +80,16 @@ define <8 x i16> @test_sshll2_v16i8(<16 x i8> %a) {
}
define <4 x i32> @test_sshll2_v8i16(<8 x ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The patterns LGTM once the other patch goes in.
G_DUP's immediate operand gets extended in RegBankSelect to allow for better pattern matching in TableGen for #96782
c0a6902
to
7a424a5
Compare
7a424a5
to
c588392
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new ranges LGTM. Good job catching that one.
c588392
to
452e6a2
Compare
…llvm#96782) First commit's PR is llvm#96780 Combines the following instructions: `ushll r0, r0, #0` `shl r0, r0, llvm#3` Into: `ushll r0, r0, llvm#3`
…llvm#96782) First commit's PR is llvm#96780 Combines the following instructions: `ushll r0, r0, #0` `shl r0, r0, #3` Into: `ushll r0, r0, #3`
First commit's PR is #96780
Combines the following instructions:
ushll r0, r0, #0
shl r0, r0, #3
Into:
ushll r0, r0, #3