Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[TwoAddressInstruction] Update LiveIntervals after INSERT_SUBREG with…
Browse files Browse the repository at this point in the history
… undef read (llvm#66211)

Update LiveIntervals after rewriting:
  %reg = INSERT_SUBREG undef %reg, %subreg, subidx
to:
  undef %reg:subidx = COPY %subreg

D113044 implemented this for the non-undef case.
jayfoad authored and ZijunZhaoCCK committed Sep 19, 2023

Verified

This commit was signed with the committer’s verified signature.
tomaka Pierre Krieger
1 parent 9ecd87a commit 300c743
Showing 4 changed files with 1,019 additions and 503 deletions.
12 changes: 8 additions & 4 deletions llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
Original file line number Diff line number Diff line change
@@ -1868,12 +1868,16 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// %reg.subidx.
LaneBitmask LaneMask =
TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg());
SlotIndex Idx = LIS->getInstructionIndex(*mi);
SlotIndex Idx = LIS->getInstructionIndex(*mi).getRegSlot();
for (auto &S : LI.subranges()) {
if ((S.LaneMask & LaneMask).none()) {
LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx);
LiveRange::iterator DefSeg = std::next(UseSeg);
S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
LiveRange::iterator DefSeg = S.FindSegmentContaining(Idx);
if (mi->getOperand(0).isUndef()) {
S.removeValNo(DefSeg->valno);
} else {
LiveRange::iterator UseSeg = std::prev(DefSeg);
S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
}
}
}

52 changes: 35 additions & 17 deletions llvm/test/CodeGen/Thumb2/mve-fmath.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS

define arm_aapcs_vfpcc <4 x float> @sqrt_float32_t(<4 x float> %src) {
; CHECK-LABEL: sqrt_float32_t:
@@ -1085,21 +1087,37 @@ entry:
}

define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
; CHECK-LABEL: copysign_float64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmov r0, lr, d2
; CHECK-NEXT: vmov r0, r3, d1
; CHECK-NEXT: vmov r12, r2, d0
; CHECK-NEXT: lsrs r1, r1, #31
; CHECK-NEXT: bfi r3, r1, #31, #1
; CHECK-NEXT: lsr.w r1, lr, #31
; CHECK-NEXT: bfi r2, r1, #31, #1
; CHECK-NEXT: vmov d1, r0, r3
; CHECK-NEXT: vmov d0, r12, r2
; CHECK-NEXT: pop {r7, pc}
; CHECK-LV-LABEL: copysign_float64_t:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: .save {r7, lr}
; CHECK-LV-NEXT: push {r7, lr}
; CHECK-LV-NEXT: vmov r0, r1, d3
; CHECK-LV-NEXT: vmov r0, lr, d2
; CHECK-LV-NEXT: vmov r0, r3, d1
; CHECK-LV-NEXT: vmov r12, r2, d0
; CHECK-LV-NEXT: lsrs r1, r1, #31
; CHECK-LV-NEXT: bfi r3, r1, #31, #1
; CHECK-LV-NEXT: lsr.w r1, lr, #31
; CHECK-LV-NEXT: bfi r2, r1, #31, #1
; CHECK-LV-NEXT: vmov d1, r0, r3
; CHECK-LV-NEXT: vmov d0, r12, r2
; CHECK-LV-NEXT: pop {r7, pc}
;
; CHECK-LIS-LABEL: copysign_float64_t:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: .save {r4, lr}
; CHECK-LIS-NEXT: push {r4, lr}
; CHECK-LIS-NEXT: vmov r0, r12, d3
; CHECK-LIS-NEXT: vmov r0, lr, d2
; CHECK-LIS-NEXT: vmov r4, r3, d1
; CHECK-LIS-NEXT: vmov r1, r2, d0
; CHECK-LIS-NEXT: lsr.w r0, r12, #31
; CHECK-LIS-NEXT: bfi r3, r0, #31, #1
; CHECK-LIS-NEXT: lsr.w r0, lr, #31
; CHECK-LIS-NEXT: bfi r2, r0, #31, #1
; CHECK-LIS-NEXT: vmov d1, r4, r3
; CHECK-LIS-NEXT: vmov d0, r1, r2
; CHECK-LIS-NEXT: pop {r4, pc}
entry:
%0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
ret <2 x double> %0
189 changes: 127 additions & 62 deletions llvm/test/CodeGen/Thumb2/mve-shuffle.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECKFP
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV,CHECKFP
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS,CHECKFP

define arm_aapcs_vfpcc <4 x i32> @shuffle1_i32(<4 x i32> %src) {
; CHECK-LABEL: shuffle1_i32:
@@ -221,18 +223,31 @@ entry:
}

define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
; CHECK-LABEL: shuffle3_i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmovx.f16 s2, s5
; CHECK-NEXT: vmovx.f16 s0, s4
; CHECK-NEXT: vins.f16 s5, s4
; CHECK-NEXT: vins.f16 s2, s0
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vmovx.f16 s1, s7
; CHECK-NEXT: vmov.f32 s0, s6
; CHECK-NEXT: vins.f16 s1, s7
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle3_i16:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: vmov q1, q0
; CHECK-LV-NEXT: vmovx.f16 s2, s5
; CHECK-LV-NEXT: vmovx.f16 s0, s4
; CHECK-LV-NEXT: vins.f16 s5, s4
; CHECK-LV-NEXT: vins.f16 s2, s0
; CHECK-LV-NEXT: vmov.f32 s3, s5
; CHECK-LV-NEXT: vmovx.f16 s1, s7
; CHECK-LV-NEXT: vmov.f32 s0, s6
; CHECK-LV-NEXT: vins.f16 s1, s7
; CHECK-LV-NEXT: bx lr
;
; CHECK-LIS-LABEL: shuffle3_i16:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: vmovx.f16 s5, s3
; CHECK-LIS-NEXT: vmovx.f16 s6, s1
; CHECK-LIS-NEXT: vmovx.f16 s4, s0
; CHECK-LIS-NEXT: vins.f16 s1, s0
; CHECK-LIS-NEXT: vins.f16 s6, s4
; CHECK-LIS-NEXT: vins.f16 s5, s3
; CHECK-LIS-NEXT: vmov.f32 s7, s1
; CHECK-LIS-NEXT: vmov.f32 s4, s2
; CHECK-LIS-NEXT: vmov q0, q1
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
ret <8 x i16> %out
@@ -476,42 +491,79 @@ entry:
}

define arm_aapcs_vfpcc <16 x i8> @shuffle3_i8(<16 x i8> %src) {
; CHECK-LABEL: shuffle3_i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmov.u8 r0, q0[4]
; CHECK-NEXT: vmov.8 q0[0], r0
; CHECK-NEXT: vmov.u8 r0, q1[5]
; CHECK-NEXT: vmov.8 q0[1], r0
; CHECK-NEXT: vmov.u8 r0, q1[15]
; CHECK-NEXT: vmov.8 q0[2], r0
; CHECK-NEXT: vmov.u8 r0, q1[7]
; CHECK-NEXT: vmov.8 q0[3], r0
; CHECK-NEXT: vmov.u8 r0, q1[14]
; CHECK-NEXT: vmov.8 q0[4], r0
; CHECK-NEXT: vmov.u8 r0, q1[9]
; CHECK-NEXT: vmov.8 q0[5], r0
; CHECK-NEXT: vmov.u8 r0, q1[6]
; CHECK-NEXT: vmov.8 q0[6], r0
; CHECK-NEXT: vmov.u8 r0, q1[3]
; CHECK-NEXT: vmov.8 q0[7], r0
; CHECK-NEXT: vmov.u8 r0, q1[10]
; CHECK-NEXT: vmov.8 q0[8], r0
; CHECK-NEXT: vmov.u8 r0, q1[12]
; CHECK-NEXT: vmov.8 q0[9], r0
; CHECK-NEXT: vmov.u8 r0, q1[1]
; CHECK-NEXT: vmov.8 q0[10], r0
; CHECK-NEXT: vmov.u8 r0, q1[13]
; CHECK-NEXT: vmov.8 q0[11], r0
; CHECK-NEXT: vmov.u8 r0, q1[2]
; CHECK-NEXT: vmov.8 q0[12], r0
; CHECK-NEXT: vmov.u8 r0, q1[8]
; CHECK-NEXT: vmov.8 q0[13], r0
; CHECK-NEXT: vmov.u8 r0, q1[0]
; CHECK-NEXT: vmov.8 q0[14], r0
; CHECK-NEXT: vmov.u8 r0, q1[11]
; CHECK-NEXT: vmov.8 q0[15], r0
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle3_i8:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: vmov q1, q0
; CHECK-LV-NEXT: vmov.u8 r0, q0[4]
; CHECK-LV-NEXT: vmov.8 q0[0], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[5]
; CHECK-LV-NEXT: vmov.8 q0[1], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[15]
; CHECK-LV-NEXT: vmov.8 q0[2], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[7]
; CHECK-LV-NEXT: vmov.8 q0[3], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[14]
; CHECK-LV-NEXT: vmov.8 q0[4], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[9]
; CHECK-LV-NEXT: vmov.8 q0[5], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[6]
; CHECK-LV-NEXT: vmov.8 q0[6], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[3]
; CHECK-LV-NEXT: vmov.8 q0[7], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[10]
; CHECK-LV-NEXT: vmov.8 q0[8], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[12]
; CHECK-LV-NEXT: vmov.8 q0[9], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[1]
; CHECK-LV-NEXT: vmov.8 q0[10], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[13]
; CHECK-LV-NEXT: vmov.8 q0[11], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[2]
; CHECK-LV-NEXT: vmov.8 q0[12], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[8]
; CHECK-LV-NEXT: vmov.8 q0[13], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[0]
; CHECK-LV-NEXT: vmov.8 q0[14], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[11]
; CHECK-LV-NEXT: vmov.8 q0[15], r0
; CHECK-LV-NEXT: bx lr
;
; CHECK-LIS-LABEL: shuffle3_i8:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: vmov.u8 r0, q0[4]
; CHECK-LIS-NEXT: vmov.8 q1[0], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[5]
; CHECK-LIS-NEXT: vmov.8 q1[1], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[15]
; CHECK-LIS-NEXT: vmov.8 q1[2], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[7]
; CHECK-LIS-NEXT: vmov.8 q1[3], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[14]
; CHECK-LIS-NEXT: vmov.8 q1[4], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[9]
; CHECK-LIS-NEXT: vmov.8 q1[5], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[6]
; CHECK-LIS-NEXT: vmov.8 q1[6], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[3]
; CHECK-LIS-NEXT: vmov.8 q1[7], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[10]
; CHECK-LIS-NEXT: vmov.8 q1[8], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[12]
; CHECK-LIS-NEXT: vmov.8 q1[9], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[1]
; CHECK-LIS-NEXT: vmov.8 q1[10], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[13]
; CHECK-LIS-NEXT: vmov.8 q1[11], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[2]
; CHECK-LIS-NEXT: vmov.8 q1[12], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[8]
; CHECK-LIS-NEXT: vmov.8 q1[13], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[0]
; CHECK-LIS-NEXT: vmov.8 q1[14], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[11]
; CHECK-LIS-NEXT: vmov.8 q1[15], r0
; CHECK-LIS-NEXT: vmov q0, q1
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 15, i32 7, i32 14, i32 9, i32 6, i32 3, i32 10, i32 12, i32 1, i32 13, i32 2, i32 8, i32 0, i32 11>
ret <16 x i8> %out
@@ -1143,18 +1195,31 @@ entry:
}

define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
; CHECK-LABEL: shuffle3_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmovx.f16 s2, s5
; CHECK-NEXT: vmovx.f16 s0, s4
; CHECK-NEXT: vins.f16 s5, s4
; CHECK-NEXT: vins.f16 s2, s0
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vmovx.f16 s1, s7
; CHECK-NEXT: vmov.f32 s0, s6
; CHECK-NEXT: vins.f16 s1, s7
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle3_f16:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: vmov q1, q0
; CHECK-LV-NEXT: vmovx.f16 s2, s5
; CHECK-LV-NEXT: vmovx.f16 s0, s4
; CHECK-LV-NEXT: vins.f16 s5, s4
; CHECK-LV-NEXT: vins.f16 s2, s0
; CHECK-LV-NEXT: vmov.f32 s3, s5
; CHECK-LV-NEXT: vmovx.f16 s1, s7
; CHECK-LV-NEXT: vmov.f32 s0, s6
; CHECK-LV-NEXT: vins.f16 s1, s7
; CHECK-LV-NEXT: bx lr
;
; CHECK-LIS-LABEL: shuffle3_f16:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: vmovx.f16 s5, s3
; CHECK-LIS-NEXT: vmovx.f16 s6, s1
; CHECK-LIS-NEXT: vmovx.f16 s4, s0
; CHECK-LIS-NEXT: vins.f16 s1, s0
; CHECK-LIS-NEXT: vins.f16 s6, s4
; CHECK-LIS-NEXT: vins.f16 s5, s3
; CHECK-LIS-NEXT: vmov.f32 s7, s1
; CHECK-LIS-NEXT: vmov.f32 s4, s2
; CHECK-LIS-NEXT: vmov q0, q1
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
ret <8 x half> %out
1,269 changes: 849 additions & 420 deletions llvm/test/CodeGen/Thumb2/mve-vld3.ll

Large diffs are not rendered by default.

0 comments on commit 300c743

Please sign in to comment.