diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 127793c4bcc5dad..1e31ac1c2d58903 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -824,8 +824,10 @@ class MachineBasicBlock /// Return the first instruction in MBB after I that is not a PHI, label or /// debug. This is the correct point to insert copies at the beginning of a - /// basic block. - iterator SkipPHIsLabelsAndDebug(iterator I, bool SkipPseudoOp = true); + /// basic block. \p Reg is the register being used by a spill or defined for a + /// restore/split during register allocation. + iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg = Register(), + bool SkipPseudoOp = true); /// Returns an iterator to the first terminator instruction of this basic /// block. If a terminator does not exist, it returns end(). diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 99c972dd84cc4c1..77687b2ae0de871 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1940,8 +1940,10 @@ class TargetInstrInfo : public MCInstrInfo { /// True if the instruction is bound to the top of its basic block and no /// other instructions shall be inserted before it. This can be implemented - /// to prevent register allocator to insert spills before such instructions. - virtual bool isBasicBlockPrologue(const MachineInstr &MI) const { + /// to prevent register allocator to insert spills for \p Reg before such + /// instructions. + virtual bool isBasicBlockPrologue(const MachineInstr &MI, + Register Reg = Register()) const { return false; } diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 0b32d69afeb286f..cc1e609a27c871d 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -461,7 +461,8 @@ class StatepointState { if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) { RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad); - auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin()); + auto EHPadInsertPoint = + EHPad->SkipPHIsLabelsAndDebug(EHPad->begin(), Reg); insertReloadBefore(Reg, EHPadInsertPoint, EHPad); LLVM_DEBUG(dbgs() << "...also reload at EHPad " << printMBBReference(*EHPad) << "\n"); diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 53117d63a39b355..5c3d3ff98df0e89 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -424,7 +424,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def); MachineBasicBlock::iterator MII; if (SrcVNI->isPHIDef()) - MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin()); + MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin(), SrcReg); else { MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def); assert(DefMI && "Defining instruction disappeared"); diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index bce9039f0e48200..b2f0e2a36ceeb8d 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -221,13 +221,13 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I, - bool SkipPseudoOp) { + Register Reg, bool SkipPseudoOp) { const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); iterator E = end(); while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() || (SkipPseudoOp && I->isPseudoProbe()) || - TII->isBasicBlockPrologue(*I))) + TII->isBasicBlockPrologue(*I, Reg))) ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values // inside the bundle. diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 8929182f76c52b9..95d7971b6026737 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -795,8 +795,10 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { return Start; } - VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB, - MBB.SkipPHIsLabelsAndDebug(MBB.begin())); + unsigned RegIdx = 0; + Register Reg = LIS.getInterval(Edit->get(RegIdx)).reg(); + VNInfo *VNI = defFromParent(RegIdx, ParentVNI, Start, MBB, + MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg)); RegAssign.insert(Start, VNI->def, OpenIdx); LLVM_DEBUG(dump()); return VNI->def; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e3716090d5d938b..6a31beb413c46d2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7987,16 +7987,25 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const { return ArrayRef(TargetFlags); } -bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { +bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI, + Register Reg) const { // We need to handle instructions which may be inserted during register // allocation to handle the prolog. The initial prolog instruction may have // been separated from the start of the block by spills and copies inserted - // needed by the prolog. - uint16_t Opc = MI.getOpcode(); + // needed by the prolog. However, the insertions for scalar registers can + // always be placed at the BB top as they are independent of the exec mask + // value. + bool IsNullOrVectorRegister = true; + if (Reg) { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)); + } + uint16_t Opc = MI.getOpcode(); // FIXME: Copies inserted in the block prolog for live-range split should also // be included. - return (isSpillOpcode(Opc) || (!MI.isTerminator() && !MI.isCopy() && + return IsNullOrVectorRegister && + (isSpillOpcode(Opc) || (!MI.isTerminator() && !MI.isCopy() && MI.modifiesRegister(AMDGPU::EXEC, &RI))); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 5dd3e3bc8ca09b2..a3717ab5e27cd96 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1132,7 +1132,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override; - bool isBasicBlockPrologue(const MachineInstr &MI) const override; + bool isBasicBlockPrologue(const MachineInstr &MI, + Register Reg = Register()) const override; MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, diff --git a/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir b/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir index a5cceb622d3a4e7..f3e5653f4fce01d 100644 --- a/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir +++ b/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir @@ -1,6 +1,8 @@ -# RUN: not llc --crash -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -run-pass=greedy -filetype=null %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -run-pass=greedy --stress-regalloc=6 --verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -; This test would crash while trying to split a liverange during register allocator. +# The spills/copies during RA for scalar register block LiveIns should be inserted at the beginning of the block. +# The COPY inserted in bb.9 during liverange split should precede the SPILL that was inserted earlier in the flow. --- name: test_kernel @@ -129,14 +131,299 @@ machineFunctionInfo: stackPtrOffsetReg: '$sgpr32' sgprForEXECCopy: '$sgpr105' body: | - bb.0: + ; GCN-LABEL: name: test_kernel + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: dead undef %1.sub1:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: SI_SPILL_S32_SAVE $sgpr1, %stack.15, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.15, addrspace 5) + ; GCN-NEXT: undef %133.sub1:sgpr_64 = COPY $sgpr0 + ; GCN-NEXT: SI_SPILL_S64_SAVE %133, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) + ; GCN-NEXT: undef %118.sub0:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: %118.sub1:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: undef %191.sub0:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: %191.sub1:sgpr_64 = IMPLICIT_DEF + ; GCN-NEXT: SI_SPILL_S64_SAVE %191, %stack.19, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.19, align 4, addrspace 5) + ; GCN-NEXT: undef %122.sub0:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: %122.sub1:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 0 + ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 0 + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_MOV_B32_1]], %stack.17, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.17, addrspace 5) + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: KILL [[DEF1]] + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %122, 132, 0 :: ("amdgpu-noclobber" load (s128), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S128_SAVE [[S_LOAD_DWORDX4_IMM]], %stack.14, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.14, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %122, 188, 0 :: ("amdgpu-noclobber" load (s256), align 8, addrspace 1) + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.4, implicit undef $scc + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.4(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 -1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_MOV_B32_2]], %stack.9, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.9, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef %122, 120, 0 :: ("amdgpu-noclobber" load (s64), align 16, addrspace 1) + ; GCN-NEXT: SI_SPILL_S64_SAVE [[S_LOAD_DWORDX2_IMM]], %stack.18, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.18, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM1:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %122, 352, 0 :: ("amdgpu-noclobber" load (s256), align 16, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM1]], %stack.10, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.10, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %97:sreg_64, 0, 0 + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_LOAD_DWORD_IMM]], %stack.11, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.11, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM2:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %122, 652, 0 :: ("amdgpu-noclobber" load (s256), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM2]], %stack.6, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.6, align 4, addrspace 5) + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; GCN-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_MOV_B64_]], 0, 0 :: ("amdgpu-noclobber" load (s32), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_LOAD_DWORD_IMM1]], %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: SI_SPILL_S64_SAVE %122, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM3:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM %122, 688, 0 :: ("amdgpu-noclobber" load (s256), align 16, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM3]], %stack.4, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.4, align 4, addrspace 5) + ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 0 + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.6, implicit undef $scc + ; GCN-NEXT: S_BRANCH %bb.5 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.5: + ; GCN-NEXT: successors: %bb.6(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 -1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: + ; GCN-NEXT: successors: %bb.7(0x40000000), %bb.10(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_MOV_B32_4]], %stack.5, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.5, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sgpr_32 = S_LOAD_DWORD_IMM undef %123:sgpr_64, 0, 0 :: ("amdgpu-noclobber" load (s32), align 16, addrspace 1) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM4:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %124:sgpr_64, 152, 0 :: ("amdgpu-noclobber" load (s256), align 4, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM4]], %stack.20, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.20, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM5:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %125:sgpr_64, 220, 0 :: ("amdgpu-noclobber" load (s256), align 4, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM5]], %stack.16, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.16, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM6:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %126:sgpr_64, 384, 0 :: ("amdgpu-noclobber" load (s256), align 4, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM6]], %stack.13, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.13, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM undef %127:sgpr_64, 440, 0 :: ("amdgpu-noclobber" load (s512), align 8, addrspace 1) + ; GCN-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM undef %128:sgpr_64, 584, 0 :: ("amdgpu-noclobber" load (s512), align 16, addrspace 1) + ; GCN-NEXT: SI_SPILL_S512_SAVE [[S_LOAD_DWORDX16_IMM1]], %stack.12, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.12, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM7:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM %118, 156, 0 :: ("amdgpu-noclobber" load (s256), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM7]], %stack.8, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.8, align 4, addrspace 5) + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.19, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.19, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sgpr_32 = S_LOAD_DWORD_IMM [[SI_SPILL_S64_RESTORE]], 0, 0 :: ("amdgpu-noclobber" load (s32), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_LOAD_DWORD_IMM3]], %stack.7, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.7, addrspace 5) + ; GCN-NEXT: SI_SPILL_S64_SAVE %118, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64 = PRED_COPY %118 + ; GCN-NEXT: dead [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: ("amdgpu-noclobber" load (s32), addrspace 1) + ; GCN-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; GCN-NEXT: [[S_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_MOV_B64_1]], 0, 0 :: ("amdgpu-noclobber" load (s32), addrspace 1) + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE1:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: undef %131.sub1:sgpr_64 = PRED_COPY [[SI_SPILL_S64_RESTORE1]].sub1 + ; GCN-NEXT: %131.sub0:sgpr_64 = S_MOV_B32 1 + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.10, implicit undef $scc + ; GCN-NEXT: S_BRANCH %bb.7 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.7: + ; GCN-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: SI_SPILL_S64_SAVE %131, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) + ; GCN-NEXT: undef %13.sub0:sgpr_64 = V_READFIRSTLANE_B32 undef %1.sub0, implicit $exec + ; GCN-NEXT: dead %13.sub1:sgpr_64 = V_READFIRSTLANE_B32 undef %1.sub1, implicit $exec + ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; GCN-NEXT: $vcc = COPY [[DEF2]] + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.9, implicit $vcc + ; GCN-NEXT: S_BRANCH %bb.8 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.8: + ; GCN-NEXT: successors: %bb.9(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 -1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.9: + ; GCN-NEXT: successors: %bb.10(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY [[S_MOV_B32_5]] + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE2:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.10: + ; GCN-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORD_IMM2]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.17, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.17, addrspace 5) + ; GCN-NEXT: dead [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[V_CMP_GT_F32_e64_]], [[SI_SPILL_S32_RESTORE]], implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE1:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.15, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.15, addrspace 5) + ; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S32_RESTORE1]], 0, implicit-def $scc + ; GCN-NEXT: dead [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE3:%[0-9]+]]:sreg_64_xexec = SI_SPILL_S64_RESTORE %stack.18, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.18, align 4, addrspace 5) + ; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S64_RESTORE3]].sub1, 0, implicit-def $scc + ; GCN-NEXT: dead [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.20, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.20, align 4, addrspace 5) + ; GCN-NEXT: undef %196.sub0:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE]].sub0 { + ; GCN-NEXT: internal %196.sub2:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE]].sub2 + ; GCN-NEXT: internal %196.sub4:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE]].sub4 + ; GCN-NEXT: internal %196.sub7:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE]].sub7 + ; GCN-NEXT: } + ; GCN-NEXT: dead [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 %196.sub7, [[S_LOAD_DWORD_IMM5]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %196.sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %196.sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_3:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %196.sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LOAD_DWORDX8_IMM]].sub0, undef [[S_OR_B32_]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_4:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub1, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_5:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_6:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub3, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_7:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_8:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub5, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_9:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.14, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.14, align 4, addrspace 5) + ; GCN-NEXT: undef %177.sub0_sub1:sgpr_128 = PRED_COPY [[SI_SPILL_S128_RESTORE]].sub0_sub1 { + ; GCN-NEXT: internal %177.sub2:sgpr_128 = PRED_COPY [[SI_SPILL_S128_RESTORE]].sub2 + ; GCN-NEXT: } + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_10:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %177.sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_11:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %177.sub1, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_12:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %177.sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF4]], [[DEF5]], implicit-def dead $scc + ; GCN-NEXT: dead [[DEF6:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[DEF7:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[DEF9:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[DEF10:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF10]], undef [[DEF10]], implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE1:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.16, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.16, align 4, addrspace 5) + ; GCN-NEXT: undef %182.sub0:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE1]].sub0 { + ; GCN-NEXT: internal %182.sub2:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE1]].sub2 + ; GCN-NEXT: internal %182.sub5:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE1]].sub5 + ; GCN-NEXT: internal %182.sub7:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE1]].sub7 + ; GCN-NEXT: } + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_13:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %182.sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_AND_B32_3:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[V_CMP_GT_F32_e64_8]], undef [[V_CMP_GT_F32_e64_9]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_14:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %182.sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 %182.sub5, %182.sub7, implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE2:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.10, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.10, align 4, addrspace 5) + ; GCN-NEXT: undef %163.lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE2]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16 + ; GCN-NEXT: dead [[S_OR_B32_3:%[0-9]+]]:sreg_32 = S_OR_B32 %163.sub0, %163.sub1, implicit-def dead $scc + ; GCN-NEXT: dead [[S_OR_B32_4:%[0-9]+]]:sreg_32 = S_OR_B32 %163.sub2, undef [[S_OR_B32_3]], implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE2:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.9, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.9, addrspace 5) + ; GCN-NEXT: dead [[S_AND_B32_4:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[S_OR_B32_3]], [[SI_SPILL_S32_RESTORE2]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_15:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %163.sub3, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_16:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %163.sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_17:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %163.sub5, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_18:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %163.sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE3:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.11, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.11, addrspace 5) + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_19:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[SI_SPILL_S32_RESTORE3]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE3:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.13, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.13, align 4, addrspace 5) + ; GCN-NEXT: undef %173.sub0:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE3]].sub0 { + ; GCN-NEXT: internal %173.sub2:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE3]].sub2 + ; GCN-NEXT: internal %173.sub4:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE3]].sub4 + ; GCN-NEXT: internal %173.sub7:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE3]].sub7 + ; GCN-NEXT: } + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_20:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %173.sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_21:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %173.sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF11:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_22:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %173.sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_AND_B32_5:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF11]], undef [[V_CMP_GT_F32_e64_20]], implicit-def dead $scc + ; GCN-NEXT: S_CMP_EQ_U32 %173.sub7, 0, implicit-def $scc + ; GCN-NEXT: undef %169.sub0:sgpr_512 = PRED_COPY [[S_LOAD_DWORDX16_IMM]].sub0 { + ; GCN-NEXT: internal %169.sub2:sgpr_512 = PRED_COPY [[S_LOAD_DWORDX16_IMM]].sub2 + ; GCN-NEXT: internal %169.sub4:sgpr_512 = PRED_COPY [[S_LOAD_DWORDX16_IMM]].sub4 + ; GCN-NEXT: internal %169.sub6:sgpr_512 = PRED_COPY [[S_LOAD_DWORDX16_IMM]].sub6 + ; GCN-NEXT: internal %169.sub9:sgpr_512 = PRED_COPY [[S_LOAD_DWORDX16_IMM]].sub9 + ; GCN-NEXT: internal %169.sub10:sgpr_512 = PRED_COPY [[S_LOAD_DWORDX16_IMM]].sub10 + ; GCN-NEXT: internal %169.sub13:sgpr_512 = PRED_COPY [[S_LOAD_DWORDX16_IMM]].sub13 + ; GCN-NEXT: internal %169.sub14:sgpr_512 = PRED_COPY [[S_LOAD_DWORDX16_IMM]].sub14 + ; GCN-NEXT: } + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_23:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %169.sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_24:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %169.sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_25:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %169.sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_26:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %169.sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_AND_B32_6:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[V_CMP_GT_F32_e64_23]], undef [[V_CMP_GT_F32_e64_23]], implicit-def dead $scc + ; GCN-NEXT: dead [[S_OR_B32_5:%[0-9]+]]:sreg_32 = S_OR_B32 %169.sub10, %169.sub9, implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_27:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %169.sub13, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_28:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %169.sub14, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.12, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.12, align 4, addrspace 5) + ; GCN-NEXT: undef %161.sub1:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub1 { + ; GCN-NEXT: internal %161.sub5:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub5 + ; GCN-NEXT: internal %161.sub6:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub6 + ; GCN-NEXT: internal %161.sub9:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub9 + ; GCN-NEXT: internal %161.sub10:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub10 + ; GCN-NEXT: internal %161.sub12:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub12 + ; GCN-NEXT: internal %161.sub15:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub15 + ; GCN-NEXT: } + ; GCN-NEXT: S_CMP_EQ_U32 %161.sub1, 0, implicit-def $scc + ; GCN-NEXT: dead [[DEF12:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_29:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %161.sub5, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_30:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %161.sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF13:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_31:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %161.sub9, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_32:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %161.sub10, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF14:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[S_AND_B32_7:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF14]], undef [[DEF13]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_33:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %161.sub12, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE4:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.6, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.6, align 4, addrspace 5) + ; GCN-NEXT: undef %148.lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE4]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16 + ; GCN-NEXT: dead [[S_OR_B32_6:%[0-9]+]]:sreg_32 = S_OR_B32 %148.sub0, %161.sub15, implicit-def dead $scc + ; GCN-NEXT: dead [[DEF15:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_34:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %148.sub1, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_35:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %148.sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF16:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_36:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %148.sub3, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_37:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %148.sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF17:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_38:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %148.sub5, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_39:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %148.sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_AND_B32_8:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF17]], undef [[DEF16]], implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE5:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.4, align 4, addrspace 5) + ; GCN-NEXT: undef %141.sub0_sub1_sub2_sub3_sub4_sub5:sgpr_256 = PRED_COPY [[SI_SPILL_S256_RESTORE5]].sub0_sub1_sub2_sub3_sub4_sub5 + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_40:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %141.sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_41:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %141.sub1, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE4:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_42:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[SI_SPILL_S32_RESTORE4]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_43:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %141.sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_44:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, %141.sub3, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_OR_B32_7:%[0-9]+]]:sreg_32 = S_OR_B32 %141.sub4, %141.sub5, implicit-def dead $scc + ; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S32_RESTORE4]], 0, implicit-def $scc + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE5:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.5, addrspace 5) + ; GCN-NEXT: dead [[S_AND_B32_9:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[S_OR_B32_7]], [[SI_SPILL_S32_RESTORE5]], implicit-def dead $scc + ; GCN-NEXT: dead [[S_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[SI_SPILL_S64_RESTORE2]], 0, 0 :: ("amdgpu-noclobber" load (s32), align 8, addrspace 1) + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE6:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.8, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.8, align 4, addrspace 5) + ; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S256_RESTORE6]].sub7, 0, implicit-def $scc + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE6:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.7, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.7, addrspace 5) + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_45:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[SI_SPILL_S32_RESTORE6]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[DEF18:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[S_AND_B32_10:%[0-9]+]]:sreg_32 = S_AND_B32 [[DEF18]], undef [[S_LOAD_DWORD_IMM6]], implicit-def dead $scc + ; GCN-NEXT: dead [[S_AND_B32_11:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[S_AND_B32_10]], [[PRED_COPY1]], implicit-def dead $scc + ; GCN-NEXT: $vcc = COPY undef [[S_AND_B32_11]] + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.12, implicit $vcc + ; GCN-NEXT: S_BRANCH %bb.11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.11: + ; GCN-NEXT: successors: %bb.12(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.12: + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE4:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5) + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], [[SI_SPILL_S64_RESTORE4]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE5:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], [[SI_SPILL_S64_RESTORE5]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GCN-NEXT: S_ENDPGM 0 + bb.0: successors: %bb.1, %bb.2 liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13 %0:vgpr_32 = IMPLICIT_DEF undef %1.sub1:vreg_64 = IMPLICIT_DEF - %109:sgpr_32 = COPY undef $sgpr1 - undef %93.sub1:sgpr_64 = COPY undef $sgpr0 + %109:sgpr_32 = COPY $sgpr1 + undef %93.sub1:sgpr_64 = COPY $sgpr0 undef %106.sub0:sgpr_64 = V_READFIRSTLANE_B32 undef %0, implicit $exec %106.sub1:sgpr_64 = V_READFIRSTLANE_B32 undef %0, implicit $exec undef %105.sub0:sgpr_64 = V_READFIRSTLANE_B32 undef %0, implicit $exec