diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 60cd9d4c3c35a2..645b6b3b374e0e 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -1093,7 +1093,6 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) { } void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) { - bool IsWave32 = MF.getSubtarget().isWave32(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock *MBB = &*BI; @@ -1106,13 +1105,18 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) { Register SrcReg = MI.getOperand(1).getReg(); Register DstReg = MI.getOperand(0).getReg(); if (SrcReg == AMDGPU::SCC) { + const TargetRegisterClass *DstRC = + TRI->getRegClassForOperandReg(*MRI, MI.getOperand(0)); + unsigned DstRegSize = TRI->getRegSizeInBits(*DstRC); + assert((DstRegSize == 64 || DstRegSize == 32) && + "Expected SCC dst to be 64 or 32 bits"); + bool IsDst32Bit = DstRegSize == 32; Register SCCCopy = MRI->createVirtualRegister( - TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID)); + IsDst32Bit ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass); + unsigned Opcode = + IsDst32Bit ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64; I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)), - MI.getDebugLoc(), - TII->get(IsWave32 ? AMDGPU::S_CSELECT_B32 - : AMDGPU::S_CSELECT_B64), - SCCCopy) + MI.getDebugLoc(), TII->get(Opcode), SCCCopy) .addImm(-1) .addImm(0); I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(), @@ -1122,9 +1126,16 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) { continue; } if (DstReg == AMDGPU::SCC) { - unsigned Opcode = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; - Register Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC()); + const TargetRegisterClass *SrcRC = + TRI->getRegClassForOperandReg(*MRI, MI.getOperand(1)); + unsigned SrcRegSize = TRI->getRegSizeInBits(*SrcRC); + assert((SrcRegSize == 64 || SrcRegSize == 32) && + "Expected SCC src to be 64 or 32 bits"); + bool IsSrc32Bit = SrcRegSize == 32; + unsigned Opcode = IsSrc32Bit ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; + Register Exec = IsSrc32Bit ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + Register Tmp = MRI->createVirtualRegister( + IsSrc32Bit ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass); I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)), MI.getDebugLoc(), TII->get(Opcode)) .addReg(Tmp, getDefRegState(true)) diff --git a/llvm/test/CodeGen/AMDGPU/save_restore_scc.mir b/llvm/test/CodeGen/AMDGPU/save_restore_scc.mir new file mode 100644 index 00000000000000..7ee7cf05b95911 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/save_restore_scc.mir @@ -0,0 +1,46 @@ +# RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GFX906 +# RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GFX1030 + +--- + +# GFX1030-LABEL: name: waterfall_kills_scc_gfx1030 +# GFX1030: %1:sreg_32 = S_CSELECT_B32 -1, 0, implicit $scc +# GFX1030: %2:sreg_32 = S_AND_B32 %0, $exec_lo, implicit-def $scc + +name: waterfall_kills_scc_gfx1030 +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + + %1:sreg_32 = COPY $scc + + bb.1: + successors: %bb.1(0x80000000), %bb.2(0x40000000) + + $exec = S_XOR_B64_term $exec, -1, implicit-def $scc + SI_WATERFALL_LOOP %bb.2, implicit $exec + + bb.2: + $scc = COPY %1 +... + +# GFX906-LABEL: name: waterfall_kills_scc_gfx906 +# GFX906: %1:sreg_64 = S_CSELECT_B64 -1, 0, implicit $scc +# GFX906: %2:sreg_64 = S_AND_B64 %0, $exec, implicit-def $scc +--- +name: waterfall_kills_scc_gfx906 +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + + %1:sreg_64_xexec = COPY $scc + + bb.1: + successors: %bb.1(0x80000000), %bb.2(0x40000000) + + $exec = S_XOR_B64_term $exec, -1, implicit-def $scc + SI_WATERFALL_LOOP %bb.2, implicit $exec + + bb.2: + $scc = COPY %1 +...