Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Use 32-bit SGPR to save/restore of SCC #68367

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1093,7 +1093,6 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
}

void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
bool IsWave32 = MF.getSubtarget<GCNSubtarget>().isWave32();
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
++BI) {
MachineBasicBlock *MBB = &*BI;
Expand All @@ -1106,13 +1105,18 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
Register SrcReg = MI.getOperand(1).getReg();
Register DstReg = MI.getOperand(0).getReg();
if (SrcReg == AMDGPU::SCC) {
const TargetRegisterClass *DstRC =
TRI->getRegClassForOperandReg(*MRI, MI.getOperand(0));
unsigned DstRegSize = TRI->getRegSizeInBits(*DstRC);
assert((DstRegSize == 64 || DstRegSize == 32) &&
"Expected SCC dst to be 64 or 32 bits");
bool IsDst32Bit = DstRegSize == 32;
Register SCCCopy = MRI->createVirtualRegister(
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
IsDst32Bit ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass);
unsigned Opcode =
IsDst32Bit ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64;
I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)),
MI.getDebugLoc(),
TII->get(IsWave32 ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64),
SCCCopy)
MI.getDebugLoc(), TII->get(Opcode), SCCCopy)
.addImm(-1)
.addImm(0);
I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(),
Expand All @@ -1122,9 +1126,16 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
continue;
}
if (DstReg == AMDGPU::SCC) {
unsigned Opcode = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
Register Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
const TargetRegisterClass *SrcRC =
TRI->getRegClassForOperandReg(*MRI, MI.getOperand(1));
unsigned SrcRegSize = TRI->getRegSizeInBits(*SrcRC);
assert((SrcRegSize == 64 || SrcRegSize == 32) &&
"Expected SCC src to be 64 or 32 bits");
bool IsSrc32Bit = SrcRegSize == 32;
unsigned Opcode = IsSrc32Bit ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
Register Exec = IsSrc32Bit ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
Register Tmp = MRI->createVirtualRegister(
IsSrc32Bit ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass);
I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)),
MI.getDebugLoc(), TII->get(Opcode))
.addReg(Tmp, getDefRegState(true))
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/CodeGen/AMDGPU/save_restore_scc.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GFX906
# RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GFX1030

---

# GFX1030-LABEL: name: waterfall_kills_scc_gfx1030
# GFX1030: %1:sreg_32 = S_CSELECT_B32 -1, 0, implicit $scc
# GFX1030: %2:sreg_32 = S_AND_B32 %0, $exec_lo, implicit-def $scc

name: waterfall_kills_scc_gfx1030
body: |
bb.0.entry:
successors: %bb.1(0x80000000)

%1:sreg_32 = COPY $scc

bb.1:
successors: %bb.1(0x80000000), %bb.2(0x40000000)

$exec = S_XOR_B64_term $exec, -1, implicit-def $scc
SI_WATERFALL_LOOP %bb.2, implicit $exec

bb.2:
$scc = COPY %1
...

# GFX906-LABEL: name: waterfall_kills_scc_gfx906
# GFX906: %1:sreg_64 = S_CSELECT_B64 -1, 0, implicit $scc
# GFX906: %2:sreg_64 = S_AND_B64 %0, $exec, implicit-def $scc
---
name: waterfall_kills_scc_gfx906
body: |
bb.0.entry:
successors: %bb.1(0x80000000)

%1:sreg_64_xexec = COPY $scc

bb.1:
successors: %bb.1(0x80000000), %bb.2(0x40000000)

$exec = S_XOR_B64_term $exec, -1, implicit-def $scc
SI_WATERFALL_LOOP %bb.2, implicit $exec

bb.2:
$scc = COPY %1
...