Skip to content

Commit

Permalink
[AMDGPU] RA inserted scalar instructions can be at the BB top (llvm#7…
Browse files Browse the repository at this point in the history
…2140)

We adjust the insertion point at the BB top for spills/copies during RA
to ensure they are placed after the exec restore instructions required
for the divergent control flow execution. This is, however, required
only for the vector operations. The insertions for scalar registers can
still go to the BB top.

Change-Id: I0ee60b84c53c73d65d8bc9b6fdfc0bcb1e86c4fe
  • Loading branch information
cdevadas authored and zhang2amd committed Nov 29, 2023
1 parent 12fa283 commit 96b5455
Show file tree
Hide file tree
Showing 9 changed files with 324 additions and 20 deletions.
6 changes: 4 additions & 2 deletions llvm/include/llvm/CodeGen/MachineBasicBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -824,8 +824,10 @@ class MachineBasicBlock

/// Return the first instruction in MBB after I that is not a PHI, label or
/// debug. This is the correct point to insert copies at the beginning of a
/// basic block.
iterator SkipPHIsLabelsAndDebug(iterator I, bool SkipPseudoOp = true);
/// basic block. \p Reg is the register being used by a spill or defined for a
/// restore/split during register allocation.
iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg = Register(),
bool SkipPseudoOp = true);

/// Returns an iterator to the first terminator instruction of this basic
/// block. If a terminator does not exist, it returns end().
Expand Down
6 changes: 4 additions & 2 deletions llvm/include/llvm/CodeGen/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1940,8 +1940,10 @@ class TargetInstrInfo : public MCInstrInfo {

/// True if the instruction is bound to the top of its basic block and no
/// other instructions shall be inserted before it. This can be implemented
/// to prevent register allocator to insert spills before such instructions.
virtual bool isBasicBlockPrologue(const MachineInstr &MI) const {
/// to prevent register allocator to insert spills for \p Reg before such
/// instructions.
virtual bool isBasicBlockPrologue(const MachineInstr &MI,
Register Reg = Register()) const {
return false;
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ class StatepointState {

if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) {
RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad);
auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin());
auto EHPadInsertPoint =
EHPad->SkipPHIsLabelsAndDebug(EHPad->begin(), Reg);
insertReloadBefore(Reg, EHPadInsertPoint, EHPad);
LLVM_DEBUG(dbgs() << "...also reload at EHPad "
<< printMBBReference(*EHPad) << "\n");
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/InlineSpiller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
MachineBasicBlock::iterator MII;
if (SrcVNI->isPHIDef())
MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin(), SrcReg);
else {
MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
assert(DefMI && "Defining instruction disappeared");
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/MachineBasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,13 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {

MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I,
bool SkipPseudoOp) {
Register Reg, bool SkipPseudoOp) {
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();

iterator E = end();
while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() ||
(SkipPseudoOp && I->isPseudoProbe()) ||
TII->isBasicBlockPrologue(*I)))
TII->isBasicBlockPrologue(*I, Reg)))
++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values
// inside the bundle.
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/CodeGen/SplitKit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -795,8 +795,10 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
return Start;
}

VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
MBB.SkipPHIsLabelsAndDebug(MBB.begin()));
unsigned RegIdx = 0;
Register Reg = LIS.getInterval(Edit->get(RegIdx)).reg();
VNInfo *VNI = defFromParent(RegIdx, ParentVNI, Start, MBB,
MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg));
RegAssign.insert(Start, VNI->def, OpenIdx);
LLVM_DEBUG(dump());
return VNI->def;
Expand Down
17 changes: 13 additions & 4 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7987,16 +7987,25 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}

bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
Register Reg) const {
// We need to handle instructions which may be inserted during register
// allocation to handle the prolog. The initial prolog instruction may have
// been separated from the start of the block by spills and copies inserted
// needed by the prolog.
uint16_t Opc = MI.getOpcode();
// needed by the prolog. However, the insertions for scalar registers can
// always be placed at the BB top as they are independent of the exec mask
// value.
bool IsNullOrVectorRegister = true;
if (Reg) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
}

uint16_t Opc = MI.getOpcode();
// FIXME: Copies inserted in the block prolog for live-range split should also
// be included.
return (isSpillOpcode(Opc) || (!MI.isTerminator() && !MI.isCopy() &&
return IsNullOrVectorRegister &&
(isSpillOpcode(Opc) || (!MI.isTerminator() && !MI.isCopy() &&
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1132,7 +1132,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAGMI *DAG) const override;

bool isBasicBlockPrologue(const MachineInstr &MI) const override;
bool isBasicBlockPrologue(const MachineInstr &MI,
Register Reg = Register()) const override;

MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsPt,
Expand Down
Loading

0 comments on commit 96b5455

Please sign in to comment.