Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU]: Fall back to default mutations when iglp is not applied #93418

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2337,6 +2337,8 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {

ScheduleDAGMI *DAG;

std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not obvious to me why this is a pointer to a vector. Can this just be an ArrayRef? Why does the mutation have to manage the other mutations? I thought all the mutations were added sequentially already, where this would just be one in the set


// Organize lists of SchedGroups by their SyncID. SchedGroups /
// SCHED_GROUP_BARRIERs with different SyncIDs will have no edges added
// between then.
Expand Down Expand Up @@ -2379,7 +2381,10 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
AMDGPU::SchedulingPhase Phase = AMDGPU::SchedulingPhase::Initial;

IGroupLPDAGMutation() = default;
IGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase) : Phase(Phase) {}
IGroupLPDAGMutation(
AMDGPU::SchedulingPhase Phase,
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations)
: SavedMutations(SavedMutations), Phase(Phase) {}
};

unsigned SchedGroup::NumSchedGroups = 0;
Expand Down Expand Up @@ -2597,6 +2602,13 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
PS.solve();
return;
}

if (!SavedMutations)
return;

// We did not apply a mutation, fall back to SavedMutations
for (auto &m : *SavedMutations)
m->apply(DAG);
}

void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
Expand Down Expand Up @@ -2695,9 +2707,10 @@ namespace llvm {
/// same scheduling region (e.g. pre and post-RA scheduling / multiple
/// scheduling "phases"), we can reenter this mutation framework more than once
/// for a given region.
std::unique_ptr<ScheduleDAGMutation>
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase) {
return std::make_unique<IGroupLPDAGMutation>(Phase);
std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(
AMDGPU::SchedulingPhase Phase,
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations) {
return std::make_unique<IGroupLPDAGMutation>(Phase, SavedMutations);
}

} // end namespace llvm
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ namespace AMDGPU {
enum class SchedulingPhase { Initial, PreRAReentry, PostRA };
} // namespace AMDGPU

std::unique_ptr<ScheduleDAGMutation>
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase);
std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(
AMDGPU::SchedulingPhase Phase,
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations);

} // namespace llvm

Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,8 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
DAG->addMutation(
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial, nullptr));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
return DAG;
Expand All @@ -481,7 +482,8 @@ static ScheduleDAGInstrs *
createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
DAG->addMutation(
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial, nullptr));
return DAG;
}

Expand Down Expand Up @@ -893,7 +895,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
DAG->addMutation(
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA, nullptr));
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
DAG->addMutation(createVOPDPairingMutation());
return DAG;
Expand Down
14 changes: 8 additions & 6 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -713,8 +713,8 @@ bool UnclusteredHighRPStage::initGCNSchedStage() {
return false;

SavedMutations.swap(DAG.Mutations);
DAG.addMutation(
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry));
DAG.addMutation(createIGroupLPDAGMutation(
AMDGPU::SchedulingPhase::PreRAReentry, nullptr));

InitialOccupancy = DAG.MinOccupancy;
// Aggressivly try to reduce register pressure in the unclustered high RP
Expand Down Expand Up @@ -858,7 +858,8 @@ bool GCNSchedStage::initGCNRegion() {
StageID == GCNSchedStageID::ILPInitialSchedule;
DAG.addMutation(createIGroupLPDAGMutation(
IsInitialStage ? AMDGPU::SchedulingPhase::Initial
: AMDGPU::SchedulingPhase::PreRAReentry));
: AMDGPU::SchedulingPhase::PreRAReentry,
&SavedMutations));
}

return true;
Expand Down Expand Up @@ -1577,15 +1578,16 @@ void GCNPostScheduleDAGMILive::schedule() {
if (HasIGLPInstrs) {
SavedMutations.clear();
SavedMutations.swap(Mutations);
addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA,
&SavedMutations));
}

ScheduleDAGMI::schedule();
}

void GCNPostScheduleDAGMILive::finalizeSchedule() {
if (HasIGLPInstrs)
SavedMutations.swap(Mutations);
}

void GCNPostScheduleDAGMILive::finalizeSchedule() {
ScheduleDAGMI::finalizeSchedule();
}
28 changes: 19 additions & 9 deletions llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,25 @@
# GCN-NEXT: V_ADD_F32_e64
name: cluster_flat_loads
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
%3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
%0:vreg_64 = IMPLICIT_DEF
%1:vgpr_32 = FLAT_LOAD_DWORD %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
%2:vgpr_32 = V_ADD_F32_e64 0, killed %1:vgpr_32, 0, 1, 0, 0, implicit $mode, implicit $exec
%3:vgpr_32 = FLAT_LOAD_DWORD %0:vreg_64, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
...
---
# GCN-LABEL: name: cluster_flat_loads_iglp_opt
# GCN: FLAT_LOAD_DWORD %0, 0
# GCN-NEXT: FLAT_LOAD_DWORD %0, 4
# GCN-NEXT: V_ADD_F32_e64
name: cluster_flat_loads_iglp_opt
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vgpr_32 = FLAT_LOAD_DWORD %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
%2:vgpr_32 = V_ADD_F32_e64 0, killed %1:vgpr_32, 0, 1, 0, 0, implicit $mode, implicit $exec
%3:vgpr_32 = FLAT_LOAD_DWORD %0:vreg_64, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
IGLP_OPT 2
...
Loading