Skip to content

Commit

Permalink
* updated documentation and minor changes based on feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
sjw36 committed Nov 1, 2024
1 parent 4f957b2 commit 78a8712
Show file tree
Hide file tree
Showing 4 changed files with 215 additions and 79 deletions.
2 changes: 1 addition & 1 deletion third_party/amd/backend/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def make_ttgir(mod, metadata, options):
"num_stages == 0. Now it will not happen anymore; "
"please update to use num_stages == 2 for "
"equivalent behavior in the past.")
prefetch = int(os.getenv("TRITON_HIP_STREAM_PREFETCH_V3", "0"))
prefetch = bool(os.getenv("TRITON_HIP_STREAM_PREFETCH", "0"))
amd.passes.ttgpuir.add_stream_pipelinev2(pm, options.num_stages, prefetch)
passes.common.add_canonicalizer(pm)
amd.passes.ttgpuir.insert_instruction_sched_hints(pm)
Expand Down
2 changes: 1 addition & 1 deletion third_party/amd/include/TritonAMDGPUTransforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def TritonAMDGPUStreamPipelineV2 : Pass<"tritonamdgpu-stream-pipeline-v2", "mlir
"Number of Pipeline stages">,
Option<"prefetch", "prefetch",
"int32_t", /*default*/"0",
"Enable prefetch(V3)">
"Enable prefetch from shared memory">
];
}

Expand Down
53 changes: 53 additions & 0 deletions third_party/amd/lib/TritonAMDGPUTransforms/ReorderInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,59 @@ static bool isPureMatmulProblem(ModuleOp moduleOp) {
return foundLoop && isMatmul;
}

// Search through block to find earliest insertion point for move op. This can
// be either an atomic op or last usage of source pointer. Search ends when move
// op is encountered.
static llvm::ilist<Operation>::iterator
findEarlyInsertionPoint(Block *block, Operation *move) {
Value src;
if (auto ld = dyn_cast<triton::LoadOp>(move))
src = ld.getPtr();

auto ipnt = block->end();
for (auto bi = block->begin(); bi != block->end(); ++bi) {
auto *op = &*bi;
if (op == move) // Don't move later than current location
break;

op->walk([&](Operation *wop) {
if (src) {
// Check for ops accessing src value.
for (auto opr : wop->getOperands()) {
if (opr == src)
ipnt = bi;
}
}
// Atomics used for global synchronization.
if (isa<triton::AtomicRMWOp, triton::AtomicCASOp>(wop))
ipnt = bi;
// Break at barrier
if (isa<gpu::BarrierOp>(wop))
ipnt = bi;
// Break at loops.
if (isa<scf::ForOp, scf::WhileOp>(wop))
ipnt = bi;
});
}
return ipnt;
}

// Return the first user in the same block of the given op. If the user is in a
// nested block then return the op owning the block. Return nullptr if not
// existing.
static Operation *getFirstUseInSameBlock(Operation *op) {
SmallVector<Operation *> usersInSameBlock;
for (auto user : op->getUsers()) {
if (Operation *ancestor = op->getBlock()->findAncestorOpInBlock(*user))
usersInSameBlock.push_back(ancestor);
}
auto minOpIt =
llvm::min_element(usersInSameBlock, [](Operation *a, Operation *b) {
return a->isBeforeInBlock(b);
});
return minOpIt != usersInSameBlock.end() ? *minOpIt : nullptr;
}

// Check if the operation opInsideLoop is inside any scf::ForOp and
// opOutsideLoop is not inside the same loop.
static bool isCrossLoopBoundary(mlir::Operation *opInsideLoop,
Expand Down
Loading

0 comments on commit 78a8712

Please sign in to comment.