Skip to content

Commit

Permalink
[mlir][gpu][bufferization] Implement BufferDeallocationOpInterface fo…
Browse files Browse the repository at this point in the history
…r gpu.terminator (llvm#66880)

This is necessary to support deallocation of IR with gpu.launch
operations because it does not implement the RegionBranchOpInterface.
Implementing the interface would require it to support regions with
unstructured control flow and produced arguments/results.
  • Loading branch information
maerhart authored Sep 20, 2023
1 parent 97ae760 commit 522c1d0
Show file tree
Hide file tree
Showing 10 changed files with 149 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,23 @@ class DeallocationState {
Liveness liveness;
};

namespace deallocation_impl {
/// Insert a `bufferization.dealloc` operation right before `op` which has to be
/// a terminator without any successors. Note that it is not required to have
/// the ReturnLike trait attached. The MemRef values in the `operands` argument
/// will be added to the list of retained values and their updated ownership
/// values will be appended to the `updatedOperandOwnerships` list. `op` is not
/// modified in any way. Returns failure if at least one of the MemRefs to
/// deallocate does not have 'Unique' ownership (likely as a result of an
/// incorrect implementation of the `process` or
/// `materializeUniqueOwnershipForMemref` interface method) or the original
/// `op`.
FailureOr<Operation *>
insertDeallocOpForReturnLike(DeallocationState &state, Operation *op,
ValueRange operands,
SmallVectorImpl<Value> &updatedOperandOwnerships);
} // namespace deallocation_impl

} // namespace bufferization
} // namespace mlir

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===- BufferDeallocationOpInterfaceImpl.h ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_GPU_TRANSFORMS_BUFFERDEALLOCATIONOPINTERFACEIMPL_H
#define MLIR_DIALECT_GPU_TRANSFORMS_BUFFERDEALLOCATIONOPINTERFACEIMPL_H

namespace mlir {

class DialectRegistry;

namespace gpu {
void registerBufferDeallocationOpInterfaceExternalModels(
DialectRegistry &registry);
} // namespace gpu
} // namespace mlir

#endif // MLIR_DIALECT_GPU_TRANSFORMS_BUFFERDEALLOCATIONOPINTERFACEIMPL_H
2 changes: 2 additions & 0 deletions mlir/include/mlir/InitAllDialects.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "mlir/Dialect/EmitC/IR/EmitC.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h"
#include "mlir/Dialect/IRDL/IR/IRDL.h"
#include "mlir/Dialect/Index/IR/IndexDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
Expand Down Expand Up @@ -144,6 +145,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
builtin::registerCastOpInterfaceExternalModels(registry);
cf::registerBufferizableOpInterfaceExternalModels(registry);
cf::registerBufferDeallocationOpInterfaceExternalModels(registry);
gpu::registerBufferDeallocationOpInterfaceExternalModels(registry);
linalg::registerBufferizableOpInterfaceExternalModels(registry);
linalg::registerTilingInterfaceExternalModels(registry);
linalg::registerValueBoundsOpInterfaceExternalModels(registry);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,44 @@ bool ValueComparator::operator()(const Value &lhs, const Value &rhs) const {
assert(lhsRegion && "this should only happen if lhs == rhs");
return false;
}

//===----------------------------------------------------------------------===//
// Implementation utilities
//===----------------------------------------------------------------------===//

FailureOr<Operation *> deallocation_impl::insertDeallocOpForReturnLike(
DeallocationState &state, Operation *op, ValueRange operands,
SmallVectorImpl<Value> &updatedOperandOwnerships) {
assert(op->hasTrait<OpTrait::IsTerminator>() && "must be a terminator");
assert(!op->hasSuccessors() && "must not have any successors");
// Collect the values to deallocate and retain and use them to create the
// dealloc operation.
OpBuilder builder(op);
Block *block = op->getBlock();
SmallVector<Value> memrefs, conditions, toRetain;
if (failed(state.getMemrefsAndConditionsToDeallocate(
builder, op->getLoc(), block, memrefs, conditions)))
return failure();

state.getMemrefsToRetain(block, /*toBlock=*/nullptr, operands, toRetain);
if (memrefs.empty() && toRetain.empty())
return op;

auto deallocOp = builder.create<bufferization::DeallocOp>(
op->getLoc(), memrefs, conditions, toRetain);

// We want to replace the current ownership of the retained values with the
// result values of the dealloc operation as they are always unique.
state.resetOwnerships(deallocOp.getRetained(), block);
for (auto [retained, ownership] :
llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions()))
state.updateOwnership(retained, ownership, block);

unsigned numMemrefOperands = llvm::count_if(operands, isMemref);
auto newOperandOwnerships =
deallocOp.getUpdatedConditions().take_front(numMemrefOperands);
updatedOperandOwnerships.append(newOperandOwnerships.begin(),
newOperandOwnerships.end());

return op;
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@ static Value buildBoolValue(OpBuilder &builder, Location loc, bool value) {

static bool isMemref(Value v) { return v.getType().isa<BaseMemRefType>(); }

static bool isMemrefOperand(OpOperand &operand) {
return isMemref(operand.get());
}

//===----------------------------------------------------------------------===//
// Backedges analysis
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -917,35 +913,16 @@ BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) {
MutableOperandRange operands =
op.getMutableSuccessorOperands(RegionBranchPoint::parent());

// Collect the values to deallocate and retain and use them to create the
// dealloc operation.
Block *block = op->getBlock();
SmallVector<Value> memrefs, conditions, toRetain;
if (failed(state.getMemrefsAndConditionsToDeallocate(
builder, op.getLoc(), block, memrefs, conditions)))
return failure();

state.getMemrefsToRetain(block, nullptr, OperandRange(operands), toRetain);
if (memrefs.empty() && toRetain.empty())
return op.getOperation();

auto deallocOp = builder.create<bufferization::DeallocOp>(
op.getLoc(), memrefs, conditions, toRetain);

// We want to replace the current ownership of the retained values with the
// result values of the dealloc operation as they are always unique.
state.resetOwnerships(deallocOp.getRetained(), block);
for (auto [retained, ownership] :
llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions()))
state.updateOwnership(retained, ownership, block);
SmallVector<Value> updatedOwnerships;
auto result = deallocation_impl::insertDeallocOpForReturnLike(
state, op, OperandRange(operands), updatedOwnerships);
if (failed(result) || !*result)
return result;

// Add an additional operand for every MemRef for the ownership indicator.
if (!funcWithoutDynamicOwnership) {
unsigned numMemRefs = llvm::count_if(operands, isMemrefOperand);
SmallVector<Value> newOperands{OperandRange(operands)};
auto ownershipValues =
deallocOp.getUpdatedConditions().take_front(numMemRefs);
newOperands.append(ownershipValues.begin(), ownershipValues.end());
newOperands.append(updatedOwnerships.begin(), updatedOwnerships.end());
operands.assign(newOperands);
}

Expand Down
2 changes: 2 additions & 0 deletions mlir/lib/Dialect/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ add_mlir_dialect_library(MLIRGPUDialect
add_mlir_dialect_library(MLIRGPUTransforms
Transforms/AllReduceLowering.cpp
Transforms/AsyncRegionRewriter.cpp
Transforms/BufferDeallocationOpInterfaceImpl.cpp
Transforms/DecomposeMemrefs.cpp
Transforms/GlobalIdRewriter.cpp
Transforms/KernelOutlining.cpp
Expand Down Expand Up @@ -79,6 +80,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
MLIRAffineUtils
MLIRArithDialect
MLIRAsyncDialect
MLIRBufferizationDialect
MLIRBuiltinToLLVMIRTranslation
MLIRDataLayoutInterfaces
MLIRExecutionEngineUtils
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===- BufferDeallocationOpInterfaceImpl.cpp ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h"
#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"

using namespace mlir;
using namespace mlir::bufferization;

namespace {
///
struct GPUTerminatorOpInterface
: public BufferDeallocationOpInterface::ExternalModel<
GPUTerminatorOpInterface, gpu::TerminatorOp> {
FailureOr<Operation *> process(Operation *op, DeallocationState &state,
const DeallocationOptions &options) const {
SmallVector<Value> updatedOperandOwnerships;
return deallocation_impl::insertDeallocOpForReturnLike(
state, op, {}, updatedOperandOwnerships);
}
};

} // namespace

void mlir::gpu::registerBufferDeallocationOpInterfaceExternalModels(
DialectRegistry &registry) {
registry.addExtension(+[](MLIRContext *ctx, GPUDialect *dialect) {
gpu::TerminatorOp::attachInterface<GPUTerminatorOpInterface>(*ctx);
});
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,33 +47,12 @@ struct InParallelOpInterface
FailureOr<Operation *> process(Operation *op, DeallocationState &state,
const DeallocationOptions &options) const {
auto inParallelOp = cast<scf::InParallelOp>(op);
OpBuilder builder(op);
if (!inParallelOp.getBody()->empty())
return op->emitError("only supported when nested region is empty");

// Collect the values to deallocate and retain and use them to create the
// dealloc operation.
Block *block = op->getBlock();
SmallVector<Value> memrefs, conditions, toRetain;
if (failed(state.getMemrefsAndConditionsToDeallocate(
builder, op->getLoc(), block, memrefs, conditions)))
return failure();

state.getMemrefsToRetain(block, /*toBlock=*/nullptr, {}, toRetain);
if (memrefs.empty() && toRetain.empty())
return op;

auto deallocOp = builder.create<bufferization::DeallocOp>(
op->getLoc(), memrefs, conditions, toRetain);

// We want to replace the current ownership of the retained values with the
// result values of the dealloc operation as they are always unique.
state.resetOwnerships(deallocOp.getRetained(), block);
for (auto [retained, ownership] :
llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions()))
state.updateOwnership(retained, ownership, block);

return op;
SmallVector<Value> updatedOperandOwnership;
return deallocation_impl::insertDeallocOpForReturnLike(
state, op, {}, updatedOperandOwnership);
}
};

Expand Down
18 changes: 18 additions & 0 deletions mlir/test/Dialect/GPU/bufferization-buffer-deallocation.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// RUN: mlir-opt %s -buffer-deallocation-pipeline --allow-unregistered-dialect | FileCheck %s

func.func @gpu_launch() {
%c1 = arith.constant 1 : index
gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1)
threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
%alloc = memref.alloc() : memref<2xf32>
"test.memref_user"(%alloc) : (memref<2xf32>) -> ()
gpu.terminator
}
return
}

// CHECK-LABEL: func @gpu_launch
// CHECK: gpu.launch
// CHECK: [[ALLOC:%.+]] = memref.alloc(
// CHECK: memref.dealloc [[ALLOC]]
// CHECK: gpu.terminator
1 change: 1 addition & 0 deletions utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4927,6 +4927,7 @@ cc_library(
":ArithDialect",
":AsmParser",
":AsyncDialect",
":BufferizationDialect",
":ControlFlowDialect",
":DLTIDialect",
":DialectUtils",
Expand Down

0 comments on commit 522c1d0

Please sign in to comment.