Skip to content

Commit

Permalink
[mlir][vector] Enable transfer op hoisting with dynamic indices (llvm…
Browse files Browse the repository at this point in the history
…#68500)

Recent changes (llvm#66930)
disabled vector transfer ops hoisting with view-like intermediate ops.
The recommended way is to fold subview ops into transfer op indices
before invoking hoisting. That would mean now we see transfer op indices
involving dynamic values, instead of static constant values before with
subview ops. Therefore hoisting won't kick in anymore. This breaks
downstream users.

To fix it, this commit enables hoisting transfer ops with dynamic
indices by using `ValueBoundsConstraintSet` to prove ranges are disjoint
in `isDisjointTransferIndices`. Given that utility is used in many
places including op folders, right now we introduce a flag to it and
only set as true for "heavy" transforms in hoisting and load-store
forwarding.
  • Loading branch information
antiagainst authored and Groverkss committed Oct 18, 2023
1 parent 7ef1754 commit 9d6a478
Show file tree
Hide file tree
Showing 13 changed files with 370 additions and 60 deletions.
12 changes: 7 additions & 5 deletions mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,18 @@ class Value;
namespace affine {
void registerValueBoundsOpInterfaceExternalModels(DialectRegistry &registry);

/// Compute whether the given values are equal. Return "failure" if equality
/// could not be determined. `value1`/`value2` must be index-typed.
/// Compute a constant delta of the given two values. Return "failure" if we
/// cannot determine a constant delta. `value1`/`value2` must be index-typed.
///
/// This function is similar to `ValueBoundsConstraintSet::areEqual`. To work
/// around limitations in `FlatLinearConstraints`, this function fully composes
/// This function is similar to
/// `ValueBoundsConstraintSet::computeConstantDistance`. To work around
/// limitations in `FlatLinearConstraints`, this function fully composes
/// `value1` and `value2` (if they are the result of affine.apply ops) before
/// populating the constraint set. The folding/composing logic can see
/// opportunities for simplifications that the constraint set implementation
/// cannot see.
FailureOr<bool> fullyComposeAndCheckIfEqual(Value value1, Value value2);
FailureOr<int64_t> fullyComposeAndComputeConstantDelta(Value value1,
Value value2);
} // namespace affine
} // namespace mlir

Expand Down
19 changes: 13 additions & 6 deletions mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,23 @@ bool checkSameValueRAW(TransferWriteOp defWrite, TransferReadOp read);
/// op.
bool checkSameValueWAW(TransferWriteOp write, TransferWriteOp priorWrite);

/// Same behavior as `isDisjointTransferSet` but doesn't require the operations
/// to have the same tensor/memref. This allows comparing operations accessing
/// different tensors.
/// Return true if we can prove that the transfer operations access disjoint
/// memory, without requring the accessed tensor/memref to be the same.
///
/// If `testDynamicValueUsingBounds` is true, tries to test dynamic values
/// via ValueBoundsOpInterface.
bool isDisjointTransferIndices(VectorTransferOpInterface transferA,
VectorTransferOpInterface transferB);
VectorTransferOpInterface transferB,
bool testDynamicValueUsingBounds = false);

/// Return true if we can prove that the transfer operations access disjoint
/// memory.
/// memory, requiring the operations to access the same tensor/memref.
///
/// If `testDynamicValueUsingBounds` is true, tries to test dynamic values
/// via ValueBoundsOpInterface.
bool isDisjointTransferSet(VectorTransferOpInterface transferA,
VectorTransferOpInterface transferB);
VectorTransferOpInterface transferB,
bool testDynamicValueUsingBounds = false);

/// Return the result value of reducing two scalar/vector values with the
/// corresponding arith operation.
Expand Down
10 changes: 10 additions & 0 deletions mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,16 @@ class ValueBoundsConstraintSet {
presburger::BoundType type, AffineMap map, ValueDimList mapOperands,
StopConditionFn stopCondition = nullptr, bool closedUB = false);

/// Compute a constant delta between the given two values. Return "failure"
/// if a constant delta could not be determined.
///
/// `dim1`/`dim2` must be `nullopt` if and only if `value1`/`value2` are
/// index-typed.
static FailureOr<int64_t>
computeConstantDelta(Value value1, Value value2,
std::optional<int64_t> dim1 = std::nullopt,
std::optional<int64_t> dim2 = std::nullopt);

/// Compute whether the given values/dimensions are equal. Return "failure" if
/// equality could not be determined.
///
Expand Down
9 changes: 3 additions & 6 deletions mlir/lib/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ void mlir::affine::registerValueBoundsOpInterfaceExternalModels(
});
}

FailureOr<bool> mlir::affine::fullyComposeAndCheckIfEqual(Value value1,
Value value2) {
FailureOr<int64_t>
mlir::affine::fullyComposeAndComputeConstantDelta(Value value1, Value value2) {
assert(value1.getType().isIndex() && "expected index type");
assert(value2.getType().isIndex() && "expected index type");

Expand All @@ -123,9 +123,6 @@ FailureOr<bool> mlir::affine::fullyComposeAndCheckIfEqual(Value value1,
ValueDimList valueDims;
for (Value v : mapOperands)
valueDims.push_back({v, std::nullopt});
FailureOr<int64_t> bound = ValueBoundsConstraintSet::computeConstantBound(
return ValueBoundsConstraintSet::computeConstantBound(
presburger::BoundType::EQ, map, valueDims);
if (failed(bound))
return failure();
return *bound == 0;
}
12 changes: 6 additions & 6 deletions mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,16 @@ void mlir::linalg::hoistRedundantVectorTransfers(func::FuncOp func) {
if (auto transferWriteUse =
dyn_cast<vector::TransferWriteOp>(use.getOwner())) {
if (!vector::isDisjointTransferSet(
cast<VectorTransferOpInterface>(transferWrite.getOperation()),
cast<VectorTransferOpInterface>(
transferWriteUse.getOperation())))
cast<VectorTransferOpInterface>(*transferWrite),
cast<VectorTransferOpInterface>(*transferWriteUse),
/*testDynamicValueUsingBounds=*/true))
return WalkResult::advance();
} else if (auto transferReadUse =
dyn_cast<vector::TransferReadOp>(use.getOwner())) {
if (!vector::isDisjointTransferSet(
cast<VectorTransferOpInterface>(transferWrite.getOperation()),
cast<VectorTransferOpInterface>(
transferReadUse.getOperation())))
cast<VectorTransferOpInterface>(*transferWrite),
cast<VectorTransferOpInterface>(*transferReadUse),
/*testDynamicValueUsingBounds=*/true))
return WalkResult::advance();
} else {
// Unknown use, we cannot prove that it doesn't alias with the
Expand Down
2 changes: 2 additions & 0 deletions mlir/lib/Dialect/Vector/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_mlir_dialect_library(MLIRVectorDialect
MLIRVectorAttributesIncGen

LINK_LIBS PUBLIC
MLIRAffineDialect
MLIRArithDialect
MLIRControlFlowInterfaces
MLIRDataLayoutInterfaces
Expand All @@ -22,5 +23,6 @@ add_mlir_dialect_library(MLIRVectorDialect
MLIRMemRefDialect
MLIRSideEffectInterfaces
MLIRTensorDialect
MLIRValueBoundsOpInterface
MLIRVectorInterfaces
)
65 changes: 52 additions & 13 deletions mlir/lib/Dialect/Vector/IR/VectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "mlir/Dialect/Vector/IR/VectorOps.h"

#include "mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
Expand All @@ -30,6 +31,7 @@
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
Expand Down Expand Up @@ -168,39 +170,76 @@ bool mlir::vector::checkSameValueWAW(vector::TransferWriteOp write,
}

bool mlir::vector::isDisjointTransferIndices(
VectorTransferOpInterface transferA, VectorTransferOpInterface transferB) {
VectorTransferOpInterface transferA, VectorTransferOpInterface transferB,
bool testDynamicValueUsingBounds) {
// For simplicity only look at transfer of same type.
if (transferA.getVectorType() != transferB.getVectorType())
return false;
unsigned rankOffset = transferA.getLeadingShapedRank();
for (unsigned i = 0, e = transferA.indices().size(); i < e; i++) {
auto indexA = getConstantIntValue(transferA.indices()[i]);
auto indexB = getConstantIntValue(transferB.indices()[i]);
// If any of the indices are dynamic we cannot prove anything.
if (!indexA.has_value() || !indexB.has_value())
continue;
Value indexA = transferA.indices()[i];
Value indexB = transferB.indices()[i];
std::optional<int64_t> cstIndexA = getConstantIntValue(indexA);
std::optional<int64_t> cstIndexB = getConstantIntValue(indexB);

if (i < rankOffset) {
// For leading dimensions, if we can prove that index are different we
// know we are accessing disjoint slices.
if (*indexA != *indexB)
return true;
if (cstIndexA.has_value() && cstIndexB.has_value()) {
if (*cstIndexA != *cstIndexB)
return true;
continue;
}
if (testDynamicValueUsingBounds) {
// First try to see if we can fully compose and simplify the affine
// expression as a fast track.
FailureOr<uint64_t> delta =
affine::fullyComposeAndComputeConstantDelta(indexA, indexB);
if (succeeded(delta) && *delta != 0)
return true;

FailureOr<bool> testEqual =
ValueBoundsConstraintSet::areEqual(indexA, indexB);
if (succeeded(testEqual) && !testEqual.value())
return true;
}
} else {
// For this dimension, we slice a part of the memref we need to make sure
// the intervals accessed don't overlap.
int64_t distance = std::abs(*indexA - *indexB);
if (distance >= transferA.getVectorType().getDimSize(i - rankOffset))
return true;
int64_t vectorDim = transferA.getVectorType().getDimSize(i - rankOffset);
if (cstIndexA.has_value() && cstIndexB.has_value()) {
int64_t distance = std::abs(*cstIndexA - *cstIndexB);
if (distance >= vectorDim)
return true;
continue;
}
if (testDynamicValueUsingBounds) {
// First try to see if we can fully compose and simplify the affine
// expression as a fast track.
FailureOr<int64_t> delta =
affine::fullyComposeAndComputeConstantDelta(indexA, indexB);
if (succeeded(delta) && std::abs(*delta) >= vectorDim)
return true;

FailureOr<int64_t> computeDelta =
ValueBoundsConstraintSet::computeConstantDelta(indexA, indexB);
if (succeeded(computeDelta)) {
if (std::abs(computeDelta.value()) >= vectorDim)
return true;
}
}
}
}
return false;
}

bool mlir::vector::isDisjointTransferSet(VectorTransferOpInterface transferA,
VectorTransferOpInterface transferB) {
VectorTransferOpInterface transferB,
bool testDynamicValueUsingBounds) {
if (transferA.source() != transferB.source())
return false;
return isDisjointTransferIndices(transferA, transferB);
return isDisjointTransferIndices(transferA, transferB,
testDynamicValueUsingBounds);
}

// Helper to iterate over n-D vector slice elements. Calculate the next
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ void TransferOptimization::deadStoreOp(vector::TransferWriteOp write) {
// Don't need to consider disjoint accesses.
if (vector::isDisjointTransferSet(
cast<VectorTransferOpInterface>(write.getOperation()),
cast<VectorTransferOpInterface>(transferOp.getOperation())))
cast<VectorTransferOpInterface>(transferOp.getOperation()),
/*testDynamicValueUsingBounds=*/true))
continue;
}
blockingAccesses.push_back(user);
Expand Down Expand Up @@ -217,7 +218,8 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
// the write.
if (vector::isDisjointTransferSet(
cast<VectorTransferOpInterface>(write.getOperation()),
cast<VectorTransferOpInterface>(read.getOperation())))
cast<VectorTransferOpInterface>(read.getOperation()),
/*testDynamicValueUsingBounds=*/true))
continue;
if (write.getSource() == read.getSource() &&
dominators.dominates(write, read) && checkSameValueRAW(write, read)) {
Expand Down
27 changes: 17 additions & 10 deletions mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,25 +484,32 @@ FailureOr<int64_t> ValueBoundsConstraintSet::computeConstantBound(
return failure();
}

FailureOr<bool>
ValueBoundsConstraintSet::areEqual(Value value1, Value value2,
std::optional<int64_t> dim1,
std::optional<int64_t> dim2) {
FailureOr<int64_t>
ValueBoundsConstraintSet::computeConstantDelta(Value value1, Value value2,
std::optional<int64_t> dim1,
std::optional<int64_t> dim2) {
#ifndef NDEBUG
assertValidValueDim(value1, dim1);
assertValidValueDim(value2, dim2);
#endif // NDEBUG

// Subtract the two values/dimensions from each other. If the result is 0,
// both are equal.
Builder b(value1.getContext());
AffineMap map = AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0,
b.getAffineDimExpr(0) - b.getAffineDimExpr(1));
FailureOr<int64_t> bound = computeConstantBound(
presburger::BoundType::EQ, map, {{value1, dim1}, {value2, dim2}});
if (failed(bound))
return computeConstantBound(presburger::BoundType::EQ, map,
{{value1, dim1}, {value2, dim2}});
}

FailureOr<bool>
ValueBoundsConstraintSet::areEqual(Value value1, Value value2,
std::optional<int64_t> dim1,
std::optional<int64_t> dim2) {
// Subtract the two values/dimensions from each other. If the result is 0,
// both are equal.
FailureOr<int64_t> delta = computeConstantDelta(value1, value2, dim1, dim2);
if (failed(delta))
return failure();
return *bound == 0;
return *delta == 0;
}

ValueBoundsConstraintSet::BoundBuilder &
Expand Down
Loading

0 comments on commit 9d6a478

Please sign in to comment.