Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mlir][tensor][bufferize] Reshapes: Fix memory side effects and memory space #68195

Conversation

matthias-springer
Copy link
Member

  • tensor.collapse_shape may bufferize to a memory read because the op may have to reallocate the source buffer.
  • tensor.reshape should not use bufferization.clone for reallocation. This op has requirements wrt. the order of buffer writes/reads. Use memref.alloc and memref.copy instead. Also fix a bug where the memory space of the source buffer was not propagated to the reallocated buffer.

@llvmbot
Copy link
Member

llvmbot commented Oct 4, 2023

@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-tensor

Changes
  • tensor.collapse_shape may bufferize to a memory read because the op may have to reallocate the source buffer.
  • tensor.reshape should not use bufferization.clone for reallocation. This op has requirements wrt. the order of buffer writes/reads. Use memref.alloc and memref.copy instead. Also fix a bug where the memory space of the source buffer was not propagated to the reallocated buffer.

Full diff: https://github.com/llvm/llvm-project/pull/68195.diff

2 Files Affected:

  • (modified) mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp (+17-6)
  • (modified) mlir/test/Dialect/Tensor/one-shot-bufferize.mlir (+38-13)
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index b08283f0070784c..9ac9f3eae120dc9 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -119,7 +119,10 @@ struct CollapseShapeOpInterface
                                                     tensor::CollapseShapeOp> {
   bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
                               const AnalysisState &state) const {
-    return false;
+    // tensor.collapse_shape may reallocate, at which point the source buffer is
+    // copied. I.e., there will be a memory read side effect on the bufferized
+    // source.
+    return true;
   }
 
   bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
@@ -288,6 +291,8 @@ struct ExpandShapeOpInterface
                                                     tensor::ExpandShapeOp> {
   bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
                               const AnalysisState &state) const {
+    // In contrast to tensor.collapse_shape, this op can always be bufferized
+    // without a copy.
     return false;
   }
 
@@ -838,6 +843,7 @@ struct ReshapeOpInterface
                                                     tensor::ReshapeOp> {
   bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
                               const AnalysisState &state) const {
+    // Depending on the layout map, the source buffer may have to be copied.
     auto reshapeOp = cast<tensor::ReshapeOp>(op);
     return &opOperand == &reshapeOp.getShapeMutable()[0];
   }
@@ -867,15 +873,20 @@ struct ReshapeOpInterface
       return failure();
 
     // memref.reshape requires the source buffer to have an identity layout.
-    // If the source memref does not have an identity layout, clone the source
+    // If the source memref does not have an identity layout, copy the source
     // into a new buffer with an identity layout.
     auto srcType = llvm::dyn_cast<MemRefType>(srcBuffer->getType());
     if (srcType && !srcType.getLayout().isIdentity()) {
-      auto identityType =
-          MemRefType::get(srcType.getShape(), srcType.getElementType());
+      FailureOr<Value> tensorAlloc = allocateTensorForShapedValue(
+          rewriter, op->getLoc(), reshapeOp.getSource(), options);
+      if (failed(tensorAlloc))
+        return failure();
+      auto memrefType = MemRefType::get(
+          srcType.getShape(), srcType.getElementType(), AffineMap(),
+          cast<BaseMemRefType>(srcBuffer->getType()).getMemorySpace());
       srcBuffer = rewriter
-                      .create<bufferization::CloneOp>(op->getLoc(),
-                                                      identityType, *srcBuffer)
+                      .create<bufferization::ToMemrefOp>(
+                          op->getLoc(), memrefType, *tensorAlloc)
                       .getResult();
     }
 
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
index 9052744a1d3f984..48eeb5c4dd077e9 100644
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -384,20 +384,45 @@ func.func @tensor.reshape() -> tensor<2x2x5xf32> {
 // -----
 
 // CHECK-LABEL: @reshape_with_non_identity_layout(
-// CHECK-SAME:    %[[INPUT:[a-zA-Z0-9]*]]: memref<2x2xf32, strided<[?, ?], offset: ?>>,
-// CHECK-SAME:    %[[LAYOUT:[a-zA-Z0-9]*]]: memref<2xi32, strided<[?], offset: ?>>)
-func.func @reshape_with_non_identity_layout(%arg0: tensor<2x2xf32>, %arg1: tensor<2xi32>) -> tensor<1x2xf32> {
-
-  // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[INPUT]][1, 0] [1, 2] [1, 1] : memref<2x2xf32, strided<[?, ?], offset: ?>> to memref<2xf32, strided<[?], offset: ?>>
-  %extracted_slice = tensor.extract_slice %arg0[1, 0] [1, 2] [1, 1] : tensor<2x2xf32> to tensor<2xf32>
+// CHECK-SAME:    %[[INPUT:[a-zA-Z0-9]*]]: memref<2x2xf32, strided<[?, ?], offset: ?>, 3>,
+// CHECK-SAME:    %[[LAYOUT:[a-zA-Z0-9]*]]: memref<2xi32, strided<[?], offset: ?>>,
+func.func @reshape_with_non_identity_layout(%arg0: memref<2x2xf32, strided<[?, ?], offset: ?>, 3>, %arg1: tensor<2xi32>, %idx: index) -> f32 {
+  %t = bufferization.to_tensor %arg0 restrict : memref<2x2xf32, strided<[?, ?], offset: ?>, 3>
+
+  // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[INPUT]][1, 0] [1, 2] [1, 1] : memref<2x2xf32, strided<[?, ?], offset: ?>, 3> to memref<2xf32, strided<[?], offset: ?>, 3>
+  %extracted_slice = tensor.extract_slice %t[1, 0] [1, 2] [1, 1] : tensor<2x2xf32> to tensor<2xf32>
+
+  // To satisify the constraints of memref.reshape, the subview must be
+  // reallocated a buffer with an identity layout.
+  // CHECK: %[[ALLOC:.+]] = memref.alloc() {{.*}} : memref<2xf32, 3>
+  // CHECK: memref.copy %[[SUBVIEW]], %[[ALLOC]]
+  // CHECK: %[[RESHAPED:.+]] = memref.reshape %[[ALLOC]](%[[LAYOUT]]) : (memref<2xf32, 3>, memref<2xi32, strided<[?], offset: ?>>) -> memref<1x2xf32, 3>\
+  %reshape = tensor.reshape %extracted_slice(%arg1) : (tensor<2xf32>, tensor<2xi32>) -> tensor<1x2xf32>
 
-  // To satisify the constraints of memref.reshape, the subview must be cloned into
-  // a buffer with an identity layout.
-  // CHECK: %[[CLONED:.+]] = bufferization.clone %[[SUBVIEW]] : memref<2xf32, strided<[?], offset: ?>> to memref<2xf32>
-  // CHECK: %[[RESHAPED:.+]] = memref.reshape %[[CLONED]](%[[LAYOUT]]) : (memref<2xf32>, memref<2xi32, strided<[?], offset: ?>>) -> memref<1x2xf32>
+  %r = tensor.extract %reshape[%idx, %idx] : tensor<1x2xf32>
+  return %r : f32
+}
 
-  %reshape = tensor.reshape %extracted_slice(%arg1) : (tensor<2xf32>, tensor<2xi32>) -> tensor<1x2xf32>
+// -----
 
-  // CHECK: return %[[RESHAPED]] : memref<1x2xf32>
-  return %reshape : tensor<1x2xf32>
+// CHECK-LABEL: func @collapse_shape_regression(
+//  CHECK-SAME:     %[[t:.*]]: memref<10x20xf32,
+func.func @collapse_shape_regression(
+    %t: tensor<10x20xf32>, %f: f32, %idx: index) {
+  // CHECK: %[[subview:.*]] = memref.subview %[[t]]
+  %0 = tensor.extract_slice %t [2, 3] [5, 6] [1, 1]
+      : tensor<10x20xf32> to tensor<5x6xf32>
+
+  // Insert a copy because the original %0 is read later.
+  // CHECK: %[[alloc1:.*]] = memref.alloc() {{.*}} : memref<5x6xf32>
+  // CHECK: memref.copy %[[subview]], %[[alloc1]]
+  // CHECK: memref.store {{.*}}, %[[alloc1]]
+  tensor.insert %f into %0[%idx, %idx] : tensor<5x6xf32>
+
+  // Insert a copy because the shape cannot be collapsed.
+  // CHECK: %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<5x6xf32>
+  // CHECK: memref.copy %[[subview]], %[[alloc2]]
+  // CHECK: memref.collapse_shape %[[alloc2]]
+  tensor.collapse_shape %0[[0, 1]] : tensor<5x6xf32> into tensor<30xf32>
+  return
 }

@matthias-springer matthias-springer force-pushed the tensor_reshape_bufferization_fixes branch from e4ef784 to 178b50d Compare October 4, 2023 10:45
…y space

* `tensor.collapse_shape` may bufferize to a memory read because the op may have to reallocate the source buffer.
* `tensor.reshape` should not use `bufferization.clone` for reallocation. This op has requirements wrt. the order of buffer writes/reads. Use `memref.alloc` and `memref.copy` instead. Also fix a bug where the memory space of the source buffer was not propagated to the reallocated buffer.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants