diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -280,14 +280,6 @@ /// computation. Whether this pays off or not can be very input IR-specific. bool alwaysAliasingWithDest = true; - /// If set to `true`, try to hoist allocations out of blocks as much as - /// possible. An allocation is not hoisted across allocation hoisting barriers - /// as indicated by `BufferizableOpInterface::isAllocationHoistingBarrier`. - /// - /// Examples of allocation hoisting barriers are parallel loops or ops where - /// SSA values cannot be captured from the outside. - bool hoistAllocations = true; - /// Buffer alignment for new memory allocations. unsigned int bufferAlignment = 128; @@ -618,10 +610,6 @@ getMemRefTypeWithStaticIdentityLayout(TensorType tensorType, Attribute memorySpace = {}); -/// Try to hoist all new buffer allocations until the next hoisting barrier. -LogicalResult hoistBufferAllocations(Operation *op, - const BufferizationOptions &options); - /// Create alloc/dealloc ops as specified in the bufferization options. If /// `onlyLeakingAlloc`, only those buffer allocations are processed for which no /// buffer deallocation can be created. `changed` is set to `true` if the IR was diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td @@ -259,22 +259,6 @@ return value.isa(); }] >, - InterfaceMethod< - /*desc=*/[{ - Return `true` if the op is an allocation hoisting barrier. Buffer - allocations will never be beyond such ops. E.g., ops with certain - parallel semantics may be allocation hoisting barriers. The majority - of ops, however, is not a barrier. Therefore, this method returns - `false` by default. - }], - /*retType=*/"bool", - /*methodName=*/"isAllocationHoistingBarrier", - /*args=*/(ins), - /*methodBody=*/"", - /*defaultImplementation=*/[{ - return false; - }] - >, InterfaceMethod< /*desc=*/[{ Return `true` if the `uRead` and `uWrite` do not constitute a RaW diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h @@ -91,9 +91,8 @@ LogicalResult bufferizeOp(Operation *op, BufferizationState &bufferizationState); -/// Finalize all buffer allocations. -/// * Hoist buffer allocations as much as possible. -/// * Create alloc/dealloc ops as specified by the bufferization options. +/// Finalize all buffer allocations: Create alloc/dealloc ops as specified by +/// the bufferization options. LogicalResult finalizeBuffers(Operation *op, const BufferizationOptions &options); } // namespace bufferization diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -582,63 +582,6 @@ return success(!status.wasInterrupted()); } -/// Try to hoist all new buffer allocations until the next hoisting barrier. -// TODO: Consolidate this function with the existing buffer hoisting pass. -LogicalResult -bufferization::hoistBufferAllocations(Operation *op, - const BufferizationOptions &options) { - // Nothing to do if allocation hoisting is deactivated. - if (!options.hoistAllocations) - return success(); - - // Gather all buffer allocations that were created by the bufferization. - SmallVector allocaOps; - op->walk([&](memref::AllocaOp allocaOp) { - if (allocaOp->hasAttr(kBufferAllocationAttr)) - allocaOps.push_back(allocaOp); - }); - - for (Operation *allocaOp : allocaOps) { - // TODO: Hoisting of allocs with dynamic shape not implemented. - if (!allocaOp->getOpOperands().empty()) - continue; - - Operation *op = allocaOp->getParentOp(); - while (op) { - if (auto bufferizableOp = dyn_cast(op)) { - if (bufferizableOp.isAllocationHoistingBarrier()) { - break; - } - } else { - // Op is not bufferizable: It may not be safe to hoist across this op. - break; - } - op = op->getParentOp(); - } - - // FuncOp is an allocation hoisting barrier, so this should never happen. - assert(op && "allocation hoisting barrier not found"); - - // Nothing to do if the insertion point is in the same block. - if (op == allocaOp->getParentOp()) - continue; - - // `op` may have multiple blocks. Make sure that we insert in the right one. - SmallVector blocks; - for (Region &r : op->getRegions()) - for (Block &b : r.getBlocks()) - blocks.push_back(&b); - auto *insertionBlock = llvm::find_if( - blocks, [&](Block *b) { return b->findAncestorOpInBlock(*allocaOp); }); - assert(insertionBlock != blocks.end() && "owning block not found"); - - // Move to the beginning of the block. - allocaOp->moveBefore(&(*insertionBlock)->front()); - } - - return success(); -} - //===----------------------------------------------------------------------===// // Bufferization-specific BlockAndValueMapping support with debugging. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -295,10 +295,6 @@ LogicalResult bufferization::finalizeBuffers(Operation *op, const BufferizationOptions &options) { - // Hoist buffers. - if (failed(hoistBufferAllocations(op, options))) - return failure(); - // Create allocation ops for "leaking buffers", i.e., buffer allocations that // escape block boundaries. If there are no leaking allocs, `hasLeakingAllocs` // is set to `false`. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -549,8 +549,6 @@ // All function arguments are writable by default. return true; } - - bool isAllocationHoistingBarrier(Operation *op) const { return true; } }; } // namespace func_ext diff --git a/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp --- a/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp @@ -119,11 +119,6 @@ const AnalysisState &state) const { return BufferRelation::Equivalent; } - - bool isAllocationHoistingBarrier(Operation *op) const { - // Allocations should not be hoisted out of AssumingOps. - return true; - } }; /// Bufferization of shape.assuming_yield. Bufferized as part of their enclosing diff --git a/mlir/test/Transforms/buffer-hoisting.mlir b/mlir/test/Dialect/Bufferization/Transforms/buffer-hoisting.mlir rename from mlir/test/Transforms/buffer-hoisting.mlir rename to mlir/test/Dialect/Bufferization/Transforms/buffer-hoisting.mlir diff --git a/mlir/test/Transforms/buffer-loop-hoisting.mlir b/mlir/test/Dialect/Bufferization/Transforms/buffer-loop-hoisting.mlir rename from mlir/test/Transforms/buffer-loop-hoisting.mlir rename to mlir/test/Dialect/Bufferization/Transforms/buffer-loop-hoisting.mlir diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -buffer-loop-hoisting -split-input-file | FileCheck %s // Run fuzzer with different seeds. // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -367,18 +367,20 @@ %idx: index) -> (tensor<5xi1>, tensor<5xi1>) { - // These allocation used to be inside the scf.while loop, but they were - // hoisted. - // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[arg0]], %[[w1:.*]] = %[[arg1]]) {{.*}} { + // CHECK: %[[clone1:.*]] = bufferization.clone %[[arg1]] + // CHECK: %[[clone0:.*]] = bufferization.clone %[[arg0]] + // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[clone0]], %[[w1:.*]] = %[[clone1]]) {{.*}} { %r0, %r1 = scf.while (%w0 = %arg0, %w1 = %arg1) : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) { // CHECK: %[[condition:.*]] = memref.load %[[w0]] + // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1> // CHECK: memref.copy %[[w1]], %[[a1]] - // CHECK: %[[casted1:.*]] = memref.cast %[[a1]] + // CHECK: memref.dealloc %[[w1]] + // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1> // CHECK: memref.copy %[[w0]], %[[a0]] + // CHECK: memref.dealloc %[[w0]] // CHECK: %[[casted0:.*]] = memref.cast %[[a0]] + // CHECK: %[[casted1:.*]] = memref.cast %[[a1]] // CHECK: scf.condition(%[[condition]]) %[[casted1]], %[[casted0]] %condition = tensor.extract %w0[%idx] : tensor<5xi1> scf.condition(%condition) %w1, %w0 : tensor<5xi1>, tensor<5xi1> @@ -410,21 +412,21 @@ %idx: index) -> (tensor<5xi1>, tensor<5xi1>) { - // These allocation used to be inside the scf.while loop, but they were - // hoisted. - // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[a2:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[a3:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[arg0]], %[[w1:.*]] = %[[arg1]]) {{.*}} { + // CHECK: %[[clone1:.*]] = bufferization.clone %[[arg1]] + // CHECK: %[[clone0:.*]] = bufferization.clone %[[arg0]] + // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[clone0]], %[[w1:.*]] = %[[clone1]]) {{.*}} { %r0, %r1 = scf.while (%w0 = %arg0, %w1 = %arg1) : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) { // CHECK: %[[condition:.*]] = memref.load %[[w0]] - // CHECK: memref.copy %[[w1]], %[[a3]] - // CHECK: %[[casted3:.*]] = memref.cast %[[a3]] - // CHECK: memref.copy %[[w0]], %[[a2]] - // CHECK: %[[casted2:.*]] = memref.cast %[[a2]] - // CHECK: scf.condition(%[[condition]]) %[[casted3]], %[[casted2]] + // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1> + // CHECK: memref.copy %[[w1]], %[[a1]] + // CHECK: memref.dealloc %[[w1]] + // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1> + // CHECK: memref.copy %[[w0]], %[[a0]] + // CHECK: memref.dealloc %[[w0]] + // CHECK: %[[casted0:.*]] = memref.cast %[[a0]] + // CHECK: %[[casted1:.*]] = memref.cast %[[a1]] + // CHECK: scf.condition(%[[condition]]) %[[casted1]], %[[casted0]] %condition = tensor.extract %w0[%idx] : tensor<5xi1> scf.condition(%condition) %w1, %w0 : tensor<5xi1>, tensor<5xi1> } do { @@ -432,11 +434,14 @@ // CHECK: } do { // CHECK: ^bb0(%[[b0:.*]]: memref<5xi1, #{{.*}}>, %[[b1:.*]]: memref<5xi1, #{{.*}}): // CHECK: memref.store %{{.*}}, %[[b0]] - // CHECK: memref.copy %[[b1]], %[[a1]] - // CHECK: %[[casted1:.*]] = memref.cast %[[a1]] - // CHECK: memref.copy %[[b0]], %[[a0]] - // CHECK: %[[casted0:.*]] = memref.cast %[[a0]] - // CHECK: scf.yield %[[casted1]], %[[casted0]] + // CHECK: %[[a3:.*]] = memref.alloc() {{.*}} : memref<5xi1> + // CHECK: memref.copy %[[b1]], %[[a3]] + // CHECK: memref.dealloc %[[b1]] + // CHECK: %[[a2:.*]] = memref.alloc() {{.*}} : memref<5xi1> + // CHECK: memref.copy %[[b0]], %[[a2]] + // CHECK: %[[casted2:.*]] = memref.cast %[[a2]] + // CHECK: %[[casted3:.*]] = memref.cast %[[a3]] + // CHECK: scf.yield %[[casted3]], %[[casted2]] // CHECK: } %pos = "dummy.some_op"() : () -> (index) %val = "dummy.another_op"() : () -> (i1) @@ -444,8 +449,6 @@ scf.yield %b1, %1 : tensor<5xi1>, tensor<5xi1> } - // CHECK-DAG: memref.dealloc %[[a0]] - // CHECK-DAG: memref.dealloc %[[a1]] // CHECK: return %[[loop]]#0, %[[loop]]#1 return %r0, %r1 : tensor<5xi1>, tensor<5xi1> } @@ -454,19 +457,20 @@ // CHECK-LABEL: func @scf_while_iter_arg_result_mismatch( // CHECK-SAME: %[[arg0:.*]]: memref<5xi1, #{{.*}}>, %[[arg1:.*]]: memref<5xi1, #{{.*}}> -// CHECK: %[[alloc1:.*]] = memref.alloc() {{.*}} : memref<5xi1> // CHECK: %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<5xi1> -// CHECK: scf.while (%[[arg3:.*]] = %[[arg1]]) : (memref<5xi1, #{{.*}}) -> () { +// CHECK: %[[clone:.*]] = bufferization.clone %[[arg1]] +// CHECK: scf.while (%[[arg3:.*]] = %[[clone]]) : (memref<5xi1, #{{.*}}) -> () { +// CHECK: memref.dealloc %[[arg3]] // CHECK: %[[load:.*]] = memref.load %[[arg0]] // CHECK: scf.condition(%[[load]]) // CHECK: } do { // CHECK: memref.copy %[[arg0]], %[[alloc2]] // CHECK: memref.store %{{.*}}, %[[alloc2]] +// CHECK: %[[alloc1:.*]] = memref.alloc() {{.*}} : memref<5xi1> // CHECK: memref.copy %[[alloc2]], %[[alloc1]] // CHECK: %[[casted:.*]] = memref.cast %[[alloc1]] : memref<5xi1> to memref<5xi1, #{{.*}}> // CHECK: scf.yield %[[casted]] // CHECK: } -// CHECK-DAG: memref.dealloc %[[alloc1]] // CHECK-DAG: memref.dealloc %[[alloc2]] func.func @scf_while_iter_arg_result_mismatch(%arg0: tensor<5xi1>, %arg1: tensor<5xi1>,