diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -293,20 +293,6 @@ /// For debugging only. Should be used together with `testAnalysisOnly`. bool printConflicts = false; - /// If set to `true`, an `getAliasingOpResult` will return the corresponding - /// "out"/"dest" OpOperand for every op that has the notion of an "out"/"dest" - /// operand. I.e., the aliasing OpOperand of the i-th tensor OpResult is - /// usually the i-th "out" tensor OpOperand. This is in line with - /// destination-passing style and the default behavior. Op interface - /// implementations must follow this contract to avoid surprising behavior. - /// - /// If set to `false`, BufferizableOpInterface implementations can try to be - /// smart and choose to alias with "in" operands or other operands. E.g., the - /// result of a `linalg.generic` op could bufferize in-place with an "in" - /// OpOperand if the corresponding "out" operand is not used within the - /// computation. Whether this pays off or not can be very input IR-specific. - bool alwaysAliasingWithDest = true; - /// Buffer alignment for new memory allocations. unsigned int bufferAlignment = 128; diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -270,10 +270,6 @@ Option<"allowUnknownOps", "allow-unknown-ops", "bool", /*default=*/"false", "Allows unknown (not bufferizable) ops in the input IR.">, - Option<"alwaysAliasingWithDest", "always-aliasing-with-dest", "bool", - /*default=*/"true", - "Tensor OpResult cannot bufferize inplace OpOperands other than " - "out/dest OpOperands (if the op has such operands; experimental)">, Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned", /*default=*/"0", "Test only: Analyze ops in random order with a given seed (fuzzer)">, diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -182,7 +182,6 @@ // pass. opt.allowReturnAllocs = allowReturnAllocs; opt.allowUnknownOps = allowUnknownOps; - opt.alwaysAliasingWithDest = alwaysAliasingWithDest; opt.analysisFuzzerSeed = analysisFuzzerSeed; opt.createDeallocs = createDeallocs; opt.functionBoundaryTypeConversion = diff --git a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp @@ -86,69 +86,6 @@ return success(); } -/// Linalg OpResults usually bufferize inplace with their tied (output -/// OpOperands. However, if an output OpOperand is not used in the computation, -/// it is better to bufferize inplace with an actually used input OpOperand; -/// less memory will be touched that way. -/// -/// Example: -/// O(i, j) = A(i, j) + B(j) --> bufferizes inplace to: A(i, j) += B(j) -/// -/// O(i, j) = A(j, i) + B(j) --> cannot bufferize inplace with A because -/// indexing maps are not identical -/// -/// O(i, j) += A(i, j) + B(j) --> Output is used in computation. -/// This could bufferize inplace with A: -/// A(i, j) += O(i, j) + B(j) -/// However, we choose to bufferize inplace with O here, as there is no clear -/// benefit of choosing A. TODO: We may want to consider both options and make -/// an informed decision during analysis in the future. -static DenseMap computeAliasingPairs(LinalgOp op) { - DenseMap mapping; - for (OpResult opResult : op->getOpResults()) { - OpOperand *tiedOperand = - op.getOutputTensorOperands()[opResult.getResultNumber()]; - AffineMap outputIndexingMap = op.getTiedIndexingMap(tiedOperand); - bool onlyParallelIterators = op.getNumParallelLoops() == op.getNumLoops(); - bool tiedOperandUsed = op.payloadUsesValueFromOperand(tiedOperand); - - // If the output arg is used in the computation or at least one iterator is - // not parallel, try to bufferize inplace with the corresponding output - // tensor. - if (tiedOperandUsed || !onlyParallelIterators) { - mapping[tiedOperand] = opResult; - continue; - } - - // Otherwise, try to bufferize inplace with one of the inputs. - OpOperand *chosenOperand = nullptr; - for (OpOperand *opOperand : op.getInputTensorOperands()) { - if (opOperand->get().getType() != opResult.getType()) - continue; - if (!op.payloadUsesValueFromOperand(opOperand)) - continue; - if (op.getTiedIndexingMap(opOperand) != outputIndexingMap) - continue; - // No other OpResult bufferizes aliases with this OpOperand. - if (mapping.count(opOperand)) - continue; - assert(op.getTiedIndexingMap(opOperand).isProjectedPermutation() && - "expected projected permutation"); - chosenOperand = opOperand; - break; - } - - // No suitable input tensor found. Use output tensor. - // TODO: This operand could bufferize inplace with OpOperands that have the - // correct type, even if they are not used inside the computation. - if (!chosenOperand) - chosenOperand = tiedOperand; - - mapping[chosenOperand] = opResult; - } - return mapping; -} - /// Bufferization of linalg.generic. Replace with a new linalg.generic that /// operates entirely on memrefs. template @@ -174,37 +111,18 @@ const AnalysisState &state) const { auto genericOp = cast(op); - // By default, the i-th OpResult may alias with the i-th "out" tensor. - if (state.getOptions().alwaysAliasingWithDest) - return {genericOp.getOutputOperand(opResult.getResultNumber())}; - - // We can try to be smart and alias in-place with an "in" tensor if the - // corresponding "out" tensor is not used in the computation. - // Aliasing OpOperand/OpResult pairs are computed by `computeAliasingPairs`. - DenseMap pairs = computeAliasingPairs(genericOp); - for (OpOperand *opOperand : genericOp.getInputAndOutputOperands()) - if (pairs[opOperand] == opResult) - return {opOperand}; - return {}; + // The i-th OpResult may alias with the i-th "out" tensor. + return {genericOp.getOutputOperand(opResult.getResultNumber())}; } SmallVector getAliasingOpResult(Operation *op, OpOperand &opOperand, const AnalysisState &state) const { auto genericOp = cast(op); - // By default, the i-th "out" tensor may alias with the i-th OpResult. - if (state.getOptions().alwaysAliasingWithDest) { - if (genericOp.isOutputTensor(&opOperand)) - return {genericOp.getTiedOpResult(&opOperand)}; - return {}; - } - - // We can try to be smart. See comment in `getAliasingOpOperand`. - // Aliasing OpOperand/OpResult pairs are computed by `computeAliasingPairs`. - DenseMap pairs = computeAliasingPairs(genericOp); - if (!pairs.count(&opOperand)) - return {}; - return {pairs[&opOperand]}; + // The i-th "out" tensor may alias with the i-th OpResult. + if (genericOp.isOutputTensor(&opOperand)) + return {genericOp.getTiedOpResult(&opOperand)}; + return {}; } BufferRelation bufferRelation(Operation *op, OpResult opResult, diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir +++ /dev/null @@ -1,75 +0,0 @@ -// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs always-aliasing-with-dest=0" -split-input-file | FileCheck %s - -// CHECK-LABEL: func @linalg_op_bufferizes_inplace_with_input -// CHECK-SAME: %[[t1:.*]]: memref, %[[t2:.*]]: memref, %[[t3:.*]]: memref -func.func @linalg_op_bufferizes_inplace_with_input( - %t1: tensor {bufferization.writable = true}, - %t2: tensor {bufferization.writable = false}, - %t3: tensor {bufferization.writable = false}, - %s1: index, %s2: index, %cst: f32) -> tensor { - // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t1]] : {{.*}}) - %r = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, - affine_map<(d0, d1) -> (d1)>, - affine_map<(d0, d1)-> (d0, d1)>], - iterator_types = ["parallel", "parallel"]} - ins(%t1, %t2 : tensor, tensor) - outs(%t3 : tensor) { - ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) : - %add = arith.addf %arg0, %arg1 : f32 - linalg.yield %add : f32 - } -> tensor - return %r : tensor -} - -// ----- - -// CHECK-LABEL: func @linalg_op_bufferizes_out_of_place_with_input -// CHECK-SAME: %[[t1:.*]]: memref, %[[t2:.*]]: memref, %[[t3:.*]]: memref -func.func @linalg_op_bufferizes_out_of_place_with_input( - %t1: tensor {bufferization.writable = false}, - %t2: tensor {bufferization.writable = false}, - %t3: tensor {bufferization.writable = false}, - %s1: index, %s2: index, %cst: f32) -> tensor { - // CHECK: %[[alloc:.*]] = memref.alloc - // CHECK: memref.copy %[[t1]], %[[alloc]] - // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[alloc]] : {{.*}}) - %r = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, - affine_map<(d0, d1) -> (d1)>, - affine_map<(d0, d1)-> (d0, d1)>], - iterator_types = ["parallel", "parallel"]} - ins(%t1, %t2 : tensor, tensor) - outs(%t3 : tensor) { - ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) : - %add = arith.addf %arg0, %arg1 : f32 - linalg.yield %add : f32 - } -> tensor - // CHECK: return %[[alloc]] - return %r : tensor -} - -// ----- - -// CHECK-LABEL: func @linalg_op_output_cannot_alias_with_input -// CHECK-SAME: %[[t1:.*]]: memref, %[[t2:.*]]: memref, %[[t3:.*]]: memref -func.func @linalg_op_output_cannot_alias_with_input( - %t1: tensor {bufferization.writable = true}, - %t2: tensor {bufferization.writable = false}, - %t3: tensor {bufferization.writable = true}, - %s1: index, %s2: index, %cst: f32) -> tensor { - // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}}) - %r = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, - affine_map<(d0, d1) -> (d1)>, - affine_map<(d0, d1)-> (d0, d1)>], - iterator_types = ["parallel", "parallel"]} - ins(%t1, %t2 : tensor, tensor) - outs(%t3 : tensor) { - ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) : - %add = arith.addf %arg0, %arg1 : f32 - linalg.yield %add : f32 - } -> tensor - return %r : tensor -} - diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir +++ /dev/null @@ -1,83 +0,0 @@ -// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs always-aliasing-with-dest=0" -split-input-file | FileCheck %s - -// This is a test case for alwaysAliasingWithDest = 0. In that case, an OpResult -// may bufferize in-place with an "in" OpOperand or any non-"out" OpOperand. - - -#accesses = [ - affine_map<(i) -> (i)>, - affine_map<(i) -> (i)>, - affine_map<(i) -> (i)> -] -#trait = { - indexing_maps = #accesses, - iterator_types = ["parallel"] -} - -// CHECK-LABEL: func @linalg_op_same_out_tensors( -func.func @linalg_op_same_out_tensors( - %t1: tensor {bufferization.writable = true}, -// CHECK-SAME: bufferization.access = "read-write" - %t2: tensor {bufferization.writable = true}) -// CHECK-SAME: bufferization.access = "write" - -> (tensor, tensor){ - - // %1 and %2 are not used in the computation, so the two OpResults do not - // necessarily have to bufferize in-place with the two "out" OpOperands. They - // bufferize in-place with the first and second OpOperand (one of which is an - // "in" OpOperand). - // CHECK: linalg.generic - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"] - %o:2 = linalg.generic #trait ins(%t1 : tensor) - outs (%t2, %t2 : tensor, tensor) { - ^bb(%0: f32, %1: f32, %2 : f32) : - linalg.yield %0, %0 : f32, f32 - } -> (tensor, tensor) - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0, 1] - return %o#0, %o#1 : tensor, tensor -} - -// ----- - -#accesses = [ - affine_map<(i) -> (i)>, - affine_map<(i) -> (i)>, - affine_map<(i) -> (i)>, - affine_map<(i) -> (i)> -] -#trait = { - indexing_maps = #accesses, - iterator_types = ["parallel"] -} - -// CHECK-LABEL: func @linalg_op_same_out_tensors_2( -func.func @linalg_op_same_out_tensors_2( - %t1: tensor {bufferization.writable = true}, -// CHECK-SAME: bufferization.access = "read-write" - %t2: tensor {bufferization.writable = true}) -// CHECK-SAME: bufferization.access = "write" - -> (tensor, tensor, tensor){ - - // %1, %2 and %3 are not used in the computation, so the three OpResults do - // not necessarily have to bufferize in-place with the three "out" OpOperands. - // They bufferize in-place with the first, second and third OpOperand (one of - // which is an "in" OpOperand). - // In contrast to the previous test case, two of the chosen OpOperands are the - // same (aliasing) SSA value, which is why one of them must bufferize - // out-of-place. - // CHECK: linalg.generic - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true", "false"] - %o:3 = linalg.generic #trait - ins(%t1 : tensor) - outs (%t2, %t2, %t2 : tensor, tensor, tensor) { - ^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) : - linalg.yield %0, %0, %0 : f32, f32, f32 - } -> (tensor, tensor, tensor) - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0, 1, -1] - return %o#0, %o#1, %o#2 : tensor, tensor, tensor -} -