diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -71,6 +71,11 @@ /// Create a pass that bufferizes ops from the bufferization dialect. std::unique_ptr createBufferizationBufferizePass(); +/// Create a pass that resolves out-of-place tensor OpOperands with copies. +std::unique_ptr createTensorCopyInsertionPass(); +std::unique_ptr +createTensorCopyInsertionPass(const OneShotBufferizationOptions &options); + //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -318,6 +318,25 @@ ]; } +def TensorCopyInsertion : Pass<"tensor-copy-insertion"> { + let summary = "Make all tensor IR inplaceable by inserting copies"; + let description = [{ + This pass runs One-Shot Analysis and inserts copies for all OpOperands that + were decided to bufferize out-of-place. After running this pass, a + bufferization can write to buffers directly (without making copies) and no + longer has to care about potential read-after-write conflicts. + }]; + let options = [ + Option<"allowReturnAllocs", "allow-return-allocs", "bool", + /*default=*/"false", + "Allows returning/yielding new allocations from a block.">, + Option<"bufferizeFunctionBoundaries", "bufferize-function-boundaries", + "bool", /*default=*/"0", + "Bufferize function boundaries (experimental).">, + ]; + let constructor = "mlir::bufferization::createTensorCopyInsertionPass()"; +} + def AllocTensorElimination : Pass<"eliminate-alloc-tensors"> { let summary = "Try to eliminate all alloc_tensor ops."; let description = [{ diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt @@ -8,6 +8,7 @@ FuncBufferizableOpInterfaceImpl.cpp OneShotAnalysis.cpp OneShotModuleBufferize.cpp + TensorCopyInsertion.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Bufferization diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp @@ -0,0 +1,123 @@ +//===- TensorCopyInsertion.cpp - Resolve Bufferization Conflicts w/ Copies ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" + +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" +#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" +#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h" +#include "mlir/Dialect/Bufferization/Transforms/Passes.h" + +using namespace mlir; +using namespace mlir::bufferization; + +static LogicalResult +insertTensorCopies(Operation *op, const OneShotBufferizationOptions &options) { + OneShotAnalysisState state(op, options); + // Run normal One-Shot Bufferize analysis or One-Shot Module Bufferize + // analysis depending on whether function boundary bufferization is enabled or + // not. + if (options.bufferizeFunctionBoundaries) { + if (failed(analyzeModuleOp(cast(op), state))) + return failure(); + } else { + if (failed(analyzeOp(op, state))) + return failure(); + } + + OpBuilder builder(op->getContext()); + WalkResult result = op->walk([&](Operation *op) { + auto bufferizableOp = options.dynCastBufferizableOp(op); + if (!bufferizableOp) + return WalkResult::skip(); + + // Find AllocTensorOps without an `escape` attribute and add the attribute + // based on analysis results. + if (auto allocTensorOp = dyn_cast(op)) { + if (allocTensorOp.escape()) + return WalkResult::advance(); + bool escape = state.isTensorYielded(allocTensorOp.result()); + if (escape && !options.allowReturnAllocs) { + op->emitError("illegal return of allocation detected"); + return WalkResult::interrupt(); + } + allocTensorOp.escapeAttr(builder.getBoolAttr(escape)); + return WalkResult::advance(); + } + + // Find out-of-place tensor OpOperands and resolve them with an explicit + // tensor copy in the form of an AllocTensorOp. + builder.setInsertionPoint(op); + for (OpOperand &opOperand : op->getOpOperands()) { + if (opOperand.get().getType().isa()) { + op->emitError("copies of unranked tensors are not supported"); + return WalkResult::interrupt(); + } + auto tensorType = opOperand.get().getType().dyn_cast(); + if (!tensorType) + continue; + if (state.isInPlace(opOperand)) + continue; + SmallVector aliasingOpResults = + state.getAliasingOpResult(opOperand); + bool escape = llvm::any_of( + aliasingOpResults, [&](Value v) { return state.isTensorYielded(v); }); + assert((!escape || options.allowReturnAllocs) && + "analysis should have detected illegal alloc return"); + Value copy = builder.create( + op->getLoc(), tensorType, ValueRange(), opOperand.get(), escape); + opOperand.set(copy); + } + + return WalkResult::advance(); + }); + + return failure(result.wasInterrupted()); +} + +namespace { +struct TensorCopyInsertionPass + : TensorCopyInsertionBase { + TensorCopyInsertionPass() + : TensorCopyInsertionBase(), + options(llvm::None) {} + TensorCopyInsertionPass(const OneShotBufferizationOptions &options) + : TensorCopyInsertionBase(), options(options) {} + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + if (options.hasValue()) { + if (failed(insertTensorCopies(getOperation(), *options))) + signalPassFailure(); + } else { + OneShotBufferizationOptions options; + options.allowReturnAllocs = allowReturnAllocs; + options.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries; + if (failed(insertTensorCopies(getOperation(), options))) + signalPassFailure(); + } + } + +private: + Optional options; +}; +} // namespace + +std::unique_ptr mlir::bufferization::createTensorCopyInsertionPass() { + return std::make_unique(); +} + +std::unique_ptr mlir::bufferization::createTensorCopyInsertionPass( + const OneShotBufferizationOptions &options) { + return std::make_unique(options); +} diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir @@ -0,0 +1,27 @@ +// RUN: mlir-opt %s -tensor-copy-insertion -split-input-file | FileCheck %s +// RUN: mlir-opt %s -tensor-copy-insertion="bufferize-function-boundaries allow-return-allocs" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC + +// CHECK-LABEL: func @read_after_write_conflict( +// CHECK-SAME: %[[t:.*]]: tensor +// CHECK-FUNC-LABEL: func @read_after_write_conflict( +func.func @read_after_write_conflict(%t: tensor, %idx: index, %f: f32) + -> (tensor, tensor) +{ + // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[t]]) {escape = false} : tensor + // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) {escape = true} : tensor + // CHECK: %[[insert:.*]] = tensor.insert %{{.*}} into %[[copy]] + %0 = tensor.insert %f into %t[%idx] : tensor + // CHECK: return %[[insert]], %[[t]] + return %0, %t : tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @return_alloc_tensor +// CHECK-FUNC-LABEL: func @return_alloc_tensor +func.func @return_alloc_tensor() -> (tensor<5xf32>) { + // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32> + // CHECK-FUNC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32> + %0 = bufferization.alloc_tensor() : tensor<5xf32> + return %0 : tensor<5xf32> +}