diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h @@ -164,9 +164,8 @@ LogicalResult analyzeOp(Operation *op, AnalysisBufferizationState &state); /// Run One-Shot Bufferize on the given op: Analysis + Bufferization -LogicalResult -runOneShotBufferize(Operation *op, - std::unique_ptr options); +LogicalResult runOneShotBufferize(Operation *op, + const AnalysisBufferizationOptions &options); } // namespace bufferization } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -5,6 +5,7 @@ namespace mlir { namespace bufferization { +struct AnalysisBufferizationOptions; //===----------------------------------------------------------------------===// // Passes @@ -29,6 +30,15 @@ /// bufferization.to_tensor and bufferization.to_memref operations. std::unique_ptr> createFinalizingBufferizePass(); +/// Create a pass that bufferizes all ops that implement BufferizableOpInterface +/// with One-Shot Bufferize. +std::unique_ptr createOneShotBufferizePass(); + +/// Create a pass that bufferizes all ops that implement BufferizableOpInterface +/// with One-Shot Bufferize and the specified bufferization options. +std::unique_ptr +createOneShotBufferizePass(const AnalysisBufferizationOptions &options); + /// Creates a pass that promotes heap-based allocations to stack-based ones. /// Only buffers smaller than the provided size are promoted. /// Dynamic shaped buffers are promoted up to the given rank. diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -149,6 +149,88 @@ let constructor = "mlir::bufferization::createFinalizingBufferizePass()"; } +def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> { + let summary = "One-Shot Bufferize"; + let description = [{ + This pass bufferizes all ops that implement `BufferizableOpInterface`. It + first performs an inplacability analysis on SSA use-def chains of tensor + values to determine which OpOperands may bufferize in-place, i.e., without + inserting a buffer copy. It then rewrites the IR, inserting a buffer + allocation and copy for each OpOperand that was decided to bufferize + out-of-place. + + One-Shot Bufferize (and `BufferizableOpInterface`) was designed for ops that + are in destination-passing style. When bufferizing such ops, it is possible + to reuse the buffer of a tensor OpOperand for a tensor OpResult. In essence, + a possible destination of an operation is already passed as an SSA value. + + `tensor.insert` is an example for an op in destination-passing style. E.g., + when bufferizing `%t0 = tensor.insert %f into %dest[%idx]`, `buffer(%t0)` is + identical to `buffer(%dest)` in the absence of RaW conflicts. As a counter + example, `tensor.generate` is not in destination-passing style and always + results in a new buffer allocation. + + One-Shot Bufferize deallocates all buffers that it allocates. Yielding newly + allocated buffers from a block is not supported yet and such IR will be + rejected. For testing purposes and compatibility with partial bufferization, + One-Shot Bufferize can be run with `allow-return-memref=1 create-dealloc=0` + to allow such IR. + + One-Shot Bufferize will by default reject IR that contains non-bufferizable + op, i.e., ops that do not implemement BufferizableOpInterface. Such IR can + be allowed with `allow-unknown-ops=1`. In that case, to_memref and to_tensor + ops will be generated at the bufferization boundary. This is useful for + compatibility with existing partial bufferization passes: These can + bufferize the remaining IR after running One-Shot Bufferize. + + Note: Running One-Shot Bufferize after a partial bufferization pass is + currently not supported. Running partial bufferization passes after running + One-Shot Bufferize is supported and the recommended way to gradually + migrate from partial bufferization to One-Shot Bufferize. + + With `dialect-filter`, bufferization can be restricted to a set of dialects. + If no filter is specified, all ops that implement `BufferizableOpInterface` + are bufferized. Ops from the `std` dialect are an exception: These ops are + always ignored, even if no filter is specified. When specifying a dialect + filter and `allow-unknown-ops` is not turned on, bufferization would fail + when encountering an op that is not included in the filter (even if it is + bufferizable). + + For testing/debugging purposes, `test-analysis-only=1 print-conflicts=1` + prints analysis results and explains why an OpOperand was decided to + bufferize out-of-place. This is useful for understanding why One-Shot + Bufferize chose to insert a certain buffer copy. + }]; + let options = [ + Option<"allowReturnMemref", "allow-return-memref", "bool", + /*default=*/"false", + "Allows the return of memrefs (for testing purposes only)">, + Option<"allowUnknownOps", "allow-unknown-ops", "bool", + /*default=*/"false", + "Allows unknown (not bufferizable) ops in the input IR.">, + Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned", + /*default=*/"0", + "Test only: Analyze ops in random order with a given seed (fuzzer)">, + Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true", + "Specify if buffers should be deallocated. For compatibility with " + "core bufferization passes.">, + ListOption<"dialectFilter", "dialect-filter", "std::string", + "Restrict bufferization to ops from these dialects.", + "llvm::cl::MiscFlags::CommaSeparated">, + Option<"fullyDynamicLayoutMaps", "fully-dynamic-layout-maps", "bool", + /*default=*/"true", + "Generate MemRef types with dynamic offset+strides by default.">, + Option<"testAnalysisOnly", "test-analysis-only", "bool", + /*default=*/"false", + "Test only: Only run inplaceability analysis and annotate IR">, + Option<"printConflicts", "print-conflicts", "bool", + /*default=*/"false", + "Test only: Annotate IR with RaW conflicts. Requires " + "test-analysis-only.">, + ]; + let constructor = "mlir::bufferization::createOneShotBufferizePass()"; +} + def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> { let summary = "Promotes heap-based allocations to automatically managed " "stack-based allocations"; diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -17,6 +17,7 @@ #include "mlir/Dialect/AMX/AMXDialect.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/ArmNeon/ArmNeonDialect.h" #include "mlir/Dialect/ArmSVE/ArmSVEDialect.h" #include "mlir/Dialect/Async/IR/Async.h" @@ -30,6 +31,7 @@ #include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/OpenACC/OpenACC.h" @@ -37,6 +39,7 @@ #include "mlir/Dialect/PDL/IR/PDL.h" #include "mlir/Dialect/PDLInterp/IR/PDLInterp.h" #include "mlir/Dialect/Quant/QuantOps.h" +#include "mlir/Dialect/SCF/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h" #include "mlir/Dialect/Shape/IR/Shape.h" @@ -45,8 +48,10 @@ #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h" #include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" +#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/X86Vector/X86VectorDialect.h" #include "mlir/IR/Dialect.h" @@ -88,8 +93,13 @@ tosa::TosaDialect, x86vector::X86VectorDialect>(); // clang-format on + arith::registerBufferizableOpInterfaceExternalModels(registry); + linalg::registerBufferizableOpInterfaceExternalModels(registry); + scf::registerBufferizableOpInterfaceExternalModels(registry); + tensor::registerBufferizableOpInterfaceExternalModels(registry); tensor::registerInferTypeOpInterfaceExternalModels(registry); tensor::registerTilingOpInterfaceExternalModels(registry); + vector::registerBufferizableOpInterfaceExternalModels(registry); } /// Append all the MLIR dialects to the registry contained in the given context. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -11,9 +11,13 @@ #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" +#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" #include "mlir/Dialect/Bufferization/Transforms/Passes.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Operation.h" +#include "mlir/Pass/PassManager.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" using namespace mlir; using namespace mlir::bufferization; @@ -144,8 +148,81 @@ signalPassFailure(); } }; + +struct OneShotBufferizePass + : public OneShotBufferizeBase { + using OneShotBufferizeBase::OneShotBufferizeBase; + + explicit OneShotBufferizePass(const AnalysisBufferizationOptions &options) + : options(options) {} + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + AnalysisBufferizationOptions opt; + if (!options) { + // Make new bufferization options if none were provided when creating the + // pass. + opt.allowReturnMemref = allowReturnMemref; + opt.allowUnknownOps = allowUnknownOps; + opt.analysisFuzzerSeed = analysisFuzzerSeed; + opt.createDeallocs = createDeallocs; + opt.fullyDynamicLayoutMaps = fullyDynamicLayoutMaps; + opt.printConflicts = printConflicts; + opt.testAnalysisOnly = testAnalysisOnly; + + BufferizationOptions::OpFilterEntry::FilterFn filterFn = + [&](Operation *op) { + // Disallow non-std dialect ops. I.e., no ops related to function + // calls. + if (op->getDialect()->getNamespace() == + StandardOpsDialect::getDialectNamespace()) + return false; + // Filter may be specified via options. + if (this->dialectFilter.hasValue()) + return llvm::find(this->dialectFilter, + op->getDialect()->getNamespace()) != + this->dialectFilter.end(); + // No filter specified: All other ops are allowed. + return true; + }; + opt.allowOperationInFilter(filterFn); + } else { + opt = *options; + } + + ModuleOp moduleOp = getOperation(); + if (failed(runOneShotBufferize(moduleOp, opt))) { + signalPassFailure(); + return; + } + + if (opt.testAnalysisOnly) + return; + + OpPassManager cleanupPipeline("builtin.module"); + cleanupPipeline.addPass(createCanonicalizerPass()); + cleanupPipeline.addPass(createCSEPass()); + cleanupPipeline.addPass(createLoopInvariantCodeMotionPass()); + (void)runPipeline(cleanupPipeline, moduleOp); + } + +private: + llvm::Optional options; +}; } // namespace +std::unique_ptr mlir::bufferization::createOneShotBufferizePass() { + return std::make_unique(); +} + +std::unique_ptr mlir::bufferization::createOneShotBufferizePass( + const AnalysisBufferizationOptions &options) { + return std::make_unique(options); +} + std::unique_ptr> mlir::bufferization::createFinalizingBufferizePass() { return std::make_unique(); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp @@ -799,11 +799,11 @@ } LogicalResult bufferization::runOneShotBufferize( - Operation *op, std::unique_ptr options) { - AnalysisBufferizationState state(op, *options); + Operation *op, const AnalysisBufferizationOptions &options) { + AnalysisBufferizationState state(op, options); if (failed(analyzeOp(op, state))) return failure(); - if (options->testAnalysisOnly) + if (options.testAnalysisOnly) return success(); return bufferizeOp(op, state); } diff --git a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize-compat.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir rename from mlir/test/Dialect/Linalg/comprehensive-function-bufferize-compat.mlir rename to mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize-compat.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir @@ -1,10 +1,10 @@ // RUN: mlir-opt %s \ -// RUN: -test-comprehensive-function-bufferize="allow-return-memref allow-unknown-ops create-deallocs=0" \ +// RUN: -one-shot-bufferize="allow-return-memref allow-unknown-ops create-deallocs=0" \ // RUN: -split-input-file | \ // RUN: FileCheck %s --check-prefix=CHECK-NODEALLOC // RUN: mlir-opt %s \ -// RUN: -test-comprehensive-function-bufferize="allow-return-memref allow-unknown-ops create-deallocs=0" \ +// RUN: -one-shot-bufferize="allow-return-memref allow-unknown-ops create-deallocs=0" \ // RUN: -buffer-deallocation | \ // RUN: FileCheck %s --check-prefix=CHECK-BUFFERDEALLOC diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir rename from mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir rename to mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir @@ -1,30 +1,28 @@ -// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s // Test bufferization using memref types that have no layout map. -// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref allow-unknown-ops fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP // Run fuzzer with different seeds. -// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null -// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null -// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null -// RUN: mlir-opt %s -allow-unregistered-dialect -test-comprehensive-function-bufferize="dialect-filter=tensor allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-TENSOR -// RUN: mlir-opt %s -allow-unregistered-dialect -test-comprehensive-function-bufferize="dialect-filter=scf allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-SCF +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=tensor allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-TENSOR +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=scf allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-SCF // CHECK: #[[$MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> // CHECK-LABEL: func @use_of_unknown_op_1( -// CHECK-SAME: %[[m1:.*]]: memref +// CHECK-SAME: %[[t1:.*]]: tensor // CHECK-NO-LAYOUT-MAP-LABEL: func @use_of_unknown_op_1( -// CHECK-NO-LAYOUT-MAP-SAME: %[[m1:.*]]: memref) -func @use_of_unknown_op_1(%t1: tensor {linalg.inplaceable = true}) +// CHECK-NO-LAYOUT-MAP-SAME: %[[t1:.*]]: tensor +func @use_of_unknown_op_1(%t1: tensor) -> vector<5xf32> { // ToTensorOp is generated because the function is bufferized and has a // memref block argument. - // CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] : memref - // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]]) - // CHECK-NO-LAYOUT-MAP: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] : memref - // CHECK-NO-LAYOUT-MAP: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]]) + // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]]) + // CHECK-NO-LAYOUT-MAP: %[[dummy:.*]] = "test.dummy_op"(%[[t1]]) %0 = "test.dummy_op"(%t1) : (tensor) -> tensor %idx = arith.constant 0 : index @@ -40,36 +38,34 @@ // ----- // CHECK-LABEL: func @use_of_unknown_op_2( -// CHECK-SAME: %[[m1:.*]]: memref {linalg.inplaceable = true}) - -> tensor { - // CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] - - // CHECK: %[[dummy1:.*]] = "test.dummy_op"(%[[m1_tensor]]) +// CHECK-SAME: %[[t1:.*]]: tensor +func @use_of_unknown_op_2(%t1: tensor) -> tensor { + // CHECK: %[[dummy1:.*]] = "test.dummy_op"(%[[t1]]) %0 = "test.dummy_op"(%t1) : (tensor) -> tensor // CHECK: %[[dummy2:.*]] = "test.another_dummy_op"(%[[dummy1]]) %1 = "test.another_dummy_op"(%0) : (tensor) -> tensor - // CHECK: %[[dummy2_memref:.*]] = bufferization.to_memref %[[dummy2]] - // CHECK: return %[[dummy2_memref]] + // CHECK: return %[[dummy2]] return %1 : tensor } // ----- +// CHECK: #[[$MAP2:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + // CHECK-LABEL: func @use_of_unknown_op_3( -// CHECK-SAME: %[[m1:.*]]: memref {linalg.inplaceable = true}) +// CHECK-SAME: %[[t1:.*]]: tensor +func @use_of_unknown_op_3(%t1: tensor) -> (vector<5xf32>, vector<5xf32>) { %idx = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - // CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] // CHECK: %[[v1:.*]] = vector.transfer_read %[[m1]] %1 = vector.transfer_read %t1[%idx], %cst : tensor, vector<5xf32> - // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]]) + // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]]) %0 = "test.dummy_op"(%t1) : (tensor) -> tensor - // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] + // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref // CHECK: %[[v2:.*]] = vector.transfer_read %[[dummy_memref]] %2 = vector.transfer_read %0[%idx], %cst : tensor, vector<5xf32> @@ -80,14 +76,13 @@ // ----- // CHECK-LABEL: func @use_of_unknown_op_4( -// CHECK-SAME: %[[m1:.*]]: memref {linalg.inplaceable = true}) +// CHECK-SAME: %[[t1:.*]]: tensor +func @use_of_unknown_op_4(%t1: tensor) -> (vector<5xf32>, tensor) { %idx = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - // CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] - // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]]) + // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]]) %0 = "test.dummy_op"(%t1) : (tensor) -> tensor // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] @@ -97,40 +92,39 @@ // CHECK: %[[another_dummy:.*]] = "test.another_dummy_op"(%[[dummy]]) %2 = "test.another_dummy_op"(%0) : (tensor) -> tensor - // CHECK: %[[another_dummy_memref:.*]] = bufferization.to_memref %[[another_dummy]] - // CHECK: return %[[v1]], %[[another_dummy_memref]] + // CHECK: return %[[v1]], %[[another_dummy]] return %1, %2 : vector<5xf32>, tensor } // ----- // CHECK-LABEL: func @use_of_bufferizable_op_in_unbufferizable_op -// CHECK-SAME: %[[m1:.*]]: memref func @use_of_bufferizable_op_in_unbufferizable_op( %t1: tensor, %o: index, %s: index) -> (tensor, tensor) { + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] // CHECK: %[[subview:.*]] = memref.subview %[[m1]] %0 = tensor.extract_slice %t1[%o][%s][1] : tensor to tensor // CHECK: %[[subview_tensor:.*]] = bufferization.to_tensor %[[subview]] // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[subview_tensor]]) %1 = "test.dummy_op"(%0) : (tensor) -> tensor - // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] - // CHECK: return %[[subview]], %[[dummy_memref]] + // CHECK: return %[[subview_tensor]], %[[dummy]] return %0, %1 : tensor, tensor } // ----- // CHECK-LABEL: func @unused_unknown_op( -// CHECK-SAME: %[[m1:.*]]: memref func @unused_unknown_op(%t1 : tensor) -> vector<5xf32> { %idx = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - // ToTensorOp is inserted to pass in the result of the above bufferized op. - // CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] + + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] // CHECK: vector.transfer_read %[[m1]] %1 = vector.transfer_read %t1[%idx], %cst : tensor, vector<5xf32> - // CHECK: "test.dummy_op"(%[[m1_tensor]]) + // CHECK: "test.dummy_op"(%[[t1]]) "test.dummy_op"(%t1) : (tensor) -> () return %1 : vector<5xf32> @@ -138,25 +132,60 @@ // ----- +// CHECK: #[[$MAP3:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @unknown_op_may_read( +func @unknown_op_may_read(%v: vector<5xf32>) + -> (tensor<10xf32>, tensor<10xf32>) { + %idx = arith.constant 0 : index + %cst = arith.constant 5.0 : f32 + + // One alloc for the init_tensor, another one because the transfer_write + // bufferizes out-of-place. + // CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32> + // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32> + // CHECK: %[[alloc_casted:.*]] = memref.cast %[[alloc]] : memref<10xf32> to memref<10xf32, #[[$MAP3]]> + // CHECK: %[[m1_casted:.*]] = memref.cast %[[m1]] : memref<10xf32> to memref<10xf32, #[[$MAP3]]> + %t1 = linalg.init_tensor [10] : tensor<10xf32> + + // CHECK: linalg.fill(%{{.*}}, %[[m1]]) + // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1_casted]] + %filled = linalg.fill(%cst, %t1) : f32, tensor<10xf32> -> tensor<10xf32> + + // The transfer_write is out-of-place because "dummy_op" may read. + // CHECK: memref.copy %[[m1]], %[[alloc]] + // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] + // CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc_casted]] + %1 = vector.transfer_write %v, %filled[%idx] : vector<5xf32>, tensor<10xf32> + + // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[filled_tensor]]) + %2 = "test.dummy_op"(%filled) : (tensor<10xf32>) -> (tensor<10xf32>) + + // CHECK: memref.dealloc %[[alloc]] + // CHECK: memref.dealloc %[[m1]] + // CHECK: return %[[alloc_tensor]], %[[dummy]] + return %1, %2 : tensor<10xf32>, tensor<10xf32> +} + +// ----- + // CHECK-LABEL: func @unknown_op_not_writable -// CHECK-SAME: %[[m1:.*]]: memref func @unknown_op_not_writable( %t1 : tensor, %v : vector<5xf32>, %idx : index) -> tensor { - // CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] - // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]]) + // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]]) // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] %0 = "test.dummy_op"(%t1) : (tensor) -> (tensor) // The result of an unknown op is not writable. Always generate a copy. - // Note: This copy is essential for partial bufferization. Otherwise, we could - // introducing a RaW conflict. // CHECK: %[[dim:.*]] = tensor.dim %[[dummy]] // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) // CHECK: memref.copy %[[dummy_memref]], %[[alloc]] // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] %1 = vector.transfer_write %v, %0[%idx] : vector<5xf32>, tensor - // CHECK: return %[[alloc]] + // CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc]] + // CHECK: return %[[alloc_tensor]] return %1 : tensor } diff --git a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir rename from mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir rename to mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -test-comprehensive-function-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s // Run fuzzer with different seeds. -// RUN: mlir-opt %s -test-comprehensive-function-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null -// RUN: mlir-opt %s -test-comprehensive-function-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null -// RUN: mlir-opt %s -test-comprehensive-function-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null // CHECK-LABEL: func @use_tensor_func_arg( // CHECK-SAME: %[[A:.*]]: tensor @@ -68,31 +68,4 @@ return } -// ----- -// CHECK-LABEL: func @rank_reducing -func @rank_reducing( - %i: index, %j: index, - %arg0: tensor<8x18x32xf32>) - -> tensor { - %c1 = arith.constant 1 : index - %c6 = arith.constant 6 : index - %c8 = arith.constant 8 : index - %c32 = arith.constant 32 : index - %c0 = arith.constant 0 : index - %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> - %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor - %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32> - %5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor) { - %7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7) - %8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32> - %9 = scf.for %arg9 = %c0 to %c6 step %c1 iter_args(%arg10 = %2) -> (tensor<1x6x8xf32>) { - %11 = tensor.extract_slice %8[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x6x8xf32> to tensor<1x1x8xf32> - %12 = tensor.insert_slice %11 into %arg10[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x1x8xf32> into tensor<1x6x8xf32> - scf.yield %12 : tensor<1x6x8xf32> - } - %10 = tensor.insert_slice %9 into %arg8[%7, 0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] : tensor<1x6x8xf32> into tensor - scf.yield %10 : tensor - } - return %5: tensor -} diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir @@ -1355,3 +1355,35 @@ // CHECK: return %[[f]], %[[select]] return %f, %w : f32, tensor } + +// ----- + +// A regression test to make sure that we handle rank-reducing extract_slice +// correctly. + +// CHECK-LABEL: func @rank_reducing +func @rank_reducing( + %i: index, %j: index, + %arg0: tensor<8x18x32xf32>) + -> tensor { + %c1 = arith.constant 1 : index + %c6 = arith.constant 6 : index + %c8 = arith.constant 8 : index + %c32 = arith.constant 32 : index + %c0 = arith.constant 0 : index + %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> + %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor + %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32> + %5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor) { + %7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7) + %8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32> + %9 = scf.for %arg9 = %c0 to %c6 step %c1 iter_args(%arg10 = %2) -> (tensor<1x6x8xf32>) { + %11 = tensor.extract_slice %8[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x6x8xf32> to tensor<1x1x8xf32> + %12 = tensor.insert_slice %11 into %arg10[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x1x8xf32> into tensor<1x6x8xf32> + scf.yield %12 : tensor<1x6x8xf32> + } + %10 = tensor.insert_slice %9 into %arg8[%7, 0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] : tensor<1x6x8xf32> into tensor + scf.yield %10 : tensor + } + return %5: tensor +} diff --git a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp deleted file mode 100644 --- a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===- TestComprehensiveBufferize.cpp - Test Comprehensive Bufferize ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements logic for testing Comprehensive Bufferize. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" -#include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" -#include "mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h" -#include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/Linalg/Passes.h" -#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/SCF/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Vector/IR/VectorOps.h" -#include "mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Transforms/Passes.h" - -using namespace mlir; -using namespace mlir::linalg; -using namespace mlir::linalg::comprehensive_bufferize; -using namespace mlir::bufferization; - -namespace { -/// A helper struct for FunctionBufferize and ModuleBufferize. Both passes are -/// mostly identical. -struct TestComprehensiveFunctionBufferize - : public PassWrapper> { - StringRef getArgument() const final { - return "test-comprehensive-function-bufferize"; - } - - StringRef getDescription() const final { - return "Test Comprehensive Bufferize of FuncOps (body only)."; - } - - TestComprehensiveFunctionBufferize() = default; - TestComprehensiveFunctionBufferize( - const TestComprehensiveFunctionBufferize &pass) - : PassWrapper(pass) {} - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - affine_ext::registerBufferizableOpInterfaceExternalModels(registry); - arith::registerBufferizableOpInterfaceExternalModels(registry); - linalg::registerBufferizableOpInterfaceExternalModels(registry); - scf::registerBufferizableOpInterfaceExternalModels(registry); - tensor::registerBufferizableOpInterfaceExternalModels(registry); - vector::registerBufferizableOpInterfaceExternalModels(registry); - } - - void runOnOperation() override; - - Option allowReturnMemref{ - *this, "allow-return-memref", - llvm::cl::desc("Allow returning/yielding memrefs from functions/blocks"), - llvm::cl::init(false)}; - Option allowUnknownOps{ - *this, "allow-unknown-ops", - llvm::cl::desc( - "Allows the return of memrefs (for testing purposes only)"), - llvm::cl::init(false)}; - Option testAnalysisOnly{ - *this, "test-analysis-only", - llvm::cl::desc( - "Only runs inplaceability analysis (for testing purposes only)"), - llvm::cl::init(false)}; - Option analysisFuzzerSeed{ - *this, "analysis-fuzzer-seed", - llvm::cl::desc("Analyze ops in random order with a given seed (fuzzer)"), - llvm::cl::init(0)}; - ListOption dialectFilter{ - *this, "dialect-filter", - llvm::cl::desc("Bufferize only ops from the specified dialects"), - llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; - Option fullyDynamicLayoutMaps{ - *this, "fully-dynamic-layout-maps", - llvm::cl::desc("Use fully dynamic layout maps on memref types"), - llvm::cl::init(true)}; - Option createDeallocs{ - *this, "create-deallocs", - llvm::cl::desc("Specify if buffers should be deallocated"), - llvm::cl::init(true)}; -}; -} // namespace - -void TestComprehensiveFunctionBufferize::runOnOperation() { - auto options = std::make_unique(); - options->allowReturnMemref = allowReturnMemref; - options->allowUnknownOps = allowUnknownOps; - options->testAnalysisOnly = testAnalysisOnly; - options->analysisFuzzerSeed = analysisFuzzerSeed; - options->fullyDynamicLayoutMaps = fullyDynamicLayoutMaps; - options->createDeallocs = createDeallocs; - - if (dialectFilter.hasValue()) { - options->hasFilter = true; - for (const std::string &dialectNamespace : dialectFilter) - options->allowDialectInFilter(dialectNamespace); - } - - Operation *op = getOperation(); - if (failed(runOneShotBufferize(op, std::move(options)))) - return; - - if (testAnalysisOnly) - return; - - OpPassManager cleanupPipeline("builtin.func"); - cleanupPipeline.addPass(createCanonicalizerPass()); - cleanupPipeline.addPass(createCSEPass()); - cleanupPipeline.addPass(createLoopInvariantCodeMotionPass()); - (void)this->runPipeline(cleanupPipeline, op); -} - -namespace mlir { -namespace test { -void registerTestComprehensiveFunctionBufferize() { - PassRegistration(); -} -} // namespace test -} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -64,7 +64,6 @@ void registerTestAliasAnalysisPass(); void registerTestBuiltinAttributeInterfaces(); void registerTestCallGraphPass(); -void registerTestComprehensiveFunctionBufferize(); void registerTestConstantFold(); void registerTestGpuSerializeToCubinPass(); void registerTestGpuSerializeToHsacoPass(); @@ -159,7 +158,6 @@ #if MLIR_ROCM_CONVERSIONS_ENABLED mlir::test::registerTestGpuSerializeToHsacoPass(); #endif - mlir::test::registerTestComprehensiveFunctionBufferize(); mlir::test::registerTestDecomposeCallGraphTypes(); mlir::test::registerTestDataLayoutQuery(); mlir::test::registerTestDominancePass();