diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -50,6 +50,11 @@ /// Placeholder for now, this is NYI. std::unique_ptr> createConvertLinalgToAffineLoopsPass(); +/// Create a pass to convert Linalg operations which work on tensors to use +/// buffers instead. +std::unique_ptr> +createConvertLinalgOnTensorsToBuffersPass(); + /// Patterns for fusing linalg operation on tensors. void populateLinalgTensorOpsFusionPatterns(MLIRContext *context, OwningRewritePatternList &patterns); diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -32,6 +32,12 @@ let constructor = "mlir::createConvertLinalgToLoopsPass()"; } +def LinalgOnTensorsToBuffers : Pass<"convert-linalg-on-tensors-to-buffers", "ModuleOp"> { + let summary = "Convert the Linalg operations which work on tensor-type " + "operands or results to use buffers instead"; + let constructor = "mlir::createConvertLinalgOnTensorsToBuffersPass()"; +} + def LinalgLowerToParallelLoops : FunctionPass<"convert-linalg-to-parallel-loops"> { let summary = "Lower the operations from the linalg dialect into parallel " diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -3,6 +3,7 @@ Interchange.cpp Loops.cpp Promotion.cpp + TensorsToBuffers.cpp Tiling.cpp Transforms.cpp Vectorization.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -0,0 +1,189 @@ +//===- TensorsToBuffers.cpp - Transformation from tensors to buffers ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the conversion from tensors to buffers on Linalg +// operations. +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/BufferPlacement.h" + +using namespace mlir; +using ReturnOpConverter = + NonVoidToVoidReturnOpConverter; + +namespace { +/// A pattern to convert Generic Linalg operations which work on tensors to +/// use buffers. A buffer is allocated using BufferAssignmentPlacer for +/// each operation result. BufferPlacement pass should be later used to move +/// Alloc operations to the correct positions and insert the missing Dealloc +/// operations in the correct places. +class GenericOpConverter + : public BufferAssignmentOpConversionPattern { +public: + using BufferAssignmentOpConversionPattern< + linalg::GenericOp>::BufferAssignmentOpConversionPattern; + + LogicalResult + matchAndRewrite(linalg::GenericOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + ResultRange results = op.getOperation()->getResults(); + SmallVector newArgs, newResults; + newArgs.reserve(operands.size() + results.size()); + newArgs.append(operands.begin(), operands.end()); + newResults.reserve(results.size()); + + // Update all types to memref types. + for (auto result : results) { + auto type = result.getType().cast(); + assert(type && "tensor to buffer conversion expects ranked results"); + if (!type.hasStaticShape()) + return rewriter.notifyMatchFailure( + op, "dynamic shapes not currently supported"); + auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); + + // Compute alloc position and insert a custom allocation node. + OpBuilder::InsertionGuard guard(rewriter); + rewriter.restoreInsertionPoint( + bufferAssignment->computeAllocPosition(result)); + auto alloc = rewriter.create(loc, memrefType); + newArgs.push_back(alloc); + newResults.push_back(alloc); + } + + // Generate a new linalg operation that works on buffers. + auto linalgOp = rewriter.create( + loc, llvm::None, newArgs, rewriter.getI64IntegerAttr(operands.size()), + rewriter.getI64IntegerAttr(results.size()), op.indexing_maps(), + op.iterator_types(), op.docAttr(), op.library_callAttr()); + + // Create a new block in the region of the new Generic Op. + Block &oldBlock = op.getRegion().front(); + Region &newRegion = linalgOp.region(); + Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(), + oldBlock.getArgumentTypes()); + + // Add the result arguments to the new block. + for (auto result : newResults) + newBlock->addArgument( + result.getType().cast().getElementType()); + + // Clone the body of the old block to the new block. + BlockAndValueMapping mapping; + for (unsigned i = 0; i < oldBlock.getNumArguments(); i++) + mapping.map(oldBlock.getArgument(i), newBlock->getArgument(i)); + rewriter.setInsertionPointToEnd(newBlock); + for (auto &op : oldBlock.getOperations()) { + Operation *clonedOp = rewriter.clone(op, mapping); + mapping.map(op.getResults(), clonedOp->getResults()); + } + + // Replace the results of the old Generic Op with the results of the new + // one. + rewriter.replaceOp(op, newResults); + return success(); + } +}; + +/// Populate the given list with patterns to convert Linalg operations on +/// tensors to buffers. +static void populateConvertLinalgOnTensorsToBuffersPattern( + MLIRContext *context, BufferAssignmentPlacer *placer, + TypeConverter *converter, OwningRewritePatternList *patterns) { + // clang-format off + patterns->insert(context, placer, converter); + // clang-format on +} + +/// Converts Linalg operations that work on tensor-type operands or results to +/// work on buffers. +struct ConvertLinalgOnTensorsToBuffers + : public LinalgOnTensorsToBuffersBase { + void runOnOperation() override { + MLIRContext &context = getContext(); + ConversionTarget target(context); + BufferAssignmentTypeConverter converter; + + // Mark all Standard operations legal. + target.addLegalDialect(); + + // Mark all Linalg operations illegal as long as they work on tensors. + auto isIllegalType = [&](Type type) { return !converter.isLegal(type); }; + auto isLegalOperation = [&](Operation *op) { + return llvm::none_of(op->getOperandTypes(), isIllegalType) && + llvm::none_of(op->getResultTypes(), isIllegalType); + }; + target.addDynamicallyLegalDialect( + Optional( + isLegalOperation)); + + // TODO: Considering the following dynamic legality checks, the current + // implementation of FunctionAndBlockSignatureConverter of Buffer Assignment + // will convert the function signature incorrectly. This converter moves + // all the return values of the function to the input argument list without + // considering the return value types and creates a void function. However, + // the NonVoidToVoidReturnOpConverter doesn't change the return operation if + // its operands are not tensors. The following example leaves the IR in a + // broken state. + // + // @function(%arg0: f32, %arg1: tensor<4xf32>) -> (f32, f32) { + // %0 = mulf %arg0, %arg0 : f32 + // return %0, %0 : f32, f32 + // } + // + // broken IR after conversion: + // + // func @function(%arg0: f32, %arg1: memref<4xf32>, f32, f32) { + // %0 = mulf %arg0, %arg0 : f32 + // return %0, %0 : f32, f32 + // } + // + // This issue must be fixed in FunctionAndBlockSignatureConverter and + // NonVoidToVoidReturnOpConverter. + + // Mark Standard Return operations illegal as long as one operand is tensor. + target.addDynamicallyLegalOp([&](mlir::ReturnOp returnOp) { + return llvm::none_of(returnOp.getOperandTypes(), isIllegalType); + }); + + // Mark the function operation illegal as long as an argument is tensor. + target.addDynamicallyLegalOp([&](FuncOp funcOp) { + return converter.isSignatureLegal(funcOp.getType()) && + llvm::none_of(funcOp.getType().getResults(), + [&](Type type) { return type.isa(); }); + }); + + // Walk over all the functions to apply buffer assignment. + getOperation().walk([&](FuncOp function) { + OwningRewritePatternList patterns; + BufferAssignmentPlacer placer(function); + populateConvertLinalgOnTensorsToBuffersPattern(&context, &placer, + &converter, &patterns); + + // Applying full conversion + return WalkResult( + applyFullConversion(function, target, patterns, &converter)); + }); + } +}; +} // end anonymous namespace + +std::unique_ptr> +mlir::createConvertLinalgOnTensorsToBuffersPass() { + return std::make_unique(); +} diff --git a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir @@ -0,0 +1,75 @@ +// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @muliple_results_generic_op +func @muliple_results_generic_op(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { + %0, %1 = linalg.generic {args_in = 1 : i64, args_out = 2 : i64, indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen_arg1: f32): + %tmp1 = exp %gen_arg1 : f32 + linalg.yield %tmp1, %tmp1 : f32, f32 + }: tensor<4xf32> -> (tensor<4xf32>, tensor<4xf32>) + return %0, %1 : tensor<4xf32>, tensor<4xf32> +} +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]], %[[ARG2_RESULT:.*]]: [[TYPE]]) +// CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK: linalg.generic +// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]] +// CHECK-NEXT: ^{{[a-z0-9_]*}} +// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32 +// CHECK-NEXT: %{{.*}} = exp +// CHECK-NEXT: linalg.yield +// CHECK-NEXT: [[TYPE]], [[TYPE]], [[TYPE]] +// CHECK: linalg.copy(%[[FIRST_ALLOC]], %[[ARG1_RESULT]]) +// CHECK: dealloc %[[FIRST_ALLOC]] +// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG2_RESULT]]) +// CHECK: dealloc %[[SECOND_ALLOC]] +// CHECK: return + +// ----- + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @chained_operations +func @chained_operations(%arg0: tensor<4xf32>) -> tensor<4xf32> { + %0 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen_arg1: f32): + %tmp1 = exp %gen_arg1 : f32 + linalg.yield %tmp1 : f32 + }: tensor<4xf32> -> tensor<4xf32> + %1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0 { + ^bb0(%gen_arg2: f32): + %tmp2 = exp %gen_arg2 : f32 + linalg.yield %tmp2 : f32 + }: tensor<4xf32> -> tensor<4xf32> + return %1 : tensor<4xf32> +} +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]]) +// CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK: linalg.generic +// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]] +// CHECK: ^{{[a-z0-9_]*}} +// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32 +// CHECK: [[TYPE]], [[TYPE]] +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK: linalg.generic +// CHECK-SAME: %[[FIRST_ALLOC]], %[[SECOND_ALLOC]] +// CHECK: ^{{[a-z0-9_]*}} +// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32 +// CHECK: [[TYPE]], [[TYPE]] +// CHECK: dealloc %[[FIRST_ALLOC]] +// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG1_RESULT]]) +// CHECK: dealloc %[[SECOND_ALLOC]] +// CHECK: return + +// ----- + +// CHECK-LABEL: func @no_linalg_op +func @no_linalg_op(%arg0: f32) -> (f32, f32) { + %0 = mulf %arg0, %arg0 : f32 + return %0, %0 : f32, f32 +} +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]]) -> ([[TYPE]], [[TYPE]]) +// CHECK: %[[RESULT:.*]] = mulf %[[NEW_ARG0]], %[[NEW_ARG0]] : [[TYPE]] +// CHECK: return %[[RESULT]], %[[RESULT]] : [[TYPE]], [[TYPE]]