diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -17,12 +17,14 @@ #include "llvm/ADT/ArrayRef.h" namespace mlir { +class BufferAssignmentPlacer; class FuncOp; class MLIRContext; class ModuleOp; template class OperationPass; class OwningRewritePatternList; class Pass; +class TypeConverter; std::unique_ptr> createLinalgFusionPass(); std::unique_ptr createLinalgFusionOfTensorOpsPass(); @@ -50,6 +52,11 @@ /// Placeholder for now, this is NYI. std::unique_ptr> createConvertLinalgToAffineLoopsPass(); +/// Create a pass to convert Linalg operations which work on tensors to use +/// buffers instead. +std::unique_ptr> +createConvertLinalgOnTensorsToBuffersPass(); + /// Patterns for fusing linalg operation on tensors. void populateLinalgTensorOpsFusionPatterns(MLIRContext *context, OwningRewritePatternList &patterns); diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -32,6 +32,12 @@ let constructor = "mlir::createConvertLinalgToLoopsPass()"; } +def LinalgOnTensorsToBuffers : Pass<"convert-linalg-on-tensors-to-buffers", "ModuleOp"> { + let summary = "Convert the Linalg operations which work on tensor-type " + "operands or results to use buffers instead"; + let constructor = "mlir::createConvertLinalgOnTensorsToBuffersPass()"; +} + def LinalgLowerToParallelLoops : FunctionPass<"convert-linalg-to-parallel-loops"> { let summary = "Lower the operations from the linalg dialect into parallel " diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -3,6 +3,7 @@ LinalgTransforms.cpp LinalgToLoops.cpp Promotion.cpp + TensorsToBuffers.cpp Tiling.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -0,0 +1,148 @@ +//===- TensorsToBuffers.cpp - Transformation from tensors to buffers ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the conversion from tensors to buffers on Linalg +// operations. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/BufferPlacement.h" + +using namespace mlir; + +namespace { +/// A pattern to convert Generic Linalg operations which work on tensors to +/// use buffers. A buffer is allocated using BufferAssignmentPlacer for +/// each operation result. BufferPlacement pass should be later used to move +/// Alloc operations to the correct positions and insert the missing Dealloc +/// operations in the correct places. +class GenericOpConverter + : public BufferAssignmentOpConversionPattern { +public: + using BufferAssignmentOpConversionPattern< + linalg::GenericOp>::BufferAssignmentOpConversionPattern; + + LogicalResult + matchAndRewrite(linalg::GenericOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op.getLoc(); + SmallVector args(operands.begin(), operands.end()); + + // Update all types to memref types. + auto results = op.getOperation()->getResults(); + SmallVector newBuffers; + for (auto result : results) { + auto type = result.getType().cast(); + if (!type) + op.emitOpError() + << "tensor to buffer conversion expects ranked results"; + if (!type.hasStaticShape()) + return rewriter.notifyMatchFailure( + op, "dynamic shapes not currently supported"); + auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); + + // Compute alloc position and insert a custom allocation node. + OpBuilder::InsertionGuard guard(rewriter); + rewriter.restoreInsertionPoint( + bufferAssignment->computeAllocPosition(result)); + auto alloc = rewriter.create(loc, memrefType); + args.push_back(alloc); + newBuffers.push_back(alloc); + } + + // Generate a new linalg operation that works on buffers. + auto linalgOp = rewriter.create( + loc, llvm::None, args, rewriter.getI64IntegerAttr(operands.size()), + rewriter.getI64IntegerAttr(results.size()), op.indexing_maps(), + op.iterator_types(), op.docAttr(), op.library_callAttr()); + + // Move regions from the old operation to the new one. + auto ®ion = linalgOp.region(); + rewriter.inlineRegionBefore(op.region(), region, region.end()); + + // TODO: verify the internal memref-based linalg functionality. + auto &entryBlock = region.front(); + for (auto result : results) { + auto type = result.getType().cast(); + entryBlock.addArgument(type.getElementType()); + } + rewriter.replaceOp(op, newBuffers); + return success(); + } +}; + +/// Populate the given list with patterns to convert Linalg operations on +/// tensors to buffers. +void populateConvertLinalgOnTensorsToBuffersPattern( + MLIRContext *context, BufferAssignmentPlacer *placer, + TypeConverter *converter, OwningRewritePatternList *patterns) { + // clang-format off + patterns->insert< + FunctionAndBlockSignatureConverter, + GenericOpConverter, + NonVoidToVoidReturnOpConverter< + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp> + >(context, placer, converter); + // clang-format on +} + +/// Converts Linalg operations that work on tensor-type operands or results to +/// work on buffers. +struct ConvertLinalgOnTensorsToBuffers + : mlir::PassWrapper> { + void runOnOperation() override { + auto &context = getContext(); + ConversionTarget target(context); + BufferAssignmentTypeConverter converter; + + // Make all linalg operations illegal as long as they work on tensors. + target.addLegalDialect(); + target.addDynamicallyLegalDialect( + Optional( + [&](Operation *op) { + auto isIllegalType = [&](Type type) { + return !converter.isLegal(type); + }; + return llvm::none_of(op->getOperandTypes(), isIllegalType) && + llvm::none_of(op->getResultTypes(), isIllegalType); + })); + + // Mark return operations illegal as long as they return values. + target.addDynamicallyLegalOp( + [](mlir::ReturnOp returnOp) { return returnOp.getNumOperands() == 0; }); + + // Mark the function operation illegal as long as an argument is tensor. + target.addDynamicallyLegalOp([&](FuncOp funcOp) { + return converter.isSignatureLegal(funcOp.getType()); + }); + + // Walk over all the functions to apply buffer assignment. + getOperation().walk([&](FuncOp function) { + OwningRewritePatternList patterns; + BufferAssignmentPlacer placer(function); + populateConvertLinalgOnTensorsToBuffersPattern(&context, &placer, + &converter, &patterns); + + // Applying full conversion + return WalkResult( + applyFullConversion(function, target, patterns, &converter)); + }); + } +}; +} // end anonymous namespace + +std::unique_ptr> +mlir::createConvertLinalgOnTensorsToBuffersPass() { + return std::make_unique(); +} \ No newline at end of file diff --git a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir @@ -0,0 +1,62 @@ +// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @muliple_results_generic_op +func @muliple_results_generic_op(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { + %0, %1 = linalg.generic {args_in = 1 : i64, args_out = 2 : i64, indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%arg1: f32): + %1 = exp %arg1 : f32 + linalg.yield %1, %1 : f32, f32 + }: tensor<4xf32> -> (tensor<4xf32>, tensor<4xf32>) + return %0, %1 : tensor<4xf32>, tensor<4xf32> +} +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]], %[[ARG2_RESULT:.*]]: [[TYPE]]) +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK-NEXT: linalg.generic +// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]] +// CHECK-NEXT: ^{{[a-z0-9_]*}} +// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32 +// CHECK: [[TYPE]], [[TYPE]], [[TYPE]] +// CHECK-NEXT: linalg.copy(%[[FIRST_ALLOC]], %[[ARG1_RESULT]]) +// CHECK-NEXT: dealloc %[[FIRST_ALLOC]] +// CHECK-NEXT: linalg.copy(%[[SECOND_ALLOC]], %[[ARG2_RESULT]]) +// CHECK-NEXT: dealloc %[[SECOND_ALLOC]] +// CHECK-NEXT: return + +// ----- + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @chained_operations +func @chained_operations(%arg0: tensor<4xf32>) -> tensor<4xf32> { + %0 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen_arg1: f32): + %tmp1 = exp %gen_arg1 : f32 + linalg.yield %tmp1 : f32 + }: tensor<4xf32> -> tensor<4xf32> + %1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0 { + ^bb0(%gen_arg2: f32): + %tmp2 = exp %gen_arg2 : f32 + linalg.yield %tmp2 : f32 + }: tensor<4xf32> -> tensor<4xf32> + return %1 : tensor<4xf32> +} +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]]) +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK-NEXT: linalg.generic +// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]] +// CHECK-NEXT: ^{{[a-z0-9_]*}} +// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32 +// CHECK: [[TYPE]], [[TYPE]] +// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK-NEXT: linalg.generic +// CHECK-SAME: %[[FIRST_ALLOC]], %[[SECOND_ALLOC]] +// CHECK-NEXT: ^{{[a-z0-9_]*}} +// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32 +// CHECK: [[TYPE]], [[TYPE]] +// CHECK-NEXT: dealloc %[[FIRST_ALLOC]] +// CHECK-NEXT: linalg.copy(%[[SECOND_ALLOC]], %[[ARG1_RESULT]]) +// CHECK-NEXT: dealloc %[[SECOND_ALLOC]] +// CHECK-NEXT: return \ No newline at end of file