diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -57,6 +57,9 @@ std::unique_ptr> createConvertLinalgOnTensorsToBuffersPass(); +/// Create a pass that removes unnecessary Linalg Copy operations. +std::unique_ptr createLinalgCopyRemovalPass(); + /// Patterns for fusing linalg operation on tensors. void populateLinalgTensorOpsFusionPatterns(MLIRContext *context, OwningRewritePatternList &patterns); diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -11,6 +11,11 @@ include "mlir/Pass/PassBase.td" +def LinalgCopyRemoval : FunctionPass<"linalg-copy-removal"> { + let summary = "Remove the redundant copies from the input IR"; + let constructor = "mlir::createLinalgCopyRemovalPass()"; +} + def LinalgFoldUnitExtentDims : FunctionPass<"linalg-fold-unit-extent-dims"> { let summary = "Remove unit-extent dimension in Linalg ops on tensors"; let constructor = "mlir::createLinalgFoldUnitExtentDimsPass()"; diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIRLinalgTransforms + CopyRemoval.cpp DropUnitDims.cpp Fusion.cpp Hoisting.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/CopyRemoval.cpp b/mlir/lib/Dialect/Linalg/Transforms/CopyRemoval.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/CopyRemoval.cpp @@ -0,0 +1,190 @@ +//===- CopyRemoval.cpp - Removing the redundant Linalg copies -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; +using namespace MemoryEffects; + +namespace { + +//===----------------------------------------------------------------------===// +// CopyRemovalPass +//===----------------------------------------------------------------------===// +/// This pass removes the redundant Linalg.Copy operations. Additionally, it +/// removes the leftover definition and deallocation operations by erasing the +/// copy operation. +class CopyRemovalPass : public PassWrapper> { +private: + /// List of operations that need to be removed. + DenseSet eraseList; + + /// Returns the deallocation operation for `value` in `block` if it exists. + Operation *getDeallocationInBlock(Value value, Block *block) { + auto valueUsers = value.getUsers(); + auto it = llvm::find_if(valueUsers, [&](Operation *op) { + auto effects = dyn_cast(op); + return effects && op->getBlock() == block && effects.hasEffect(); + }); + return (it == valueUsers.end() ? nullptr : *it); + } + + /// Returns true if an operation between start and end operations has memory + /// effect. + bool hasMemoryEffectOpBetween(Operation *start, Operation *end) { + assert(start->getBlock() == end->getBlock() && + "Start and end operations should be in the same block."); + Operation *op = start->getNextNode(); + while (op->isBeforeInBlock(end)) { + auto effects = dyn_cast(op); + if (effects) + return true; + op = op->getNextNode(); + } + return false; + }; + + /// Returns true if `val` value has at least a user between `start` and + /// `end` operations. + bool hasUsersBetween(Value val, Operation *start, Operation *end) { + Block *block = start->getBlock(); + assert(block == end->getBlock() && + "Start and end operations should be in the same block."); + return llvm::any_of(val.getUsers(), [&](Operation *op) { + return op->getBlock() == block && start->isBeforeInBlock(op) && + op->isBeforeInBlock(end); + }); + }; + + bool areOpsInTheSameBlock(ArrayRef operations) { + llvm::SmallDenseSet blocks; + for (Operation *op : operations) + blocks.insert(op->getBlock()); + return blocks.size() == 1; + } + + /// Input: + /// func(){ + /// %from = alloc() + /// write_to(%from) + /// %to = alloc() + /// copy(%from,%to) + /// dealloc(%from) + /// return %to + /// } + /// + /// Output: + /// func(){ + /// %from = alloc() + /// write_to(%from) + /// return %from + /// } + /// Constraints: + /// 1) %to, copy and dealloc must all be defined and lie in the same block. + /// 2) This transformation cannot be applied if there is a single user/alias + /// of `to` value between the defining operation of `to` and the copy + /// operation. + /// 3) This transformation cannot be applied if there is a single user/alias + /// of `from` value between the copy operation and the deallocation of `from`. + /// TODO: Alias analysis is not available at the moment. At the moment, we + /// check if there are any operations with memory effects between copy and + /// deallocation operations. + void case1(linalg::CopyOp copyOp) { + if (eraseList.count(copyOp)) + return; + + Value from = copyOp.input(); + Value to = copyOp.output(); + + Operation *copy = copyOp.getOperation(); + Operation *fromDefiningOp = from.getDefiningOp(); + Operation *fromFreeingOp = getDeallocationInBlock(from, copy->getBlock()); + Operation *toDefiningOp = to.getDefiningOp(); + if (!fromDefiningOp || !fromFreeingOp || !toDefiningOp || + !areOpsInTheSameBlock({fromFreeingOp, toDefiningOp, copy}) || + hasUsersBetween(to, toDefiningOp, copy) || + hasUsersBetween(from, copy, fromFreeingOp) || + hasMemoryEffectOpBetween(copy, fromFreeingOp)) + return; + + to.replaceAllUsesWith(from); + eraseList.insert(copy); + eraseList.insert(toDefiningOp); + eraseList.insert(fromFreeingOp); + } + + /// Input: + /// func(){ + /// %to = alloc() + /// %from = alloc() + /// write_to(%from) + /// copy(%from,%to) + /// dealloc(%from) + /// return %to + /// } + /// + /// Output: + /// func(){ + /// %to = alloc() + /// write_to(%to) + /// return %to + /// } + /// Constraints: + /// 1) %from, copy and dealloc must all be defined and lie in the same block. + /// 2) This transformation cannot be applied if there is a single user/alias + /// of `to` value between the defining operation of `from` and the copy + /// operation. + /// 3) This transformation cannot be applied if there is a single user/alias + /// of `from` value between the copy operation and the deallocation of `from`. + /// TODO: Alias analysis is not available at the moment. At the moment, we + /// check if there are any operations with memory effects between copy and + /// deallocation operations. + void case2(linalg::CopyOp copyOp) { + if (eraseList.count(copyOp)) + return; + + Value from = copyOp.input(); + Value to = copyOp.output(); + + Operation *copy = copyOp.getOperation(); + Operation *fromDefiningOp = from.getDefiningOp(); + Operation *fromFreeingOp = getDeallocationInBlock(from, copy->getBlock()); + if (!fromDefiningOp || !fromFreeingOp || + !areOpsInTheSameBlock({fromFreeingOp, fromDefiningOp, copy}) || + hasUsersBetween(to, fromDefiningOp, copy) || + hasUsersBetween(from, copy, fromFreeingOp) || + hasMemoryEffectOpBetween(copy, fromFreeingOp)) + return; + + from.replaceAllUsesWith(to); + eraseList.insert(copy); + eraseList.insert(fromDefiningOp); + eraseList.insert(fromFreeingOp); + } + +public: + void runOnOperation() override { + getOperation()->walk([&](linalg::CopyOp copyOp) { + case1(copyOp); + case2(copyOp); + }); + for (Operation *op : eraseList) + op->erase(); + } +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// CopyRemovalPass construction +//===----------------------------------------------------------------------===// +std::unique_ptr mlir::createLinalgCopyRemovalPass() { + return std::make_unique(); +} diff --git a/mlir/test/Dialect/Linalg/copy-removal.mlir b/mlir/test/Dialect/Linalg/copy-removal.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Linalg/copy-removal.mlir @@ -0,0 +1,278 @@ +// RUN: mlir-opt -linalg-copy-removal -split-input-file %s | FileCheck %s + +// All linalg copies except the linalg.copy(%1, %9) must be removed since the +// defining operation of %1 and its DeallocOp have been defined in another block. + +// CHECK-LABEL: func @nested_region_control_flow_div_nested +func @nested_region_control_flow_div_nested(%arg0: index, %arg1: index) -> memref { + %0 = cmpi "eq", %arg0, %arg1 : index + %1 = alloc(%arg0, %arg0) : memref + // CHECK: %{{.*}} = scf.if + %2 = scf.if %0 -> (memref) { + // CHECK: %[[PERCENT3:.*]] = scf.if + %3 = scf.if %0 -> (memref) { + %c0_0 = constant 0 : index + %7 = dim %1, %c0_0 : memref + %c1_1 = constant 1 : index + %8 = dim %1, %c1_1 : memref + %9 = alloc(%7, %8) : memref + // CHECK: linalg.copy({{.*}}, %[[PERCENT9:.*]]) + linalg.copy(%1, %9) : memref, memref + // CHECK: scf.yield %[[PERCENT9]] + scf.yield %9 : memref + } else { + // CHECK: %[[PERCENT7:.*]] = alloc + %7 = alloc(%arg0, %arg1) : memref + %c0_0 = constant 0 : index + %8 = dim %7, %c0_0 : memref + %c1_1 = constant 1 : index + %9 = dim %7, %c1_1 : memref + // CHECK-NOT: %[[PERCENT10:.*]] = alloc + // CHECK-NOT: linalg.copy(%[[PERCENT7]], %[[PERCENT10]]) + // CHECK-NOT: dealloc %[[PERCENT7]]) + %10 = alloc(%8, %9) : memref + linalg.copy(%7, %10) : memref, memref + dealloc %7 : memref + // CHECK: scf.yield %[[PERCENT7]] + scf.yield %10 : memref + } + %c0 = constant 0 : index + %4 = dim %3, %c0 : memref + %c1 = constant 1 : index + %5 = dim %3, %c1 : memref + // CHECK-NOT: %[[PERCENT6:.*]] = alloc + // CHECK-NOT: linalg.copy(%[[PERCENT3]], %[[PERCENT6]]) + // CHECK-NOT: dealloc %[[PERCENT3]]) + %6 = alloc(%4, %5) : memref + linalg.copy(%3, %6) : memref, memref + dealloc %3 : memref + // CHECK: scf.yield %[[PERCENT3]] + scf.yield %6 : memref + } else { + // CHECK: %[[PERCENT3:.*]] = alloc + %3 = alloc(%arg1, %arg1) : memref + %c0 = constant 0 : index + %4 = dim %3, %c0 : memref + %c1 = constant 1 : index + %5 = dim %3, %c1 : memref + // CHECK-NOT: %[[PERCENT6:.*]] = alloc + // CHECK-NOT: linalg.copy(%[[PERCENT3]], %[[PERCENT6]]) + // CHECK-NOT: dealloc %[[PERCENT3]]) + %6 = alloc(%4, %5) : memref + linalg.copy(%3, %6) : memref, memref + dealloc %3 : memref + // CHECK: scf.yield %[[PERCENT3]] + scf.yield %6 : memref + } + dealloc %1 : memref + return %2 : memref +} + +// ----- + +// CHECK-LABEL: func @simple_test +func @simple_test() -> memref<5xf32> { + %temp = alloc() : memref<5xf32> + %ret = alloc() : memref<5xf32> + linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32> + dealloc %ret : memref<5xf32> + return %temp : memref<5xf32> +} +// CHECK: () -> memref<5xf32> +// CHECK: %[[ret:.*]] = alloc() +// CHECK: return %[[ret]] + +// ----- + +// It is legal to remove the copy operation that %ret has a usage before the copy +// operation. The allocation of %temp and the deallocation of %ret should be also +// removed. + +// CHECK-LABEL: func @test_with_ret_usage_before_copy +func @test_with_ret_usage_before_copy() -> memref<5xf32> { + %ret = alloc() : memref<5xf32> + %temp = alloc() : memref<5xf32> + %c0 = constant 0 : index + %dimension = dim %ret, %c0 : memref<5xf32> + linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32> + dealloc %ret : memref<5xf32> + return %temp : memref<5xf32> +} +// CHECK: %[[ret:.*]] = alloc() +// CHECK: %{{.*}} = constant +// CHECK: %[[DIM:.*]] = dim %[[ret]] +// CHECK: return %[[ret]] + +// ----- + +// It is illegal to remove a copy operation that %ret has a usage after copy +// operation. + +// CHECK-LABEL: func @test_with_ret_usage_after_copy +func @test_with_ret_usage_after_copy() -> memref<5xf32> { + %ret = alloc() : memref<5xf32> + %temp = alloc() : memref<5xf32> + // CHECK: linalg.copy + linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32> + %c0 = constant 0 : index + %dimension = dim %ret, %c0 : memref<5xf32> + dealloc %ret : memref<5xf32> + return %temp : memref<5xf32> +} + +// ----- + +// It is illegal to remove a copy operation that %temp has a usage before copy +// operation. + +// CHECK-LABEL: func @test_with_temp_usage_before_copy +func @test_with_temp_usage_before_copy() -> memref<5xf32> { + %ret = alloc() : memref<5xf32> + %temp = alloc() : memref<5xf32> + %c0 = constant 0 : index + %dimension = dim %temp, %c0 : memref<5xf32> + // CHECK: linalg.copy + linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32> + dealloc %ret : memref<5xf32> + return %temp : memref<5xf32> +} + +// ----- + +// It is legal to remove the copy operation that %temp has a usage after the copy +// operation. The allocation of %temp and the deallocation of %ret should be also +// removed. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @test_with_temp_usage_after_copy +func @test_with_temp_usage_after_copy() -> memref<5xf32> { + %ret = alloc() : memref<5xf32> + %temp = alloc() : memref<5xf32> + %res = alloc() : memref<5xf32> + linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32> + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %temp, %res { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<5xf32>, memref<5xf32> + dealloc %ret : memref<5xf32> + return %temp : memref<5xf32> +} +// CHECK: %[[ret:.*]] = alloc() +// CHECK: %[[res:.*]] = alloc() +// CHECK-NOT: %{{.*}} = alloc() +// CHECK-NOT: linalg.copy +// CHECK-NOT: dealloc %[[ret]] +// CHECK: return %[[ret]] + +// ----- + +// CHECK-LABEL: func @make_allocation +func @make_allocation() -> memref<5xf32> { + %mem = alloc() : memref<5xf32> + return %mem : memref<5xf32> +} + +// CHECK-LABEL: func @test_with_function_call +func @test_with_function_call() -> memref<5xf32> { + // CHECK: %[[ret:.*]] = call @make_allocation() : () -> memref<5xf32> + %ret = call @make_allocation() : () -> (memref<5xf32>) + // CHECK-NOT: %[[temp:.*]] = alloc + // CHECK-NOT: linalg.copy(%[[ret]], %[[temp]]) + // CHECK-NOT: dealloc %[[ret]]) + %temp = alloc() : memref<5xf32> + linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32> + dealloc %ret : memref<5xf32> + // CHECK: return %[[ret]] + return %temp : memref<5xf32> +} + +// ----- + +// CHECK-LABEL: func @multiple_deallocs_in_different_blocks +func @multiple_deallocs_in_different_blocks(%cond : i1) -> memref<5xf32> { + // CHECK: %[[PERCENT0:.*]] = alloc() + %0 = alloc() : memref<5xf32> + cond_br %cond, ^bb1, ^bb2 +^bb1: + dealloc %0 : memref<5xf32> + // CHECK: br ^[[BB3:.*]](%[[PERCENT0]] + br ^bb3(%0 : memref<5xf32>) +^bb2: + // CHECK-NOT: %[[temp:.*]] = alloc + // CHECK-NOT: linalg.copy(%[[PERCENT0]], %[[temp]]) + // CHECK-NOT: dealloc %[[PERCENT0]]) + %temp = alloc() : memref<5xf32> + linalg.copy(%0, %temp) : memref<5xf32>, memref<5xf32> + dealloc %0 : memref<5xf32> + // CHECK: br ^[[BB3:.*]](%[[PERCENT0]] + br ^bb3(%temp : memref<5xf32>) +^bb3(%res : memref<5xf32>): + return %res : memref<5xf32> +} + +// ----- + +#map0 = affine_map<(d0) -> (d0)> + +func @test(%arg0: memref<2xf32>, %result: memref<2xf32>){ + // CHECK: (%[[ARG0:.*]]: memref<2xf32>, %[[RES:.*]]: memref<2xf32>) + // CHECK-NOT: %[[temp:.*]] = alloc + %temp = alloc() : memref<2xf32> + // CHECK: linalg.generic + // CHECK-SAME: %[[ARG0]], %[[RES]] + // CHECK-NOT: linalg.copy(%[[PERCENT0]], %[[temp]]) + // CHECK-NOT: dealloc %[[PERCENT0]]) + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg0, %temp { + ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: memref<2xf32>, memref<2xf32> + "linalg.copy"(%temp, %result) : (memref<2xf32>, memref<2xf32>) -> () + dealloc %temp : memref<2xf32> + // CHECK: return + return +} + +// ----- + +// Copy operation must not be removed since an operation writes to %to value +// before copy. + +#map0 = affine_map<(d0) -> (d0)> + +func @test(%arg0: memref<2xf32>){ + %to = alloc() : memref<2xf32> + %temp = alloc() : memref<2xf32> + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg0, %temp { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg0, %to { + ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: memref<2xf32>, memref<2xf32> + // CHECK: linalg.copy + "linalg.copy"(%temp, %to) : (memref<2xf32>, memref<2xf32>) -> () + dealloc %temp : memref<2xf32> + return +}