diff --git a/mlir/examples/toy/Ch5/toyc.cpp b/mlir/examples/toy/Ch5/toyc.cpp --- a/mlir/examples/toy/Ch5/toyc.cpp +++ b/mlir/examples/toy/Ch5/toyc.cpp @@ -15,6 +15,7 @@ #include "toy/Parser.h" #include "toy/Passes.h" +#include "mlir/Dialect/Affine/Passes.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" @@ -146,7 +147,7 @@ // Add optimizations if enabled. if (enableOpt) { optPM.addPass(mlir::createLoopFusionPass()); - optPM.addPass(mlir::createMemRefDataFlowOptPass()); + optPM.addPass(mlir::createAffineScalarReplacementPass()); } } diff --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp --- a/mlir/examples/toy/Ch6/toyc.cpp +++ b/mlir/examples/toy/Ch6/toyc.cpp @@ -15,6 +15,7 @@ #include "toy/Parser.h" #include "toy/Passes.h" +#include "mlir/Dialect/Affine/Passes.h" #include "mlir/ExecutionEngine/ExecutionEngine.h" #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/IR/AsmState.h" @@ -161,7 +162,7 @@ // Add optimizations if enabled. if (enableOpt) { optPM.addPass(mlir::createLoopFusionPass()); - optPM.addPass(mlir::createMemRefDataFlowOptPass()); + optPM.addPass(mlir::createAffineScalarReplacementPass()); } } diff --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp --- a/mlir/examples/toy/Ch7/toyc.cpp +++ b/mlir/examples/toy/Ch7/toyc.cpp @@ -15,6 +15,7 @@ #include "toy/Parser.h" #include "toy/Passes.h" +#include "mlir/Dialect/Affine/Passes.h" #include "mlir/ExecutionEngine/ExecutionEngine.h" #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/IR/AsmState.h" @@ -162,7 +163,7 @@ // Add optimizations if enabled. if (enableOpt) { optPM.addPass(mlir::createLoopFusionPass()); - optPM.addPass(mlir::createMemRefDataFlowOptPass()); + optPM.addPass(mlir::createAffineScalarReplacementPass()); } } diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -48,6 +48,11 @@ /// Overload relying on pass options for initialization. std::unique_ptr> createAffineDataCopyGenerationPass(); +/// Creates a pass to replace affine memref accesses by scalars using store to +/// load forwarding and redundant load elimination; consequently also eliminate +/// dead allocs. +std::unique_ptr> createAffineScalarReplacementPass(); + /// Creates a pass to perform tiling on loop nests. std::unique_ptr> createLoopTilingPass(uint64_t cacheSizeBytes); diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -94,6 +94,52 @@ ]; } +def AffineScalarReplacement : FunctionPass<"affine-scalrep"> { + let summary = "Replace affine memref acceses by scalars by forwarding stores " + "to loads and eliminating redundant loads"; + let description = [{ + This pass performs store to load forwarding and redundant load elimination + for affine memref accesses and potentially eliminates the entire memref + if all its accesses are forwarded. + + Input + + ```mlir + func @store_load_affine_apply() -> memref<10x10xf32> { + %cf7 = constant 7.0 : f32 + %m = alloc() : memref<10x10xf32> + affine.for %i0 = 0 to 10 { + affine.for %i1 = 0 to 10 { + affine.store %cf7, %m[%i0, %i1] : memref<10x10xf32> + %v0 = affine.load %m[%i0, %i1] : memref<10x10xf32> + %v1 = addf %v0, %v0 : f32 + } + } + return %m : memref<10x10xf32> + } + ``` + + Output + + ```mlir + module { + func @store_load_affine_apply() -> memref<10x10xf32> { + %cst = constant 7.000000e+00 : f32 + %0 = alloc() : memref<10x10xf32> + affine.for %arg0 = 0 to 10 { + affine.for %arg1 = 0 to 10 { + affine.store %cst, %0[%arg0, %arg1] : memref<10x10xf32> + %1 = addf %cst, %cst : f32 + } + } + return %0 : memref<10x10xf32> + } + } + ``` + }]; + let constructor = "mlir::createAffineScalarReplacementPass()"; +} + def AffineVectorize : FunctionPass<"affine-super-vectorize"> { let summary = "Vectorize to a target independent n-D vector abstraction"; let constructor = "mlir::createSuperVectorizePass()"; diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -96,10 +96,6 @@ /// variables into another ParallelLoop over less than N induction variables. std::unique_ptr createParallelLoopCollapsingPass(); -/// Creates a pass to perform optimizations relying on memref dataflow such as -/// store to load forwarding, elimination of dead stores, and dead allocs. -std::unique_ptr> createMemRefDataFlowOptPass(); - /// Creates a pass to strip debug information from a function. std::unique_ptr createStripDebugInfoPass(); diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -480,51 +480,6 @@ let constructor = "mlir::createLoopInvariantCodeMotionPass()"; } -def MemRefDataFlowOpt : FunctionPass<"memref-dataflow-opt"> { - let summary = "Perform store/load forwarding for memrefs"; - let description = [{ - This pass performs store to load forwarding for memref's to eliminate memory - accesses and potentially the entire memref if all its accesses are - forwarded. - - Input - - ```mlir - func @store_load_affine_apply() -> memref<10x10xf32> { - %cf7 = constant 7.0 : f32 - %m = alloc() : memref<10x10xf32> - affine.for %i0 = 0 to 10 { - affine.for %i1 = 0 to 10 { - affine.store %cf7, %m[%i0, %i1] : memref<10x10xf32> - %v0 = affine.load %m[%i0, %i1] : memref<10x10xf32> - %v1 = addf %v0, %v0 : f32 - } - } - return %m : memref<10x10xf32> - } - ``` - - Output - - ```mlir - module { - func @store_load_affine_apply() -> memref<10x10xf32> { - %cst = constant 7.000000e+00 : f32 - %0 = alloc() : memref<10x10xf32> - affine.for %arg0 = 0 to 10 { - affine.for %arg1 = 0 to 10 { - affine.store %cst, %0[%arg0, %arg1] : memref<10x10xf32> - %1 = addf %cst, %cst : f32 - } - } - return %0 : memref<10x10xf32> - } - } - ``` - }]; - let constructor = "mlir::createMemRefDataFlowOptPass()"; -} - def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> { let summary = "Normalize memrefs"; let description = [{ diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp rename from mlir/lib/Transforms/MemRefDataFlowOpt.cpp rename to mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp --- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp @@ -1,4 +1,4 @@ -//===- MemRefDataFlowOpt.cpp - MemRef DataFlow Optimization pass ------ -*-===// +//===- AffineScalarReplacement.cpp - Affine scalar replacement pass -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements a pass to forward memref stores to loads, thereby -// potentially getting rid of intermediate memref's entirely. It also removes +// This file implements a pass to forward affine memref stores to loads, thereby +// potentially getting rid of intermediate memrefs entirely. It also removes // redundant loads. // TODO: In the future, similar techniques could be used to eliminate // dead memref store's and perform more complex forwarding when support for @@ -18,11 +18,11 @@ #include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Dominance.h" #include "mlir/Support/LogicalResult.h" -#include "mlir/Transforms/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include @@ -66,7 +66,8 @@ // currently only eliminates the stores only if no other loads/uses (other // than dealloc) remain. // -struct MemRefDataFlowOpt : public MemRefDataFlowOptBase { +struct AffineScalarReplacement + : public AffineScalarReplacementBase { void runOnFunction() override; LogicalResult forwardStoreToLoad(AffineReadOpInterface loadOp); @@ -86,8 +87,9 @@ /// Creates a pass to perform optimizations relying on memref dataflow such as /// store to load forwarding, elimination of dead stores, and dead allocs. -std::unique_ptr> mlir::createMemRefDataFlowOptPass() { - return std::make_unique(); +std::unique_ptr> +mlir::createAffineScalarReplacementPass() { + return std::make_unique(); } // Check if the store may be reaching the load. @@ -115,7 +117,7 @@ // This is a straightforward implementation not optimized for speed. Optimize // if needed. LogicalResult -MemRefDataFlowOpt::forwardStoreToLoad(AffineReadOpInterface loadOp) { +AffineScalarReplacement::forwardStoreToLoad(AffineReadOpInterface loadOp) { // First pass over the use list to get the minimum number of surrounding // loops common between the load op and the store op, with min taken across // all store ops. @@ -206,7 +208,7 @@ // 1) loadA and loadB have mathematically equivalent affine access functions. // 2) loadB dominates loadA. // 3) loadB postdominates all the store op's that have a dependence into loadA. -void MemRefDataFlowOpt::loadCSE(AffineReadOpInterface loadOp) { +void AffineScalarReplacement::loadCSE(AffineReadOpInterface loadOp) { // The list of load op candidates for forwarding that satisfy conditions // (1) and (2) above - they will be filtered later when checking (3). SmallVector fwdingCandidates; @@ -283,7 +285,7 @@ loadOpsToErase.push_back(loadOp); } -void MemRefDataFlowOpt::runOnFunction() { +void AffineScalarReplacement::runOnFunction() { // Only supports single block functions at the moment. FuncOp f = getFunction(); if (!llvm::hasSingleElement(f)) { diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt @@ -3,6 +3,7 @@ AffineLoopInvariantCodeMotion.cpp AffineLoopNormalize.cpp AffineParallelize.cpp + AffineScalarReplacement.cpp LoopTiling.cpp LoopUnroll.cpp LoopUnrollAndJam.cpp diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -13,7 +13,6 @@ LoopCoalescing.cpp LoopFusion.cpp LoopInvariantCodeMotion.cpp - MemRefDataFlowOpt.cpp NormalizeMemRefs.cpp OpStats.cpp ParallelLoopCollapsing.cpp diff --git a/mlir/test/Transforms/memref-dataflow-opt.mlir b/mlir/test/Dialect/Affine/scalrep.mlir rename from mlir/test/Transforms/memref-dataflow-opt.mlir rename to mlir/test/Dialect/Affine/scalrep.mlir --- a/mlir/test/Transforms/memref-dataflow-opt.mlir +++ b/mlir/test/Dialect/Affine/scalrep.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -memref-dataflow-opt | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-scalrep | FileCheck %s // CHECK-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0, d1) -> (d1 + 1)> // CHECK-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0, d1) -> (d0)>