diff --git a/mlir/include/mlir/Dialect/SCF/Passes.h b/mlir/include/mlir/Dialect/SCF/Passes.h --- a/mlir/include/mlir/Dialect/SCF/Passes.h +++ b/mlir/include/mlir/Dialect/SCF/Passes.h @@ -20,6 +20,10 @@ class Pass; +/// Creates a pass that specializes for loop for unrolling and +/// vectorization. +std::unique_ptr createForLoopSpecializationPass(); + /// Creates a loop fusion pass which fuses parallel loops. std::unique_ptr createParallelLoopFusionPass(); diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td --- a/mlir/include/mlir/Dialect/SCF/Passes.td +++ b/mlir/include/mlir/Dialect/SCF/Passes.td @@ -1,4 +1,4 @@ -//===-- Passes.td - Loop pass definition file --------------*- tablegen -*-===// +//===-- Passes.td - SCF pass definition file ---------------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,18 +11,24 @@ include "mlir/Pass/PassBase.td" -def LoopParallelLoopFusion : Pass<"parallel-loop-fusion"> { +def SCFForLoopSpecialization + : FunctionPass<"for-loop-specialization"> { + let summary = "Specialize for loops for vectorization"; + let constructor = "mlir::createForLoopSpecializationPass()"; +} + +def SCFParallelLoopFusion : Pass<"parallel-loop-fusion"> { let summary = "Fuse adjacent parallel loops"; let constructor = "mlir::createParallelLoopFusionPass()"; } -def LoopParallelLoopSpecialization +def SCFParallelLoopSpecialization : FunctionPass<"parallel-loop-specialization"> { let summary = "Specialize parallel loops for vectorization"; let constructor = "mlir::createParallelLoopSpecializationPass()"; } -def LoopParallelLoopTiling : FunctionPass<"parallel-loop-tiling"> { +def SCFParallelLoopTiling : FunctionPass<"parallel-loop-tiling"> { let summary = "Tile parallel loops"; let constructor = "mlir::createParallelLoopTilingPass()"; let options = [ diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp --- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp @@ -64,19 +64,18 @@ /// loops after checking if the bounds are equal to that constant. This is /// beneficial if the loop will almost always have the constant bound and that /// version can be fully unrolled and vectorized. -static void specializeParallelLoopForUnrolling(ForOp op) { - auto bound = op.upper(); - auto minOp = bound.getDefiningOp(); - if (!minOp) - return; - int64_t minConstant = std::numeric_limits::max(); - for (auto expr : minOp.map().getResults()) { - if (auto constantIndex = expr.dyn_cast()) - minConstant = std::min(minConstant, constantIndex.getValue()); - } - if (minConstant == std::numeric_limits::max()) - return; - constantIndices.push_back(minConstant); +static void specializeForLoopForUnrolling(ForOp op) { + auto bound = op.upperBound(); + auto minOp = bound.getDefiningOp(); + if (!minOp) + return; + int64_t minConstant = std::numeric_limits::max(); + for (auto expr : minOp.map().getResults()) { + if (auto constantIndex = expr.dyn_cast()) + minConstant = std::min(minConstant, constantIndex.getValue()); + } + if (minConstant == std::numeric_limits::max()) + return; OpBuilder b(op); BlockAndValueMapping map; @@ -92,13 +91,24 @@ namespace { struct ParallelLoopSpecialization - : public LoopParallelLoopSpecializationBase { + : public SCFParallelLoopSpecializationBase { void runOnFunction() override { getFunction().walk([](ParallelOp op) { specializeParallelLoopForUnrolling(op); }); } }; + +struct ForLoopSpecialization + : public SCFForLoopSpecializationBase { + void runOnFunction() override { + getFunction().walk([](ForOp op) { specializeForLoopForUnrolling(op); }); + } +}; } // namespace std::unique_ptr mlir::createParallelLoopSpecializationPass() { return std::make_unique(); } + +std::unique_ptr mlir::createForLoopSpecializationPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp --- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp @@ -160,7 +160,7 @@ namespace { struct ParallelLoopFusion - : public LoopParallelLoopFusionBase { + : public SCFParallelLoopFusionBase { void runOnOperation() override { getOperation()->walk([&](Operation *child) { for (Region ®ion : child->getRegions()) diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp --- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp @@ -119,7 +119,7 @@ namespace { struct ParallelLoopTiling - : public LoopParallelLoopTilingBase { + : public SCFParallelLoopTilingBase { ParallelLoopTiling() = default; explicit ParallelLoopTiling(ArrayRef tileSizes) { this->tileSizes = tileSizes; diff --git a/mlir/test/Dialect/SCF/for-loop-specialization.mlir b/mlir/test/Dialect/SCF/for-loop-specialization.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SCF/for-loop-specialization.mlir @@ -0,0 +1,39 @@ +// RUN: mlir-opt %s -for-loop-specialization -split-input-file | FileCheck %s + +#map0 = affine_map<()[s0, s1] -> (1024, s0 - s1)> +#map1 = affine_map<()[s0, s1] -> (64, s0 - s1)> + +func @for(%outer: index, %A: memref, %B: memref, + %C: memref, %result: memref) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %d0 = dim %A, %c0 : memref + %b0 = affine.min #map0()[%d0, %outer] + scf.for %i0 = %c0 to %b0 step %c1 { + %B_elem = load %B[%i0] : memref + %C_elem = load %C[%i0] : memref + %sum_elem = addf %B_elem, %C_elem : f32 + store %sum_elem, %result[%i0] : memref + } + return +} + +// CHECK-LABEL: func @for( +// CHECK-SAME: [[ARG0:%.*]]: index, [[VAL_2:%.*]]: memref, [[VAL_3:%.*]]: memref, [[VAL_4:%.*]]: memref, [[VAL_5:%.*]]: memref) { +// CHECK: [[CST_0:%.*]] = constant 0 : index +// CHECK: [[CST_1:%.*]] = constant 1 : index +// CHECK: [[DIM_0:%.*]] = dim [[VAL_2]], [[CST_0]] : memref +// CHECK: [[MIN:%.*]] = affine.min #map0(){{\[}}[[DIM_0]], [[ARG0]]] +// CHECK: [[CST_1024:%.*]] = constant 1024 : index +// CHECK: [[PRED:%.*]] = cmpi "eq", [[MIN]], [[CST_1024]] : index +// CHECK: scf.if [[PRED]] { +// CHECK: scf.for [[IDX0:%.*]] = [[CST_0]] to [[CST_1024]] step [[CST_1]] { +// CHECK: store +// CHECK: } +// CHECK: } else { +// CHECK: scf.for [[IDX0:%.*]] = [[CST_0]] to [[MIN]] step [[CST_1]] { +// CHECK: store +// CHECK: } +// CHECK: } +// CHECK: return +// CHECK: }