diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h --- a/mlir/include/mlir/Transforms/LoopUtils.h +++ b/mlir/include/mlir/Transforms/LoopUtils.h @@ -38,8 +38,9 @@ /// Unrolls this for operation by the specified unroll factor. Returns failure /// if the loop cannot be unrolled either due to restrictions or due to invalid -/// unroll factors. +/// unroll factors. Requires positive loop bounds and step. LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor); +LogicalResult loopUnrollByFactor(loop::ForOp forOp, uint64_t unrollFactor); /// Unrolls this loop by the specified unroll factor or its trip count, /// whichever is lower. @@ -68,9 +69,10 @@ LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp, uint64_t unrollJamFactor); -/// Promotes the loop body of a AffineForOp to its containing block if the -/// AffineForOp was known to have a single iteration. +/// Promotes the loop body of a AffineForOp/loop::ForOp to its containing block +/// if the loop was known to have a single iteration. LogicalResult promoteIfSingleIteration(AffineForOp forOp); +LogicalResult promoteIfSingleIteration(loop::ForOp forOp); /// Promotes all single iteration AffineForOp's in the Function, i.e., moves /// their body into the containing Block. diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -24,6 +24,7 @@ #include "mlir/IR/Function.h" #include "mlir/IR/IntegerSet.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/Support/MathExtras.h" #include "mlir/Transforms/RegionUtils.h" #include "mlir/Transforms/Utils.h" #include "llvm/ADT/DenseMap.h" @@ -118,6 +119,34 @@ lb.erase(); } +// Build the IR that performs ceil division of a positive value by a constant: +// ceildiv(a, B) = divis(a + (B-1), B) +// where divis is rounding-to-zero division. +static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, + int64_t divisor) { + assert(divisor > 0 && "expected positive divisor"); + assert(dividend.getType().isIndex() && "expected index-typed value"); + + Value divisorMinusOneCst = builder.create(loc, divisor - 1); + Value divisorCst = builder.create(loc, divisor); + Value sum = builder.create(loc, dividend, divisorMinusOneCst); + return builder.create(loc, sum, divisorCst); +} + +// Build the IR that performs ceil division of a positive value by another +// positive value: +// ceildiv(a, b) = divis(a + (b - 1), b) +// where divis is rounding-to-zero division. +static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, + Value divisor) { + assert(dividend.getType().isIndex() && "expected index-typed value"); + + Value cstOne = builder.create(loc, 1); + Value divisorMinusOne = builder.create(loc, divisor, cstOne); + Value sum = builder.create(loc, dividend, divisorMinusOne); + return builder.create(loc, sum, divisor); +} + /// Promotes the loop body of a forOp to its containing block if the forOp /// was known to have a single iteration. // TODO(bondhugula): extend this for arbitrary affine bounds. @@ -161,6 +190,35 @@ return success(); } +/// Promotes the loop body of a forOp to its containing block if the forOp +/// it can be determined that the loop has a single iteration. +LogicalResult mlir::promoteIfSingleIteration(loop::ForOp forOp) { + auto lbCstOp = + dyn_cast_or_null(forOp.lowerBound().getDefiningOp()); + auto ubCstOp = + dyn_cast_or_null(forOp.upperBound().getDefiningOp()); + auto stepCstOp = + dyn_cast_or_null(forOp.step().getDefiningOp()); + if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.getValue() < 0 || + ubCstOp.getValue() < 0 || stepCstOp.getValue() < 0) + return failure(); + int64_t tripCount = mlir::ceilDiv(ubCstOp.getValue() - lbCstOp.getValue(), + stepCstOp.getValue()); + if (tripCount != 1) + return failure(); + auto iv = forOp.getInductionVar(); + iv.replaceAllUsesWith(lbCstOp); + + // Move the loop body operations, except for its terminator, to the loop's + // containing block. + auto *parentBlock = forOp.getOperation()->getBlock(); + forOp.getBody()->back().erase(); + parentBlock->getOperations().splice(Block::iterator(forOp), + forOp.getBody()->getOperations()); + forOp.erase(); + return success(); +} + /// Promotes all single iteration 'for' ops in `f`, i.e., moves /// their body into the containing Block. void mlir::promoteSingleIterationLoops(FuncOp f) { @@ -416,6 +474,37 @@ return loopUnrollByFactor(forOp, unrollFactor); } +// Generates unrolled copies of AffineForOp or loop::ForOp 'loopBodyBlock', with +// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap +// 'forOpIV' for each unrolled body. +static void generateUnrolledLoop( + Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, + function_ref ivRemapFn) { + // Builder to insert unrolled bodies just before the terminator of the body of + // 'forOp'. + auto builder = OpBuilder::atBlockTerminator(loopBodyBlock); + + // Keep a pointer to the last non-terminator operation in the original block + // so that we know what to clone (since we are doing this in-place). + Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2); + + // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies). + for (unsigned i = 1; i < unrollFactor; i++) { + BlockAndValueMapping operandMap; + + // If the induction variable is used, create a remapping to the value for + // this unrolled instance. + if (!forOpIV.use_empty()) { + Value ivUnroll = ivRemapFn(i, forOpIV, builder); + operandMap.map(forOpIV, ivUnroll); + } + + // Clone the original body of 'forOp'. + for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) + builder.clone(*it, operandMap); + } +} + /// Unrolls this loop by the specified factor. Returns success if the loop /// is successfully unrolled. LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp, @@ -467,38 +556,114 @@ // Scale the step of loop being unrolled by unroll factor. int64_t step = forOp.getStep(); forOp.setStep(step * unrollFactor); + generateUnrolledLoop(forOp.getBody(), forOp.getInductionVar(), unrollFactor, + [&](unsigned i, Value iv, OpBuilder b) { + // iv' = iv + i * step + auto d0 = b.getAffineDimExpr(0); + auto bumpMap = AffineMap::get(1, 0, d0 + i * step); + return b.create(forOp.getLoc(), bumpMap, + iv); + }); - // Builder to insert unrolled bodies just before the terminator of the body of - // 'forOp'. - auto builder = OpBuilder::atBlockTerminator(forOp.getBody()); + // Promote the loop body up if this has turned into a single iteration loop. + promoteIfSingleIteration(forOp); + return success(); +} - // Keep a pointer to the last non-terminator operation in the original block - // so that we know what to clone (since we are doing this in-place). - Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2); +/// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled. +LogicalResult mlir::loopUnrollByFactor(loop::ForOp forOp, + uint64_t unrollFactor) { + assert(unrollFactor > 0 && "expected positive unroll factor"); + if (unrollFactor == 1) + return promoteIfSingleIteration(forOp); - // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies). - auto forOpIV = forOp.getInductionVar(); - for (unsigned i = 1; i < unrollFactor; i++) { - BlockAndValueMapping operandMap; + // Return if the loop body is empty. + if (llvm::hasSingleElement(forOp.getBody()->getOperations())) + return success(); - // If the induction variable is used, create a remapping to the value for - // this unrolled instance. - if (!forOpIV.use_empty()) { - // iv' = iv + 1/2/3...unrollFactor-1; - auto d0 = builder.getAffineDimExpr(0); - auto bumpMap = AffineMap::get(1, 0, d0 + i * step); - auto ivUnroll = - builder.create(forOp.getLoc(), bumpMap, forOpIV); - operandMap.map(forOpIV, ivUnroll); - } + // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate + // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases. + OpBuilder boundsBuilder(forOp); + auto loc = forOp.getLoc(); + auto step = forOp.step(); + Value upperBoundUnrolled; + Value stepUnrolled; + bool generateEpilogueLoop = true; + + auto lbCstOp = + dyn_cast_or_null(forOp.lowerBound().getDefiningOp()); + auto ubCstOp = + dyn_cast_or_null(forOp.upperBound().getDefiningOp()); + auto stepCstOp = + dyn_cast_or_null(forOp.step().getDefiningOp()); + if (lbCstOp && ubCstOp && stepCstOp) { + // Constant loop bounds computation. + int64_t lbCst = lbCstOp.getValue(); + int64_t ubCst = ubCstOp.getValue(); + int64_t stepCst = stepCstOp.getValue(); + assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 && + "expected positive loop bounds and step"); + int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst); + int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor); + int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst; + assert(upperBoundUnrolledCst <= ubCst); + int64_t stepUnrolledCst = stepCst * unrollFactor; + + // Create constant for 'upperBoundUnrolled' and set epilogue loop flag. + generateEpilogueLoop = upperBoundUnrolledCst < ubCst; + if (generateEpilogueLoop) + upperBoundUnrolled = + boundsBuilder.create(loc, upperBoundUnrolledCst); + else + upperBoundUnrolled = ubCstOp; + + // Create constant for 'stepUnrolled'. + stepUnrolled = + stepCst == stepUnrolledCst + ? step + : boundsBuilder.create(loc, stepUnrolledCst); + } else { + // Dynamic loop bounds computation. + // TODO(andydavis) Add dynamic asserts for negative lb/ub/step, or + // consider using ceilDiv from AffineApplyExpander. + auto lowerBound = forOp.lowerBound(); + auto upperBound = forOp.upperBound(); + Value diff = boundsBuilder.create(loc, upperBound, lowerBound); + Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step); + Value unrollFactorCst = + boundsBuilder.create(loc, unrollFactor); + Value tripCountRem = + boundsBuilder.create(loc, tripCount, unrollFactorCst); + // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor) + Value tripCountEvenMultiple = + boundsBuilder.create(loc, tripCount, tripCountRem); + // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step + upperBoundUnrolled = boundsBuilder.create( + loc, lowerBound, + boundsBuilder.create(loc, tripCountEvenMultiple, step)); + // Scale 'step' by 'unrollFactor'. + stepUnrolled = boundsBuilder.create(loc, step, unrollFactorCst); + } - // Clone the original body of 'forOp'. - for (auto it = forOp.getBody()->begin(); it != std::next(srcBlockEnd); - it++) { - builder.clone(*it, operandMap); - } + // Create epilogue clean up loop starting at 'upperBoundUnrolled'. + if (generateEpilogueLoop) { + OpBuilder epilogueBuilder(forOp.getOperation()->getBlock(), + std::next(Block::iterator(forOp))); + auto epilogueForOp = cast(epilogueBuilder.clone(*forOp)); + epilogueForOp.setLowerBound(upperBoundUnrolled); + promoteIfSingleIteration(epilogueForOp); } + // Create unrolled loop. + forOp.setUpperBound(upperBoundUnrolled); + forOp.setStep(stepUnrolled); + generateUnrolledLoop(forOp.getBody(), forOp.getInductionVar(), unrollFactor, + [&](unsigned i, Value iv, OpBuilder b) { + // iv' = iv + step * i; + auto stride = b.create( + loc, step, b.create(loc, i)); + return b.create(loc, iv, stride); + }); // Promote the loop body up if this has turned into a single iteration loop. promoteIfSingleIteration(forOp); return success(); @@ -1032,34 +1197,6 @@ return ::tile(forOps, sizes, forOps.back()); } -// Build the IR that performs ceil division of a positive value by a constant: -// ceildiv(a, B) = divis(a + (B-1), B) -// where divis is rounding-to-zero division. -static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, - int64_t divisor) { - assert(divisor > 0 && "expected positive divisor"); - assert(dividend.getType().isIndex() && "expected index-typed value"); - - Value divisorMinusOneCst = builder.create(loc, divisor - 1); - Value divisorCst = builder.create(loc, divisor); - Value sum = builder.create(loc, dividend, divisorMinusOneCst); - return builder.create(loc, sum, divisorCst); -} - -// Build the IR that performs ceil division of a positive value by another -// positive value: -// ceildiv(a, b) = divis(a + (b - 1), b) -// where divis is rounding-to-zero division. -static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, - Value divisor) { - assert(dividend.getType().isIndex() && "expected index-typed value"); - - Value cstOne = builder.create(loc, 1); - Value divisorMinusOne = builder.create(loc, divisor, cstOne); - Value sum = builder.create(loc, dividend, divisorMinusOne); - return builder.create(loc, sum, divisor); -} - // Hoist the ops within `outer` that appear before `inner`. // Such ops include the ops that have been introduced by parametric tiling. // Ops that come from triangular loops (i.e. that belong to the program slice diff --git a/mlir/test/Dialect/Loops/loop-unroll.mlir b/mlir/test/Dialect/Loops/loop-unroll.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Loops/loop-unroll.mlir @@ -0,0 +1,250 @@ +// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2' | FileCheck %s --check-prefix UNROLL-BY-2 +// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3 +// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2 +// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2 + +func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index, + %arg3: memref) { + %0 = constant 7.0 : f32 + loop.for %i0 = %arg0 to %arg1 step %arg2 { + store %0, %arg3[%i0] : memref + } + return +} +// UNROLL-BY-2-LABEL: func @dynamic_loop_unroll +// UNROLL-BY-2-SAME: %[[LB:.*0]]: index, +// UNROLL-BY-2-SAME: %[[UB:.*1]]: index, +// UNROLL-BY-2-SAME: %[[STEP:.*2]]: index, +// UNROLL-BY-2-SAME: %[[MEM:.*3]]: memref +// +// UNROLL-BY-2-DAG: %[[V0:.*]] = subi %[[UB]], %[[LB]] : index +// UNROLL-BY-2-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-2-DAG: %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index +// UNROLL-BY-2-DAG: %[[V2:.*]] = addi %[[V0]], %[[V1]] : index +// Compute trip count in V3. +// UNROLL-BY-2-DAG: %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index +// Store unroll factor in C2. +// UNROLL-BY-2-DAG: %[[C2:.*]] = constant 2 : index +// UNROLL-BY-2-DAG: %[[V4:.*]] = remi_signed %[[V3]], %[[C2]] : index +// UNROLL-BY-2-DAG: %[[V5:.*]] = subi %[[V3]], %[[V4]] : index +// UNROLL-BY-2-DAG: %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index +// Compute upper bound of unrolled loop in V7. +// UNROLL-BY-2-DAG: %[[V7:.*]] = addi %[[LB]], %[[V6]] : index +// Compute step of unrolled loop in V8. +// UNROLL-BY-2-DAG: %[[V8:.*]] = muli %[[STEP]], %[[C2]] : index +// UNROLL-BY-2: loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] { +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-2-NEXT: %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index +// UNROLL-BY-2-NEXT: %[[V10:.*]] = addi %[[IV]], %[[V9]] : index +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[V10]]] : memref +// UNROLL-BY-2-NEXT: } +// UNROLL-BY-2-NEXT: loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] { +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-2-NEXT: } +// UNROLL-BY-2-NEXT: return + +// UNROLL-BY-3-LABEL: func @dynamic_loop_unroll +// UNROLL-BY-3-SAME: %[[LB:.*0]]: index, +// UNROLL-BY-3-SAME: %[[UB:.*1]]: index, +// UNROLL-BY-3-SAME: %[[STEP:.*2]]: index, +// UNROLL-BY-3-SAME: %[[MEM:.*3]]: memref +// +// UNROLL-BY-3-DAG: %[[V0:.*]] = subi %[[UB]], %[[LB]] : index +// UNROLL-BY-3-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-3-DAG: %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index +// UNROLL-BY-3-DAG: %[[V2:.*]] = addi %[[V0]], %[[V1]] : index +// Compute trip count in V3. +// UNROLL-BY-3-DAG: %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index +// Store unroll factor in C3. +// UNROLL-BY-3-DAG: %[[C3:.*]] = constant 3 : index +// UNROLL-BY-3-DAG: %[[V4:.*]] = remi_signed %[[V3]], %[[C3]] : index +// UNROLL-BY-3-DAG: %[[V5:.*]] = subi %[[V3]], %[[V4]] : index +// UNROLL-BY-3-DAG: %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index +// Compute upper bound of unrolled loop in V7. +// UNROLL-BY-3-DAG: %[[V7:.*]] = addi %[[LB]], %[[V6]] : index +// Compute step of unrolled loop in V8. +// UNROLL-BY-3-DAG: %[[V8:.*]] = muli %[[STEP]], %[[C3]] : index +// UNROLL-BY-3: loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-3-NEXT: %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index +// UNROLL-BY-3-NEXT: %[[V10:.*]] = addi %[[IV]], %[[V9]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V10]]] : memref +// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = constant 2 : index +// UNROLL-BY-3-NEXT: %[[V11:.*]] = muli %[[STEP]], %[[C2_IV]] : index +// UNROLL-BY-3-NEXT: %[[V12:.*]] = addi %[[IV]], %[[V11]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V12]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: return + +func @dynamic_loop_unroll_outer_by_2( + %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, + %arg5 : index, %arg6: memref) { + %0 = constant 7.0 : f32 + loop.for %i0 = %arg0 to %arg1 step %arg2 { + loop.for %i1 = %arg3 to %arg4 step %arg5 { + store %0, %arg6[%i1] : memref + } + } + return +} +// UNROLL-OUTER-BY-2-LABEL: func @dynamic_loop_unroll_outer_by_2 +// UNROLL-OUTER-BY-2-SAME: %[[LB0:.*0]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[UB0:.*1]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[STEP0:.*2]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[LB1:.*3]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[UB1:.*4]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[STEP1:.*5]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[MEM:.*6]]: memref +// +// UNROLL-OUTER-BY-2: loop.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} { +// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { +// UNROLL-OUTER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { +// UNROLL-OUTER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] { +// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { +// UNROLL-OUTER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: return + +func @dynamic_loop_unroll_inner_by_2( + %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, + %arg5 : index, %arg6: memref) { + %0 = constant 7.0 : f32 + loop.for %i0 = %arg0 to %arg1 step %arg2 { + loop.for %i1 = %arg3 to %arg4 step %arg5 { + store %0, %arg6[%i1] : memref + } + } + return +} +// UNROLL-INNER-BY-2-LABEL: func @dynamic_loop_unroll_inner_by_2 +// UNROLL-INNER-BY-2-SAME: %[[LB0:.*0]]: index, +// UNROLL-INNER-BY-2-SAME: %[[UB0:.*1]]: index, +// UNROLL-INNER-BY-2-SAME: %[[STEP0:.*2]]: index, +// UNROLL-INNER-BY-2-SAME: %[[LB1:.*3]]: index, +// UNROLL-INNER-BY-2-SAME: %[[UB1:.*4]]: index, +// UNROLL-INNER-BY-2-SAME: %[[STEP1:.*5]]: index, +// UNROLL-INNER-BY-2-SAME: %[[MEM:.*6]]: memref +// +// UNROLL-INNER-BY-2: loop.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] { +// UNROLL-INNER-BY-2: loop.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} { +// UNROLL-INNER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-INNER-BY-2-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-INNER-BY-2-NEXT: %[[V0:.*]] = muli %[[STEP1]], %[[C1_IV]] : index +// UNROLL-INNER-BY-2-NEXT: %[[V1:.*]] = addi %[[IV1]], %[[V0]] : index +// UNROLL-INNER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-INNER-BY-2-NEXT: } +// UNROLL-INNER-BY-2-NEXT: loop.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] { +// UNROLL-INNER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-INNER-BY-2-NEXT: } +// UNROLL-INNER-BY-2-NEXT: } +// UNROLL-INNER-BY-2-NEXT: return + +// Test that no epilogue clean-up loop is generated because the trip count is +// a multiple of the unroll factor. +func @static_loop_unroll_by_2(%arg0 : memref) { + %0 = constant 7.0 : f32 + %lb = constant 0 : index + %ub = constant 20 : index + %step = constant 1 : index + loop.for %i0 = %lb to %ub step %step { + store %0, %arg0[%i0] : memref + } + return +} +// UNROLL-BY-2-LABEL: func @static_loop_unroll_by_2 +// UNROLL-BY-2-SAME: %[[MEM:.*0]]: memref +// +// UNROLL-BY-2-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-BY-2-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-2-DAG: %[[C20:.*]] = constant 20 : index +// UNROLL-BY-2-DAG: %[[C2:.*]] = constant 2 : index +// UNROLL-BY-2: loop.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] { +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-2-NEXT: %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index +// UNROLL-BY-2-NEXT: %[[V1:.*]] = addi %[[IV]], %[[V0]] : index +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-BY-2-NEXT: } +// UNROLL-BY-2-NEXT: return + +// Test that epilogue clean up loop is generated (trip count is not +// a multiple of unroll factor). +func @static_loop_unroll_by_3(%arg0 : memref) { + %0 = constant 7.0 : f32 + %lb = constant 0 : index + %ub = constant 20 : index + %step = constant 1 : index + loop.for %i0 = %lb to %ub step %step { + store %0, %arg0[%i0] : memref + } + return +} + +// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3 +// UNROLL-BY-3-SAME: %[[MEM:.*0]]: memref +// +// UNROLL-BY-3-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-BY-3-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-3-DAG: %[[C20:.*]] = constant 20 : index +// UNROLL-BY-3-DAG: %[[C18:.*]] = constant 18 : index +// UNROLL-BY-3-DAG: %[[C3:.*]] = constant 3 : index +// UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-3-NEXT: %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index +// UNROLL-BY-3-NEXT: %[[V1:.*]] = addi %[[IV]], %[[V0]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = constant 2 : index +// UNROLL-BY-3-NEXT: %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index +// UNROLL-BY-3-NEXT: %[[V3:.*]] = addi %[[IV]], %[[V2]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V3]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: loop.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: return + +// Test that the single iteration epilogue loop body is promoted to the loops +// containing block. +func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref) { + %0 = constant 7.0 : f32 + %lb = constant 0 : index + %ub = constant 10 : index + %step = constant 1 : index + loop.for %i0 = %lb to %ub step %step { + store %0, %arg0[%i0] : memref + } + return +} +// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_promote_epilogue +// UNROLL-BY-3-SAME: %[[MEM:.*0]]: memref +// +// UNROLL-BY-3-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-BY-3-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-3-DAG: %[[C10:.*]] = constant 10 : index +// UNROLL-BY-3-DAG: %[[C9:.*]] = constant 9 : index +// UNROLL-BY-3-DAG: %[[C3:.*]] = constant 3 : index +// UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-3-NEXT: %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index +// UNROLL-BY-3-NEXT: %[[V1:.*]] = addi %[[IV]], %[[V0]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = constant 2 : index +// UNROLL-BY-3-NEXT: %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index +// UNROLL-BY-3-NEXT: %[[V3:.*]] = addi %[[IV]], %[[V2]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V3]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[C9]]] : memref +// UNROLL-BY-3-NEXT: return diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -14,6 +14,7 @@ TestLiveness.cpp TestLoopMapping.cpp TestLoopParametricTiling.cpp + TestLoopUnrolling.cpp TestOpaqueLoc.cpp TestMemRefBoundCheck.cpp TestMemRefDependenceCheck.cpp diff --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp new file mode 100644 --- /dev/null +++ b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp @@ -0,0 +1,68 @@ +//===-------- TestLoopUnrolling.cpp --- loop unrolling test pass ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to unroll loops by a specified unroll factor. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/LoopOps/LoopOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/LoopUtils.h" +#include "mlir/Transforms/Passes.h" + +using namespace mlir; + +namespace { + +static unsigned getNestingDepth(Operation *op) { + Operation *currOp = op; + unsigned depth = 0; + while ((currOp = currOp->getParentOp())) { + if (isa(currOp)) + depth++; + } + return depth; +} + +class TestLoopUnrollingPass + : public PassWrapper { +public: + TestLoopUnrollingPass() = default; + TestLoopUnrollingPass(const TestLoopUnrollingPass &) {} + explicit TestLoopUnrollingPass(uint64_t unrollFactorParam, + unsigned loopDepthParam) { + unrollFactor = unrollFactorParam; + loopDepth = loopDepthParam; + } + + void runOnFunction() override { + FuncOp func = getFunction(); + SmallVector loops; + func.walk([&](loop::ForOp forOp) { + if (getNestingDepth(forOp) == loopDepth) + loops.push_back(forOp); + }); + for (auto loop : loops) { + loopUnrollByFactor(loop, unrollFactor); + } + } + Option unrollFactor{*this, "unroll-factor", + llvm::cl::desc("Loop unroll factor."), + llvm::cl::init(1)}; + Option loopDepth{*this, "loop-depth", llvm::cl::desc("Loop depth."), + llvm::cl::init(0)}; +}; +} // end namespace + +namespace mlir { +void registerTestLoopUnrollingPass() { + PassRegistration( + "test-loop-unrolling", "Tests loop unrolling transformation"); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -53,6 +53,7 @@ void registerTestLivenessPass(); void registerTestLoopFusion(); void registerTestLoopMappingPass(); +void registerTestLoopUnrollingPass(); void registerTestMatchers(); void registerTestMemRefDependenceCheck(); void registerTestMemRefStrideCalculation(); @@ -119,6 +120,7 @@ registerTestLivenessPass(); registerTestLoopFusion(); registerTestLoopMappingPass(); + registerTestLoopUnrollingPass(); registerTestMatchers(); registerTestMemRefDependenceCheck(); registerTestMemRefStrideCalculation();