diff --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h --- a/mlir/include/mlir/Dialect/SCF/Transforms.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms.h @@ -17,6 +17,7 @@ namespace mlir { +class AffineMinOp; class ConversionTarget; class LogicalResult; class MLIRContext; @@ -26,6 +27,7 @@ class RewritePatternSet; using OwningRewritePatternList = RewritePatternSet; class Operation; +class Value; namespace scf { @@ -66,10 +68,35 @@ /// ``` /// /// This function rewrites the given scf.for loop in-place and creates a new -/// scf.if operation (returned via `ifOp`) for the last iteration. +/// scf.if operation for the last iteration, returned via `ifOp`. The boundary +/// at which the loop is split (new upper bound) is returned via `splitBound`. +LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp, + Value &splitBound); + +/// Try to simplify an affine.min operation after loop peeling. This function +/// detects affine.min operations such as (ub is the previous upper bound of the +/// unpeeled loop): +/// ``` +/// #map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)> +/// %r = affine.min #affine.min #map(%iv)[%step, %ub] +/// ``` +/// and rewrites them into (in the case the peeled loop): +/// ``` +/// %r = %step +/// ``` +/// affine.min operations inside the generated scf.if operation are rewritten in +/// a similar way. +/// +/// This function builds up a set of constraints, capable of proving that: +/// * min(step, ub - iv) == step inside the peeled loop +/// * min(step, ub - iv) == ub - iv inside the if-statement /// -/// TODO: Simplify affine.min ops inside the new loop/if statement. -LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp); +/// Note: `ub` is the previous upper bound of the loop (before peeling). +/// `insideLoop` must be true for affine.min ops inside the loop and false for +/// affine.min ops inside the scf.for op. +LogicalResult rewritePeeledAffineOp(RewriterBase &rewriter, AffineMinOp minOp, + Value iv, Value ub, Value step, + bool insideLoop); /// Tile a parallel loop of the form /// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp --- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PassDetail.h" +#include "mlir/Analysis/AffineStructures.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/SCF/Passes.h" #include "mlir/Dialect/SCF/SCF.h" @@ -98,7 +99,7 @@ /// into a for loop where the step divides the iteration space evenly, followed /// by an scf.if for the last (partial) iteration (if any). LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp, - scf::IfOp &ifOp) { + scf::IfOp &ifOp, Value &splitBound) { RewriterBase::InsertionGuard guard(b); auto lbInt = getConstantIntValue(forOp.lowerBound()); auto ubInt = getConstantIntValue(forOp.upperBound()); @@ -116,7 +117,7 @@ bindDims(b.getContext(), dim0, dim1, dim2); // New upper bound: %ub - (%ub - %lb) mod %step auto modMap = AffineMap::get(3, 0, {dim1 - ((dim1 - dim0) % dim2)}); - Value splitBound = b.createOrFold( + splitBound = b.createOrFold( loc, modMap, ValueRange{forOp.lowerBound(), forOp.upperBound(), forOp.step()}); @@ -153,6 +154,136 @@ return success(); } +/// Try to simplify an affine.min operation `minOp` after loop peeling. +/// +/// 1. Set up a constraint system with dimensions for `iv`, `ub` (previous +/// upper bound), `step`, `minOp` and `minOpUb` (upper bound of `minOp`). +/// 2. a. If inside the main loop: Add inequality: ub - iv >= step +/// 2. b. If inside the if-statement: Add inequality: ub - iv < step +/// 3. Add each result of `minOp` as a dimension `r_i` to the constraint system. +/// 4. Compute an upper bound of `minOp` and bind it to `minOpUb`. +/// 5. For each result of `minOp`: Prove that r_i >= minOpUb. If this is the +/// case, upper_bound(minOp) == lower_bound(minOp) and `minOp` can be +/// replaced with the that bound. +LogicalResult mlir::scf::rewritePeeledAffineOp(RewriterBase &rewriter, + AffineMinOp minOp, Value iv, + Value ub, Value step, + bool insideLoop) { + RewriterBase::InsertionGuard guard(rewriter); + AffineMap minOpMap = minOp.getAffineMap(); + + // Set up constraint system. + FlatAffineConstraints constraints; + static const unsigned kDimIv = 0; // Iteration variable + static const unsigned kDimUb = 1; // Upper loop bound before peeling + static const unsigned kDimStep = 2; // Loop step size + static const unsigned kDimMinOp = 3; // `minOp` + static const unsigned kDimMinOpUb = 4; // Upper bound of `minOp` + // Add an SSA value as a dimension to the constraint system. If the SSA value + // is a constant, a new equality is added, setting the dimension to the + // constant value. + auto addDim = [&](unsigned dimId, Value value = nullptr) { + constraints.addDimId(dimId, value); + if (value) + // Note: Fails if no constant value can be derived from `value`. + (void)constraints.addLowerOrUpperBound( + dimId, rewriter.getDimIdentityMap(), ValueRange{value}, + /*eq=*/true, /*lower=*/true, /*composeMapAndOperands=*/false); + }; + + addDim(kDimIv, iv); + addDim(kDimUb, ub); + addDim(kDimStep, step); + addDim(kDimMinOp); + addDim(kDimMinOpUb); + + // Add loop peeling invariant. This is the main piece of knowledge that + // enables AffineMinOp simplification. + if (insideLoop) { + // ub - iv >= step (equiv.: -iv + ub - step + 0*minOp + 0*minOpUb + 0 >= 0) + // Intuitively: Inside the peeled loop, every iteration is "full" iteration, + // i.e., step divides the iteration space `ub - lb` evenly. + constraints.addInequality({-1, 1, -1, 0, 0, 0}); + } else { + // ub - iv < step (equiv.: iv + -ub + step + 0*minOp + 0*minOpUb - 1 >= 0) + // Intuitively: `iv` is the split bound here, i.e., the iteration variable + // value of the very last iteration (in the unpeeled loop). At that point, + // there are less than `step` elements remaining. (Otherwise, the peeled + // loop would run for at least one more iteration.) + constraints.addInequality({1, -1, 1, 0, 0, -1}); + } + + // Add an inequality for each result expr_i of minOpMap: minOp <= expr_i + if (failed(constraints.addLowerOrUpperBound( + kDimMinOp, minOpMap, minOp.operands(), /*eq=*/false, /*lower=*/false, + /*composeMapAndOperands=*/false))) + return failure(); + + // Add a helper dimension r_i for each result expr_i of minOpMap. + unsigned resultStartDim = constraints.getNumDimIds(); + unsigned numResults = minOpMap.getNumResults(); + for (unsigned i = 0; i < minOpMap.getNumResults(); ++i) { + constraints.addDimId(resultStartDim + i); + // Add an equality: r_i = expr_i + if (failed(constraints.addLowerOrUpperBound( + resultStartDim + i, minOpMap.getSubMap({i}), minOp.operands(), + /*eq=*/true, /*lower=*/true, /*composeMapAndOperands=*/false))) + return failure(); + } + + // Try to compute an upper bound for minOp, expressed in terms of the other + // dimensions iv, ub and step. + SmallVector minOpValLb(1), minOpValUb(1); + constraints.getSliceBounds(kDimMinOp, 1, minOp.getContext(), &minOpValLb, + &minOpValUb); + if (minOpValUb.size() != 1 || minOpValUb[0].getNumResults() != 1) + return failure(); // No upper bound found. + + // Add an equality: kDimMinOpUb = minOpValUb[0] + std::vector> flattened; + if (failed(getFlattenedAffineExprs(minOpValUb[0], &flattened))) + return failure(); + assert(flattened.size() == 1); + SmallVector &eq = flattened[0]; + eq.insert(eq.begin() + kDimMinOp, 0); // Add back kDimMinOp dimension. + assert(eq.size() == constraints.getNumCols()); + eq[kDimMinOpUb] = -1; + constraints.addEquality(eq); + + // Prove that each result of minOpMap has a lower bound that is equal (or + // greater than) the upper bound of minOp (`kDimMinOpUb`). In that case, + // minOp can be replaced with the bound. I.e., prove that for each result + // expr_i (represented by dimension r_i): + // + // r_i >= minOpUb + // + // To prove this inequality, add its negation to the constraint set and prove + // that the constraint set is empty. + for (unsigned i = resultStartDim; i < resultStartDim + numResults; ++i) { + FlatAffineConstraints newConstr(constraints); + // Add inequality: r_i < minOpUb (equiv.: minOpUb - r_i - 1 >= 0) + SmallVector ineq(newConstr.getNumCols(), 0); + ineq[kDimMinOpUb] = 1; + ineq[i] = -1; + ineq[newConstr.getNumCols() - 1] = -1; + newConstr.addInequality(ineq); + // If the constraint set it empty: + // \not \exists r_i: r_i < minOpUb + // ==> \forall r_i: r_i >= minOpUb + if (!newConstr.isEmpty()) + return failure(); + } + + // Lower and upper bound of minOp are equal. Replace minOp with its upper + // bound. + auto newMap = AffineMap::get(/*dimCount=*/3, /*symbolCount=*/0, + minOpValUb[0].getResult(0)); + rewriter.setInsertionPoint(minOp); + rewriter.replaceOpWithNewOp(minOp, newMap, + ValueRange{iv, ub, step}); + return success(); +} + namespace { struct ForLoopPeelingPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -164,13 +295,25 @@ if (forOp->hasAttr(kLoopLabel)) return failure(); + Value ub = forOp.upperBound(); scf::IfOp ifOp; - if (failed(peelForLoop(rewriter, forOp, ifOp))) + Value splitBound; + if (failed(peelForLoop(rewriter, forOp, ifOp, splitBound))) return failure(); // Apply label, so that the same loop is not rewritten a second time. rewriter.updateRootInPlace( forOp, [&]() { forOp->setAttr(kLoopLabel, rewriter.getUnitAttr()); }); + // Rewrite affine.min ops. + forOp.walk([&](AffineMinOp minOp) { + (void)scf::rewritePeeledAffineOp(rewriter, minOp, forOp.getInductionVar(), + ub, forOp.step(), /*insideLoop=*/true); + }); + ifOp.walk([&](AffineMinOp minOp) { + (void)scf::rewritePeeledAffineOp(rewriter, minOp, splitBound, ub, + forOp.step(), /*insideLoop=*/false); + }); + return success(); } }; diff --git a/mlir/test/Dialect/SCF/for-loop-peeling.mlir b/mlir/test/Dialect/SCF/for-loop-peeling.mlir --- a/mlir/test/Dialect/SCF/for-loop-peeling.mlir +++ b/mlir/test/Dialect/SCF/for-loop-peeling.mlir @@ -1,22 +1,20 @@ // RUN: mlir-opt %s -for-loop-peeling -canonicalize -split-input-file | FileCheck %s // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - s0) mod s2)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0, s1, s2] -> (s0, s2 - (s2 - (s2 - s1) mod s0))> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - (s1 - s0) mod s2))> // CHECK: func @fully_dynamic_bounds( // CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index // CHECK: %[[C0_I32:.*]] = constant 0 : i32 // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[UB]], %[[STEP]]] // CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]] // CHECK-SAME: step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { -// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[STEP]], %[[UB]]] -// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32 +// CHECK: %[[CAST:.*]] = index_cast %[[STEP]] : index to i32 // CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32 // CHECK: scf.yield %[[ADD]] // CHECK: } // CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]] // CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) { -// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[STEP]], %[[LB]], %[[UB]]] +// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[LB]], %[[UB]], %[[STEP]]] // CHECK: %[[CAST2:.*]] = index_cast %[[REM]] // CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]] // CHECK: scf.yield %[[ADD2]] @@ -38,18 +36,16 @@ // ----- -// CHECK-DAG: #[[MAP:.*]] = affine_map<(d0) -> (4, -d0 + 17)> // CHECK: func @fully_static_bounds( // CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32 // CHECK-DAG: %[[C1_I32:.*]] = constant 1 : i32 +// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32 // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C4:.*]] = constant 4 : index // CHECK-DAG: %[[C16:.*]] = constant 16 : index // CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C16]] // CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { -// CHECK: %[[MINOP:.*]] = affine.min #[[MAP]](%[[IV]]) -// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32 -// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32 +// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32 // CHECK: scf.yield %[[ADD]] // CHECK: } // CHECK: %[[RESULT:.*]] = addi %[[LOOP]], %[[C1_I32]] : i32 @@ -73,24 +69,22 @@ // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)> // CHECK: func @dynamic_upper_bound( // CHECK-SAME: %[[UB:.*]]: index // CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32 +// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32 // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C4:.*]] = constant 4 : index // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]] // CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] // CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { -// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]] -// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32 -// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32 +// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32 // CHECK: scf.yield %[[ADD]] // CHECK: } // CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]] // CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) { -// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]] +// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]] // CHECK: %[[CAST2:.*]] = index_cast %[[REM]] // CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]] // CHECK: scf.yield %[[ADD2]] @@ -116,23 +110,21 @@ // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)> // CHECK: func @no_loop_results( // CHECK-SAME: %[[UB:.*]]: index, %[[MEMREF:.*]]: memref +// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32 // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C4:.*]] = constant 4 : index // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]] // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] step %[[C4]] { -// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]] // CHECK: %[[LOAD:.*]] = memref.load %[[MEMREF]][] -// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32 -// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[CAST]] : i32 +// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[C4_I32]] : i32 // CHECK: memref.store %[[ADD]], %[[MEMREF]] // CHECK: } // CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]] // CHECK: scf.if %[[HAS_MORE]] { -// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]] +// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]] // CHECK: %[[LOAD2:.*]] = memref.load %[[MEMREF]][] // CHECK: %[[CAST2:.*]] = index_cast %[[REM]] // CHECK: %[[ADD2:.*]] = addi %[[LOAD2]], %[[CAST2]] @@ -153,3 +145,75 @@ } return } + +// ----- + +// Test rewriting of affine.min ops. Make sure that more general cases than +// the ones above are successfully rewritten. Also make sure that the pattern +// does not rewrite affine.min ops that should not be rewritten. + +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1 - 1)> +// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (s0, -d0 + s1, s2)> +// CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - (s1 - s0) mod s2))> +// CHECK-DAG: #[[MAP5:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - (s1 - s0) mod s2) + 1)> +// CHECK-DAG: #[[MAP6:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - (s1 - s0) mod s2) - 1)> +// CHECK-DAG: #[[MAP7:.*]] = affine_map<()[s0, s1, s2, s3] -> (s0, s2 - (s2 - (s2 - s1) mod s0), s3)> +// CHECK: func @test_affine_min_rewrite( +// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index, +// CHECK-SAME: %[[MEMREF:.*]]: memref, %[[SOME_VAL:.*]]: index +// CHECK: scf.for %[[IV:.*]] = %[[LB]] to %{{.*}} step %[[STEP]] { +// CHECK: %[[RES2:.*]] = affine.apply #[[MAP1]]()[%[[STEP]]] +// CHECK: %[[RES3:.*]] = affine.min #[[MAP2]](%[[IV]])[%[[STEP]], %[[UB]]] +// CHECK: %[[RES4:.*]] = affine.min #map3(%[[IV]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]] +// CHECK: memref.store %[[STEP]] +// CHECK: memref.store %[[STEP]] +// CHECK: memref.store %[[RES2]] +// CHECK: memref.store %[[RES3]] +// CHECK: memref.store %[[RES4]] +// CHECK: } +// CHECK: scf.if {{.*}} { +// CHECK: %[[RES_IF_0:.*]] = affine.apply #[[MAP4]]()[%[[LB]], %[[UB]], %[[STEP]]] +// CHECK: %[[RES_IF_1:.*]] = affine.apply #[[MAP5]]()[%[[LB]], %[[UB]], %[[STEP]]] +// CHECK: %[[RES_IF_2:.*]] = affine.apply #[[MAP5]]()[%[[LB]], %[[UB]], %[[STEP]]] +// CHECK: %[[RES_IF_3:.*]] = affine.apply #[[MAP6]]()[%[[LB]], %[[UB]], %[[STEP]]] +// CHECK: %[[RES_IF_4:.*]] = affine.min #[[MAP7]]()[%[[STEP]], %[[LB]], %[[UB]], %[[SOME_VAL]]] +// CHECK: memref.store %[[RES_IF_0]] +// CHECK: memref.store %[[RES_IF_1]] +// CHECK: memref.store %[[RES_IF_2]] +// CHECK: memref.store %[[RES_IF_3]] +// CHECK: memref.store %[[RES_IF_4]] +#map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> +#map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)> +#map2 = affine_map<(d0, d1)[s0] -> (s0 + 1, d0 - d1 + 1)> +#map3 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1 - 1)> +#map4 = affine_map<(d0, d1, d2)[s0] -> (s0, d0 - d1, d2)> +func @test_affine_min_rewrite(%lb : index, %ub: index, + %step: index, %d : memref, + %some_val: index) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c3 = constant 3 : index + %c4 = constant 4 : index + scf.for %iv = %lb to %ub step %step { + // Most common case: Rewrite min(%ub - %iv, %step) to %step. + %m0 = affine.min #map0(%ub, %iv)[%step] + // Increase %ub - %iv a little bit, pattern should still apply. + %m1 = affine.min #map1(%ub, %iv)[%step] + // Rewrite min(%ub - %iv + 1, %step + 1) to %step + 1. + %m2 = affine.min #map2(%ub, %iv)[%step] + // min(%ub - %iv - 1, %step) cannot be simplified because %ub - %iv - 1 + // can be smaller than %step. (Can be simplified in if-statement.) + %m3 = affine.min #map3(%ub, %iv)[%step] + // min(%ub - %iv, %step, %some_val) cannot be simplified because the range + // of %some_val is unknown. + %m4 = affine.min #map4(%ub, %iv, %some_val)[%step] + memref.store %m0, %d[%c0] : memref + memref.store %m1, %d[%c1] : memref + memref.store %m2, %d[%c2] : memref + memref.store %m3, %d[%c3] : memref + memref.store %m4, %d[%c4] : memref + } + return +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1453,6 +1453,7 @@ includes = ["include"], deps = [ ":Affine", + ":Analysis", ":DialectUtils", ":IR", ":MemRefDialect",