diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -250,6 +250,12 @@ void addSymbolId(unsigned pos); void addLocalId(unsigned pos); virtual unsigned addId(IdKind kind, unsigned pos); + /// Add identifiers of the specified kind at the end of the table. Return the + /// position of the column. The coefficient column corresponding to the + /// added identifier is initialized to zero. + unsigned addDimId(); + unsigned addSymbolId(); + unsigned addLocalId(); /// Composes an affine map whose dimensions and symbols match one to one with /// the dimensions and symbols of this FlatAffineConstraints. The results of @@ -659,6 +665,12 @@ using FlatAffineConstraints::addSymbolId; unsigned addId(IdKind kind, unsigned pos) override; unsigned addId(IdKind kind, unsigned pos, Value val); + /// Add identifiers of the specified kind at the end of the table. Return the + /// position of the column. The coefficient column corresponding to the + /// added identifier is initialized to zero. `val` is the Value corresponding + /// to the identifier that can optionally be provided. + unsigned addDimId(Value val); + unsigned addSymbolId(Value val); /// Add the specified values as a dim or symbol id depending on its nature, if /// it already doesn't exist in the system. `val` has to be either a terminal @@ -766,6 +778,18 @@ return {values.data(), values.size()}; } + inline ArrayRef> getMaybeDimValues() const { + return {values.data(), getNumDimIds()}; + } + + inline ArrayRef> getMaybeSymbolValues() const { + return {values.data() + getNumDimIds(), getNumSymbolIds()}; + } + + inline ArrayRef> getMaybeDimAndSymbolValues() const { + return {values.data(), getNumDimIds() + getNumSymbolIds()}; + } + /// Sets the Value associated with the pos^th identifier. inline void setValue(unsigned pos, Value val) { assert(pos < numIds && "invalid id position"); diff --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h --- a/mlir/include/mlir/Dialect/SCF/Transforms.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms.h @@ -44,9 +44,10 @@ /// by an scf.if for the last (partial) iteration (if any). This transformation /// is called "loop peeling". /// -/// Other patterns can simplify/canonicalize operations in the body of the loop -/// and the scf.if. This is beneficial for a wide range of transformations such -/// as vectorization or loop tiling. +/// This transformation is beneficial for a wide range of transformations such +/// as vectorization or loop tiling: It enables additional canonicalizations +/// inside the peeled loop body such as rewriting masked loads into unmaked +/// loads. /// /// E.g., assuming a lower bound of 0 (for illustration purposes): /// ``` @@ -65,11 +66,22 @@ /// } /// ``` /// -/// This function rewrites the given scf.for loop in-place and creates a new -/// scf.if operation (returned via `ifOp`) for the last iteration. +/// After loop peeling, this function tries to simplify/canonicalize affine.min +/// operations in the body of the loop and the scf.if, taking advantage of the +/// fact that every iteration of the peeled loop is a "full" iteration. This +/// canonicalization is expected to enable further canonicalization +/// opportunities through other patterns. /// -/// TODO: Simplify affine.min ops inside the new loop/if statement. -LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp); +/// The return value indicates whether the loop was rewritten or not. Loops are +/// not rewritten if: +/// * Loop step size is 1 or +/// * Loop bounds and step size are static, and step already divides the +/// iteration space evenly. +/// +/// Note: This function rewrites the given scf.for loop in-place and creates a +/// new scf.if operation for the last iteration. It replaces all uses of the +/// unpeeled loop with the results of the newly generated scf.if. +LogicalResult peelAndCanonicalizeForLoop(RewriterBase &rewriter, ForOp forOp); /// Tile a parallel loop of the form /// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) diff --git a/mlir/include/mlir/IR/AffineExpr.h b/mlir/include/mlir/IR/AffineExpr.h --- a/mlir/include/mlir/IR/AffineExpr.h +++ b/mlir/include/mlir/IR/AffineExpr.h @@ -143,12 +143,15 @@ /// `*this` and apply replace with `map` on its subexpressions. AffineExpr replace(const DenseMap &map) const; - /// Replace dims[0 .. numDims - 1] by dims[shift .. shift + numDims - 1]. - AffineExpr shiftDims(unsigned numDims, unsigned shift) const; - - /// Replace symbols[0 .. numSymbols - 1] by - /// symbols[shift .. shift + numSymbols - 1]. - AffineExpr shiftSymbols(unsigned numSymbols, unsigned shift) const; + /// Replace dims[offset ... numDims) + /// by dims[offset + shift ... shift + numDims). + AffineExpr shiftDims(unsigned numDims, unsigned shift, + unsigned offset = 0) const; + + /// Replace symbols[offset ... numSymbols) + /// by symbols[offset + shift ... shift + numSymbols). + AffineExpr shiftSymbols(unsigned numSymbols, unsigned shift, + unsigned offset = 0) const; AffineExpr operator+(int64_t v) const; AffineExpr operator+(AffineExpr other) const; diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -207,24 +207,28 @@ AffineMap replace(const DenseMap &map, unsigned numResultDims, unsigned numResultSyms) const; - /// Replace dims[0 .. numDims - 1] by dims[shift .. shift + numDims - 1]. - AffineMap shiftDims(unsigned shift) const { - return AffineMap::get( - getNumDims() + shift, getNumSymbols(), - llvm::to_vector<4>(llvm::map_range( - getResults(), - [&](AffineExpr e) { return e.shiftDims(getNumDims(), shift); })), - getContext()); + /// Replace dims[offset ... numDims) + /// by dims[offset + shift ... shift + numDims). + AffineMap shiftDims(unsigned shift, unsigned offset = 0) const { + assert(offset <= getNumDims()); + return AffineMap::get(getNumDims() + shift, getNumSymbols(), + llvm::to_vector<4>(llvm::map_range( + getResults(), + [&](AffineExpr e) { + return e.shiftDims(getNumDims(), shift, offset); + })), + getContext()); } - /// Replace symbols[0 .. numSymbols - 1] by - /// symbols[shift .. shift + numSymbols - 1]. - AffineMap shiftSymbols(unsigned shift) const { + /// Replace symbols[offset ... numSymbols) + /// by symbols[offset + shift ... shift + numSymbols). + AffineMap shiftSymbols(unsigned shift, unsigned offset = 0) const { return AffineMap::get(getNumDims(), getNumSymbols() + shift, llvm::to_vector<4>(llvm::map_range( getResults(), [&](AffineExpr e) { - return e.shiftSymbols(getNumSymbols(), shift); + return e.shiftSymbols(getNumSymbols(), shift, + offset); })), getContext()); } diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -263,22 +263,52 @@ addId(IdKind::Local, pos); } +unsigned FlatAffineConstraints::addLocalId() { + unsigned pos = getNumLocalIds(); + addId(IdKind::Local, pos); + return pos; +} + void FlatAffineConstraints::addDimId(unsigned pos) { addId(IdKind::Dimension, pos); } +unsigned FlatAffineConstraints::addDimId() { + unsigned pos = getNumDimIds(); + addId(IdKind::Dimension, pos); + return pos; +} + void FlatAffineValueConstraints::addDimId(unsigned pos, Value val) { addId(IdKind::Dimension, pos, val); } +unsigned FlatAffineValueConstraints::addDimId(Value val) { + unsigned pos = getNumDimIds(); + addId(IdKind::Dimension, pos, val); + return pos; +} + void FlatAffineConstraints::addSymbolId(unsigned pos) { addId(IdKind::Symbol, pos); } +unsigned FlatAffineConstraints::addSymbolId() { + unsigned pos = getNumSymbolIds(); + addId(IdKind::Symbol, pos); + return pos; +} + void FlatAffineValueConstraints::addSymbolId(unsigned pos, Value val) { addId(IdKind::Symbol, pos, val); } +unsigned FlatAffineValueConstraints::addSymbolId(Value val) { + unsigned pos = getNumSymbolIds(); + addId(IdKind::Symbol, pos, val); + return pos; +} + unsigned FlatAffineConstraints::addId(IdKind kind, unsigned pos) { if (kind == IdKind::Dimension) assert(pos <= getNumDimIds()); diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -357,6 +357,9 @@ // *) It is a result of the dim op on a memref whose corresponding size is a // valid symbol. bool mlir::isValidSymbol(Value value) { + if (!value) + return false; + // The value must be an index type. if (!value.getType().isIndex()) return false; diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp --- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PassDetail.h" +#include "mlir/Analysis/AffineStructures.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/SCF/Passes.h" #include "mlir/Dialect/SCF/SCF.h" @@ -97,8 +98,16 @@ /// Rewrite a for loop with bounds/step that potentially do not divide evenly /// into a for loop where the step divides the iteration space evenly, followed /// by an scf.if for the last (partial) iteration (if any). -LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp, - scf::IfOp &ifOp) { +/// +/// This function rewrites the given scf.for loop in-place and creates a new +/// scf.if operation for the last iteration. It replaces all uses of the +/// unpeeled loop with the results of the newly generated scf.if. +/// +/// The newly generated scf.if operation is returned via `ifOp`. The boundary +/// at which the loop is split (new upper bound) is returned via `splitBound`. +/// The return value indicates whether the loop was rewritten or not. +static LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp, + Value &splitBound) { RewriterBase::InsertionGuard guard(b); auto lbInt = getConstantIntValue(forOp.lowerBound()); auto ubInt = getConstantIntValue(forOp.upperBound()); @@ -117,7 +126,7 @@ // New upper bound: %ub - (%ub - %lb) mod %step auto modMap = AffineMap::get(3, 0, {dim1 - ((dim1 - dim0) % dim2)}); b.setInsertionPoint(forOp); - Value splitBound = b.createOrFold( + splitBound = b.createOrFold( loc, modMap, ValueRange{forOp.lowerBound(), forOp.upperBound(), forOp.step()}); @@ -153,6 +162,226 @@ return success(); } +static void unpackOptionalValues(ArrayRef> source, + SmallVector &target) { + target = llvm::to_vector<4>(llvm::map_range(source, [](Optional val) { + return val.hasValue() ? *val : Value(); + })); +} + +/// Bound an identifier `pos` in a given FlatAffineValueConstraints with +/// constraints drawn from an affine map. Before adding the constraint, the +/// dimensions/symbols of the affine map are aligned with `constraints`. +/// `operands` are the SSA Value operands used with the affine map. +/// Note: This function adds a new symbol column to the `constraints` for each +/// dimension/symbol that exists in the affine map but not in `constraints`. +static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, + FlatAffineConstraints::BoundType type, + unsigned pos, AffineMap map, + ValueRange operands) { + SmallVector dims, syms, newSyms; + unpackOptionalValues(constraints.getMaybeDimValues(), dims); + unpackOptionalValues(constraints.getMaybeSymbolValues(), syms); + + AffineMap alignedMap = + alignAffineMapWithValues(map, operands, dims, syms, &newSyms); + for (unsigned i = syms.size(); i < newSyms.size(); ++i) + constraints.addSymbolId(constraints.getNumSymbolIds(), newSyms[i]); + return constraints.addBound(type, pos, alignedMap); +} + +/// This function tries to canonicalize affine.min operations by proving that +/// its value is bounded by the same lower and upper bound. In that case, the +/// operation can be folded away. +/// +/// Bounds are computed by FlatAffineValueConstraints. Invariants required for +/// finding/proving bounds should be supplied via `constraints`. +/// +/// 1. Add dimensions for `minOp` and `minOpUb` (upper bound of `minOp`). +/// 2. Compute an upper bound of `minOp` and bind it to `minOpUb`. SSA values +/// that are used in `minOp` but are not part of `dims`, are added as extra +/// symbols to the constraint set. +/// 3. For each result of `minOp`: Add result as a dimension `r_i`. Prove that +/// r_i >= minOpUb. If this is the case, ub(minOp) == lb(minOp) and `minOp` +/// can be replaced with that bound. +/// +/// In summary, the following constraints are added throughout this function. +/// Note: `invar` are dimensions added by the caller to express the invariants. +/// +/// invar | minOp | minOpUb | r_i | extra syms... | const | eq/ineq +/// ------+-------+---------+-----+---------------+-------+------------------- +/// (various eq./ineq. constraining `invar`, added by the caller) +/// ... | 0 | 0 | 0 | 0 | ... | ... +/// ------+-------+---------+-----+---------------+-------+------------------- +/// (various ineq. constraining `minOp` in terms of `minOp` operands (`invar` +/// and extra `minOp` operands "extra syms" that are not in `invar`)). +/// ... | -1 | 0 | 0 | ... | ... | >= 0 +/// ------+-------+---------+-----+---------------+-------+------------------- +/// (set `minOpUb` to `minOp` upper bound in terms of `invar` and extra syms) +/// ... | 0 | -1 | 0 | ... | ... | = 0 +/// ------+-------+---------+-----+---------------+-------+------------------- +/// (for each `minOp` map result r_i: copy previous constraints, set r_i to +/// corresponding map result, prove r_i >= minOpUb via contradiction) +/// ... | 0 | 0 | -1 | ... | ... | = 0 +/// 0 | 0 | 1 | -1 | 0 | -1 | >= 0 +/// +static LogicalResult +canonicalizeAffineMinOp(RewriterBase &rewriter, AffineMinOp minOp, + FlatAffineValueConstraints constraints) { + RewriterBase::InsertionGuard guard(rewriter); + AffineMap minOpMap = minOp.getAffineMap(); + unsigned numResults = minOpMap.getNumResults(); + + // Add a few extra dimensions. + unsigned dimMinOp = constraints.addDimId(); // `minOp` + unsigned dimMinOpUb = constraints.addDimId(); // `minOp` upper bound + unsigned resultDimStart = constraints.getNumDimIds(); + for (unsigned i = 0; i < numResults; ++i) + constraints.addDimId(); + + // Add an inequality for each result expr_i of minOpMap: minOp <= expr_i + if (failed(alignAndAddBound(constraints, FlatAffineConstraints::UB, dimMinOp, + minOpMap, minOp.operands()))) + return failure(); + + // Try to compute an upper bound for minOp, expressed in terms of the other + // `dims` and extra symbols. + SmallVector minOpValLb(1), minOpValUb(1); + constraints.getSliceBounds(dimMinOp, 1, minOp.getContext(), &minOpValLb, + &minOpValUb); + // TODO: `getSliceBounds` may return multiple bounds at the moment. This is + // a TODO of `getSliceBounds` and not handled here. + if (!minOpValUb[0] || minOpValUb[0].getNumResults() != 1) + return failure(); // No or multiple upper bounds found. + + // Add an equality: dimMinOpUb = minOpValUb[0] + // Add back dimension for minOp. (Was removed by `getSliceBounds`.) + AffineMap alignedUbMap = minOpValUb[0].shiftDims(/*shift=*/1, + /*offset=*/dimMinOp); + if (failed(constraints.addBound(FlatAffineConstraints::EQ, dimMinOpUb, + alignedUbMap))) + return failure(); + + // If the constraint system is empty, there is an inconsistency. (E.g., this + // can happen if loop lb > ub.) + if (constraints.isEmpty()) + return failure(); + + // Prove that each result of minOpMap has a lower bound that is equal to (or + // greater than) the upper bound of minOp (`kDimMinOpUb`). In that case, + // minOp can be replaced with the bound. I.e., prove that for each result + // expr_i (represented by dimension r_i): + // + // r_i >= minOpUb + // + // To prove this inequality, add its negation to the constraint set and prove + // that the constraint set is empty. + for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) { + FlatAffineValueConstraints newConstr(constraints); + + // Add an equality: r_i = expr_i + // Note: These equalities could have been added earlier and used to express + // minOp <= expr_i. However, then we run the risk that `getSliceBounds` + // computes minOpUb in terms of r_i dims, which is not desired. + if (failed(alignAndAddBound(newConstr, FlatAffineConstraints::EQ, i, + minOpMap.getSubMap({i - resultDimStart}), + minOp.operands()))) + return failure(); + + // Add inequality: r_i < minOpUb (equiv.: minOpUb - r_i - 1 >= 0) + SmallVector ineq(newConstr.getNumCols(), 0); + ineq[dimMinOpUb] = 1; + ineq[i] = -1; + ineq[newConstr.getNumCols() - 1] = -1; + newConstr.addInequality(ineq); + if (!newConstr.isEmpty()) + return failure(); + } + + // Lower and upper bound of `minOp` are equal. Replace `minOp` with its bound. + AffineMap newMap = alignedUbMap; + SmallVector newOperands; + unpackOptionalValues(constraints.getMaybeDimAndSymbolValues(), newOperands); + mlir::canonicalizeMapAndOperands(&newMap, &newOperands); + rewriter.setInsertionPoint(minOp); + rewriter.replaceOpWithNewOp(minOp, newMap, newOperands); + return success(); +} + +/// Try to simplify an affine.min operation `minOp` after loop peeling. This +/// function detects affine.min operations such as (ub is the previous upper +/// bound of the unpeeled loop): +/// ``` +/// #map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)> +/// %r = affine.min #affine.min #map(%iv)[%step, %ub] +/// ``` +/// and rewrites them into (in the case the peeled loop): +/// ``` +/// %r = %step +/// ``` +/// affine.min operations inside the generated scf.if operation are rewritten in +/// a similar way. +/// +/// This function builds up a set of constraints, capable of proving that: +/// * Inside the peeled loop: min(step, ub - iv) == step +/// * Inside the scf.if operation: min(step, ub - iv) == ub - iv +/// +/// Note: `ub` is the previous upper bound of the loop (before peeling). +/// `insideLoop` must be true for affine.min ops inside the loop and false for +/// affine.min ops inside the scf.for op. +static LogicalResult rewritePeeledAffineOp(RewriterBase &rewriter, + AffineMinOp minOp, Value iv, + Value ub, Value step, + bool insideLoop) { + FlatAffineValueConstraints constraints; + constraints.addDimId(0, iv); + constraints.addDimId(1, ub); + constraints.addDimId(2, step); + if (auto constUb = getConstantIntValue(ub)) + constraints.addBound(FlatAffineConstraints::EQ, 1, *constUb); + if (auto constStep = getConstantIntValue(step)) + constraints.addBound(FlatAffineConstraints::EQ, 2, *constStep); + + // Add loop peeling invariant. This is the main piece of knowledge that + // enables AffineMinOp simplification. + if (insideLoop) { + // ub - iv >= step (equiv.: -iv + ub - step + 0 >= 0) + // Intuitively: Inside the peeled loop, every iteration is a "full" + // iteration, i.e., step divides the iteration space `ub - lb` evenly. + constraints.addInequality({-1, 1, -1, 0}); + } else { + // ub - iv < step (equiv.: iv + -ub + step - 1 >= 0) + // Intuitively: `iv` is the split bound here, i.e., the iteration variable + // value of the very last iteration (in the unpeeled loop). At that point, + // there are less than `step` elements remaining. (Otherwise, the peeled + // loop would run for at least one more iteration.) + constraints.addInequality({1, -1, 1, -1}); + } + + return canonicalizeAffineMinOp(rewriter, minOp, constraints); +} + +LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter, + ForOp forOp) { + Value ub = forOp.upperBound(); + scf::IfOp ifOp; + Value splitBound; + if (failed(peelForLoop(rewriter, forOp, ifOp, splitBound))) + return failure(); + + // Rewrite affine.min ops. + forOp.walk([&](AffineMinOp minOp) { + (void)rewritePeeledAffineOp(rewriter, minOp, forOp.getInductionVar(), ub, + forOp.step(), /*insideLoop=*/true); + }); + ifOp.walk([&](AffineMinOp minOp) { + (void)rewritePeeledAffineOp(rewriter, minOp, splitBound, ub, forOp.step(), + /*insideLoop=*/false); + }); + + return success(); +} + static constexpr char kPeeledLoopLabel[] = "__peeled_loop__"; namespace { @@ -163,15 +392,12 @@ PatternRewriter &rewriter) const override { if (forOp->hasAttr(kPeeledLoopLabel)) return failure(); - - scf::IfOp ifOp; - if (failed(peelForLoop(rewriter, forOp, ifOp))) + if (failed(peelAndCanonicalizeForLoop(rewriter, forOp))) return failure(); // Apply label, so that the same loop is not rewritten a second time. rewriter.updateRootInPlace(forOp, [&]() { forOp->setAttr(kPeeledLoopLabel, rewriter.getUnitAttr()); }); - return success(); } }; diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -101,19 +101,26 @@ return replaceDimsAndSymbols({}, symReplacements); } -/// Replace symbols[0 .. numDims - 1] by symbols[shift .. shift + numDims - 1]. -AffineExpr AffineExpr::shiftDims(unsigned numDims, unsigned shift) const { +/// Replace dims[offset ... numDims) +/// by dims[offset + shift ... shift + numDims). +AffineExpr AffineExpr::shiftDims(unsigned numDims, unsigned shift, + unsigned offset) const { SmallVector dims; - for (unsigned idx = 0; idx < numDims; ++idx) + for (unsigned idx = 0; idx < offset; ++idx) + dims.push_back(getAffineDimExpr(idx, getContext())); + for (unsigned idx = offset; idx < numDims; ++idx) dims.push_back(getAffineDimExpr(idx + shift, getContext())); return replaceDimsAndSymbols(dims, {}); } -/// Replace symbols[0 .. numSymbols - 1] by -/// symbols[shift .. shift + numSymbols - 1]. -AffineExpr AffineExpr::shiftSymbols(unsigned numSymbols, unsigned shift) const { +/// Replace symbols[offset ... numSymbols) +/// by symbols[offset + shift ... shift + numSymbols). +AffineExpr AffineExpr::shiftSymbols(unsigned numSymbols, unsigned shift, + unsigned offset) const { SmallVector symbols; - for (unsigned idx = 0; idx < numSymbols; ++idx) + for (unsigned idx = 0; idx < offset; ++idx) + symbols.push_back(getAffineSymbolExpr(idx, getContext())); + for (unsigned idx = offset; idx < numSymbols; ++idx) symbols.push_back(getAffineSymbolExpr(idx + shift, getContext())); return replaceDimsAndSymbols({}, symbols); } diff --git a/mlir/test/Dialect/SCF/for-loop-peeling.mlir b/mlir/test/Dialect/SCF/for-loop-peeling.mlir --- a/mlir/test/Dialect/SCF/for-loop-peeling.mlir +++ b/mlir/test/Dialect/SCF/for-loop-peeling.mlir @@ -1,22 +1,20 @@ // RUN: mlir-opt %s -for-loop-peeling -canonicalize -split-input-file | FileCheck %s // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - s0) mod s2)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0, s1, s2] -> (s0, s2 - (s2 - (s2 - s1) mod s0))> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)> // CHECK: func @fully_dynamic_bounds( // CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index // CHECK: %[[C0_I32:.*]] = constant 0 : i32 // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[UB]], %[[STEP]]] // CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]] // CHECK-SAME: step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { -// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[STEP]], %[[UB]]] -// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32 +// CHECK: %[[CAST:.*]] = index_cast %[[STEP]] : index to i32 // CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32 // CHECK: scf.yield %[[ADD]] // CHECK: } // CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]] // CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) { -// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[STEP]], %[[LB]], %[[UB]]] +// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]], %[[LB]], %[[STEP]]] // CHECK: %[[CAST2:.*]] = index_cast %[[REM]] // CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]] // CHECK: scf.yield %[[ADD2]] @@ -38,18 +36,16 @@ // ----- -// CHECK-DAG: #[[MAP:.*]] = affine_map<(d0) -> (4, -d0 + 17)> // CHECK: func @fully_static_bounds( // CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32 // CHECK-DAG: %[[C1_I32:.*]] = constant 1 : i32 +// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32 // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C4:.*]] = constant 4 : index // CHECK-DAG: %[[C16:.*]] = constant 16 : index // CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C16]] // CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { -// CHECK: %[[MINOP:.*]] = affine.min #[[MAP]](%[[IV]]) -// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32 -// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32 +// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32 // CHECK: scf.yield %[[ADD]] // CHECK: } // CHECK: %[[RESULT:.*]] = addi %[[LOOP]], %[[C1_I32]] : i32 @@ -73,24 +69,22 @@ // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)> // CHECK: func @dynamic_upper_bound( // CHECK-SAME: %[[UB:.*]]: index // CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32 +// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32 // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C4:.*]] = constant 4 : index // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]] // CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] // CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { -// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]] -// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32 -// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32 +// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32 // CHECK: scf.yield %[[ADD]] // CHECK: } // CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]] // CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) { -// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]] +// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]] // CHECK: %[[CAST2:.*]] = index_cast %[[REM]] // CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]] // CHECK: scf.yield %[[ADD2]] @@ -116,23 +110,21 @@ // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)> // CHECK: func @no_loop_results( // CHECK-SAME: %[[UB:.*]]: index, %[[MEMREF:.*]]: memref +// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32 // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C4:.*]] = constant 4 : index // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]] // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] step %[[C4]] { -// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]] // CHECK: %[[LOAD:.*]] = memref.load %[[MEMREF]][] -// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32 -// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[CAST]] : i32 +// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[C4_I32]] : i32 // CHECK: memref.store %[[ADD]], %[[MEMREF]] // CHECK: } // CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]] // CHECK: scf.if %[[HAS_MORE]] { -// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]] +// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]] // CHECK: %[[LOAD2:.*]] = memref.load %[[MEMREF]][] // CHECK: %[[CAST2:.*]] = index_cast %[[REM]] // CHECK: %[[ADD2:.*]] = addi %[[LOAD2]], %[[CAST2]] @@ -153,3 +145,81 @@ } return } + +// ----- + +// Test rewriting of affine.min ops. Make sure that more general cases than +// the ones above are successfully rewritten. Also make sure that the pattern +// does not rewrite affine.min ops that should not be rewritten. + +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1 - 1)> +// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (s0, -d0 + s1, s2)> +// CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)> +// CHECK-DAG: #[[MAP5:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 + 1)> +// CHECK-DAG: #[[MAP6:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 - 1)> +// CHECK-DAG: #[[MAP7:.*]] = affine_map<()[s0, s1, s2, s3] -> (s0, s2 - (s2 - (s2 - s1) mod s0), s3)> +// CHECK: func @test_affine_min_rewrite( +// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index, +// CHECK-SAME: %[[MEMREF:.*]]: memref, %[[SOME_VAL:.*]]: index +// CHECK: scf.for %[[IV:.*]] = %[[LB]] to %{{.*}} step %[[STEP]] { +// (affine.min folded away) +// CHECK: memref.store %[[STEP]] +// (affine.min folded away) +// CHECK: memref.store %[[STEP]] +// CHECK: %[[RES2:.*]] = affine.apply #[[MAP1]]()[%[[STEP]]] +// CHECK: memref.store %[[RES2]] +// CHECK: %[[RES3:.*]] = affine.min #[[MAP2]](%[[IV]])[%[[STEP]], %[[UB]]] +// CHECK: memref.store %[[RES3]] +// CHECK: %[[RES4:.*]] = affine.min #[[MAP3]](%[[IV]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]] +// CHECK: memref.store %[[RES4]] +// CHECK: } +// CHECK: scf.if {{.*}} { +// CHECK: %[[RES_IF_0:.*]] = affine.apply #[[MAP4]]()[%[[UB]], %[[LB]], %[[STEP]]] +// CHECK: memref.store %[[RES_IF_0]] +// CHECK: %[[RES_IF_1:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]] +// CHECK: memref.store %[[RES_IF_1]] +// CHECK: %[[RES_IF_2:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]] +// CHECK: memref.store %[[RES_IF_2]] +// CHECK: %[[RES_IF_3:.*]] = affine.apply #[[MAP6]]()[%[[UB]], %[[LB]], %[[STEP]]] +// CHECK: memref.store %[[RES_IF_3]] +// CHECK: %[[RES_IF_4:.*]] = affine.min #[[MAP7]]()[%[[STEP]], %[[LB]], %[[UB]], %[[SOME_VAL]]] +// CHECK: memref.store %[[RES_IF_4]] +#map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> +#map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)> +#map2 = affine_map<(d0, d1)[s0] -> (s0 + 1, d0 - d1 + 1)> +#map3 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1 - 1)> +#map4 = affine_map<(d0, d1, d2)[s0] -> (s0, d0 - d1, d2)> +func @test_affine_min_rewrite(%lb : index, %ub: index, + %step: index, %d : memref, + %some_val: index) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c3 = constant 3 : index + %c4 = constant 4 : index + scf.for %iv = %lb to %ub step %step { + // Most common case: Rewrite min(%ub - %iv, %step) to %step. + %m0 = affine.min #map0(%ub, %iv)[%step] + memref.store %m0, %d[%c0] : memref + + // Increase %ub - %iv a little bit, pattern should still apply. + %m1 = affine.min #map1(%ub, %iv)[%step] + memref.store %m1, %d[%c1] : memref + + // Rewrite min(%ub - %iv + 1, %step + 1) to %step + 1. + %m2 = affine.min #map2(%ub, %iv)[%step] + memref.store %m2, %d[%c2] : memref + + // min(%ub - %iv - 1, %step) cannot be simplified because %ub - %iv - 1 + // can be smaller than %step. (Can be simplified in if-statement.) + %m3 = affine.min #map3(%ub, %iv)[%step] + memref.store %m3, %d[%c3] : memref + + // min(%ub - %iv, %step, %some_val) cannot be simplified because the range + // of %some_val is unknown. + %m4 = affine.min #map4(%ub, %iv, %some_val)[%step] + memref.store %m4, %d[%c4] : memref + } + return +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1479,6 +1479,7 @@ includes = ["include"], deps = [ ":Affine", + ":Analysis", ":DialectUtils", ":IR", ":MemRefDialect",