diff --git a/mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h b/mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h --- a/mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h +++ b/mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h @@ -16,12 +16,13 @@ #include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/ArrayRef.h" namespace mlir { class AffineApplyOp; -class AffineMap; class FlatAffineValueConstraints; -struct LogicalResult; +class Location; +class OpBuilder; class Operation; class OpFoldResult; class RewriterBase; @@ -44,11 +45,23 @@ OpFoldResult &step); /// Populate the given constraint set with induction variable constraints of a -/// "for" loop with the given range and step. +/// "for" loop with the given range and step. The step is optional. LogicalResult addLoopRangeConstraints(FlatAffineValueConstraints &cstr, Value iv, OpFoldResult lb, OpFoldResult ub, OpFoldResult step); +/// Build a value that computes an upper bound of the result of the given +/// AffineApplyOp without any of the given loop induction variables. If no loop +/// induction variables are specified, the op is made independent of any loop +/// induction variables. +/// +/// Return failure if no upper bound could be determined. `changed` is set to +/// true if the returned bound differs from the given affine.apply op result. +/// Also return failure if at least one given loop IV is not actually an IV. +FailureOr buildInductionVarIndependentUpperBound( + OpBuilder &b, Location loc, AffineApplyOp applyOp, bool *changed = nullptr, + std::optional> ivs = std::nullopt); + /// Try to canonicalize the given affine.min/max operation in the context of /// for `loops` with a known range. /// diff --git a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt --- a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_TARGET_DEFINITIONS TensorTransformOps.td) +mlir_tablegen(TensorTransformOps.h.inc -gen-op-decls) +mlir_tablegen(TensorTransformOps.cpp.inc -gen-op-defs) +add_public_tablegen_target(MLIRTensorTransformOpsIncGen) + +add_mlir_doc(TensorTransformOps TensorTransformOps Dialects/ -gen-op-doc) diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h @@ -0,0 +1,30 @@ +//===- TensorTransformOps.h - Tensor transformation ops ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H +#define MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H + +#include "mlir/Dialect/PDL/IR/PDLTypes.h" +#include "mlir/Dialect/Transform/IR/TransformInterfaces.h" +#include "mlir/Dialect/Transform/IR/TransformTypes.h" +#include "mlir/IR/OpImplementation.h" + +namespace mlir { +class DialectRegistry; + +namespace tensor { +class PadOp; + +void registerTransformDialectExtension(DialectRegistry ®istry); +} // namespace tensor +} // namespace mlir + +#define GET_OP_CLASSES +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc" + +#endif // MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td @@ -0,0 +1,66 @@ +//===- TensorTransformOps.td - Tensor transformation ops ---*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef TENSOR_TRANSFORM_OPS +#define TENSOR_TRANSFORM_OPS + +include "mlir/Dialect/PDL/IR/PDLTypes.td" +include "mlir/Dialect/Transform/IR/TransformDialect.td" +include "mlir/Dialect/Transform/IR/TransformInterfaces.td" +include "mlir/Dialect/Transform/IR/TransformTypes.td" +include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/IR/OpBase.td" + +def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">; + +def MakeLoopIndependentOp + : Op { + let description = [{ + Rewrite the targeted ops such that their index-typed operands no longer + depend on any loop induction variable of the `num_loop` enclosing `scf.for` + loops. I.e., compute an upper bound that is independent of any such loop IV + for every tensor dimension. The transformed op could then be hoisted from + the `num_loop` enclosing loops. To preserve the original semantics, place a + `tensor.extract_slice` inside the loop. + + Currently supported operations are: + - tensor.pad: Replaced by an upper bound padding, followed by a + tensor.extract_slice. + + Note: Only index-typed operands that are affine.apply ops are taken into + account at the moment. Furthermore, only direct uses of `scf.for` induction + variables are eliminated. + + #### Return modes + + This operation fails if at least one induction variable could not be + eliminated. In case the targeted op is already independent of induction + variables, this transform succeeds and returns the unmodified target op. + + Otherwise, the returned handle points to a subset of the produced ops: + - tensor.pad: The returned handle points to the tensor.extract_slice op. + + This transform op consumes the target handle and produces a result handle. + }]; + + let arguments = (ins PDL_Operation:$target, I64Attr:$num_loops); + let results = (outs PDL_Operation:$transformed); + let assemblyFormat = "$target attr-dict"; + + let extraClassDeclaration = [{ + ::mlir::DiagnosedSilenceableFailure applyToOne( + ::mlir::tensor::PadOp target, + ::mlir::transform::ApplyToEachResultList &results, + ::mlir::transform::TransformState &state); + }]; +} + +#endif // TENSOR_TRANSFORM_OPS + diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h @@ -15,6 +15,40 @@ namespace mlir { namespace tensor { +/// Build a new tensor::PadOp with low/high padding that is independent of all +/// given SCF loop induction variables. If the op is already independent of loop +/// IVs, the same PadOp result is returned. +/// +/// Failure indicates the no suitable upper bound for low/high padding could be +/// found. Failure is also returned if at least one of the given IVs is not an +/// SCF loop IV. +/// +/// Note: This function takes into account only low/high padding values that +/// are affine.apply ops that directly use a loop's IV. +/// +/// Example: +/// scf.for %iv = %lb to %ub step %step { +/// %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] +/// %p = tensor.pad %t low[5] high[%high] ... +/// ... +/// } +/// +/// The function builds IR such as: +/// %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub] +/// %p_hoistable = tensor.pad %t low[5] high[%high_new] +/// %dim = tensor.dim %t, %c0 +/// %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)> +/// (%iv)[%ub, %dim] +/// %slice = tensor.extract_slice %p_hoistable [0] [%size] [1] +/// +/// The slice is returned. +/// +/// Note: Due to limitations in the FlatAffineValueConstraints, we over- +/// approximate by assuming a step size of 1 for every loop. +FailureOr +buildInductionVarIndependentOp(OpBuilder &b, tensor::PadOp padOp, + std::optional> ivs = {}); + /// Populates `patterns` with patterns to wrap a tensor.pad op with an scf.if op /// to separate the cases where we don't need padding (all pad sizes are /// actually zeros) and where we indeed need padding. diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -64,6 +64,7 @@ #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h" #include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h" #include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" @@ -124,6 +125,7 @@ linalg::registerTransformDialectExtension(registry); memref::registerTransformDialectExtension(registry); scf::registerTransformDialectExtension(registry); + tensor::registerTransformDialectExtension(registry); vector::registerTransformDialectExtension(registry); // Register all external models. diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -1010,7 +1010,7 @@ unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl *lbMaps, SmallVectorImpl *ubMaps, bool getClosedUB) { - assert(num < getNumDimVars() && "invalid range"); + assert(offset + num <= getNumDimVars() && "invalid range"); // Basic simplification. normalizeConstraintsByGCD(); diff --git a/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp b/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp --- a/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp @@ -80,11 +80,18 @@ OpFoldResult ub, OpFoldResult step) { Builder b(iv.getContext()); - // IntegerPolyhedron does not support semi-affine expressions. - // Therefore, only constant step values are supported. - auto stepInt = getConstantIntValue(step); - if (!stepInt) - return failure(); + int64_t stepInt; + if (!step) { + // No step given: Assume step size 1. + stepInt = 1; + } else if (auto maybeConstStep = getConstantIntValue(step)) { + stepInt = *maybeConstStep; + } else { + // Note: IntegerPolyhedron does not support semi-affine expressions. + // Therefore, only constant step values are supported. In case of non-const + // step sizes, we conservatively assume a step size of 1. + stepInt = 1; + } unsigned dimIv = cstr.appendDimVar(iv); auto lbv = lb.dyn_cast(); @@ -110,7 +117,13 @@ // Upper bound AffineExpr ivUb; - if (lbInt && ubInt && (*lbInt + *stepInt >= *ubInt)) { + AffineExpr exprLb = lbInt + ? b.getAffineConstantExpr(*lbInt) + : b.getAffineSymbolExpr(symLb - cstr.getNumDimVars()); + AffineExpr exprUb = ubInt + ? b.getAffineConstantExpr(*ubInt) + : b.getAffineSymbolExpr(symUb - cstr.getNumDimVars()); + if (lbInt && ubInt && (*lbInt + stepInt >= *ubInt)) { // The loop has at most one iteration. // iv < lb + 1 // TODO: Try to derive this constraint by simplifying the expression in @@ -119,13 +132,7 @@ } else { // The loop may have more than one iteration. // iv < lb + step * ((ub - lb - 1) floorDiv step) + 1 - AffineExpr exprLb = - lbInt ? b.getAffineConstantExpr(*lbInt) - : b.getAffineSymbolExpr(symLb - cstr.getNumDimVars()); - AffineExpr exprUb = - ubInt ? b.getAffineConstantExpr(*ubInt) - : b.getAffineSymbolExpr(symUb - cstr.getNumDimVars()); - ivUb = exprLb + 1 + (*stepInt * ((exprUb - exprLb - 1).floorDiv(*stepInt))); + ivUb = exprLb + 1 + (stepInt * ((exprUb - exprLb - 1).floorDiv(stepInt))); } auto map = AffineMap::get( /*dimCount=*/cstr.getNumDimVars(), @@ -134,6 +141,106 @@ return cstr.addBound(IntegerPolyhedron::UB, dimIv, map); } +static void unpackOptionalValues(ArrayRef> source, + SmallVector &target) { + target = + llvm::to_vector<4>(llvm::map_range(source, [](std::optional val) { + return val.has_value() ? *val : Value(); + })); +} + +/// Bound an identifier `pos` in a given FlatAffineValueConstraints with +/// constraints drawn from an affine map. Before adding the constraint, the +/// dimensions/symbols of the affine map are aligned with `constraints`. +/// `operands` are the SSA Value operands used with the affine map. +/// Note: This function adds a new symbol column to the `constraints` for each +/// dimension/symbol that exists in the affine map but not in `constraints`. +static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, + IntegerPolyhedron::BoundType type, + unsigned pos, AffineMap map, + ValueRange operands) { + SmallVector dims, syms, newSyms; + unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims); + unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms); + + AffineMap alignedMap = + alignAffineMapWithValues(map, operands, dims, syms, &newSyms); + for (unsigned i = syms.size(); i < newSyms.size(); ++i) + constraints.appendSymbolVar(newSyms[i]); + return constraints.addBound(type, pos, alignedMap); +} + +FailureOr scf::buildInductionVarIndependentUpperBound( + OpBuilder &b, Location loc, AffineApplyOp applyOp, bool *changed, + std::optional> ivs) { + if (changed) + *changed = false; + + // Build constraint set for the loop + FlatAffineValueConstraints cstr; + unsigned applyOpDim = cstr.appendDimVar(); + + SmallVector allIvs; + // Find all iteration variables among the operands add constrain them. + for (Value operand : applyOp->getOperands()) { + // Skip duplicate ivs. + if (llvm::is_contained(allIvs, operand)) + continue; + + // If `operand` is an iteration variable: Find corresponding loop + // bounds and step. + OpFoldResult lb, ub, step; + Value iv = operand; + if (ivs.has_value()) { + // Check if this iv should be eliminated. + if (!llvm::is_contained(*ivs, iv)) + continue; + // Failure if one of the given ivs is not an IV. + if (failed(matchForLikeLoop(operand, lb, ub, step))) + return failure(); + } else { + if (failed(matchForLikeLoop(operand, lb, ub, step))) + continue; + } + allIvs.push_back(iv); + + if (failed(addLoopRangeConstraints(cstr, iv, lb, ub, + /*step=*/OpFoldResult()))) + return failure(); + } + if (allIvs.empty()) + return success(); + + // Add the affine map of the affine.apply op. + if (failed(alignAndAddBound( + cstr, presburger::IntegerPolyhedron::BoundType::EQ, applyOpDim, + applyOp.getAffineMap(), applyOp->getOperands()))) + return failure(); + + // Project out all iteration variables. + for (Value iv : allIvs) + cstr.projectOut(iv); + + // Compute an upper bound for the affine.apply op. + SmallVector opLb(1), opUb(1); + cstr.getSliceBounds(applyOpDim, 1, applyOp->getContext(), &opLb, &opUb); + if (opUb.empty() || !opUb[0]) + return failure(); + assert(opUb[0].getNumResults() == 1 && "expected single result"); + + // Create new AffineApplyOp. + if (changed) + *changed = true; + // Turn open bound into closed bound. + AffineMap newMap = AffineMap::get( + opUb[0].getNumDims(), opUb[0].getNumSymbols(), opUb[0].getResult(0) - 1); + SmallVector newOperands; + for (auto maybeValue : cstr.getMaybeValues().drop_front()) + newOperands.push_back(*maybeValue); + mlir::canonicalizeMapAndOperands(&newMap, &newOperands); + return b.create(loc, newMap, newOperands).getResult(); +} + /// Canonicalize min/max operations in the context of for loops with a known /// range. Call `canonicalizeMinMaxOp` and add the following constraints to /// the constraint system (along with the missing dimensions): diff --git a/mlir/lib/Dialect/Tensor/CMakeLists.txt b/mlir/lib/Dialect/Tensor/CMakeLists.txt --- a/mlir/lib/Dialect/Tensor/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) add_subdirectory(Utils) diff --git a/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt @@ -0,0 +1,17 @@ +add_mlir_dialect_library(MLIRTensorTransformOps + TensorTransformOps.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor/TransformOps + + DEPENDS + MLIRTensorTransformOpsIncGen + + LINK_LIBS PUBLIC + MLIRAffineDialect + MLIRIR + MLIRPDLDialect + MLIRSCFDialect + MLIRTensorTransforms + MLIRTransformDialect +) diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp @@ -0,0 +1,86 @@ +//===- TensorTransformOps.cpp - Implementation of tensor transform ops ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/Transforms/Transforms.h" +#include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/Dialect/Transform/IR/TransformInterfaces.h" +#include "mlir/Dialect/Transform/IR/TransformUtils.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// MakeLoopIndependentOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure transform::MakeLoopIndependentOp::applyToOne( + tensor::PadOp target, transform::ApplyToEachResultList &results, + transform::TransformState &state) { + // Gather IVs. + SmallVector ivs; + Operation *nextOp = target; + for (uint64_t i = 0; i < getNumLoops(); ++i) { + nextOp = nextOp->getParentOfType(); + if (!nextOp) { + DiagnosedSilenceableFailure diag = emitSilenceableError() + << "could not find " << i + << "-th enclosing loop"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + ivs.push_back(cast(nextOp).getInductionVar()); + } + + // Rewrite IR. + IRRewriter rewriter(target->getContext()); + FailureOr replacement = + tensor::buildInductionVarIndependentOp(rewriter, target); + if (failed(replacement)) { + DiagnosedSilenceableFailure diag = + emitSilenceableError() << "could not make target op loop-independent"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + rewriter.replaceOp(target, *replacement); + results.push_back(replacement->getDefiningOp()); + return DiagnosedSilenceableFailure::success(); +} + +//===----------------------------------------------------------------------===// +// Transform op registration +//===----------------------------------------------------------------------===// + +namespace { +class TensorTransformDialectExtension + : public transform::TransformDialectExtension< + TensorTransformDialectExtension> { +public: + using Base::Base; + + void init() { + declareGeneratedDialect(); + declareGeneratedDialect(); + + registerTransformOps< +#define GET_OP_LIST +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc" + >(); + } +}; +} // namespace + +#define GET_OP_CLASSES +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc" + +void mlir::tensor::registerTransformDialectExtension( + DialectRegistry ®istry) { + registry.addExtensions(); +} diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt @@ -4,6 +4,7 @@ EmptyOpPatterns.cpp ExtractSliceFromReshapeUtils.cpp FoldIntoPackAndUnpackPatterns.cpp + LoopTransforms.cpp MergeConsecutiveInsertExtractSlicePatterns.cpp ReshapePatterns.cpp SplitPaddingPatterns.cpp @@ -26,6 +27,7 @@ MLIRMemRefDialect MLIRPass MLIRSCFDialect + MLIRSCFUtils MLIRTensorDialect MLIRTilingInterface MLIRTransforms diff --git a/mlir/lib/Dialect/Tensor/Transforms/LoopTransforms.cpp b/mlir/lib/Dialect/Tensor/Transforms/LoopTransforms.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/Transforms/LoopTransforms.cpp @@ -0,0 +1,107 @@ +//===- LoopTransforms.cpp - Transforms wrt. loops -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tensor/Transforms/Transforms.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" + +using namespace mlir; +using namespace mlir::tensor; + +// Compute upper bounds for low/high padding such that they are independent of +// any SCF loop induction variables. +FailureOr +tensor::buildInductionVarIndependentOp(OpBuilder &b, tensor::PadOp padOp, + std::optional> ivs) { + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(padOp); + Location loc = padOp.getLoc(); + + // Non-constant padding not supported. + Value constantPadding = padOp.getConstantPaddingValue(); + if (!constantPadding) + return failure(); + + // Try to compute upper bounds for the given values if they affine.apply ops. + // If they are not affine.apply ops or if the affine.apply ops do not directly + // depend on loop IVs, simply store them in `result`. + bool foundUb = false; + auto computeUpperBounds = [&](ValueRange values, SmallVector &result) { + for (Value v : values) { + auto applyOp = v.getDefiningOp(); + if (!applyOp) { + result.push_back(v); + continue; + } + bool changed; + auto ub = scf::buildInductionVarIndependentUpperBound(b, loc, applyOp, + &changed, ivs); + if (failed(ub) || !changed) { + result.push_back(v); + continue; + } + result.push_back(*ub); + foundUb = true; + } + }; + + // Compute new low/high padding. + SmallVector newLow, newHigh; + computeUpperBounds(padOp.getLow(), newLow); + computeUpperBounds(padOp.getHigh(), newHigh); + // Return failure if no upper bound was computed. (This function would be a + // no-op.) + if (!foundUb) + return failure(); + SmallVector newMixedLow = + getMixedValues(padOp.getStaticLow(), newLow, b); + SmallVector newMixedHigh = + getMixedValues(padOp.getStaticHigh(), newHigh, b); + + // Create a new tensor::PadOp. + auto newPadOp = b.create( + loc, padOp.getResultType(), padOp.getSource(), newMixedLow, newMixedHigh, + constantPadding, padOp.getNofold(), /*attrs=*/ArrayRef{}); + + // Create a tensor::ExtractSliceOp. + // Reify the result sizes of the old tensor::PadOp. + ReifiedRankedShapedTypeDims reifiedSizes; + ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface = + dyn_cast(padOp.getOperation()); + if (failed(reifyShapedTypeInterface.reifyResultShapes(b, reifiedSizes))) + return failure(); + SmallVector offsets, sizes, strides; + for (int64_t i = 0; i < padOp.getResultType().getRank(); ++i) { + // offset = ub(low_padding) - low_padding + OpFoldResult prevLow = padOp.getMixedLowPad()[i]; + if (prevLow.is()) { + offsets.push_back(b.getIndexAttr(0)); + } else { + offsets.push_back( + b.create( + loc, b.getAffineDimExpr(0) - b.getAffineDimExpr(1), + std::initializer_list{newMixedLow[i].get(), + prevLow.get()}) + .getResult()); + } + // size = reified result size + if (!padOp.getResultType().isDynamicDim(i)) { + sizes.push_back(b.getIndexAttr(padOp.getResultType().getDimSize(i))); + } else { + sizes.push_back(reifiedSizes[0][i]); + } + // stride = 1 + strides.push_back(b.getIndexAttr(1)); + } + + return b.create(loc, newPadOp, offsets, sizes, strides) + .getResult(); +} diff --git a/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir @@ -0,0 +1,125 @@ +// RUN: mlir-opt %s -allow-unregistered-dialect \ +// RUN: -test-transform-dialect-interpreter -canonicalize \ +// RUN: -split-input-file -verify-diagnostics | FileCheck %s + +// This is a test case where "high" padding depends on the IV. + +// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (-s0 + s1)> +// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)> +// CHECK-LABEL: func @make_pad_loop_independent_1( +// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index, +// CHECK-SAME: %[[t:.*]]: tensor +func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]] + scf.for %i = %lb to %ub step %step { + // CHECK: %[[high:.*]] = affine.apply #[[$map]]()[%[[lb]], %[[ub]]] + // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[5] high[%[[high]]] + // CHECK: %[[dim:.*]] = tensor.dim %[[t]] + // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]] + // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][0] [%[[size]]] [1] + %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] + %p = tensor.pad %t low[5] high[%high] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + // CHECK: "dummy.some_use"(%[[replacement]]) + "dummy.some_use"(%p) : (tensor) -> () + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 1} +} + +// ----- + +// This is a test case where "low" padding depends on the IV. + +// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (-s0 + s1)> +// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)> +// CHECK: #[[$map2:.*]] = affine_map<(d0)[s0] -> (d0 - s0)> +// CHECK-LABEL: func @make_pad_loop_independent_1( +// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index, +// CHECK-SAME: %[[t:.*]]: tensor +func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]] + scf.for %i = %lb to %ub step %step { + // CHECK: %[[low:.*]] = affine.apply #[[$map]]()[%[[lb]], %[[ub]]] + // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[%[[low]]] high[5] + // CHECK: %[[dim:.*]] = tensor.dim %[[t]] + // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]] + // CHECK: %[[offset:.*]] = affine.apply #[[$map2]](%[[iv]])[%[[lb]]] + // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][%[[offset]]] [%[[size]]] [1] + %low = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] + %p = tensor.pad %t low[%low] high[5] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + // CHECK: "dummy.some_use"(%[[replacement]]) + "dummy.some_use"(%p) : (tensor) -> () + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 1} +} + +// ----- + +// CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2 - 2)> +// CHECK-LABEL: func @two_loops( +func.func @two_loops(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + // CHECK: affine.apply #map()[%{{.*}}] + %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[] + %p = tensor.pad %t low[%low] high[5] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + "dummy.some_use"(%p) : (tensor) -> () + } + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 2} +} + +// ----- + + +func.func @not_enough_loops(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[] + // expected-note@below {{target op}} + %p = tensor.pad %t low[%low] high[5] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + "dummy.some_use"(%p) : (tensor) -> () + } + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + // expected-error@below {{could not find 2-th enclosing loop}} + %1 = transform.tensor.make_loop_independent %0 {num_loops = 3} +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5543,6 +5543,7 @@ ":MemRefDialect", ":Pass", ":SCFDialect", + ":SCFUtils", ":TensorDialect", ":TensorPassIncGen", ":TilingInterface", @@ -5551,6 +5552,56 @@ ], ) +td_library( + name = "TensorTransformOpsTdFiles", + srcs = [ + "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td", + ], + includes = ["include"], + deps = [ + ":PDLDialect", + ":TransformDialectTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorTransformOpsIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + ["-gen-op-decls"], + "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc", + ), + ( + ["-gen-op-defs"], + "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td", + deps = [ + ":TensorTransformOpsTdFiles", + ], +) + +cc_library( + name = "TensorTransformOps", + srcs = glob(["lib/Dialect/Tensor/TransformOps/*.cpp"]), + hdrs = glob(["include/mlir/Dialect/Tensor/TransformOps/*.h"]), + includes = ["include"], + deps = [ + ":AffineDialect", + ":IR", + ":PDLDialect", + ":SCFDialect", + ":TensorDialect", + ":TensorTransformOpsIncGen", + ":TensorTransforms", + ":TransformDialect", + "//llvm:Support", + ], +) + cc_library( name = "Rewrite", srcs = glob([ @@ -6980,6 +7031,7 @@ ":TensorDialect", ":TensorInferTypeOpInterfaceImpl", ":TensorTilingInterfaceImpl", + ":TensorTransformOps", ":TensorTransforms", ":TosaDialect", ":TosaToLinalg",