diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -18,6 +18,7 @@ #include namespace mlir { + namespace func { class FuncOp; } // namespace func @@ -110,10 +111,6 @@ /// memory hierarchy. std::unique_ptr> createPipelineDataTransferPass(); -/// Populate patterns that expand affine index operations into more fundamental -/// operations (not necessarily restricted to Affine dialect). -void populateAffineExpandIndexOpsPatterns(RewritePatternSet &patterns); - /// Creates a pass to expand affine index operations into more fundamental /// operations (not necessarily restricted to Affine dialect). std::unique_ptr createAffineExpandIndexOpsPass(); diff --git a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h @@ -0,0 +1,47 @@ +//===- Transforms.h - Transforms Entrypoints ---------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file defines a set of transforms specific for the AffineOps +// dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H +#define MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H + +#include "mlir/Support/LogicalResult.h" + +namespace mlir { +class RewritePatternSet; +class RewriterBase; +class AffineApplyOp; + +/// Populate patterns that expand affine index operations into more fundamental +/// operations (not necessarily restricted to Affine dialect). +void populateAffineExpandIndexOpsPatterns(RewritePatternSet &patterns); + +/// Helper function to rewrite `op`'s affine map and reorder its operands such +/// that they are in increasing order of hoistability (i.e. the least hoistable) +/// operands come first in the operand list. +void reorderOperandsByHoistability(RewriterBase &rewriter, AffineApplyOp op); + +/// Split an "affine.apply" operation into 2 smaller ops, exhibiting +/// opportunities for CSE and LICM. +/// Return the sink AffineApplyOp on success or failure if the +/// Note that this can be currently undone by canonicalization which tries to +/// maximally compose chains of AffineApplyOps. +FailureOr decompose(RewriterBase &rewriter, AffineApplyOp op); + +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +} // namespace mlir + +#endif // MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOps.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOps.cpp --- a/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOps.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOps.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/Transforms/Transforms.h" #include "mlir/Dialect/Affine/Utils.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ AffineLoopNormalize.cpp AffineParallelize.cpp AffineScalarReplacement.cpp + DecomposeAffineOps.cpp LoopCoalescing.cpp LoopFusion.cpp LoopTiling.cpp diff --git a/mlir/lib/Dialect/Affine/Transforms/DecomposeAffineOps.cpp b/mlir/lib/Dialect/Affine/Transforms/DecomposeAffineOps.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Affine/Transforms/DecomposeAffineOps.cpp @@ -0,0 +1,156 @@ +//===- DecomposeAffineOps.cpp - Decompose affine ops into finer-grained ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements functionality to progressively decompose coarse-grained +// affine ops into finer-grained ops. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/Transforms/Transforms.h" +#include "mlir/Dialect/Affine/Utils.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Value.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/Support/Debug.h" +#include + +using namespace mlir; + +#define DEBUG_TYPE "decompose-affine-ops" +#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ") +#define DBGSNL() (llvm::dbgs() << "\n") + +/// Count the number of invariant enclosing +static int64_t numEnclosingInvariantLoops(OpOperand &operand) { + int64_t count = 0; + Operation *currentOp = operand.getOwner(); + while (auto loopOp = currentOp->getParentOfType()) { + if (!loopOp.isDefinedOutsideOfLoop(operand.get())) + break; + currentOp = loopOp; + count++; + } + return count; +} + +/// Reorder operands by the number of loops above which the operand is defined. +/// This allows us to unambiguously +void mlir::reorderOperandsByHoistability(RewriterBase &rewriter, + AffineApplyOp op) { + SmallVector numInvariant = llvm::to_vector<4>( + llvm::map_range(op->getOpOperands(), [&](OpOperand &operand) { + return numEnclosingInvariantLoops(operand); + })); + + int64_t numOperands = op.getNumOperands(); + SmallVector operandPositions = + llvm::to_vector<4>(llvm::seq(0, numOperands)); + std::sort(operandPositions.begin(), operandPositions.end(), + [&numInvariant](size_t i1, size_t i2) { + return numInvariant[i1] < numInvariant[i2]; + }); + + SmallVector replacements(numOperands); + SmallVector operands(numOperands); + for (int64_t i = 0; i < numOperands; ++i) { + operands[i] = op.getOperand(operandPositions[i]); + replacements[operandPositions[i]] = getAffineSymbolExpr(i, op.getContext()); + } + + AffineMap map = op.getAffineMap(); + ArrayRef repls{replacements}; + map = map.replaceDimsAndSymbols(repls.take_front(map.getNumDims()), + repls.drop_front(map.getNumDims()), + /*numResultDims=*/0, + /*numResultSyms=*/numOperands); + map = AffineMap::get(0, numOperands, + simplifyAffineExpr(map.getResult(0), 0, numOperands), + op->getContext()); + + rewriter.startRootUpdate(op); + op.setMap(map); + op->setOperands(operands); + rewriter.finalizeRootUpdate(op); +} + +/// Build an affine.apply that is a subexpression `expr` of `originalOp`s affine +/// map and with the same operands. +/// Canonicalize the map and operands to deduplicate and drop dead operands +/// before returning but do not perform maximal composition of AffineApplyOp +/// which would defeat the purpose. +static AffineApplyOp createSubApply(RewriterBase &rewriter, + AffineApplyOp originalOp, AffineExpr expr) { + MLIRContext *ctx = originalOp->getContext(); + AffineMap m = originalOp.getAffineMap(); + auto rhsMap = AffineMap::get(m.getNumDims(), m.getNumSymbols(), expr, ctx); + SmallVector rhsOperands = originalOp->getOperands(); + canonicalizeMapAndOperands(&rhsMap, &rhsOperands); + return rewriter.create(originalOp.getLoc(), rhsMap, + rhsOperands); +} + +/// Split an "affine.apply" operation into 2 smaller ops, exhibiting +/// opportunities for CSE and LICM. +/// Note that this can be currently undone by canonicalization which tries to +/// maximally compose chains of AffineApplyOps. +FailureOr mlir::decompose(RewriterBase &rewriter, + AffineApplyOp op) { + AffineMap m = op.getAffineMap(); + AffineExpr exp = m.getResult(0); + auto binExpr = exp.dyn_cast(); + if (!binExpr) + return rewriter.notifyMatchFailure(op, "terminal affine.apply"); + + if (!binExpr.getLHS().isa() && + !binExpr.getRHS().isa()) + return rewriter.notifyMatchFailure(op, "terminal affine.apply"); + + bool supportedKind = ((binExpr.getKind() == AffineExprKind::Add) || + (binExpr.getKind() == AffineExprKind::Mul)); + if (!supportedKind) + return rewriter.notifyMatchFailure( + op, "only add or mul binary expr can be reassociated"); + + LLVM_DEBUG(DBGS() << "Start decomposeIntoFinerGrainedOps: " << op << "\n"); + + // Iteratively extract the RHS while the binary operation does not change. + // When done, we have an ordered list of affine.apply ops that we can + // reassociate. + MLIRContext *ctx = op->getContext(); + SmallVector rhsOps; + while (auto currentBinExpr = exp.dyn_cast()) { + if (currentBinExpr.getKind() != binExpr.getKind()) { + rhsOps.push_back(createSubApply(rewriter, op, currentBinExpr)); + LLVM_DEBUG(DBGS() << "--subapply: " << rhsOps.back() << "\n"); + break; + } + rhsOps.push_back(createSubApply(rewriter, op, currentBinExpr.getRHS())); + LLVM_DEBUG(DBGS() << "--subapply: " << rhsOps.back() << "\n"); + exp = currentBinExpr.getLHS(); + } + + // Merge back iteratively, thus achieving reassociation. + auto s0 = getAffineSymbolExpr(0, ctx); + auto s1 = getAffineSymbolExpr(1, ctx); + AffineMap binMap = AffineMap::get( + /*dimCount=*/0, /*symbolCount=*/2, + getAffineBinaryOpExpr(binExpr.getKind(), s0, s1), ctx); + AffineApplyOp rhs = rhsOps[0]; + for (int64_t i = 0, e = rhsOps.size(); i + 1 < e; ++i) { + rhs = rewriter.create(op.getLoc(), binMap, + ValueRange{rhs, rhsOps[i + 1]}); + LLVM_DEBUG(DBGS() << "--reassociate into: " << rhs << "\n"); + } + + rewriter.replaceOp(op, rhs.getResult()); + return rhs; +}