diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td @@ -621,11 +621,6 @@ /// Get the number of dimensions. unsigned getNumDims(); - operand_range getLowerBoundsOperands(); - operand_range getUpperBoundsOperands(); - - AffineValueMap getLowerBoundsValueMap(); - AffineValueMap getUpperBoundsValueMap(); AffineValueMap getRangesValueMap(); /// Get ranges as constants, may fail in dynamic case. @@ -636,6 +631,18 @@ MutableArrayRef getIVs() { return getBody()->getArguments(); } + + operand_range getLowerBoundsOperands(); + AffineValueMap getLowerBoundsValueMap(); + void setLowerBounds(ValueRange operands, AffineMap map); + void setLowerBoundsMap(AffineMap map); + + operand_range getUpperBoundsOperands(); + AffineValueMap getUpperBoundsValueMap(); + void setUpperBounds(ValueRange operands, AffineMap map); + void setUpperBoundsMap(AffineMap map); + + SmallVector getSteps(); void setSteps(ArrayRef newSteps); static StringRef getReductionsAttrName() { return "reductions"; } @@ -643,6 +650,8 @@ static StringRef getUpperBoundsMapAttrName() { return "upperBoundsMap"; } static StringRef getStepsAttrName() { return "steps"; } }]; + + let hasFolder = 1; } def AffinePrefetchOp : Affine_Op<"prefetch"> { diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h b/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h --- a/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h @@ -74,6 +74,10 @@ ArrayRef getOperands() const; AffineMap getAffineMap() const; + /// Attempts to canonicalize the map and operands. Return success if the map + /// and/or operands have been modified. + LogicalResult canonicalize(); + private: // A mutable affine map. MutableAffineMap map; diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -35,6 +35,9 @@ /// ops. std::unique_ptr> createAffineParallelizePass(); +/// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1. +std::unique_ptr> createAffineParallelNormalizePass(); + /// Performs packing (or explicit copying) of accessed memref regions into /// buffers in the specified faster memory space through either pointwise copies /// or DMA operations. diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -118,6 +118,12 @@ let constructor = "mlir::createAffineParallelizePass()"; } +def AffineParallelNormalize : FunctionPass<"affine-parallel-normalize"> { + let summary = "Normalize affine.parallel ops so that lower bounds are 0 and " + "steps are 1"; + let constructor = "mlir::createAffineParallelNormalizePass()"; +} + def SimplifyAffineStructures : FunctionPass<"simplify-affine-structures"> { let summary = "Simplify affine expressions in maps/sets and normalize " "memrefs"; diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h --- a/mlir/include/mlir/Dialect/Affine/Utils.h +++ b/mlir/include/mlir/Dialect/Affine/Utils.h @@ -43,6 +43,11 @@ llvm::DenseSet> &loops, ArrayRef vectorSizes, ArrayRef fastestVaryingPattern); +/// Normalize a affine.parallel op so that lower bounds are 0 and steps are 1. +/// As currently implemented, this transformation cannot fail and will return +/// early if the op is already in a normalized form. +void normalizeAffineParallel(AffineParallelOp op); + } // namespace mlir #endif // MLIR_DIALECT_AFFINE_UTILS_H diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -2505,9 +2505,58 @@ return OpBuilder(getBody(), std::prev(getBody()->end())); } +void AffineParallelOp::setLowerBounds(ValueRange lbOperands, AffineMap map) { + assert(lbOperands.size() == map.getNumInputs() && + "operands to map must match number of inputs"); + assert(map.getNumResults() >= 1 && "bounds map has at least one result"); + + auto ubOperands = getUpperBoundsOperands(); + + SmallVector newOperands(lbOperands); + newOperands.append(ubOperands.begin(), ubOperands.end()); + getOperation()->setOperands(newOperands); + + lowerBoundsMapAttr(AffineMapAttr::get(map)); +} + +void AffineParallelOp::setUpperBounds(ValueRange ubOperands, AffineMap map) { + assert(ubOperands.size() == map.getNumInputs() && + "operands to map must match number of inputs"); + assert(map.getNumResults() >= 1 && "bounds map has at least one result"); + + SmallVector newOperands(getLowerBoundsOperands()); + newOperands.append(ubOperands.begin(), ubOperands.end()); + getOperation()->setOperands(newOperands); + + upperBoundsMapAttr(AffineMapAttr::get(map)); +} + +void AffineParallelOp::setLowerBoundsMap(AffineMap map) { + AffineMap lbMap = lowerBoundsMap(); + assert(lbMap.getNumDims() == map.getNumDims() && + lbMap.getNumSymbols() == map.getNumSymbols()); + (void)lbMap; + lowerBoundsMapAttr(AffineMapAttr::get(map)); +} + +void AffineParallelOp::setUpperBoundsMap(AffineMap map) { + AffineMap ubMap = upperBoundsMap(); + assert(ubMap.getNumDims() == map.getNumDims() && + ubMap.getNumSymbols() == map.getNumSymbols()); + (void)ubMap; + upperBoundsMapAttr(AffineMapAttr::get(map)); +} + +SmallVector AffineParallelOp::getSteps() { + SmallVector result; + for (Attribute attr : steps()) { + result.push_back(attr.cast().getInt()); + } + return result; +} + void AffineParallelOp::setSteps(ArrayRef newSteps) { - assert(newSteps.size() == getNumDims() && "steps & num dims mismatch"); - setAttr(getStepsAttrName(), getBodyBuilder().getI64ArrayAttr(newSteps)); + stepsAttr(getBodyBuilder().getI64ArrayAttr(newSteps)); } static LogicalResult verify(AffineParallelOp op) { @@ -2541,6 +2590,41 @@ return success(); } +LogicalResult AffineValueMap::canonicalize() { + SmallVector newOperands{operands}; + auto newMap = getAffineMap(); + composeAffineMapAndOperands(&newMap, &newOperands); + if (newMap == getAffineMap() && newOperands == operands) + return failure(); + reset(newMap, newOperands); + return success(); +} + +/// Canonicalize the bounds of the given loop. +static LogicalResult canonicalizeLoopBounds(AffineParallelOp op) { + AffineValueMap lb = op.getLowerBoundsValueMap(); + bool lbCanonicalized = succeeded(lb.canonicalize()); + + AffineValueMap ub = op.getUpperBoundsValueMap(); + bool ubCanonicalized = succeeded(ub.canonicalize()); + + // Any canonicalization change always leads to updated map(s). + if (!lbCanonicalized && !ubCanonicalized) + return failure(); + + if (lbCanonicalized) + op.setLowerBounds(lb.getOperands(), lb.getAffineMap()); + if (ubCanonicalized) + op.setUpperBounds(ub.getOperands(), ub.getAffineMap()); + + return success(); +} + +LogicalResult AffineParallelOp::fold(ArrayRef operands, + SmallVectorImpl &results) { + return canonicalizeLoopBounds(*this); +} + static void print(OpAsmPrinter &p, AffineParallelOp op) { p << op.getOperationName() << " (" << op.getBody()->getArguments() << ") = ("; p.printAffineMapOfSSAIds(op.lowerBoundsMapAttr(), @@ -2549,13 +2633,8 @@ p.printAffineMapOfSSAIds(op.upperBoundsMapAttr(), op.getUpperBoundsOperands()); p << ')'; - SmallVector steps; - bool elideSteps = true; - for (auto attr : op.steps()) { - auto step = attr.cast().getInt(); - elideSteps &= (step == 1); - steps.push_back(step); - } + SmallVector steps = op.getSteps(); + bool elideSteps = llvm::all_of(steps, [](int64_t step) { return step == 1; }); if (!elideSteps) { p << " step ("; llvm::interleaveComma(steps, p); @@ -2641,7 +2720,7 @@ } // Parse optional clause of the form: `reduce ("addf", "maxf")`, where the - // quoted strings a member of the enum AtomicRMWKind. + // quoted strings are a member of the enum AtomicRMWKind. SmallVector reductions; if (succeeded(parser.parseOptionalKeyword("reduce"))) { if (parser.parseLParen()) diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelNormalize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelNormalize.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelNormalize.cpp @@ -0,0 +1,96 @@ +//===- AffineParallelNormalize.cpp - AffineParallelNormalize Pass ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a normalizer for affine parallel loops. +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/IR/AffineValueMap.h" +#include "mlir/Dialect/Affine/Passes.h" +#include "mlir/IR/PatternMatch.h" + +using namespace mlir; + +void normalizeAffineParallel(AffineParallelOp op) { + AffineMap lbMap = op.lowerBoundsMap(); + SmallVector steps = op.getSteps(); + // No need to do any work if the parallel op is already normalized. + bool isAlreadyNormalized = + llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) { + int64_t step = std::get<0>(tuple); + auto lbExpr = + std::get<1>(tuple).template dyn_cast(); + return lbExpr && lbExpr.getValue() == 0 && step == 1; + }); + if (isAlreadyNormalized) + return; + + AffineValueMap ranges = op.getRangesValueMap(); + auto builder = OpBuilder::atBlockBegin(op.getBody()); + auto zeroExpr = builder.getAffineConstantExpr(0); + SmallVector lbExprs; + SmallVector ubExprs; + for (unsigned i = 0, e = steps.size(); i < e; ++i) { + int64_t step = steps[i]; + + // Adjust the lower bound to be 0. + lbExprs.push_back(zeroExpr); + + // Adjust the upper bound expression: 'range / step'. + AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step); + ubExprs.push_back(ubExpr); + + // Adjust the corresponding IV: 'lb + i * step'. + BlockArgument iv = op.getBody()->getArgument(i); + AffineExpr lbExpr = lbMap.getResult(i); + unsigned nDims = lbMap.getNumDims(); + auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step; + auto map = AffineMap::get(/*dimCount=*/nDims + 1, + /*symbolCount=*/lbMap.getNumSymbols(), expr); + + // Use an 'affine.apply' op that will be simplified later in subsequent + // canonicalizations. + OperandRange lbOperands = op.getLowerBoundsOperands(); + OperandRange dimOperands = lbOperands.take_front(nDims); + OperandRange symbolOperands = lbOperands.drop_front(nDims); + SmallVector applyOperands{dimOperands}; + applyOperands.push_back(iv); + applyOperands.append(symbolOperands.begin(), symbolOperands.end()); + auto apply = builder.create(op.getLoc(), map, applyOperands); + iv.replaceAllUsesExcept(apply, SmallPtrSet{apply}); + } + + SmallVector newSteps(op.getNumDims(), 1); + op.setSteps(newSteps); + auto newLowerMap = AffineMap::get( + /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext()); + op.setLowerBounds({}, newLowerMap); + auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(), + ubExprs, op.getContext()); + op.setUpperBounds(ranges.getOperands(), newUpperMap); +} + +namespace { + +/// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1. +/// As currently implemented, this pass cannot fail, but it might skip over ops +/// that are already in a normalized form. +struct AffineParallelNormalizePass + : public AffineParallelNormalizeBase { + + void runOnFunction() override { getFunction().walk(normalizeAffineParallel); } +}; + +} // namespace + +std::unique_ptr> +mlir::createAffineParallelNormalizePass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt @@ -2,6 +2,7 @@ AffineDataCopyGeneration.cpp AffineLoopInvariantCodeMotion.cpp AffineParallelize.cpp + AffineParallelNormalize.cpp LoopTiling.cpp LoopUnroll.cpp LoopUnrollAndJam.cpp diff --git a/mlir/test/Dialect/Affine/affine-parallel-normalize.mlir b/mlir/test/Dialect/Affine/affine-parallel-normalize.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Affine/affine-parallel-normalize.mlir @@ -0,0 +1,25 @@ +// RUN: mlir-opt %s -affine-parallel-normalize -split-input-file | FileCheck %s + +// Normalize steps to 1 and lower bounds to 0. + +// CHECK-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 * 3)> +// CHECK-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 * 2 + 1)> +// CHECK-DAG: [[$MAP2:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> + +// CHECK-LABEL: func @normalize_parallel() +func @normalize_parallel() { + %cst = constant 1.0 : f32 + %0 = alloc() : memref<2x4xf32> + // CHECK: affine.parallel (%[[i0:.*]], %[[j0:.*]]) = (0, 0) to (4, 2) + affine.parallel (%i, %j) = (0, 1) to (10, 5) step (3, 2) { + // CHECK: %[[i1:.*]] = affine.apply [[$MAP0]](%[[i0]]) + // CHECK: %[[j1:.*]] = affine.apply [[$MAP1]](%[[j0]]) + // CHECK: affine.parallel (%[[k0:.*]]) = (0) to (%[[j1]] - %[[i1]]) + affine.parallel (%k) = (%i) to (%j) { + // CHECK: %[[k1:.*]] = affine.apply [[$MAP2]](%[[i1]], %[[k0]]) + // CHECK: affine.store %{{.*}}, %{{.*}}[%[[i1]], %[[k1]]] : memref<2x4xf32> + affine.store %cst, %0[%i, %k] : memref<2x4xf32> + } + } + return +} diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir --- a/mlir/test/Dialect/Affine/canonicalize.mlir +++ b/mlir/test/Dialect/Affine/canonicalize.mlir @@ -604,3 +604,26 @@ } return } + +// ----- + +// Ensure affine.parallel bounds expressions are canonicalized. + +#map3 = affine_map<(d0) -> (d0 * 5)> + +// CHECK-LABEL: func @affine_parallel_const_bounds +func @affine_parallel_const_bounds() { + %cst = constant 1.0 : f32 + %c0 = constant 0 : index + %c4 = constant 4 : index + %0 = alloc() : memref<4xf32> + // CHECK: affine.parallel (%{{.*}}) = (0) to (4) + affine.parallel (%i) = (%c0) to (%c0 + %c4) { + %1 = affine.apply #map3(%i) + // CHECK: affine.parallel (%{{.*}}) = (0) to (%{{.*}} * 5) + affine.parallel (%j) = (%c0) to (%1) { + affine.store %cst, %0[%j] : memref<4xf32> + } + } + return +}