diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h @@ -17,7 +17,7 @@ namespace linalg { class PadTensorOp; -/// Mechanically hoist padding operations on tensors by `nLoops` into a new, +/// Mechanically hoist padding operations on tensors by `numLoops` into a new, /// generally larger tensor. This achieves packing of multiple padding ops into /// a larger tensor. On success, `padTensorOp` is replaced by the cloned version /// in the packing loop so the caller can continue reasoning about the padding diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp @@ -54,7 +54,7 @@ /// 7. There is no enclosing scf::ForOp that indexes the padded data. /// Other cases succeed and will trigger hoisting of the pad op. struct HoistingAnalysis { - HoistingAnalysis(PadTensorOp padTensorOp, int nLevels); + HoistingAnalysis(PadTensorOp padTensorOp, int numLoops); bool isValid() { return valid; } @@ -62,12 +62,6 @@ /// `backwardSlice`. FailureOr> getPackedTensorSizes(ImplicitLocOpBuilder &b); - /// The padTensorOp that needs to be hoisted. - PadTensorOp padTensorOp; - - /// The maximum number of immediately enclosing scf::ForOp to hoist over. - int nLevels; - /// The outermost loop, determined by `nLevels` above which `padTensorOp` will /// be hoisted. scf::ForOp outermostEnclosingForOp; @@ -81,9 +75,7 @@ /// 2. whose induction variable is used, directly or indirectly, in the /// computation of `padTensorOp`. /// The span of these loops determines the footprint of the packed tensor. - /// SmallSetVector packingLoops; - SetVector, DenseSet> - packingLoops; + SmallVector packingLoops; private: /// Returns the loops in `backwardSlice` used to index the padded data. The @@ -103,8 +95,8 @@ /// %padded_slice = linalg.pad_tensor %slice /// ``` /// getIndexingLoops(%padded_slice, %slice) returns [scf.for %i, scf.for %j] - SetVector getIndexingLoops(PadTensorOp padTensorOp, - tensor::ExtractSliceOp sliceOp); + SmallVector getIndexingLoops(PadTensorOp padTensorOp, + tensor::ExtractSliceOp sliceOp); /// Encodes whether the analysis is valid and hoisting can proceed. bool valid; @@ -148,10 +140,8 @@ } } -HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int nLevels) - : padTensorOp(padTensorOp), nLevels(nLevels), valid(false) { - AsmState state(padTensorOp->getParentOfType()); - (void)state; +HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) { + valid = false; // Bail on any use that isn't an input of a Linalg op. // Hoisting of inplace updates happens after vectorization. @@ -160,7 +150,7 @@ // Get at most nLevels of immediately enclosing loops. SmallVector reverseEnclosingLoops; - getAtMostNEnclosingLoops(padTensorOp, nLevels, reverseEnclosingLoops); + getAtMostNEnclosingLoops(padTensorOp, numLoops, reverseEnclosingLoops); if (reverseEnclosingLoops.empty()) { LLVM_DEBUG(DBGS() << "No immediately enclosing loop -> skip\n"); return; @@ -216,19 +206,20 @@ } // Search the loops found in `backwardSlice` used to index the padded data. - SetVector indexingLoops = getIndexingLoops(padTensorOp, sliceOp); + SmallVector indexingLoops = + getIndexingLoops(padTensorOp, sliceOp); // Add only the loops part of `indexingLoops` to the packing loops. All other // loops are not used to index the padded data and consequently access the // same data in every loop iteration. Adding them to the packing loops would // increase the cache footprint of the packed data by storing the same data // multiple times. - for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops)) { - if (indexingLoops.contains(forOp)) - packingLoops.insert(forOp); - } - assert(indexingLoops.size() == packingLoops.size() && + for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops)) + if (!indexingLoops.empty() && indexingLoops.back() == forOp) + packingLoops.push_back(indexingLoops.pop_back_val()); + assert(indexingLoops.empty() && "expect the all indexing loops are enclosing loops"); + if (packingLoops.empty()) { LLVM_DEBUG(DBGS() << "Cannot find a packing loop -> skip\n"); return; @@ -247,7 +238,7 @@ indexEdges.insert(operand); } -SetVector +SmallVector HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp, tensor::ExtractSliceOp sliceOp) { // Set of all values used for index computation. @@ -272,7 +263,7 @@ // After iterating `backwardSlice` we obtain: // indexEdges = [%i, %j, %ubi, %ubj] // indexingLoops = [scf.for %i, scf.for %j] - SetVector indexingLoops; + SmallVector indexingLoops; for (Operation *op : llvm::reverse(backwardSlice)) { // Add the index operands of `padTensorOp` and `sliceOp` to start the // exploration of the index computation. @@ -286,7 +277,7 @@ if (auto forOp = dyn_cast(op)) { if (indexEdges.contains(forOp.getInductionVar())) { addIndexOperandsToIndexEdges(op, indexEdges); - indexingLoops.insert(forOp); + indexingLoops.push_back(forOp); continue; } } @@ -442,7 +433,7 @@ // Iteratively try to fold the upper bounds into the constraints set. if (failed(foldUpperBoundsIntoConstraintsSet( - constraints, outermostEnclosingForOp, packingLoops.getArrayRef()))) + constraints, outermostEnclosingForOp, packingLoops))) return failure(); int nPackedLoops = packingLoops.size(); @@ -577,7 +568,7 @@ auto forOp = dyn_cast(op); assert(forOp && "Expected scf::ForOp when hoisting pad ops"); // Unused loop, just skip it. - if (!analysis.packingLoops.contains(forOp)) + if (!llvm::is_contained(analysis.packingLoops, forOp)) continue; auto clonedForOp =