diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h --- a/mlir/include/mlir/Dialect/Affine/Utils.h +++ b/mlir/include/mlir/Dialect/Affine/Utils.h @@ -13,12 +13,15 @@ #ifndef MLIR_DIALECT_AFFINE_UTILS_H #define MLIR_DIALECT_AFFINE_UTILS_H +#include "mlir/Support/LLVM.h" + namespace mlir { class AffineForOp; class AffineIfOp; class AffineParallelOp; struct LogicalResult; +class Operation; /// Replaces parallel affine.for op with 1-d affine.parallel op. /// mlir::isLoopParallel detect the parallel affine.for ops. @@ -31,6 +34,15 @@ /// significant code expansion in some cases. LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded = nullptr); +/// Vectorizes affine loops in 'loops' using the n-D vectorization factors in +/// 'vectorSizes'. By default, each vectorization factor is applied +/// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can +/// be optionally used to provide a different loop vectorization order. +void vectorizeAffineLoops( + Operation *parentOp, + llvm::DenseSet> &loops, + ArrayRef vectorSizes, ArrayRef fastestVaryingPattern); + } // namespace mlir #endif // MLIR_DIALECT_AFFINE_UTILS_H diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -18,6 +18,7 @@ #include "mlir/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/Passes.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Dialect/Vector/VectorUtils.h" @@ -1198,25 +1199,38 @@ return signalPassFailure(); } - // Thread-safe RAII local context, BumpPtrAllocator freed on exit. - NestedPatternContext mlContext; - DenseSet parallelLoops; f.walk([¶llelLoops](AffineForOp loop) { if (isLoopParallel(loop)) parallelLoops.insert(loop); }); + vectorizeAffineLoops(f, parallelLoops, vectorSizes, fastestVaryingPattern); +} + +namespace mlir { + +/// Vectorizes affine loops in 'loops' using the n-D vectorization factors in +/// 'vectorSizes'. By default, each vectorization factor is applied +/// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can +/// be optionally used to provide a different loop vectorization order. +void vectorizeAffineLoops(Operation *parentOp, DenseSet &loops, + ArrayRef vectorSizes, + ArrayRef fastestVaryingPattern) { + // Thread-safe RAII local context, BumpPtrAllocator freed on exit. + NestedPatternContext mlContext; + for (auto &pat : - makePatterns(parallelLoops, vectorSizes.size(), fastestVaryingPattern)) { + makePatterns(loops, vectorSizes.size(), fastestVaryingPattern)) { LLVM_DEBUG(dbgs() << "\n******************************************"); LLVM_DEBUG(dbgs() << "\n******************************************"); - LLVM_DEBUG(dbgs() << "\n[early-vect] new pattern on Function\n"); - LLVM_DEBUG(f.print(dbgs())); + LLVM_DEBUG(dbgs() << "\n[early-vect] new pattern on parent op\n"); + LLVM_DEBUG(parentOp->print(dbgs())); + unsigned patternDepth = pat.getDepth(); SmallVector matches; - pat.match(f, &matches); + pat.match(parentOp, &matches); // Iterate over all the top-level matches and vectorize eagerly. // This automatically prunes intersecting matches. for (auto m : matches) { @@ -1239,9 +1253,11 @@ } std::unique_ptr> -mlir::createSuperVectorizePass(ArrayRef virtualVectorSize) { +createSuperVectorizePass(ArrayRef virtualVectorSize) { return std::make_unique(virtualVectorSize); } -std::unique_ptr> mlir::createSuperVectorizePass() { +std::unique_ptr> createSuperVectorizePass() { return std::make_unique(); } + +} // namespace mlir