diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -118,6 +118,11 @@ def AffineParallelize : FunctionPass<"affine-parallelize"> { let summary = "Convert affine.for ops into 1-D affine.parallel"; let constructor = "mlir::createAffineParallelizePass()"; + let options = [ + Option<"maxNested", "max-nested", "unsigned", /*default=*/"-1u", + "Maximum number of nested parallel loops to produce. " + "Defaults to unlimited (UINT_MAX).">, + ]; } def AffineLoopNormalize : FunctionPass<"affine-loop-normalize"> { diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp --- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp @@ -36,13 +36,28 @@ void AffineParallelize::runOnFunction() { FuncOp f = getFunction(); - SmallVector parallelizableLoops; + + // The walker proceeds in post-order, but we need to process outer loops first + // to control the number of outer parallel loops, so push candidate loops to + // the front of a deque. + std::deque parallelizableLoops; f.walk([&](AffineForOp loop) { if (isLoopParallel(loop)) - parallelizableLoops.push_back(loop); + parallelizableLoops.push_front(loop); }); - for (AffineForOp loop : parallelizableLoops) - affineParallelize(loop); + + for (AffineForOp loop : parallelizableLoops) { + unsigned numParentParallelOps = 0; + for (Operation *op = loop->getParentOp(); + op != nullptr && !op->hasTrait(); + op = op->getParentOp()) { + if (isa(op)) + ++numParentParallelOps; + } + + if (numParentParallelOps < maxNested) + affineParallelize(loop); + } } std::unique_ptr> mlir::createAffineParallelizePass() { diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir --- a/mlir/test/Dialect/Affine/parallelize.mlir +++ b/mlir/test/Dialect/Affine/parallelize.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize| FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize='max-nested=1' | FileCheck --check-prefix=MAX-NESTED %s // CHECK-LABEL: func @reduce_window_max() { func @reduce_window_max() { @@ -144,3 +145,18 @@ } return } + +// MAX-NESTED-LABEL: @max_nested +func @max_nested(%m: memref, %lb0: index, %lb1: index, + %ub0: index, %ub1: index) { + // MAX-NESTED: affine.parallel + affine.for %i = affine_map<(d0) -> (d0)>(%lb0) to affine_map<(d0) -> (d0)>(%ub0) { + // MAX-NESTED: affine.for + affine.for %j = affine_map<(d0) -> (d0)>(%lb1) to affine_map<(d0) -> (d0)>(%ub1) { + affine.load %m[%i, %j] : memref + } + } + return +} + +