diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -2033,10 +2033,14 @@ OpBuilder outsideBuilder(loops); Location loc = loops.getLoc(); + // Presort combined dimensions. + auto sortedDimensions = llvm::to_vector<3>(combinedDimensions); + for (auto &dims : sortedDimensions) + std::sort(dims.begin(), dims.end()); + // Normalize ParallelOp's iteration pattern. - SmallVector normalizedLowerBounds; - SmallVector normalizedSteps; - SmallVector normalizedUpperBounds; + SmallVector normalizedLowerBounds, normalizedSteps, + normalizedUpperBounds; for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) { OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody()); auto resultBounds = @@ -2050,14 +2054,12 @@ } // Combine iteration spaces. - SmallVector lowerBounds; - SmallVector steps; - SmallVector upperBounds; + SmallVector lowerBounds, upperBounds, steps; auto cst0 = outsideBuilder.create(loc, 0); auto cst1 = outsideBuilder.create(loc, 1); - for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) { + for (unsigned i = 0, e = sortedDimensions.size(); i < e; ++i) { Value newUpperBound = outsideBuilder.create(loc, 1); - for (auto idx : combinedDimensions[i]) { + for (auto idx : sortedDimensions[i]) { newUpperBound = outsideBuilder.create(loc, newUpperBound, normalizedUpperBounds[idx]); } @@ -2079,7 +2081,7 @@ Value previous = ploopIVs[i]; unsigned numberCombinedDimensions = combinedDimensions[i].size(); // Iterate over all except the last induction value. - for (unsigned j = 0, e = numberCombinedDimensions - 1; j < e; ++j) { + for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) { unsigned idx = combinedDimensions[i][j]; // Determine the current induction value's current loop iteration @@ -2095,7 +2097,7 @@ } // The final induction value is just the remaining value. - unsigned idx = combinedDimensions[i][numberCombinedDimensions - 1]; + unsigned idx = combinedDimensions[i][0]; replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), previous, loops.region()); } diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir --- a/mlir/test/Transforms/parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize)' | FileCheck %s +// CHECK-LABEL: func @parallel_many_dims() { func @parallel_many_dims() { %c0 = constant 0 : index %c1 = constant 1 : index @@ -19,28 +20,27 @@ %c15 = constant 15 : index %c26 = constant 26 : index - scf.parallel (%i0, %i1, %i2, %i3, %i4) = (%c0, %c3, %c6, %c9, %c12) to (%c2, %c5, %c8, %c11, %c14) - step (%c1, %c4, %c7, %c10, %c13) { - %result = "magic.op"(%i0, %i1, %i2, %i3, %i4): (index, index, index, index, index) -> index + scf.parallel (%i0, %i1, %i2, %i3, %i4) = (%c0, %c3, %c6, %c9, %c12) + to (%c2, %c5, %c8, %c26, %c14) step (%c1, %c4, %c7, %c10, %c13) { + %result = "magic.op"(%i0, %i1, %i2, %i3, %i4) + : (index, index, index, index, index) -> index } return } -// CHECK-LABEL: func @parallel_many_dims() { -// CHECK: [[C6:%.*]] = constant 6 : index -// CHECK: [[C9:%.*]] = constant 9 : index -// CHECK: [[C10:%.*]] = constant 10 : index -// CHECK: [[C0:%.*]] = constant 0 : index -// CHECK: [[C1:%.*]] = constant 1 : index -// CHECK: [[C2:%.*]] = constant 2 : index -// CHECK: [[C3:%.*]] = constant 3 : index -// CHECK: [[C12:%.*]] = constant 12 : index -// CHECK: scf.parallel ([[NEW_I0:%.*]]) = ([[C0]]) to ([[C2]]) step ([[C1]]) { -// CHECK: [[I0:%.*]] = remi_signed [[NEW_I0]], [[C2]] : index -// CHECK: [[V0:%.*]] = divi_signed [[NEW_I0]], [[C2]] : index -// CHECK: [[V2:%.*]] = muli [[V0]], [[C10]] : index -// CHECK: [[I3:%.*]] = addi [[V2]], [[C9]] : index -// CHECK: "magic.op"([[I0]], [[C3]], [[C6]], [[I3]], [[C12]]) : (index, index, index, index, index) -> index -// CHECK: scf.yield -// CHECK-NEXT: } -// CHECK-NEXT: return +// CHECK: [[C3:%.*]] = constant 3 : index +// CHECK: [[C6:%.*]] = constant 6 : index +// CHECK: [[C9:%.*]] = constant 9 : index +// CHECK: [[C10:%.*]] = constant 10 : index +// CHECK: [[C4:%.*]] = constant 4 : index +// CHECK: [[C12:%.*]] = constant 12 : index +// CHECK: [[C0:%.*]] = constant 0 : index +// CHECK: [[C1:%.*]] = constant 1 : index +// CHECK: [[C2:%.*]] = constant 2 : index +// CHECK: scf.parallel ([[NEW_I0:%.*]]) = ([[C0]]) to ([[C4]]) step ([[C1]]) { +// CHECK: [[V0:%.*]] = remi_signed [[NEW_I0]], [[C2]] : index +// CHECK: [[I0:%.*]] = divi_signed [[NEW_I0]], [[C2]] : index +// CHECK: [[V2:%.*]] = muli [[V0]], [[C10]] : index +// CHECK: [[I3:%.*]] = addi [[V2]], [[C9]] : index +// CHECK: "magic.op"([[I0]], [[C3]], [[C6]], [[I3]], [[C12]]) : (index, index, index, index, index) -> index +// CHECK: scf.yield