diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -134,11 +134,43 @@ void mlir::affineParallelize(AffineForOp forOp) { Location loc = forOp.getLoc(); OpBuilder outsideBuilder(forOp); + + // If a loop has a 'max' in the lower bound, emit it outside the parallel loop + // as it does not have implicit 'max' behavior. + AffineMap lowerBoundMap = forOp.getLowerBoundMap(); + ValueRange lowerBoundOperands = forOp.getLowerBoundOperands(); + AffineMap upperBoundMap = forOp.getUpperBoundMap(); + ValueRange upperBoundOperands = forOp.getUpperBoundOperands(); + + bool needsMax = lowerBoundMap.getNumResults() > 1; + bool needsMin = upperBoundMap.getNumResults() > 1; + AffineMap identityMap; + if (needsMax || needsMin) { + if (forOp->getParentOp() && + !forOp->getParentOp()->hasTrait()) + return; + + identityMap = AffineMap::getMultiDimIdentityMap(1, loc->getContext()); + } + if (needsMax) { + auto maxOp = outsideBuilder.create(loc, lowerBoundMap, + lowerBoundOperands); + lowerBoundMap = identityMap; + lowerBoundOperands = maxOp->getResults(); + } + + // Same for the upper bound. + if (needsMin) { + auto minOp = outsideBuilder.create(loc, upperBoundMap, + upperBoundOperands); + upperBoundMap = identityMap; + upperBoundOperands = minOp->getResults(); + } + // Creating empty 1-D affine.parallel op. AffineParallelOp newPloop = outsideBuilder.create( - loc, llvm::None, llvm::None, forOp.getLowerBoundMap(), - forOp.getLowerBoundOperands(), forOp.getUpperBoundMap(), - forOp.getUpperBoundOperands()); + loc, llvm::None, llvm::None, lowerBoundMap, lowerBoundOperands, + upperBoundMap, upperBoundOperands); // Steal the body of the old affine for op and erase it. newPloop.region().takeBody(forOp.region()); forOp.erase(); diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir --- a/mlir/test/Dialect/Affine/parallelize.mlir +++ b/mlir/test/Dialect/Affine/parallelize.mlir @@ -114,3 +114,33 @@ } return } + +// CHECK-LABEL: for_with_minmax +func @for_with_minmax(%m: memref, %lb0: index, %lb1: index, + %ub0: index, %ub1: index) { + // CHECK: %[[lb:.*]] = affine.max + // CHECK: %[[ub:.*]] = affine.min + // CHECK: affine.parallel (%{{.*}}) = (%[[lb]]) to (%[[ub]]) + affine.for %i = max affine_map<(d0, d1) -> (d0, d1)>(%lb0, %lb1) + to min affine_map<(d0, d1) -> (d0, d1)>(%ub0, %ub1) { + affine.load %m[%i] : memref + } + return +} + +// CHECK-LABEL: nested_for_with_minmax +func @nested_for_with_minmax(%m: memref, %lb0: index, + %ub0: index, %ub1: index) { + // CHECK: affine.parallel + affine.for %j = 0 to 10 { + // Cannot parallelize the inner loop because we would need to compute + // affine.max for its lower bound inside the loop, and that is not (yet) + // considered as a valid affine dimension. + // CHECK: affine.for + affine.for %i = max affine_map<(d0, d1) -> (d0, d1)>(%lb0, %j) + to min affine_map<(d0, d1) -> (d0, d1)>(%ub0, %ub1) { + affine.load %m[%i] : memref + } + } + return +}