diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h --- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h @@ -24,6 +24,7 @@ class AffineCondition; class AffineForOp; class AffineIfOp; +class AffineParallelOp; class AffineMap; class AffineValueMap; class IntegerSet; @@ -141,6 +142,13 @@ // TODO: add support for non-unit strides. LogicalResult addAffineForOpDomain(AffineForOp forOp); + /// Add constraints (lower and upper bounds) for the specified + /// 'affine.parallel' operation's Value using IR information stored in its + /// bound maps. Returns failure for the yet unimplemented/unsupported cases. + /// Asserts if the Value corresponding to the 'affine.parallel' operation + /// isn't found in the constraint system. + LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp); + /// Adds constraints (lower and upper bounds) for each loop in the loop nest /// described by the bound maps `lbMaps` and `ubMaps` of a computation slice. /// Every pair (`lbMaps[i]`, `ubMaps[i]`) describes the bounds of a loop in diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -450,6 +450,11 @@ void extractForInductionVars(ArrayRef forInsts, SmallVectorImpl *ivs); +/// Extracts the induction variables from a list of either AffineForOp or +/// AffineParallelOp and places them in the output argument `ivs`. +void extractInductionVars(ArrayRef affineOps, + SmallVectorImpl &ivs); + /// Builds a perfect nest of affine.for loops, i.e., each loop except the /// innermost one contains only another loop and a terminator. The loops iterate /// from "lbs" to "ubs" with "steps". The body of the innermost loop is diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp --- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp @@ -240,27 +240,36 @@ LogicalResult mlir::getIndexSet(MutableArrayRef ops, FlatAffineValueConstraints *domain) { SmallVector indices; - SmallVector forOps; + SmallVector loopOps; + size_t numDims = 0; for (Operation *op : ops) { - if (!isa(op)) { - // TODO: Support affine.parallel ops. - LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if ops"); + if (!isa(op)) { + LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/" + "parallel ops"); return failure(); } - if (AffineForOp forOp = dyn_cast(op)) - forOps.push_back(forOp); + if (AffineForOp forOp = dyn_cast(op)) { + loopOps.push_back(forOp); + // An AffineForOp retains only 1 induction variable. + numDims += 1; + } else if (AffineParallelOp parallelOp = dyn_cast(op)) { + loopOps.push_back(parallelOp); + numDims += parallelOp.getNumDims(); + } } - extractForInductionVars(forOps, &indices); - // Reset while associated Values in 'indices' to the domain. - domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices); + extractInductionVars(loopOps, indices); + // Reset while associating Values in 'indices' to the domain. + domain->reset(numDims, /*numSymbols=*/0, /*numLocals=*/0, indices); for (Operation *op : ops) { // Add constraints from forOp's bounds. if (AffineForOp forOp = dyn_cast(op)) { if (failed(domain->addAffineForOpDomain(forOp))) return failure(); - } else if (AffineIfOp ifOp = dyn_cast(op)) { + } else if (auto ifOp = dyn_cast(op)) { domain->addAffineIfOpDomain(ifOp); - } + } else if (auto parallelOp = dyn_cast(op)) + if (failed(domain->addAffineParallelOpDomain(parallelOp))) + return failure(); } return success(); } @@ -594,6 +603,12 @@ if (srcAccess.memref != dstAccess.memref) return DependenceResult::NoDependence; + // TODO: Support affine.parallel which does not specify the ordering. + auto srcParent = srcAccess.opInst->getParentOfType(); + auto dstParent = dstAccess.opInst->getParentOfType(); + if (srcParent || dstParent) + return DependenceResult::Failure; + // Return 'NoDependence' if one of these accesses is not an // AffineWriteOpInterface. if (!allowRAR && !isa(srcAccess.opInst) && diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -639,6 +639,33 @@ forOp.getUpperBoundOperands()); } +LogicalResult FlatAffineValueConstraints::addAffineParallelOpDomain( + AffineParallelOp parallelOp) { + size_t ivPos = 0; + for (auto iv : parallelOp.getIVs()) { + unsigned pos; + if (!findVar(iv, &pos)) { + assert(false && "variable expected for the IV value"); + return failure(); + } + + AffineMap lowerBound = parallelOp.getLowerBoundMap(ivPos); + if (lowerBound.isConstant()) + addBound(BoundType::LB, pos, lowerBound.getSingleConstantResult()); + else if (failed(addBound(BoundType::LB, pos, lowerBound, + parallelOp.getLowerBoundsOperands()))) + return failure(); + + auto upperBound = parallelOp.getUpperBoundMap(ivPos); + if (upperBound.isConstant()) + addBound(BoundType::UB, pos, upperBound.getSingleConstantResult()); + else if (failed(addBound(BoundType::UB, pos, upperBound, + parallelOp.getUpperBoundsOperands()))) + return failure(); + } + return success(); +} + LogicalResult FlatAffineValueConstraints::addDomainFromSliceMaps(ArrayRef lbMaps, ArrayRef ubMaps, diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -2318,6 +2318,19 @@ ivs->push_back(forInst.getInductionVar()); } +void mlir::extractInductionVars(ArrayRef affineOps, + SmallVectorImpl &ivs) { + ivs.reserve(affineOps.size()); + for (Operation *op : affineOps) { + // Add constraints from forOp's bounds. + if (auto forOp = dyn_cast(op)) + ivs.push_back(forOp.getInductionVar()); + else if (auto parallelOp = dyn_cast(op)) + for (size_t i = 0; i < parallelOp.getBody()->getNumArguments(); i++) + ivs.push_back(parallelOp.getBody()->getArgument(i)); + } +} + /// Builds an affine loop nest, using "loopCreatorFn" to create individual loop /// operations. template diff --git a/mlir/test/Dialect/Affine/scalrep.mlir b/mlir/test/Dialect/Affine/scalrep.mlir --- a/mlir/test/Dialect/Affine/scalrep.mlir +++ b/mlir/test/Dialect/Affine/scalrep.mlir @@ -788,3 +788,61 @@ } return %A : memref<1xf32> } + +// CHECK-LABEL: func @parallel_store_load() { +func.func @parallel_store_load() { + %cf7 = arith.constant 7.0 : f32 + %m = memref.alloc() : memref<10xf32> + affine.parallel (%i0) = (0) to (10) { + affine.store %cf7, %m[%i0] : memref<10xf32> + %v0 = affine.load %m[%i0] : memref<10xf32> + %v1 = arith.addf %v0, %v0 : f32 + } + memref.dealloc %m : memref<10xf32> + return +// CHECK: %[[C7:.*]] = arith.constant 7.000000e+00 : f32 +// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (10) { +// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: return +} + +func.func @non_constant_parallel_store_load(%N : index) { + %cf7 = arith.constant 7.0 : f32 + %m = memref.alloc() : memref<10xf32> + affine.parallel (%i0) = (0) to (%N) { + affine.store %cf7, %m[%i0] : memref<10xf32> + %v0 = affine.load %m[%i0] : memref<10xf32> + %v1 = arith.addf %v0, %v0 : f32 + } + memref.dealloc %m : memref<10xf32> + return +} +// CHECK: func.func @non_constant_parallel_store_load(%[[ARG0:.*]]: index) { +// CHECK-NEXT: %[[C7:.*]] = arith.constant 7.000000e+00 : f32 +// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (%[[ARG0]]) { +// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: return + +// CHECK-LABEL: func @parallel_surrounding_for() { +func.func @parallel_surrounding_for() { + %cf7 = arith.constant 7.0 : f32 + %m = memref.alloc() : memref<10x10xf32> + affine.parallel (%i0) = (0) to (10) { + affine.for %i1 = 0 to 10 { + affine.store %cf7, %m[%i0,%i1] : memref<10x10xf32> + %v0 = affine.load %m[%i0,%i1] : memref<10x10xf32> + %v1 = arith.addf %v0, %v0 : f32 + } + } + memref.dealloc %m : memref<10x10xf32> + return +// CHECK: %[[C7:.*]] = arith.constant 7.000000e+00 : f32 +// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (10) { +// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { +// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: return +} diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir --- a/mlir/test/Transforms/memref-dependence-check.mlir +++ b/mlir/test/Transforms/memref-dependence-check.mlir @@ -1064,3 +1064,19 @@ return } + +// ----- +// CHECK-LABEL: func @parallel_dependence_check_failure() { +func.func @parallel_dependence_check_failure() { + %0 = memref.alloc() : memref<10xf32> + %cst = arith.constant 7.000000e+00 : f32 + affine.parallel (%i0) = (0) to (10) { + // expected-error @+1 {{dependence check failed}} + affine.store %cst, %0[%i0] : memref<10xf32> + } + affine.parallel (%i1) = (0) to (10) { + // expected-error @+1 {{dependence check failed}} + %1 = affine.load %0[%i1] : memref<10xf32> + } + return +} diff --git a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp --- a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp +++ b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp @@ -86,15 +86,18 @@ DependenceResult result = checkMemrefAccessDependence( srcAccess, dstAccess, d, &dependenceConstraints, &dependenceComponents); - assert(result.value != DependenceResult::Failure); - bool ret = hasDependence(result); - // TODO: Print dependence type (i.e. RAW, etc) and print - // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance - // vectors from ([1, 1], [3, 3]) to (1, 3). - srcOpInst->emitRemark("dependence from ") - << i << " to " << j << " at depth " << d << " = " - << getDirectionVectorStr(ret, numCommonLoops, d, - dependenceComponents); + if (result.value == DependenceResult::Failure) { + srcOpInst->emitError("dependence check failed"); + } else { + bool ret = hasDependence(result); + // TODO: Print dependence type (i.e. RAW, etc) and print + // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance + // vectors from ([1, 1], [3, 3]) to (1, 3). + srcOpInst->emitRemark("dependence from ") + << i << " to " << j << " at depth " << d << " = " + << getDirectionVectorStr(ret, numCommonLoops, d, + dependenceComponents); + } } } }