diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h --- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h @@ -24,6 +24,7 @@ class AffineCondition; class AffineForOp; class AffineIfOp; +class AffineParallelOp; class AffineMap; class AffineValueMap; class IntegerSet; @@ -141,6 +142,12 @@ // TODO: add support for non-unit strides. LogicalResult addAffineForOpDomain(AffineForOp forOp); + /// Add constraints (lower and upper bounds) for the specified + /// 'affine.parallel' operation's Value using IR information stored in its + /// bound maps. Returns failure for the yet unimplemented/unsupported cases. + /// TODO: Support non-constant lower/upper bounds. + LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp); + /// Adds constraints (lower and upper bounds) for each loop in the loop nest /// described by the bound maps `lbMaps` and `ubMaps` of a computation slice. /// Every pair (`lbMaps[i]`, `ubMaps[i]`) describes the bounds of a loop in diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -450,6 +450,11 @@ void extractForInductionVars(ArrayRef forInsts, SmallVectorImpl *ivs); +/// Extracts the induction variables from a list of either AffineForOp or +/// AffineParallelOp and places them in the output argument `ivs`. +void extractInductionVars(ArrayRef operations, + SmallVectorImpl *ivs); + /// Builds a perfect nest of affine.for loops, i.e., each loop except the /// innermost one contains only another loop and a terminator. The loops iterate /// from "lbs" to "ubs" with "steps". The body of the innermost loop is diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp --- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp @@ -240,19 +240,26 @@ LogicalResult mlir::getIndexSet(MutableArrayRef ops, FlatAffineValueConstraints *domain) { SmallVector indices; - SmallVector forOps; + SmallVector loopOps; + size_t numDims = 0; for (Operation *op : ops) { - if (!isa(op)) { - // TODO: Support affine.parallel ops. - LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if ops"); + if (!isa(op)) { + LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/" + "parallel ops"); return failure(); } - if (AffineForOp forOp = dyn_cast(op)) - forOps.push_back(forOp); + if (AffineForOp forOp = dyn_cast(op)) { + loopOps.push_back(forOp); + numDims += 1; // An AffineForOp retains only 1 induction variable. + } + else if (AffineParallelOp parallelOp = dyn_cast(op)) { + loopOps.push_back(parallelOp); + numDims += parallelOp.getNumDims(); + } } - extractForInductionVars(forOps, &indices); + extractInductionVars(loopOps, &indices); // Reset while associated Values in 'indices' to the domain. - domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices); + domain->reset(numDims, /*numSymbols=*/0, /*numLocals=*/0, indices); for (Operation *op : ops) { // Add constraints from forOp's bounds. if (AffineForOp forOp = dyn_cast(op)) { @@ -260,6 +267,9 @@ return failure(); } else if (AffineIfOp ifOp = dyn_cast(op)) { domain->addAffineIfOpDomain(ifOp); + } else if (AffineParallelOp parallelOp = dyn_cast(op)) { + if (failed(domain->addAffineParallelOpDomain(parallelOp))) + return failure(); } } return success(); diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -639,6 +639,33 @@ forOp.getUpperBoundOperands()); } +// TODO: Support non-constant upper/lower bounds. +LogicalResult FlatAffineValueConstraints::addAffineParallelOpDomain( + AffineParallelOp parallelOp) { + size_t ivPos = 0; + for (auto iv : parallelOp.getIVs()) { + LLVM_DEBUG(iv.dump();); + unsigned pos; + if (!findVar(iv, &pos)) { + assert(false && "Value not found"); + return failure(); + } + + AffineMap lowerBound = parallelOp.getLowerBoundMap(ivPos); + if (lowerBound.isConstant()) + addBound(BoundType::LB, pos, lowerBound.getSingleConstantResult()); + else + return failure(); + + auto upperBound = parallelOp.getUpperBoundMap(ivPos); + if (upperBound.isConstant()) + addBound(BoundType::UB, pos, upperBound.getSingleConstantResult()); + else + return failure(); + } + return success(); +} + LogicalResult FlatAffineValueConstraints::addDomainFromSliceMaps(ArrayRef lbMaps, ArrayRef ubMaps, diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -2321,6 +2321,20 @@ ivs->push_back(forInst.getInductionVar()); } +void mlir::extractInductionVars(ArrayRef operations, + SmallVectorImpl *ivs) { + ivs->reserve(operations.size()); + for (Operation *op : operations) { + // Add constraints from forOp's bounds. + if (AffineForOp forOp = dyn_cast(op)) + ivs->push_back(forOp.getInductionVar()); + else if (AffineParallelOp parallelOp = dyn_cast(op)) { + for (size_t i = 0; i < parallelOp.getBody()->getNumArguments(); i++) + ivs->push_back(parallelOp.getBody()->getArgument(i)); + } + } +} + /// Builds an affine loop nest, using "loopCreatorFn" to create individual loop /// operations. template diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir --- a/mlir/test/Transforms/memref-dependence-check.mlir +++ b/mlir/test/Transforms/memref-dependence-check.mlir @@ -1064,3 +1064,99 @@ return } + +// ----- + +// CHECK-LABEL: func @dependent_store_load_in_parallel() { +func.func @dependent_store_load_in_parallel() { + %0 = memref.alloc() : memref<10xf32> + %cst = arith.constant 7.000000e+00 : f32 + affine.parallel (%i0) = (0) to (10) { + affine.store %cst, %0[%i0] : memref<10xf32> + // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 0 to 1 at depth 1 = true}} + %1 = affine.load %0[%i0] : memref<10xf32> + // expected-remark@above {{dependence from 1 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 1 to 1 at depth 1 = false}} + } + return +} + +// ----- + +// CHECK-LABEL: func @different_memref_in_parallel() { +func.func @different_memref_in_parallel() { + %m0 = memref.alloc() : memref<10xf32> + %m1 = memref.alloc() : memref<10xf32> + %cst = arith.constant 7.000000e+00 : f32 + affine.parallel (%i0) = (0) to (10) { + affine.store %cst, %m0[%i0] : memref<10xf32> + // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 0 to 1 at depth 1 = false}} + %1 = affine.load %m1[%i0] : memref<10xf32> + // expected-remark@above {{dependence from 1 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 1 to 1 at depth 1 = false}} + } + return +} + +// ----- + +// CHECK-LABEL: func @dependent_parallels() { +func.func @dependent_parallels() { + %0 = memref.alloc() : memref<10xf32> + %cst = arith.constant 7.000000e+00 : f32 + // No dependence from 0 to 1 because the first parallel dominates the second one. + affine.parallel (%i0) = (0) to (10) { + affine.store %cst, %0[%i0] : memref<10xf32> + // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 0 to 1 at depth 1 = true}} + } + affine.parallel (%i1) = (0) to (10) { + %1 = affine.load %0[%i1] : memref<10xf32> + // expected-remark@above {{dependence from 1 to 1 at depth 1 = false}} + // expected-remark@above {{dependence from 1 to 0 at depth 1 = false}} + } + return +} + +// ----- + +// CHECK-LABEL: func @parallel_store_load_func_symbol(%arg0: index) { +func.func @parallel_store_load_func_symbol(%arg0: index) { + %m = memref.alloc() : memref<100xf32> + %c7 = arith.constant 7.0 : f32 + %c10 = arith.constant 10 : index + affine.parallel (%i0) = (0) to (10) { + %a0 = affine.apply affine_map<(d0) -> (d0)> (%arg0) + affine.store %c7, %m[%a0] : memref<100xf32> + // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 0 to 1 at depth 1 = true}} + %a1 = affine.apply affine_map<(d0) -> (d0)> (%arg0) + %v0 = affine.load %m[%a1] : memref<100xf32> + // expected-remark@above {{dependence from 1 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 1 to 1 at depth 1 = false}} + } + return +} + +// ----- + +// CHECK-LABEL: func @two_dim_parallel() { +func.func @two_dim_parallel() { + %m = memref.alloc() : memref<10x10xf32> + %c7 = arith.constant 7.0 : f32 + affine.parallel (%i0, %i1) = (0, 0) to (10, 10) { + %a00 = affine.apply affine_map<(d0, d1) -> (d0)> (%i0, %i1) + %a01 = affine.apply affine_map<(d0, d1) -> (d1)> (%i0, %i1) + affine.store %c7, %m[%a00, %a01] : memref<10x10xf32> + // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 0 to 1 at depth 1 = true}} + %a10 = affine.apply affine_map<(d0, d1) -> (d0 - 2)> (%i0, %i1) + %a11 = affine.apply affine_map<(d0, d1) -> (d1)> (%i0, %i1) + %v0 = affine.load %m[%a10, %a11] : memref<10x10xf32> + // expected-remark@above {{dependence from 1 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 1 to 1 at depth 1 = false}} + } + return +} \ No newline at end of file