diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h --- a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h @@ -34,10 +34,9 @@ class Operation; class Value; -/// Populates 'loops' with IVs of the loops surrounding 'op' ordered from -/// the outermost 'affine.for' operation to the innermost one. -// TODO: handle 'affine.if' ops. -void getLoopIVs(Operation &op, SmallVectorImpl *loops); +/// Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered +/// from the outermost 'affine.for' operation to the innermost one. +void getAffineForIVs(Operation &op, SmallVectorImpl *loops); /// Populates 'ops' with affine operations enclosing `op` ordered from outermost /// to innermost. affine.for, affine.if, or affine.parallel ops comprise such diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -437,9 +437,9 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h.inc" namespace mlir { -/// Returns true if the provided value is the induction variable of a +/// Returns true if the provided value is the induction variable of an /// AffineForOp. -bool isForInductionVar(Value val); +bool isAffineForInductionVar(Value val); /// Returns the loop parent of an induction variable. If the provided value is /// not an induction variable, then return nullptr. diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp --- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp @@ -286,6 +286,14 @@ return getIndexSet(ops, indexSet); } +/// Returns true if `val` is an induction of an affine.parallel op. +static bool isAffineParallelInductionVar(Value val) { + auto ivArg = val.dyn_cast(); + if (!ivArg) + return false; + return isa(ivArg.getOwner()->getParentOp()); +} + // Returns the number of outer loop common to 'src/dstDomain'. // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null. static unsigned @@ -297,8 +305,10 @@ std::min(srcDomain.getNumDimVars(), dstDomain.getNumDimVars()); unsigned numCommonLoops = 0; for (unsigned i = 0; i < minNumLoops; ++i) { - if (!isForInductionVar(srcDomain.getValue(i)) || - !isForInductionVar(dstDomain.getValue(i)) || + if ((!isAffineForInductionVar(srcDomain.getValue(i)) && + !isAffineParallelInductionVar(srcDomain.getValue(i))) || + (!isAffineForInductionVar(dstDomain.getValue(i)) && + !isAffineParallelInductionVar(dstDomain.getValue(i))) || srcDomain.getValue(i) != dstDomain.getValue(i)) break; if (commonLoops != nullptr) @@ -603,12 +613,6 @@ if (srcAccess.memref != dstAccess.memref) return DependenceResult::NoDependence; - // TODO: Support affine.parallel which does not specify the ordering. - auto srcParent = srcAccess.opInst->getParentOfType(); - auto dstParent = dstAccess.opInst->getParentOfType(); - if (srcParent || dstParent) - return DependenceResult::Failure; - // Return 'NoDependence' if one of these accesses is not an // AffineWriteOpInterface. if (!allowRAR && !isa(srcAccess.opInst) && diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -576,7 +576,7 @@ return; // Caller is expected to fully compose map/operands if necessary. - assert((isTopLevelValue(val) || isForInductionVar(val)) && + assert((isTopLevelValue(val) || isAffineForInductionVar(val)) && "non-terminal symbol / loop IV expected"); // Outer loop IVs could be used in forOp's bounds. if (auto loop = getForInductionVarOwner(val)) { diff --git a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp --- a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp @@ -160,7 +160,7 @@ /// Returns false in cases with more than one AffineApplyOp, this is /// conservative. static bool isAccessIndexInvariant(Value iv, Value index) { - assert(isForInductionVar(iv) && "iv must be a AffineForOp"); + assert(isAffineForInductionVar(iv) && "iv must be a AffineForOp"); assert(index.getType().isa() && "index must be of IndexType"); SmallVector affineApplyOps; getReachableAffineApplyOps({index}, affineApplyOps); diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp --- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp @@ -31,9 +31,7 @@ using llvm::SmallDenseMap; -/// Populates 'loops' with IVs of the loops surrounding 'op' ordered from -/// the outermost 'affine.for' operation to the innermost one. -void mlir::getLoopIVs(Operation &op, SmallVectorImpl *loops) { +void mlir::getAffineForIVs(Operation &op, SmallVectorImpl *loops) { auto *currOp = op.getParentOp(); AffineForOp currAffineForOp; // Traverse up the hierarchy collecting all 'affine.for' operation while @@ -460,7 +458,7 @@ // 0-d memrefs. if (rank == 0) { SmallVector ivs; - getLoopIVs(*op, &ivs); + getAffineForIVs(*op, &ivs); assert(loopDepth <= ivs.size() && "invalid 'loopDepth'"); // The first 'loopDepth' IVs are symbols for this region. ivs.resize(loopDepth); @@ -554,7 +552,7 @@ // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which // this memref region is symbolic. SmallVector enclosingIVs; - getLoopIVs(*op, &enclosingIVs); + getAffineForIVs(*op, &enclosingIVs); assert(loopDepth <= enclosingIVs.size() && "invalid loop depth"); enclosingIVs.resize(loopDepth); SmallVector vars; @@ -763,7 +761,7 @@ for (unsigned i = 0, e = cst->getNumDimVars(); i < e; ++i) { auto value = cst->getValue(i); if (ivs.count(value) == 0) { - assert(isForInductionVar(value)); + assert(isAffineForInductionVar(value)); auto loop = getForInductionVarOwner(value); if (failed(cst->addAffineForOpDomain(loop))) return failure(); @@ -782,7 +780,7 @@ std::vector> loops(numOps); unsigned loopDepthLimit = std::numeric_limits::max(); for (unsigned i = 0; i < numOps; ++i) { - getLoopIVs(*ops[i], &loops[i]); + getAffineForIVs(*ops[i], &loops[i]); loopDepthLimit = std::min(loopDepthLimit, static_cast(loops[i].size())); } @@ -1046,12 +1044,12 @@ bool isBackwardSlice, ComputationSliceState *sliceState) { // Get loop nest surrounding src operation. SmallVector srcLoopIVs; - getLoopIVs(*depSourceOp, &srcLoopIVs); + getAffineForIVs(*depSourceOp, &srcLoopIVs); unsigned numSrcLoopIVs = srcLoopIVs.size(); // Get loop nest surrounding dst operation. SmallVector dstLoopIVs; - getLoopIVs(*depSinkOp, &dstLoopIVs); + getAffineForIVs(*depSinkOp, &dstLoopIVs); unsigned numDstLoopIVs = dstLoopIVs.size(); assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) || @@ -1160,12 +1158,12 @@ ComputationSliceState *sliceState) { // Get loop nest surrounding src operation. SmallVector srcLoopIVs; - getLoopIVs(*srcOpInst, &srcLoopIVs); + getAffineForIVs(*srcOpInst, &srcLoopIVs); unsigned numSrcLoopIVs = srcLoopIVs.size(); // Get loop nest surrounding dst operation. SmallVector dstLoopIVs; - getLoopIVs(*dstOpInst, &dstLoopIVs); + getAffineForIVs(*dstOpInst, &dstLoopIVs); unsigned dstLoopIVsSize = dstLoopIVs.size(); if (dstLoopDepth > dstLoopIVsSize) { dstOpInst->emitError("invalid destination loop depth"); @@ -1188,7 +1186,7 @@ getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody()); // Get loop nest surrounding 'sliceInst'. SmallVector sliceSurroundingLoops; - getLoopIVs(*sliceInst, &sliceSurroundingLoops); + getAffineForIVs(*sliceInst, &sliceSurroundingLoops); // Sanity check. unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size(); @@ -1264,17 +1262,34 @@ [](AffineExpr e) { return e == 0; }); } +/// Populates 'loops' with IVs of the surrounding affine.for and affine.parallel +/// ops ordered from the outermost one to the innermost. +static void getAffineIVs(Operation &op, SmallVectorImpl &loops) { + auto *currOp = op.getParentOp(); + AffineForOp currAffineForOp; + // Traverse up the hierarchy collecting all 'affine.for' operation while + // skipping over 'affine.if' operations. + while (currOp) { + if (AffineForOp currAffineForOp = dyn_cast(currOp)) + loops.push_back(currAffineForOp.getInductionVar()); + else if (auto parOp = dyn_cast(currOp)) + llvm::append_range(loops, parOp.getIVs()); + currOp = currOp->getParentOp(); + } + std::reverse(loops.begin(), loops.end()); +} + /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB', /// where each lists loops from outer-most to inner-most in loop nest. unsigned mlir::getNumCommonSurroundingLoops(Operation &a, Operation &b) { - SmallVector loopsA, loopsB; - getLoopIVs(a, &loopsA); - getLoopIVs(b, &loopsB); + SmallVector loopsA, loopsB; + getAffineIVs(a, loopsA); + getAffineIVs(b, loopsB); unsigned minNumLoops = std::min(loopsA.size(), loopsB.size()); unsigned numCommonLoops = 0; for (unsigned i = 0; i < minNumLoops; ++i) { - if (loopsA[i].getOperation() != loopsB[i].getOperation()) + if (loopsA[i] != loopsB[i]) break; ++numCommonLoops; } diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -2298,7 +2298,7 @@ /// Returns true if the provided value is the induction variable of a /// AffineForOp. -bool mlir::isForInductionVar(Value val) { +bool mlir::isAffineForInductionVar(Value val) { return getForInductionVarOwner(val) != AffineForOp(); } diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp --- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp @@ -116,7 +116,7 @@ isa(op))) { if (&op != user) { SmallVector userIVs; - getLoopIVs(*user, &userIVs); + getAffineForIVs(*user, &userIVs); // Check that userIVs don't contain the for loop around the op. if (llvm::is_contained(userIVs, getForInductionVarOwner(indVar))) return false; diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp --- a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp @@ -864,7 +864,7 @@ block) continue; SmallVector loops; - getLoopIVs(*user, &loops); + getAffineForIVs(*user, &loops); if (loops.empty()) continue; assert(forToNodeMap.count(loops[0]) > 0 && "missing mapping"); @@ -1140,7 +1140,7 @@ // Compute cost of sliced and unsliced src loop nest. SmallVector srcLoopIVs; - getLoopIVs(*srcOpInst, &srcLoopIVs); + getAffineForIVs(*srcOpInst, &srcLoopIVs); // Walk src loop nest and collect stats. LoopNestStats srcLoopNestStats; @@ -1790,7 +1790,7 @@ dstNode->getLoadOpsForMemref(memref, &dstLoadOpInsts); SmallVector dstLoopIVs; - getLoopIVs(*dstLoadOpInsts[0], &dstLoopIVs); + getAffineForIVs(*dstLoadOpInsts[0], &dstLoopIVs); unsigned dstLoopDepthTest = dstLoopIVs.size(); auto sibAffineForOp = cast(sibNode->op); @@ -1897,7 +1897,7 @@ continue; // Gather loops surrounding 'use'. SmallVector loops; - getLoopIVs(*user, &loops); + getAffineForIVs(*user, &loops); // Skip 'use' if it is not within a loop nest. if (loops.empty()) continue; diff --git a/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp --- a/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp @@ -109,7 +109,7 @@ for (Operation *user : value.getUsers()) { SmallVector loops; // Check if any loop in loop nest surrounding 'user' is 'opB'. - getLoopIVs(*user, &loops); + getAffineForIVs(*user, &loops); if (llvm::is_contained(loops, cast(opB))) { lastDepOp = opX; return WalkResult::interrupt(); @@ -605,7 +605,7 @@ SmallVector loops; // Check if any loop in loop nest surrounding 'user' is // 'insertPointParent'. - getLoopIVs(*user, &loops); + getAffineForIVs(*user, &loops); if (llvm::is_contained(loops, cast(insertPointParent))) { if (auto forOp = dyn_cast_or_null(user->getParentOp())) { diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -1882,7 +1882,7 @@ cst->getValues(cst->getNumDimVars(), cst->getNumDimAndSymbolVars(), &symbols); SmallVector enclosingFors; - getLoopIVs(*block.begin(), &enclosingFors); + getAffineForIVs(*block.begin(), &enclosingFors); // Walk up loop parents till we find an IV on which this region is // symbolic/variant. auto it = enclosingFors.rbegin(); @@ -2355,7 +2355,7 @@ // Just get the first numSymbols IVs, which the memref region is parametric // on. SmallVector ivs; - getLoopIVs(*op, &ivs); + getAffineForIVs(*op, &ivs); ivs.resize(numParamLoopIVs); SmallVector symbols; extractForInductionVars(ivs, &symbols); diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -428,7 +428,7 @@ // canonicalization is missing composition of affine.applys into it. assert(llvm::all_of(ifOp.getOperands(), [](Value v) { - return isTopLevelValue(v) || isForInductionVar(v); + return isTopLevelValue(v) || isAffineForInductionVar(v); }) && "operands not composed"); diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir --- a/mlir/test/Transforms/memref-dependence-check.mlir +++ b/mlir/test/Transforms/memref-dependence-check.mlir @@ -1066,17 +1066,21 @@ } // ----- -// CHECK-LABEL: func @parallel_dependence_check_failure() { -func.func @parallel_dependence_check_failure() { +// CHECK-LABEL: func @dependent_parallel() { +func.func @dependent_parallel() { %0 = memref.alloc() : memref<10xf32> %cst = arith.constant 7.000000e+00 : f32 affine.parallel (%i0) = (0) to (10) { - // expected-error @+1 {{dependence check failed}} affine.store %cst, %0[%i0] : memref<10xf32> + // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 0 to 1 at depth 1 = true}} + // expected-remark@above {{dependence from 0 to 0 at depth 2 = false}} } affine.parallel (%i1) = (0) to (10) { - // expected-error @+1 {{dependence check failed}} %1 = affine.load %0[%i1] : memref<10xf32> + // expected-remark@above {{dependence from 1 to 0 at depth 1 = false}} + // expected-remark@above {{dependence from 1 to 1 at depth 1 = false}} + // expected-remark@above {{dependence from 1 to 1 at depth 2 = false}} } return }