Index: lib/Analysis/SyncDependenceAnalysis.cpp =================================================================== --- lib/Analysis/SyncDependenceAnalysis.cpp +++ lib/Analysis/SyncDependenceAnalysis.cpp @@ -197,6 +197,7 @@ // a join of at least two definitions if (ItLastDef->second != &DefBlock) { + // do we know this join already? if (!JoinBlocks->insert(&SuccBlock).second) return; @@ -218,13 +219,9 @@ template std::unique_ptr computeJoinPoints(const BasicBlock &RootBlock, - SuccessorIterable NodeSuccessors, const Loop *ParentLoop) { - assert(JoinBlocks); + SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) { - // immediate post dominator (no join block beyond that block) - const auto *PdNode = PDT.getNode(const_cast(&RootBlock)); - const auto *IpdNode = PdNode->getIDom(); - const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; + assert(JoinBlocks); // bootstrap with branch targets for (const auto *SuccBlock : NodeSuccessors) { @@ -340,13 +337,23 @@ // already available in cache? auto ItCached = CachedLoopExitJoins.find(&Loop); - if (ItCached != CachedLoopExitJoins.end()) + if (ItCached != CachedLoopExitJoins.end()) { return *ItCached->second; + } + + // dont propagte beyond the immediate post dom of the loop + const auto *PdNode = PDT.getNode(const_cast(Loop.getHeader())); + const auto *IpdNode = PdNode->getIDom(); + const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; + while (PdBoundBlock && Loop.contains(PdBoundBlock)) { + IpdNode = IpdNode->getIDom(); + PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; + } // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; auto JoinBlocks = Propagator.computeJoinPoints( - *Loop.getHeader(), LoopExits, Loop.getParentLoop()); + *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock); auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks)); assert(ItInserted.second); @@ -365,11 +372,16 @@ if (ItCached != CachedBranchJoins.end()) return *ItCached->second; + // dont propagate beyond the immediate post dominator of the branch + const auto *PdNode = PDT.getNode(const_cast(Term.getParent())); + const auto *IpdNode = PdNode->getIDom(); + const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; + // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; const auto &TermBlock = *Term.getParent(); auto JoinBlocks = Propagator.computeJoinPoints( - TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock)); + TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock); auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks)); assert(ItInserted.second); Index: test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll =================================================================== --- test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll +++ test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll @@ -21,6 +21,43 @@ ret void } +define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 { +; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd' +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %cond.var = icmp slt i32 %tid, 0 +; CHECK: DIVERGENT: %cond.var = icmp + %cond.uni = icmp slt i32 %n, 0 +; CHECK-NOT: DIVERGENT: %cond.uni = icmp + br label %for.header +for.header: + br i1 %cond.var, label %A, label %B +A: + br label %C +B: + br label %C +C: + br i1 %cond.uni, label %E, label %D +D: + br i1 %cond.var, label %for.header, label %F + +E: + %e.lcssa.uni = phi i32 [ 0, %C ] +; CHECK-NOT: DIVERGENT: %e.lcssa.uni = phi i32 + br label %G + +F: + %f.lcssa.uni = phi i32 [ 1, %D ] +; CHECK-NOT: DIVERGENT: %f.lcssa.uni = phi i32 + br label %G + +G: + %g.join.var = phi i32 [ %e.lcssa.uni, %E ], [ %f.lcssa.uni, %F ] +; CHECK: DIVERGENT: %g.join.var = phi i32 + ret void +} + + declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone }