diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -82,7 +82,7 @@ void print(raw_ostream &OS, const Module *) const; private: - bool updateTerminator(const Instruction &Term) const; + bool updateTerminator(const Instruction &Term); bool updatePHINode(const PHINode &Phi) const; /// \brief Computes whether \p Inst is divergent based on the @@ -171,6 +171,9 @@ // Detected/marked divergent values. DenseSet DivergentValues; + // Terminators that were marked divergent outside the worklist. + DenseSet DeferredTerminators; + // Internal worklist for divergence propagation. std::vector Worklist; }; diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -107,7 +107,11 @@ UniformOverrides.insert(&UniVal); } -bool DivergenceAnalysis::updateTerminator(const Instruction &Term) const { +bool DivergenceAnalysis::updateTerminator(const Instruction &Term) { + if (DeferredTerminators.count(&Term)) { + DeferredTerminators.erase(&Term); + return true; + } if (Term.getNumSuccessors() <= 1) return false; if (auto *BranchTerm = dyn_cast(&Term)) { @@ -233,8 +237,13 @@ if (!OpInst) continue; if (DivLoop->contains(OpInst->getParent())) { - markDivergent(I); - pushUsers(I); + if (I.isTerminator()) { + DeferredTerminators.insert(&I); + Worklist.push_back(&I); + } else { + markDivergent(I); + pushUsers(I); + } break; } } diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll @@ -0,0 +1,48 @@ +; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s + +; CHECK: bb6: +; CHECK: DIVERGENT: %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ] +; CHECK: DIVERGENT: br i1 %.126.i355.i, label %bb7, label %bb8 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +define protected amdgpu_kernel void @_Z23krnl_GPUITSFitterKerneli() { +bb0: + %i4 = call i32 @llvm.amdgcn.workitem.id.x() + %i5 = icmp eq i32 %i4, -1 + br label %bb1 + +bb1: ; preds = %bb3, %bb0 + %lsr.iv = phi i32 [ %i1, %bb3 ], [ 7, %bb0 ] + br i1 %i5, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %i14 = icmp eq i32 %lsr.iv.next, 0 + br label %bb3 + +bb3: ; preds = %bb2, %bb1 + %i1 = phi i32 [ %lsr.iv.next, %bb2 ], [ 0, %bb1 ] + %i2 = phi i1 [ false, %bb2 ], [ true, %bb1 ] + %i3 = phi i1 [ %i14, %bb2 ], [ true, %bb1 ] + br i1 %i3, label %bb4, label %bb1 + +bb4: ; preds = %bb3 + br i1 %i2, label %bb5, label %bb6 + +bb5: ; preds = %bb4 + br label %bb6 + +bb6: ; preds = %bb5, %bb4 + %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ] + br i1 %.126.i355.i, label %bb7, label %bb8 + +bb7: ; preds = %bb6 + br label %bb8 + +bb8: ; preds = %bb7, %bb6 + ret void +} + +attributes #0 = { nounwind readnone speculatable }