diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -295,14 +295,11 @@ // push non-divergent phi nodes in JoinBlock to the worklist pushPHINodes(JoinBlock); - // JoinBlock is a divergent loop exit - if (BranchLoop && !BranchLoop->contains(&JoinBlock)) { - return true; - } - // disjoint-paths divergent at JoinBlock markBlockJoinDivergent(JoinBlock); - return false; + + // JoinBlock is a divergent loop exit + return BranchLoop && !BranchLoop->contains(&JoinBlock); } void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) { diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll @@ -0,0 +1,36 @@ +; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s + +; CHECK: bb3: +; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ] +; CHECK: DIVERGENT: br i1 %Guard.bb4, label %bb4, label %bb5 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +define protected amdgpu_kernel void @test() { +bb0: + %tid.x = call i32 @llvm.amdgcn.workitem.id.x() + %i5 = icmp eq i32 %tid.x, -1 + br label %bb1 + +bb1: ; preds = %bb2, %bb0 + %lsr.iv = phi i32 [ 7, %bb0 ], [ %lsr.iv.next, %bb2 ] + br i1 %i5, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %i14 = icmp eq i32 %lsr.iv.next, 0 + br i1 %i14, label %bb3, label %bb1 + +bb3: ; preds = %bb2, %bb1 + %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ] + br i1 %Guard.bb4, label %bb4, label %bb5 + +bb4: ; preds = %bb3 + br label %bb5 + +bb5: ; preds = %bb3, %bb4 + ret void +} + +attributes #0 = { nounwind readnone speculatable } diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll @@ -0,0 +1,28 @@ +; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; XFAIL: * + +; https://bugs.llvm.org/show_bug.cgi?id=46372 + +; CHECK: bb2: +; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ] + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +define protected amdgpu_kernel void @test2(i1 %uni) { +bb0: + %tid.x = call i32 @llvm.amdgcn.workitem.id.x() + %i5 = icmp eq i32 %tid.x, -1 + br i1 %uni, label %bb1, label %bb2 + +bb1: ; preds = %bb2, %bb0 + %lsr.iv = phi i32 [ 7, %bb0 ], [ %lsr.iv.next, %bb1 ] + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + br i1 %i5, label %bb2, label %bb1 + +bb2: ; preds = %bb2, %bb1 + %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ] + ret void +} + +attributes #0 = { nounwind readnone speculatable }