diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h --- a/llvm/include/llvm/ADT/GenericUniformityImpl.h +++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h @@ -784,6 +784,9 @@ return false; } + if (isAlwaysUniform(I)) + return false; + return markDefsDivergent(I); } @@ -952,10 +955,6 @@ if (I.isTerminator()) break; - // Mark this as divergent. We don't check if the instruction is - // always uniform. In a cycle where the thread convergence is not - // statically known, the instruction is not statically converged, - // and its outputs cannot be statically uniform. if (markDivergent(I)) Worklist.push_back(&I); } diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp --- a/llvm/lib/Analysis/UniformityAnalysis.cpp +++ b/llvm/lib/Analysis/UniformityAnalysis.cpp @@ -48,13 +48,10 @@ void llvm::GenericUniformityAnalysisImpl::pushUsers( const Value *V) { for (const auto *User : V->users()) { - const auto *UserInstr = dyn_cast(User); - if (!UserInstr) - continue; - if (isAlwaysUniform(*UserInstr)) - continue; - if (markDivergent(*UserInstr)) { - Worklist.push_back(UserInstr); + if (const auto *UserInstr = dyn_cast(User)) { + if (markDivergent(*UserInstr)) { + Worklist.push_back(UserInstr); + } } } } diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp --- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp @@ -75,8 +75,6 @@ Register Reg) { const auto &RegInfo = F.getRegInfo(); for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) { - if (isAlwaysUniform(UserInstr)) - continue; if (markDivergent(UserInstr)) Worklist.push_back(&UserInstr); } diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll @@ -569,6 +569,37 @@ ret void } +define amdgpu_kernel void @always_uniform() { +; CHECK-LABEL: UniformityInfo for function 'always_uniform': +; CHECK: CYCLES ASSSUMED DIVERGENT: +; CHECK: depth=1: entries(bb2 bb3) + +bb: + %inst = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0) + %inst1 = icmp ugt i32 %inst, 0 + br i1 %inst1, label %bb3, label %bb2 +; CHECK: DIVERGENT: %inst = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0) +; CHECK: DIVERGENT: %inst1 = icmp ugt i32 %inst, 0 +; CHECK: DIVERGENT: br i1 %inst1, label %bb3, label %bb2 + +bb2: ; preds = %bb3, %bb + br label %bb3 + +bb3: ; preds = %bb2, %bb + %inst4 = tail call i64 @llvm.amdgcn.icmp.i64.i16(i16 0, i16 0, i32 0) + %inst5 = trunc i64 %inst4 to i32 + %inst6 = and i32 0, %inst5 + br label %bb2 +; CHECK-LABEL: BLOCK bb3 +; CHECK-NOT: DIVERGENT: {{.*}} call i64 @llvm.amdgcn.icmp.i64.i16 +; CHECK: DIVERGENT: %inst5 = trunc i64 %inst4 to i32 +; CHECK: DIVERGENT: %inst6 = and i32 0, %inst5 +} + +declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) + +declare i64 @llvm.amdgcn.icmp.i64.i16(i16, i16, i32 immarg) + declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone }