diff --git a/llvm/docs/ConvergenceAndUniformity.rst b/llvm/docs/ConvergenceAndUniformity.rst --- a/llvm/docs/ConvergenceAndUniformity.rst +++ b/llvm/docs/ConvergenceAndUniformity.rst @@ -393,15 +393,19 @@ 1. The semantics of the instruction may specify the output to be uniform. -2. Otherwise, if it is a PHI node, its output is uniform if and only - if for every pair of converged dynamic instances produced by all - threads in ``S``: +2. Otherwise, the output is divergent if the static instance is not + :ref:`m-converged `. +3. Otherwise, if the static instance is m-converged: - a. Both instances choose the same output from converged - dynamic instances, and, - b. That output is uniform for all threads in ``S``. -3. Otherwise, the output is uniform if and only if the input - operands are uniform for all threads in ``S``. + 1. If it is a PHI node, its output is uniform if and only + if for every pair of converged dynamic instances produced by all + threads in ``S``: + + a. Both instances choose the same output from converged + dynamic instances, and, + b. That output is uniform for all threads in ``S``. + 2. Otherwise, the output is uniform if and only if the input + operands are uniform for all threads in ``S``. Divergent Cycle Exits --------------------- @@ -433,6 +437,8 @@ those static instances whose convergence is independent of the cycle hierarchy: +.. _convergence-m-converged: + **m-converged static instances:** A static instance ``X`` is *m-converged* for a given CFG if and only @@ -474,9 +480,8 @@ if the whole CFG is reducible, then all nodes in the CFG are m-converged. -If a static instance is not m-converged, then every output is assumed -to be divergent. Otherwise, for an m-converged static instance, the -uniformity of each output is determined using the criteria +The uniformity of each output of a static instance +is determined using the criteria :ref:`described earlier `. The discovery of divergent outputs may cause their uses (including branches) to also become divergent. The analysis propagates this divergence until a diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h --- a/llvm/include/llvm/ADT/GenericUniformityImpl.h +++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h @@ -784,6 +784,9 @@ return false; } + if (isAlwaysUniform(I)) + return false; + return markDefsDivergent(I); } @@ -952,10 +955,6 @@ if (I.isTerminator()) break; - // Mark this as divergent. We don't check if the instruction is - // always uniform. In a cycle where the thread convergence is not - // statically known, the instruction is not statically converged, - // and its outputs cannot be statically uniform. if (markDivergent(I)) Worklist.push_back(&I); } diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp --- a/llvm/lib/Analysis/UniformityAnalysis.cpp +++ b/llvm/lib/Analysis/UniformityAnalysis.cpp @@ -48,13 +48,10 @@ void llvm::GenericUniformityAnalysisImpl::pushUsers( const Value *V) { for (const auto *User : V->users()) { - const auto *UserInstr = dyn_cast(User); - if (!UserInstr) - continue; - if (isAlwaysUniform(*UserInstr)) - continue; - if (markDivergent(*UserInstr)) { - Worklist.push_back(UserInstr); + if (const auto *UserInstr = dyn_cast(User)) { + if (markDivergent(*UserInstr)) { + Worklist.push_back(UserInstr); + } } } } diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp --- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp @@ -75,8 +75,6 @@ Register Reg) { const auto &RegInfo = F.getRegInfo(); for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) { - if (isAlwaysUniform(UserInstr)) - continue; if (markDivergent(UserInstr)) Worklist.push_back(&UserInstr); } diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll @@ -569,6 +569,37 @@ ret void } +define amdgpu_kernel void @always_uniform() { +; CHECK-LABEL: UniformityInfo for function 'always_uniform': +; CHECK: CYCLES ASSSUMED DIVERGENT: +; CHECK: depth=1: entries(bb2 bb3) + +bb: + %inst = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0) + %inst1 = icmp ugt i32 %inst, 0 + br i1 %inst1, label %bb3, label %bb2 +; CHECK: DIVERGENT: %inst = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0) +; CHECK: DIVERGENT: %inst1 = icmp ugt i32 %inst, 0 +; CHECK: DIVERGENT: br i1 %inst1, label %bb3, label %bb2 + +bb2: ; preds = %bb3, %bb + br label %bb3 + +bb3: ; preds = %bb2, %bb + %inst4 = tail call i64 @llvm.amdgcn.icmp.i64.i16(i16 0, i16 0, i32 0) + %inst5 = trunc i64 %inst4 to i32 + %inst6 = and i32 0, %inst5 + br label %bb2 +; CHECK-LABEL: BLOCK bb3 +; CHECK-NOT: DIVERGENT: {{.*}} call i64 @llvm.amdgcn.icmp.i64.i16 +; CHECK: DIVERGENT: %inst5 = trunc i64 %inst4 to i32 +; CHECK: DIVERGENT: %inst6 = and i32 0, %inst5 +} + +declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) + +declare i64 @llvm.amdgcn.icmp.i64.i16(i16, i16, i32 immarg) + declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone }