Index: lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -117,14 +117,8 @@ } void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { - if (I.isUnconditional()) - return; - - Value *Cond = I.getCondition(); - if (!DA->isUniform(Cond)) - return; - - setUniformMetadata(I.getParent()->getTerminator()); + if (DA->isUniform(&I)) + setUniformMetadata(I.getParent()->getTerminator()); } void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { Index: lib/Target/AMDGPU/SIAnnotateControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -155,7 +155,7 @@ /// Is the branch condition uniform or did the StructurizeCFG pass /// consider it as such? bool SIAnnotateControlFlow::isUniform(BranchInst *T) { - return DA->isUniform(T->getCondition()) || + return DA->isUniform(T) || T->getMetadata("structurizecfg.uniform") != nullptr; } Index: test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +; This module creates a divergent branch. The branch is marked as divergent by +; the divergence analysis but the condition is not. This test ensures that the +; divergence of the branch is tested, not its condition, so that branch is +; correctly emitted as divergent. + +; FUNC-LABEL: {{^}}main: +; CHECK: v_sqrt_f32_e32 +; CHECK-NEXT: BB0_{{[0-9]+}}: +; CHECK-NEXT: s_or_b64 exec, exec +define amdgpu_ps void @main(i32, float) { +start: + %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) + br label %loop + +loop: + %v1 = phi i32 [ 0, %main_body ], [ %v5, %endif2 ] + %v2 = icmp ugt i32 %v1, 31 + br i1 %v2, label %if1, label %endif1 + +if1: + %v3 = call float @llvm.sqrt.f32(float %v0) + br label %endloop + +endif1: + %v4 = fcmp ogt float %v0, 0.000000e+00 + br i1 %v4, label %endloop, label %endif2 + +endif2: + %v5 = add i32 %v1, 1 + br label %loop + +endloop: + %v6 = phi float [ %v3, %if1 ], [ 0.0, %endif1 ] + call void @llvm.amdgcn.exp.v4f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true) + ret void +} + +declare float @llvm.sqrt.f32(float) #1 +declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 +declare void @llvm.amdgcn.exp.v4f32(i32, i32, float, float, float, float, i1, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }