diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp --- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp +++ b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp @@ -88,10 +88,9 @@ for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { // When a uniform loop is inside non-uniform control flow, the branch - // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken - // when EXEC = 0. We should skip the loop lest it becomes infinite. - if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ || - I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) + // leaving the loop might never be taken when EXEC = 0. + // Hence we should retain cbranch out of the loop lest it become infinite. + if (I->isConditionalBranch()) return true; if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -32,6 +32,7 @@ ; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec ; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3] +; CHECK-NEXT: s_cbranch_execz BB0_6 ; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec @@ -49,7 +50,7 @@ ; CHECK-NEXT: s_add_i32 s0, s0, 1 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 ; CHECK-NEXT: s_branch BB0_1 -; CHECK-NEXT: ; %bb.6: ; %Flow2 +; CHECK-NEXT: BB0_6: ; %Flow2 ; CHECK-NEXT: s_or_b64 exec, exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -13,6 +13,7 @@ ; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0 ; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc ; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]] +; SI-NEXT: s_cbranch_execz [[FLOW_BB:BB[0-9]+_[0-9]+]] ; SI-NEXT: ; %bb.{{[0-9]+}}: ; %LeafBlock3 ; SI: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 @@ -20,7 +21,7 @@ ; SI-NEXT: s_cbranch_execnz ; v_mov should be after exec modification -; SI: ; %bb.{{[0-9]+}}: +; SI: [[FLOW_BB]]: ; SI-NEXT: s_or_saveexec_b64 [[SAVE3:s\[[0-9]+:[0-9]+\]]], [[SAVE2]] ; SI-NEXT: s_xor_b64 exec, exec, [[SAVE3]] ;