diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp --- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp +++ b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp @@ -37,6 +37,7 @@ class SIRemoveShortExecBranches : public MachineFunctionPass { private: const SIInstrInfo *TII = nullptr; + bool getBlockDestinations(MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB, MachineBasicBlock *&FalseMBB, @@ -88,10 +89,10 @@ for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { // When a uniform loop is inside non-uniform control flow, the branch - // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken - // when EXEC = 0. We should skip the loop lest it becomes infinite. - if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ || - I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) + // leaving the loop might never be taken when EXEC = 0. + // Hence we should retain cbranch out of the loop lest it become infinite. + if (I->isConditionalBranch() && + I->getOpcode() != AMDGPU::S_CBRANCH_EXECNZ) return true; if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -32,6 +32,7 @@ ; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec ; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3] +; CHECK-NEXT: s_cbranch_execz BB0_6 ; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec @@ -49,7 +50,7 @@ ; CHECK-NEXT: s_add_i32 s0, s0, 1 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 ; CHECK-NEXT: s_branch BB0_1 -; CHECK-NEXT: ; %bb.6: ; %Flow2 +; CHECK-NEXT: BB0_6: ; %Flow2 ; CHECK-NEXT: s_or_b64 exec, exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]