Index: lib/Target/AMDGPU/SIInsertSkips.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertSkips.cpp +++ lib/Target/AMDGPU/SIInsertSkips.cpp @@ -251,6 +251,7 @@ BI != BE; BI = NextBB) { NextBB = std::next(BI); MachineBasicBlock &MBB = *BI; + bool HaveSkipBlock = false; if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) { // Reached convergence point for last divergent branch. @@ -280,6 +281,11 @@ // FIXME: Shouldn't this be handled by BranchFolding? if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) MI.eraseFromParent(); + // Remove the given unconditional branch when a skip block has been + // inserted after the current one and let skip the two instructions + // performing the kill if the exec mask is non-zero. + if (HaveSkipBlock) + MI.eraseFromParent(); break; } case AMDGPU::SI_KILL_TERMINATOR: { @@ -288,9 +294,9 @@ if (ExecBranchStack.empty()) { if (skipIfDead(MI, *NextBB)) { + HaveSkipBlock = true; NextBB = std::next(BI); BE = MF.end(); - Next = MBB.end(); } } else { HaveKill = true; Index: test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir @@ -0,0 +1,40 @@ +# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold=1 %s -o - | FileCheck %s + +--- | + define amdgpu_ps void @kill_uncond_branch() { + ret void + } +... +--- + +# CHECK-LABEL: name: kill_uncond_branch + +# CHECK-LABEL: bb.0: +# CHECK: S_CBRANCH_VCCNZ %bb.1, implicit %vcc + +# CHECK-LABEL: bb.1: +# CHECK: V_CMPX_LE_F32_e32 +# CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit %exec + +# CHECK-LABEL: bb.3: +# CHECK: EXP_DONE +# CHECK: S_ENDPGM + +# CHECK-LABEL: bb.2: +# CHECK: S_ENDPGM + +name: kill_uncond_branch + +body: | + bb.0: + successors: %bb.1 + S_CBRANCH_VCCNZ %bb.1, implicit %vcc + + bb.1: + successors: %bb.2 + %vgpr0 = V_MOV_B32_e32 0, implicit %exec + SI_KILL_TERMINATOR %vgpr0, implicit-def %exec, implicit-def %vcc, implicit %exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM