Index: lib/Target/AMDGPU/SIInsertSkips.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertSkips.cpp +++ lib/Target/AMDGPU/SIInsertSkips.cpp @@ -251,6 +251,7 @@ BI != BE; BI = NextBB) { NextBB = std::next(BI); MachineBasicBlock &MBB = *BI; + bool HaveSkipBlock = false; if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) { // Reached convergence point for last divergent branch. @@ -280,6 +281,11 @@ // FIXME: Shouldn't this be handled by BranchFolding? if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) MI.eraseFromParent(); + // Remove the given unconditional branch when a skip block has been + // inserted after the current one and let skip the two instructions + // performing the kill if the exec mask is non-zero. + if (HaveSkipBlock) + MI.eraseFromParent(); break; } case AMDGPU::SI_KILL_TERMINATOR: { @@ -288,9 +294,9 @@ if (ExecBranchStack.empty()) { if (skipIfDead(MI, *NextBB)) { + HaveSkipBlock = true; NextBB = std::next(BI); BE = MF.end(); - Next = MBB.end(); } } else { HaveKill = true;