Index: lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SILowerControlFlow.cpp +++ lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -125,29 +125,44 @@ return new SILowerControlFlow(); } +static bool opcodeEmitsNoInsts(unsigned Opc) { + switch (Opc) { + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::BUNDLE: + case TargetOpcode::CFI_INSTRUCTION: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + case TargetOpcode::DBG_VALUE: + return true; + default: + return false; + } +} + bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To) { unsigned NumInstr = 0; + MachineFunction *MF = From->getParent(); - for (MachineFunction::iterator MBBI = MachineFunction::iterator(From), - ToI = MachineFunction::iterator(To); MBBI != ToI; ++MBBI) { - + for (MachineFunction::iterator MBBI(From), ToI(To), End = MF->end(); + MBBI != End && MBBI != ToI; ++MBBI) { MachineBasicBlock &MBB = *MBBI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); NumInstr < SkipThreshold && I != E; ++I) { + if (opcodeEmitsNoInsts(I->getOpcode())) + continue; - if (I->isBundle() || !I->isBundled()) { - // When a uniform loop is inside non-uniform control flow, the branch - // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken - // when EXEC = 0. We should skip the loop lest it becomes infinite. - if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ) - return true; + // When a uniform loop is inside non-uniform control flow, the branch + // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken + // when EXEC = 0. We should skip the loop lest it becomes infinite. + if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ) + return true; - if (++NumInstr >= SkipThreshold) - return true; - } + if (++NumInstr >= SkipThreshold) + return true; } } Index: test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll @@ -0,0 +1,56 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator: +; GCN: v_cmp_eq_i32 +; GCN: s_and_saveexec_b64 +; GCN: s_xor_b64 +; GCN: s_branch BB0_1 + +; GCN: s_or_b64 exec, exec +; GCN: s_endpgm + +; GCN: ds_write_b32 +; GCN: s_waitcnt +define void @lower_control_flow_unreachable_terminator() #0 { +bb: + %tmp15 = tail call i32 @llvm.amdgcn.workitem.id.y() + %tmp63 = icmp eq i32 %tmp15, 32 + br i1 %tmp63, label %bb64, label %bb68 + +bb64: + store volatile i32 0, i32 addrspace(3)* undef, align 4 + unreachable + +bb68: + ret void +} + +; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order: +; GCN: v_cmp_eq_i32 +; GCN: s_and_saveexec_b64 +; GCN: s_xor_b64 +; GCN: s_endpgm + +; GCN: s_or_b64 exec, exec +; GCN: ds_write_b32 +; GCN: s_waitcnt +define void @lower_control_flow_unreachable_terminator_swap_block_order() #0 { +bb: + %tmp15 = tail call i32 @llvm.amdgcn.workitem.id.y() + %tmp63 = icmp eq i32 %tmp15, 32 + br i1 %tmp63, label %bb68, label %bb64 + +bb68: + ret void + +bb64: + store volatile i32 0, i32 addrspace(3)* undef, align 4 + unreachable +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.y() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind }