diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1640,13 +1640,15 @@ // TODO: Could insert earlier and schedule more liberally with operations // that only use caller preserved registers. MachineBasicBlock &EntryBB = MF.front(); + MachineBasicBlock::iterator I = EntryBB.begin(); + for (MachineBasicBlock::iterator E = EntryBB.end(); + I != E && (I->isPHI() || I->isCFIInstruction()); ++I) + ; + BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0); if (ST->hasVscnt()) - BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), - TII->get(AMDGPU::S_WAITCNT_VSCNT)) - .addReg(AMDGPU::SGPR_NULL, RegState::Undef) - .addImm(0); - BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) - .addImm(0); + BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT)) + .addReg(AMDGPU::SGPR_NULL, RegState::Undef) + .addImm(0); Modified = true; } diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-skip-cfi.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-cfi.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-cfi.mir @@ -0,0 +1,23 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck %s + +--- | + + define void @skip_cfi() { + ret void + } + +... +--- + +# Ensure we insert waitcnts after any CFI at the start of non-kernel functions + +# CHECK-LABEL: name: skip_cfi{{$}} +# CHECK: frame-setup CFI_INSTRUCTION +# CHECK: S_WAITCNT + +name: skip_cfi +machineFunctionInfo: +body: | + bb.0: + frame-setup CFI_INSTRUCTION undefined $sgpr0 +...