Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -393,9 +393,8 @@ addPass(createSILowerControlFlowPass(), false); const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); - if (ST.debuggerInsertNops()) { + if (ST.debuggerInsertNops()) addPass(createSIInsertNopsPass(), false); - } } TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { Index: lib/Target/AMDGPU/SIInsertNopsPass.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertNopsPass.cpp +++ lib/Target/AMDGPU/SIInsertNopsPass.cpp @@ -55,9 +55,8 @@ bool SIInsertNops::runOnMachineFunction(MachineFunction &MF) { // Skip machine functions without debug info. - if (!MF.getMMI().hasDebugInfo()) { + if (!MF.getMMI().hasDebugInfo()) return false; - } // Target instruction info. const SIInstrInfo *TII = @@ -69,16 +68,16 @@ // Insert nop instruction before first isa instruction of each high level // source statement and collect last isa instruction for each high level // source statement. - for (auto MBB = MF.begin(); MBB != MF.end(); ++MBB) { - for (auto MI = MBB->begin(); MI != MBB->end(); ++MI) { - if (MI->isDebugValue() || !MI->getDebugLoc()) { + for (auto &MBB : MF) { + for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { + if (MI->isDebugValue() || !MI->getDebugLoc()) continue; - } + auto DL = MI->getDebugLoc(); auto CL = DL.getLine(); auto LineToInstEntry = LineToInst.find(CL); if (LineToInstEntry == LineToInst.end()) { - BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) + BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) .addImm(0); LineToInst.insert(std::make_pair(CL, MI)); } else { @@ -88,16 +87,13 @@ } // Insert nop instruction after last isa instruction of each high level source // statement. - for (auto LineToInstEntry = LineToInst.begin(); - LineToInstEntry != LineToInst.end(); ++LineToInstEntry) { - auto MBB = LineToInstEntry->second->getParent(); - auto DL = LineToInstEntry->second->getDebugLoc(); - MachineBasicBlock::iterator MI = LineToInstEntry->second; - ++MI; - if (MI != MBB->end()) { - BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) + for (auto const &LineToInstEntry : LineToInst) { + auto MBB = LineToInstEntry.second->getParent(); + auto DL = LineToInstEntry.second->getDebugLoc(); + MachineBasicBlock::iterator MI = LineToInstEntry.second; + if (MI->getOpcode() != AMDGPU::S_ENDPGM) + BuildMI(*MBB, *(++MI), DL, TII->get(AMDGPU::S_NOP)) .addImm(0); - } } // Insert nop instruction before prologue. MachineBasicBlock &MBB = MF.front(); Index: test/CodeGen/AMDGPU/debugger_insert_nops.ll =================================================================== --- test/CodeGen/AMDGPU/debugger_insert_nops.ll +++ test/CodeGen/AMDGPU/debugger_insert_nops.ll @@ -2,13 +2,17 @@ ; CHECK: debugger_insert_nops.cl:2:3 ; CHECK-NEXT: s_nop 0 -; CHECK: debugger_insert_nops.cl:3:3 +; CHECK: s_nop 0 +; CHECK-NEXT: debugger_insert_nops.cl:3:3 ; CHECK-NEXT: s_nop 0 -; CHECK: debugger_insert_nops.cl:4:3 +; CHECK: s_nop 0 +; CHECK-NEXT: debugger_insert_nops.cl:4:3 ; CHECK-NEXT: s_nop 0 -; CHECK: debugger_insert_nops.cl:5:3 +; CHECK: s_nop 0 +; CHECK-NEXT: debugger_insert_nops.cl:5:3 ; CHECK-NEXT: s_nop 0 -; CHECK: debugger_insert_nops.cl:6:1 +; CHECK: s_nop 0 +; CHECK-NEXT: debugger_insert_nops.cl:6:1 ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: s_endpgm