Index: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h @@ -49,7 +49,7 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIFixSGPRCopiesPass(); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); -FunctionPass *createSIInsertNopsPass(); +FunctionPass *createSIDebuggerInsertNopsPass(); FunctionPass *createSIInsertWaitsPass(); ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C); @@ -98,8 +98,8 @@ void initializeSIAnnotateControlFlowPass(PassRegistry&); extern char &SIAnnotateControlFlowPassID; -void initializeSIInsertNopsPass(PassRegistry&); -extern char &SIInsertNopsID; +void initializeSIDebuggerInsertNopsPass(PassRegistry&); +extern char &SIDebuggerInsertNopsID; void initializeSIInsertWaitsPass(PassRegistry&); extern char &SIInsertWaitsID; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -55,7 +55,7 @@ initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); initializeSIAnnotateControlFlowPass(*PR); - initializeSIInsertNopsPass(*PR); + initializeSIDebuggerInsertNopsPass(*PR); initializeSIInsertWaitsPass(*PR); initializeSIWholeQuadModePass(*PR); initializeSILowerControlFlowPass(*PR); @@ -396,7 +396,7 @@ addPass(createSIInsertWaitsPass(), false); addPass(createSIShrinkInstructionsPass()); addPass(createSILowerControlFlowPass(), false); - addPass(createSIInsertNopsPass(), false); + addPass(createSIDebuggerInsertNopsPass(), false); } TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { Index: llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt +++ llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt @@ -61,11 +61,11 @@ R600RegisterInfo.cpp R600TextureIntrinsicsReplacer.cpp SIAnnotateControlFlow.cpp + SIDebuggerInsertNops.cpp SIFixControlFlowLiveIntervals.cpp SIFixSGPRCopies.cpp SIFoldOperands.cpp SIFrameLowering.cpp - SIInsertNopsPass.cpp SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp Index: llvm/trunk/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp @@ -0,0 +1,112 @@ +//===--- SIDebuggerInsertNops.cpp - Inserts nops for debugger usage -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Inserts two nop instructions for each high level source statement for +/// debugger usage. +/// +/// Tools, such as debugger, need to pause execution based on user input (i.e. +/// breakpoint). In order to do this, two nop instructions are inserted for each +/// high level source statement: one before first isa instruction of high level +/// source statement, and one after last isa instruction of high level source +/// statement. Further, debugger may replace nop instructions with trap +/// instructions based on user input. +// +//===----------------------------------------------------------------------===// + +#include "SIInstrInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "si-debugger-insert-nops" +#define PASS_NAME "SI Debugger Insert Nops" + +namespace { + +class SIDebuggerInsertNops : public MachineFunctionPass { +public: + static char ID; + + SIDebuggerInsertNops() : MachineFunctionPass(ID) { } + const char *getPassName() const override { return PASS_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS(SIDebuggerInsertNops, DEBUG_TYPE, PASS_NAME, false, false) + +char SIDebuggerInsertNops::ID = 0; +char &llvm::SIDebuggerInsertNopsID = SIDebuggerInsertNops::ID; + +FunctionPass *llvm::createSIDebuggerInsertNopsPass() { + return new SIDebuggerInsertNops(); +} + +bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) { + // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not + // specified. + const AMDGPUSubtarget &ST = MF.getSubtarget(); + if (!ST.debuggerInsertNops()) + return false; + + // Skip machine functions without debug info. + if (!MF.getMMI().hasDebugInfo()) + return false; + + // Target instruction info. + const SIInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + // Mapping from high level source statement line number to last corresponding + // isa instruction. + DenseMap LineToInst; + // Insert nop instruction before first isa instruction of each high level + // source statement and collect last isa instruction for each high level + // source statement. + for (auto &MBB : MF) { + for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { + if (MI->isDebugValue() || !MI->getDebugLoc()) + continue; + + auto DL = MI->getDebugLoc(); + auto CL = DL.getLine(); + auto LineToInstEntry = LineToInst.find(CL); + if (LineToInstEntry == LineToInst.end()) { + BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) + .addImm(0); + LineToInst.insert(std::make_pair(CL, MI)); + } else { + LineToInstEntry->second = MI; + } + } + } + // Insert nop instruction after last isa instruction of each high level source + // statement. + for (auto const &LineToInstEntry : LineToInst) { + auto MBB = LineToInstEntry.second->getParent(); + auto DL = LineToInstEntry.second->getDebugLoc(); + MachineBasicBlock::iterator MI = LineToInstEntry.second; + if (MI->getOpcode() != AMDGPU::S_ENDPGM) + BuildMI(*MBB, *(++MI), DL, TII->get(AMDGPU::S_NOP)) + .addImm(0); + } + // Insert nop instruction before prologue. + MachineBasicBlock &MBB = MF.front(); + MachineInstr &MI = MBB.front(); + BuildMI(MBB, MI, DebugLoc(), TII->get(AMDGPU::S_NOP)) + .addImm(0); + + return true; +} Index: llvm/trunk/lib/Target/AMDGPU/SIInsertNopsPass.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInsertNopsPass.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInsertNopsPass.cpp @@ -1,110 +0,0 @@ -//===--- SIInsertNopsPass.cpp - Use predicates for control flow -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief Insert two nop instructions for each high level source statement. -/// -/// Tools, such as debugger, need to pause execution based on user input (i.e. -/// breakpoint). In order to do this, two nop instructions are inserted for each -/// high level source statement: one before first isa instruction of high level -/// source statement, and one after last isa instruction of high level source -/// statement. Further, debugger may replace nop instructions with trap -/// instructions based on user input. -// -//===----------------------------------------------------------------------===// - -#include "SIInstrInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -using namespace llvm; - -#define DEBUG_TYPE "si-insert-nops" -#define PASS_NAME "SI Insert Nops" - -namespace { - -class SIInsertNops : public MachineFunctionPass { -public: - static char ID; - - SIInsertNops() : MachineFunctionPass(ID) { } - const char *getPassName() const override { return PASS_NAME; } - - bool runOnMachineFunction(MachineFunction &MF) override; -}; - -} // anonymous namespace - -INITIALIZE_PASS(SIInsertNops, DEBUG_TYPE, PASS_NAME, false, false) - -char SIInsertNops::ID = 0; -char &llvm::SIInsertNopsID = SIInsertNops::ID; - -FunctionPass *llvm::createSIInsertNopsPass() { - return new SIInsertNops(); -} - -bool SIInsertNops::runOnMachineFunction(MachineFunction &MF) { - // Skip this pass if debugger-insert-nops feature is not enabled. - const AMDGPUSubtarget &ST = MF.getSubtarget(); - if (!ST.debuggerInsertNops()) - return false; - - // Skip machine functions without debug info. - if (!MF.getMMI().hasDebugInfo()) - return false; - - // Target instruction info. - const SIInstrInfo *TII = - static_cast(MF.getSubtarget().getInstrInfo()); - - // Mapping from high level source statement line number to last corresponding - // isa instruction. - DenseMap LineToInst; - // Insert nop instruction before first isa instruction of each high level - // source statement and collect last isa instruction for each high level - // source statement. - for (auto &MBB : MF) { - for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { - if (MI->isDebugValue() || !MI->getDebugLoc()) - continue; - - auto DL = MI->getDebugLoc(); - auto CL = DL.getLine(); - auto LineToInstEntry = LineToInst.find(CL); - if (LineToInstEntry == LineToInst.end()) { - BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) - .addImm(0); - LineToInst.insert(std::make_pair(CL, MI)); - } else { - LineToInstEntry->second = MI; - } - } - } - // Insert nop instruction after last isa instruction of each high level source - // statement. - for (auto const &LineToInstEntry : LineToInst) { - auto MBB = LineToInstEntry.second->getParent(); - auto DL = LineToInstEntry.second->getDebugLoc(); - MachineBasicBlock::iterator MI = LineToInstEntry.second; - if (MI->getOpcode() != AMDGPU::S_ENDPGM) - BuildMI(*MBB, *(++MI), DL, TII->get(AMDGPU::S_NOP)) - .addImm(0); - } - // Insert nop instruction before prologue. - MachineBasicBlock &MBB = MF.front(); - MachineInstr &MI = MBB.front(); - BuildMI(MBB, MI, DebugLoc(), TII->get(AMDGPU::S_NOP)) - .addImm(0); - - return true; -}