Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -49,6 +49,7 @@ FunctionPass *createSIFixSGPRCopiesPass(); FunctionPass *createSIFixSGPRLiveRangesPass(); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); +FunctionPass *createSIInsertNopsPass(); FunctionPass *createSIInsertWaitsPass(); ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C); @@ -96,6 +97,9 @@ void initializeSIAnnotateControlFlowPass(PassRegistry&); extern char &SIAnnotateControlFlowPassID; +void initializeSIInsertNopsPass(PassRegistry&); +extern char &SIInsertNopsID; + void initializeSIInsertWaitsPass(PassRegistry&); extern char &SIInsertWaitsID; Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_os_ostream.h" #include "llvm/Transforms/IPO.h" @@ -54,6 +55,7 @@ initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); initializeSIAnnotateControlFlowPass(*PR); + initializeSIInsertNopsPass(*PR); initializeSIInsertWaitsPass(*PR); initializeSILowerControlFlowPass(*PR); } @@ -145,6 +147,12 @@ //===----------------------------------------------------------------------===// namespace { + +cl::opt InsertNops( + "amdgpu-insert-nops", + cl::desc("Insert two nop instructions for each high level source statement"), + cl::init(false)); + class AMDGPUPassConfig : public TargetPassConfig { public: AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) @@ -364,6 +372,9 @@ void GCNPassConfig::addPreEmitPass() { addPass(createSIInsertWaitsPass(), false); addPass(createSILowerControlFlowPass(), false); + if (InsertNops) { + addPass(createSIInsertNopsPass(), false); + } } TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -50,6 +50,7 @@ SIFixSGPRLiveRanges.cpp SIFoldOperands.cpp SIFrameLowering.cpp + SIInsertNopsPass.cpp SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp Index: lib/Target/AMDGPU/SIInsertNopsPass.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/SIInsertNopsPass.cpp @@ -0,0 +1,94 @@ +//===--- SIInsertNopsPass.cpp - Use predicates for control flow -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Insert two S_NOP instructions for every high level source statement. +/// +/// Tools, such as debugger, need to pause execution based on user input (i.e. +/// breakpoint). In order to do this, two S_NOP instructions are inserted for +/// each high level source statement: one before first isa instruction of high +/// level source statement, and one after last isa instruction of high level +/// source statement. Further, debugger may replace S_NOP instructions with +/// S_TRAP instructions based on user input. +// +//===----------------------------------------------------------------------===// + +#include "SIInstrInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define DEBUG_TYPE "si-insert-nops" +#define PASS_NAME "SI Insert Nops" + +namespace { + +class SIInsertNops : public MachineFunctionPass { +public: + static char ID; + + SIInsertNops() : MachineFunctionPass(ID) { } + const char *getPassName() const override { return PASS_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS(SIInsertNops, DEBUG_TYPE, PASS_NAME, false, false) + +char SIInsertNops::ID = 0; +char &llvm::SIInsertNopsID = SIInsertNops::ID; + +FunctionPass *llvm::createSIInsertNopsPass() { + return new SIInsertNops(); +} + +bool SIInsertNops::runOnMachineFunction(MachineFunction &MF) { + const SIInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + DenseMap LineToInst; + for (auto MBB = MF.begin(); MBB != MF.end(); ++MBB) { + for (auto MI = MBB->begin(); MI != MBB->end(); ++MI) { + if (MI->isDebugValue() || !MI->getDebugLoc()) { + continue; + } + auto DL = MI->getDebugLoc(); + auto CL = DL.getLine(); + auto LineToInstEntry = LineToInst.find(CL); + if (LineToInstEntry == LineToInst.end()) { + BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) + .addImm(0); + LineToInst.insert(std::make_pair(CL, MI)); + } else { + LineToInstEntry->second = MI; + } + } + } + for (auto LineToInstEntry = LineToInst.begin(); + LineToInstEntry != LineToInst.end(); ++LineToInstEntry) { + auto MBB = LineToInstEntry->second->getParent(); + auto DL = LineToInstEntry->second->getDebugLoc(); + MachineBasicBlock::iterator MI = LineToInstEntry->second; + ++MI; + if (MI != MBB->end()) { + BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) + .addImm(0); + } + } + MachineBasicBlock &MBB = MF.front(); + MachineInstr &MI = MBB.front(); + BuildMI(MBB, MI, DebugLoc(), TII->get(AMDGPU::S_NOP)) + .addImm(0); + + return true; +}