Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -49,6 +49,7 @@ FunctionPass *createSIFixSGPRCopiesPass(); FunctionPass *createSIFixSGPRLiveRangesPass(); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); +FunctionPass *createSIInsertNopsPass(); FunctionPass *createSIInsertWaitsPass(); ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C); @@ -96,6 +97,9 @@ void initializeSIAnnotateControlFlowPass(PassRegistry&); extern char &SIAnnotateControlFlowPassID; +void initializeSIInsertNopsPass(PassRegistry&); +extern char &SIInsertNopsID; + void initializeSIInsertWaitsPass(PassRegistry&); extern char &SIInsertWaitsID; Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_os_ostream.h" #include "llvm/Transforms/IPO.h" @@ -54,6 +55,7 @@ initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); initializeSIAnnotateControlFlowPass(*PR); + initializeSIInsertNopsPass(*PR); initializeSIInsertWaitsPass(*PR); initializeSILowerControlFlowPass(*PR); } @@ -145,6 +147,12 @@ //===----------------------------------------------------------------------===// namespace { + +cl::opt InsertNops( + "amdgpu-insert-nops", + cl::desc("Insert two nop instructions for each high level source statement"), + cl::init(false)); + class AMDGPUPassConfig : public TargetPassConfig { public: AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) @@ -364,6 +372,9 @@ void GCNPassConfig::addPreEmitPass() { addPass(createSIInsertWaitsPass(), false); addPass(createSILowerControlFlowPass(), false); + if (InsertNops) { + addPass(createSIInsertNopsPass(), false); + } } TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -50,6 +50,7 @@ SIFixSGPRLiveRanges.cpp SIFoldOperands.cpp SIFrameLowering.cpp + SIInsertNopsPass.cpp SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp Index: lib/Target/AMDGPU/SIInsertNopsPass.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/SIInsertNopsPass.cpp @@ -0,0 +1,96 @@ +//===--- SIInsertNopsPass.cpp - Use predicates for control flow -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Insert two S_NOP instructions for every high level source statement. +/// +/// Tools, such as debugger, need to pause execution based on user input (i.e. +/// breakpoint). In order to do this, two S_NOP instructions are inserted for +/// each high level source statement: one before first isa instruction of high +/// level source statement, and one after last isa instruction of high level +/// source statement. Further, debugger may replace S_NOP instructions with +/// S_TRAP instructions based on user input. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define DEBUG_TYPE "si-insert-nops" +#define PASS_NAME "SI Insert Nops" + +namespace { + +class SIInsertNops : public MachineFunctionPass { +public: + static char ID; + + SIInsertNops() : MachineFunctionPass(ID) { } + const char *getPassName() const override { return PASS_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS_BEGIN(SIInsertNops, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_END(SIInsertNops, DEBUG_TYPE, PASS_NAME, false, false) + +char SIInsertNops::ID = 0; +char &llvm::SIInsertNopsID = SIInsertNops::ID; + +FunctionPass *llvm::createSIInsertNopsPass() { + return new SIInsertNops(); +} + +bool SIInsertNops::runOnMachineFunction(MachineFunction &MF) { + const SIInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + DenseMap L2I; + for (auto MBB = MF.begin(); MBB != MF.end(); ++MBB) { + for (auto MI = MBB->begin(); MI != MBB->end(); ++MI) { + if (!MI->isDebugValue() && MI->getDebugLoc()) { + auto DL = MI->getDebugLoc(); + auto CL = DL.getLine(); + auto L2IEntry = L2I.find(CL); + if (L2IEntry == L2I.end()) { + BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_NOP)).addImm(0); + L2I.insert(std::make_pair(CL, MI)); + } else { + L2IEntry->second = MI; + } + } + } + } + for (auto L2IEntry = L2I.begin(); L2IEntry != L2I.end(); ++L2IEntry) { + auto MIParent = L2IEntry->second->getParent(); + auto DL = L2IEntry->second->getDebugLoc(); + auto MI = L2IEntry->second; ++MI; + if (MI != MIParent->end()) { + BuildMI(*MIParent, *MI, DL, TII->get(AMDGPU::S_NOP)).addImm(0); + } + } + BuildMI(MF.front(), MF.front().front(), DebugLoc(), TII->get(AMDGPU::S_NOP)) + .addImm(0); + + for (auto &MBB : MF) { + for (auto &MI : MBB) { + MI.print(errs()); + } + } + + return false; +}