Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -83,6 +83,8 @@ ModulePass *createAMDGPUAlwaysInlinePass(); ModulePass *createAMDGPUOpenCLImageTypeLoweringPass(); FunctionPass *createAMDGPUAnnotateUniformValues(); +FunctionPass *createAMDGPUInsertDebugNopsPass(); +FunctionPass *createAMDGPULowerDebugNopsPass(); void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&); extern char &SIFixControlFlowLiveIntervalsID; @@ -93,6 +95,12 @@ void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); extern char &AMDGPUAnnotateUniformValuesPassID; +void initializeAMDGPUInsertDebugNopsPass(PassRegistry&); +extern char &AMDGPUInsertDebugNopsID; + +void initializeAMDGPULowerDebugNopsPass(PassRegistry&); +extern char &AMDGPULowerDebugNopsID; + void initializeSIAnnotateControlFlowPass(PassRegistry&); extern char &SIAnnotateControlFlowPassID; Index: lib/Target/AMDGPU/AMDGPUInsertNopsPass.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUInsertNopsPass.cpp @@ -0,0 +1,145 @@ +//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// These passes insert S_NOP instruction for each high level source statement. +/// AMDGPUInsertDebugNops pass inserts DEBUG_NOP pseudo instructions before +/// register allocation. AMDGPULowerDebugNops pass lowers DEBUG_NOP instructions +/// to S_NOP instructions before machine code is emitted. +/// +/// S_NOP for each high level source statement is needed for tools (i.e. +/// debugger, profiler), which overwrite S_NOPs with S_TRAPs as they see fit. +// +//===----------------------------------------------------------------------===// +#include "AMDGPU.h" +#include "AMDGPUInstrInfo.h" +#include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// AMDGPU Insert Debug Nops Pass +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "amdgpu-insert-debug-nops" +#define PASS_NAME "AMDGPU Insert Debug Nops" + +namespace { + +class AMDGPUInsertDebugNops : public MachineFunctionPass { +public: + static char ID; + + AMDGPUInsertDebugNops() : MachineFunctionPass(ID) { } + const char *getPassName() const override { return PASS_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS_BEGIN( + AMDGPUInsertDebugNops, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_END( + AMDGPUInsertDebugNops, DEBUG_TYPE, PASS_NAME, false, false) + +char AMDGPUInsertDebugNops::ID = 0; +char &llvm::AMDGPUInsertDebugNopsID = AMDGPUInsertDebugNops::ID; + +FunctionPass *llvm::createAMDGPUInsertDebugNopsPass() { + return new AMDGPUInsertDebugNops(); +} + +bool AMDGPUInsertDebugNops::runOnMachineFunction(MachineFunction &MF) { + const AMDGPUInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + unsigned prevLine = 0; + bool modified = false; + for (auto &CMB : MF) { + for (auto &CMI : CMB) { + if (!CMI.isDebugValue() && CMI.getDebugLoc() && + prevLine != CMI.getDebugLoc().getLine()) { + BuildMI(CMB, CMI, CMI.getDebugLoc(), TII->get(AMDGPU::DEBUG_NOP)); + prevLine = CMI.getDebugLoc().getLine(); + modified = true; + } + } + } + + return modified; +} + +#undef DEBUG_TYPE +#undef PASS_NAME + +//===----------------------------------------------------------------------===// +// AMDGPU Lower Debug Nops Pass +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "amdgpu-lower-debug-nops" +#define PASS_NAME "AMDGPU Lower Debug Nops" + +namespace { + +class AMDGPULowerDebugNops : public MachineFunctionPass { +public: + static char ID; + + AMDGPULowerDebugNops() : MachineFunctionPass(ID) { } + const char *getPassName() const override { return PASS_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS_BEGIN( + AMDGPULowerDebugNops, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_END( + AMDGPULowerDebugNops, DEBUG_TYPE, PASS_NAME, false, false) + +char AMDGPULowerDebugNops::ID = 0; +char &llvm::AMDGPULowerDebugNopsID = AMDGPULowerDebugNops::ID; + +FunctionPass *llvm::createAMDGPULowerDebugNopsPass() { + return new AMDGPULowerDebugNops(); +} + +bool AMDGPULowerDebugNops::runOnMachineFunction(MachineFunction &MF) { + const AMDGPUInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + bool modified = false; + for (auto &CMB : MF) { + auto CMI = CMB.begin(); + while (CMI != CMB.end()) { + if (TII->get(CMI->getOpcode()).TSFlags & AMDGPU_FLAG_DEBUG_NOP) { + BuildMI(CMB, *CMI, CMI->getDebugLoc(), TII->get(AMDGPU::S_NOP)) + .addImm(0); + CMI = CMB.erase(CMI); + modified = true; + } else { + ++CMI; + } + } + } + + if (modified) { + BuildMI(MF.front(), MF.front().front(), DebugLoc(), TII->get(AMDGPU::S_NOP)) + .addImm(0); + } + + return modified; +} + +#undef DEBUG_TYPE +#undef PASS_NAME Index: lib/Target/AMDGPU/AMDGPUInstrInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -94,5 +94,6 @@ #define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63) #define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62) +#define AMDGPU_FLAG_DEBUG_NOP (UINT64_C(1) << 61) #endif Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -15,6 +15,7 @@ class AMDGPUInst pattern> : Instruction { field bit isRegisterLoad = 0; field bit isRegisterStore = 0; + field bit isDebugNop = 0; let Namespace = "AMDGPU"; let OutOperandList = outs; @@ -25,6 +26,7 @@ let TSFlags{63} = isRegisterLoad; let TSFlags{62} = isRegisterStore; + let TSFlags{61} = isDebugNop; } class AMDGPUShaderInst pattern> @@ -461,6 +463,11 @@ } } +def DEBUG_NOP : AMDGPUShaderInst<(outs), (ins), "DEBUG_NOP", []> { + let isDebugNop = 1; + let isTerminator = 1; +} + } // End isCodeGenOnly = 1, isPseudo = 1 /* Generic helper patterns for intrinsics */ Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_os_ostream.h" #include "llvm/Transforms/IPO.h" @@ -52,6 +53,8 @@ initializeSILoadStoreOptimizerPass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesPass(*PR); + initializeAMDGPUInsertDebugNopsPass(*PR); + initializeAMDGPULowerDebugNopsPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); initializeSIAnnotateControlFlowPass(*PR); initializeSIInsertWaitsPass(*PR); @@ -145,6 +148,12 @@ //===----------------------------------------------------------------------===// namespace { + +cl::opt InsertNops( + "amdgpu-insert-nops", + cl::desc("Insert nop instruction for each high level source statement"), + cl::init(false)); + class AMDGPUPassConfig : public TargetPassConfig { public: AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) @@ -175,6 +184,8 @@ bool addPreISel() override; bool addInstSelector() override; bool addGCPasses() override; + void addPreRegAlloc() override; + void addPreEmitPass() override; }; class R600PassConfig : public AMDGPUPassConfig { @@ -257,6 +268,18 @@ return false; } +void AMDGPUPassConfig::addPreRegAlloc() { + if (InsertNops) { + addPass(createAMDGPUInsertDebugNopsPass()); + } +} + +void AMDGPUPassConfig::addPreEmitPass() { + if (InsertNops) { + addPass(createAMDGPULowerDebugNopsPass()); + } +} + //===----------------------------------------------------------------------===// // R600 Pass Setup //===----------------------------------------------------------------------===// @@ -268,6 +291,7 @@ } void R600PassConfig::addPreRegAlloc() { + AMDGPUPassConfig::addPreRegAlloc(); addPass(createR600VectorRegMerger(*TM)); } @@ -280,6 +304,7 @@ } void R600PassConfig::addPreEmitPass() { + AMDGPUPassConfig::addPreEmitPass(); addPass(createAMDGPUCFGStructurizerPass(), false); addPass(createR600ExpandSpecialInstrsPass(*TM), false); addPass(&FinalizeMachineBundlesID, false); @@ -319,6 +344,8 @@ } void GCNPassConfig::addPreRegAlloc() { + AMDGPUPassConfig::addPreRegAlloc(); + const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); // This needs to be run directly before register allocation because @@ -362,6 +389,8 @@ } void GCNPassConfig::addPreEmitPass() { + AMDGPUPassConfig::addPreEmitPass(); + addPass(createSIInsertWaitsPass(), false); addPass(createSILowerControlFlowPass(), false); } Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -20,6 +20,7 @@ AMDGPUAsmPrinter.cpp AMDGPUFrameLowering.cpp AMDGPUTargetObjectFile.cpp + AMDGPUInsertNopsPass.cpp AMDGPUIntrinsicInfo.cpp AMDGPUISelDAGToDAG.cpp AMDGPUMCInstLower.cpp