Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -50,7 +50,6 @@ FunctionPass *createSIOptimizeExecMaskingPreRAPass(); FunctionPass *createSIFixSGPRCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); -FunctionPass *createSIDebuggerInsertNopsPass(); FunctionPass *createSIInsertWaitcntsPass(); FunctionPass *createSIFixWWMLivenessPass(); FunctionPass *createSIFormMemoryClausesPass(); @@ -196,9 +195,6 @@ void initializeSIMemoryLegalizerPass(PassRegistry&); extern char &SIMemoryLegalizerID; -void initializeSIDebuggerInsertNopsPass(PassRegistry&); -extern char &SIDebuggerInsertNopsID; - void initializeSIModeRegisterPass(PassRegistry&); extern char &SIModeRegisterID; Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -578,24 +578,6 @@ FeatureXNACK, FeatureCodeObjectV3]>; -//===----------------------------------------------------------------------===// -// Debugger related subtarget features. -//===----------------------------------------------------------------------===// - -def FeatureDebuggerInsertNops : SubtargetFeature< - "amdgpu-debugger-insert-nops", - "DebuggerInsertNops", - "true", - "Insert one nop instruction for each high level source statement" ->; - -def FeatureDebuggerEmitPrologue : SubtargetFeature< - "amdgpu-debugger-emit-prologue", - "DebuggerEmitPrologue", - "true", - "Emit debugger prologue" ->; - //===----------------------------------------------------------------------===// def AMDGPUInstrInfo : InstrInfo { Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -487,15 +487,6 @@ OutStreamer->emitRawComment( " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false); - if (MF.getSubtarget().debuggerEmitPrologue()) { - OutStreamer->emitRawComment( - " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" + - Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false); - OutStreamer->emitRawComment( - " DebuggerPrivateSegmentBufferSGPR: s" + - Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false); - } - OutStreamer->emitRawComment( " COMPUTE_PGM_RSRC2:USER_SGPR: " + Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false); @@ -828,8 +819,6 @@ const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); - const SIInstrInfo *TII = STM.getInstrInfo(); - const SIRegisterInfo *RI = &TII->getRegisterInfo(); // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be @@ -921,16 +910,6 @@ ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( &STM, ProgInfo.NumVGPRsForWavesPerEU); - // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and - // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" - // attribute was requested. - if (STM.debuggerEmitPrologue()) { - ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR = - RI->getHWRegIndex(MFI->getScratchWaveOffsetReg()); - ProgInfo.DebuggerPrivateSegmentBufferSGPR = - RI->getHWRegIndex(MFI->getScratchRSrcReg()); - } - // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode // register. ProgInfo.FloatMode = getFPMode(MF); @@ -1184,9 +1163,6 @@ if (MFI->hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; - if (STM.debuggerSupported()) - Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED; - if (STM.isXNACKEnabled()) Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; @@ -1201,13 +1177,6 @@ // 2^n. The minimum alignment is 2^4 = 16. Out.kernarg_segment_alignment = std::max((size_t)4, countTrailingZeros(MaxKernArgAlign)); - - if (STM.debuggerEmitPrologue()) { - Out.debug_wavefront_private_segment_offset_sgpr = - CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; - Out.debug_private_segment_buffer_sgpr = - CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR; - } } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, Index: lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -239,23 +239,7 @@ Kernel::DebugProps::Metadata MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { - const GCNSubtarget &STM = MF.getSubtarget(); - HSAMD::Kernel::DebugProps::Metadata HSADebugProps; - - if (!STM.debuggerSupported()) - return HSADebugProps; - - HSADebugProps.mDebuggerABIVersion.push_back(1); - HSADebugProps.mDebuggerABIVersion.push_back(0); - - if (STM.debuggerEmitPrologue()) { - HSADebugProps.mPrivateSegmentBufferSGPR = - ProgramInfo.DebuggerPrivateSegmentBufferSGPR; - HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = - ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; - } - - return HSADebugProps; + return HSAMD::Kernel::DebugProps::Metadata(); } void MetadataStreamerV2::emitVersion() { Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -295,8 +295,6 @@ bool HasApertureRegs; bool EnableXNACK; bool TrapHandler; - bool DebuggerInsertNops; - bool DebuggerEmitPrologue; // Used as options. bool EnableHugePrivateBuffer; @@ -791,18 +789,6 @@ return EnableSIScheduler; } - bool debuggerSupported() const { - return debuggerInsertNops() && debuggerEmitPrologue(); - } - - bool debuggerInsertNops() const { - return DebuggerInsertNops; - } - - bool debuggerEmitPrologue() const { - return DebuggerEmitPrologue; - } - bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -174,8 +174,6 @@ HasApertureRegs(false), EnableXNACK(false), TrapHandler(false), - DebuggerInsertNops(false), - DebuggerEmitPrologue(false), EnableHugePrivateBuffer(false), EnableLoadStoreOpt(false), Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -200,7 +200,6 @@ initializeSILowerControlFlowPass(*PR); initializeSIInsertSkipsPass(*PR); initializeSIMemoryLegalizerPass(*PR); - initializeSIDebuggerInsertNopsPass(*PR); initializeSIOptimizeExecMaskingPass(*PR); initializeSIFixWWMLivenessPass(*PR); initializeSIFormMemoryClausesPass(*PR); @@ -918,7 +917,6 @@ addPass(&PostRAHazardRecognizerID); addPass(&SIInsertSkipsPassID); - addPass(createSIDebuggerInsertNopsPass()); addPass(&BranchRelaxationPassID); } Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -77,8 +77,6 @@ AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureAutoWaitcntBeforeBarrier, - AMDGPU::FeatureDebuggerEmitPrologue, - AMDGPU::FeatureDebuggerInsertNops, // Property of the kernel/environment which can't actually differ. AMDGPU::FeatureSGPRInitBug, Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -92,7 +92,6 @@ R600RegisterInfo.cpp SIAddIMGInit.cpp SIAnnotateControlFlow.cpp - SIDebuggerInsertNops.cpp SIFixSGPRCopies.cpp SIFixupVectorISel.cpp SIFixVGPRCopies.cpp Index: lib/Target/AMDGPU/SIDebuggerInsertNops.cpp =================================================================== --- lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ /dev/null @@ -1,96 +0,0 @@ -//===--- SIDebuggerInsertNops.cpp - Inserts nops for debugger usage -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Inserts one nop instruction for each high level source statement for -/// debugger usage. -/// -/// Tools, such as a debugger, need to pause execution based on user input (i.e. -/// breakpoint). In order to do this, one nop instruction is inserted before the -/// first isa instruction of each high level source statement. Further, the -/// debugger may replace nop instructions with trap instructions based on user -/// input. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUSubtarget.h" -#include "SIInstrInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -using namespace llvm; - -#define DEBUG_TYPE "si-debugger-insert-nops" -#define PASS_NAME "SI Debugger Insert Nops" - -namespace { - -class SIDebuggerInsertNops : public MachineFunctionPass { -public: - static char ID; - - SIDebuggerInsertNops() : MachineFunctionPass(ID) { } - StringRef getPassName() const override { return PASS_NAME; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; -}; - -} // anonymous namespace - -INITIALIZE_PASS(SIDebuggerInsertNops, DEBUG_TYPE, PASS_NAME, false, false) - -char SIDebuggerInsertNops::ID = 0; -char &llvm::SIDebuggerInsertNopsID = SIDebuggerInsertNops::ID; - -FunctionPass *llvm::createSIDebuggerInsertNopsPass() { - return new SIDebuggerInsertNops(); -} - -bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) { - // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not - // specified. - const GCNSubtarget &ST = MF.getSubtarget(); - if (!ST.debuggerInsertNops()) - return false; - - // Skip machine functions without debug info. - if (!MF.getMMI().hasDebugInfo()) - return false; - - // Target instruction info. - const SIInstrInfo *TII = ST.getInstrInfo(); - - // Set containing line numbers that have nop inserted. - DenseSet NopInserted; - - for (auto &MBB : MF) { - for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { - // Skip debug instructions and instructions without location. - if (MI->isDebugInstr() || !MI->getDebugLoc()) - continue; - - // Insert nop instruction if line number does not have nop inserted. - auto DL = MI->getDebugLoc(); - if (NopInserted.find(DL.getLine()) == NopInserted.end()) { - BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) - .addImm(0); - NopInserted.insert(DL.getLine()); - } - } - } - - return true; -} Index: lib/Target/AMDGPU/SIFrameLowering.h =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.h +++ lib/Target/AMDGPU/SIFrameLowering.h @@ -65,9 +65,6 @@ SIMachineFunctionInfo *MFI, MachineFunction &MF) const; - /// Emits debugger prologue. - void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, Index: lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.cpp +++ lib/Target/AMDGPU/SIFrameLowering.cpp @@ -217,12 +217,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was - // specified. - const GCNSubtarget &ST = MF.getSubtarget(); - if (ST.debuggerEmitPrologue()) - emitDebuggerPrologue(MF, MBB); - assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -233,6 +227,7 @@ // FIXME: We should be cleaning up these unused SGPR spill frame indices // somewhere. + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = &TII->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -778,47 +773,6 @@ return MBB.erase(I); } -void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - const GCNSubtarget &ST = MF.getSubtarget(); - const SIInstrInfo *TII = ST.getInstrInfo(); - const SIRegisterInfo *TRI = &TII->getRegisterInfo(); - const SIMachineFunctionInfo *MFI = MF.getInfo(); - - MachineBasicBlock::iterator I = MBB.begin(); - DebugLoc DL; - - // For each dimension: - for (unsigned i = 0; i < 3; ++i) { - // Get work group ID SGPR, and make it live-in again. - unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i); - MF.getRegInfo().addLiveIn(WorkGroupIDSGPR); - MBB.addLiveIn(WorkGroupIDSGPR); - - // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in - // order to spill it to scratch. - unsigned WorkGroupIDVGPR = - MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR) - .addReg(WorkGroupIDSGPR); - - // Spill work group ID. - int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i); - TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false, - WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI); - - // Get work item ID VGPR, and make it live-in again. - unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i); - MF.getRegInfo().addLiveIn(WorkItemIDVGPR); - MBB.addLiveIn(WorkItemIDVGPR); - - // Spill work item ID. - int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i); - TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false, - WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI); - } -} - bool SIFrameLowering::hasFP(const MachineFunction &MF) const { // All stack operations are relative to the frame offset SGPR. // TODO: Still want to eliminate sometimes. Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -174,8 +174,6 @@ unsigned isCFIntrinsic(const SDNode *Intr) const; - void createDebuggerPrologueStackObjects(MachineFunction &MF) const; - /// \returns True if fixup needs to be emitted for given global value \p GV, /// false otherwise. bool shouldEmitFixup(const GlobalValue *GV) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1862,7 +1862,6 @@ const Function &Fn = MF.getFunction(); FunctionType *FType = MF.getFunction().getFunctionType(); SIMachineFunctionInfo *Info = MF.getInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) { DiagnosticInfoUnsupported NoGraphicsHSA( @@ -1871,11 +1870,6 @@ return DAG.getEntryNode(); } - // Create stack objects that are used for emitting debugger prologue if - // "amdgpu-debugger-emit-prologue" attribute was specified. - if (ST.debuggerEmitPrologue()) - createDebuggerPrologueStackObjects(MF); - SmallVector Splits; SmallVector ArgLocs; BitVector Skipped(Ins.size()); @@ -3962,32 +3956,6 @@ return 0; } -void SITargetLowering::createDebuggerPrologueStackObjects( - MachineFunction &MF) const { - // Create stack objects that are used for emitting debugger prologue. - // - // Debugger prologue writes work group IDs and work item IDs to scratch memory - // at fixed location in the following format: - // offset 0: work group ID x - // offset 4: work group ID y - // offset 8: work group ID z - // offset 16: work item ID x - // offset 20: work item ID y - // offset 24: work item ID z - SIMachineFunctionInfo *Info = MF.getInfo(); - int ObjectIdx = 0; - - // For each dimension: - for (unsigned i = 0; i < 3; ++i) { - // Create fixed stack object for work group ID. - ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4, true); - Info->setDebuggerWorkGroupIDStackObjectIndex(i, ObjectIdx); - // Create fixed stack object for work item ID. - ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4 + 16, true); - Info->setDebuggerWorkItemIDStackObjectIndex(i, ObjectIdx); - } -} - bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { const Triple &TT = getTargetMachine().getTargetTriple(); return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -123,12 +123,6 @@ // unit. Minimum - first, maximum - second. std::pair WavesPerEU = {0, 0}; - // Stack object indices for work group IDs. - std::array DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; - - // Stack object indices for work item IDs. - std::array DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; - DenseMap> BufferPSVs; DenseMap::max() if the register is not - // used or not known. - uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR = - std::numeric_limits::max(); - - // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire - // kernel execution, or std::numeric_limits::max() if the register - // is not used or not known. - uint16_t DebuggerPrivateSegmentBufferSGPR = - std::numeric_limits::max(); - // Whether there is recursion, dynamic allocas, indirect calls or some other // reason there may be statically unknown stack usage. bool DynamicCallStack = false; Index: test/CodeGen/AMDGPU/debugger-emit-prologue.ll =================================================================== --- test/CodeGen/AMDGPU/debugger-emit-prologue.ll +++ /dev/null @@ -1,81 +0,0 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-code-object-v3 -verify-machineinstrs < %s | FileCheck %s --check-prefix=NOATTR -target datalayout = "A5" - -; CHECK: debug_wavefront_private_segment_offset_sgpr = [[SOFF:[0-9]+]] -; CHECK: debug_private_segment_buffer_sgpr = [[SREG:[0-9]+]] - -; CHECK: v_mov_b32_e32 [[WGIDX:v[0-9]+]], s{{[0-9]+}} -; CHECK: buffer_store_dword [[WGIDX]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] -; CHECK: buffer_store_dword v0, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:16 - -; CHECK: v_mov_b32_e32 [[WGIDY:v[0-9]+]], s{{[0-9]+}} -; CHECK: buffer_store_dword [[WGIDY]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:4 -; CHECK: buffer_store_dword v1, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:20 - -; CHECK: v_mov_b32_e32 [[WGIDZ:v[0-9]+]], s{{[0-9]+}} -; CHECK: buffer_store_dword [[WGIDZ]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:8 -; CHECK: buffer_store_dword v2, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:24 - -; CHECK: DebuggerWavefrontPrivateSegmentOffsetSGPR: s[[SOFF]] -; CHECK: DebuggerPrivateSegmentBufferSGPR: s[[SREG]] - -; NOATTR-NOT: DebuggerWavefrontPrivateSegmentOffsetSGPR -; NOATTR-NOT: DebuggerPrivateSegmentBufferSGPR - -; Function Attrs: nounwind -define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !12 { -entry: - %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !17, metadata !18), !dbg !19 - %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !20 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20 - store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21 - %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !22 - %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22 - store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23 - %2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !24 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24 - store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25 - ret void, !dbg !26 -} - -; Function Attrs: nounwind readnone -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone } - -!llvm.dbg.cu = !{!0} -!opencl.kernels = !{!3} -!llvm.module.flags = !{!9, !10} -!llvm.ident = !{!11} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 269772)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "test01.cl", directory: "/home/kzhuravl/Lightning/testing") -!2 = !{} -!3 = !{void (i32 addrspace(1)*)* @test, !4, !5, !6, !7, !8} -!4 = !{!"kernel_arg_addr_space", i32 1} -!5 = !{!"kernel_arg_access_qual", !"none"} -!6 = !{!"kernel_arg_type", !"int addrspace(5)*"} -!7 = !{!"kernel_arg_base_type", !"int addrspace(5)*"} -!8 = !{!"kernel_arg_type_qual", !""} -!9 = !{i32 2, !"Dwarf Version", i32 2} -!10 = !{i32 2, !"Debug Info Version", i32 3} -!11 = !{!"clang version 3.9.0 (trunk 269772)"} -!12 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) -!13 = !DISubroutineType(types: !14) -!14 = !{null, !15} -!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32) -!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15) -!18 = !DIExpression() -!19 = !DILocation(line: 1, column: 30, scope: !12) -!20 = !DILocation(line: 2, column: 3, scope: !12) -!21 = !DILocation(line: 2, column: 8, scope: !12) -!22 = !DILocation(line: 3, column: 3, scope: !12) -!23 = !DILocation(line: 3, column: 8, scope: !12) -!24 = !DILocation(line: 4, column: 3, scope: !12) -!25 = !DILocation(line: 4, column: 8, scope: !12) -!26 = !DILocation(line: 5, column: 1, scope: !12) Index: test/CodeGen/AMDGPU/debugger-insert-nops.ll =================================================================== --- test/CodeGen/AMDGPU/debugger-insert-nops.ll +++ /dev/null @@ -1,80 +0,0 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP -target datalayout = "A5" - -; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each. - -; Check that each line appears at least once -; CHECK-DAG: test01.cl:2:3 -; CHECK-DAG: test01.cl:3:3 -; CHECK-DAG: test01.cl:4:3 - - -; Check that each of each of the lines consists of the line output, followed by "s_nop 0" -; CHECKNOP: test01.cl:{{[234]}}:3 -; CHECKNOP-NEXT: s_nop 0 -; CHECKNOP: test01.cl:{{[234]}}:3 -; CHECKNOP-NEXT: s_nop 0 -; CHECKNOP: test01.cl:{{[234]}}:3 -; CHECKNOP-NEXT: s_nop 0 - -; CHECK: test01.cl:5:{{[0-9]+}} -; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: s_endpgm - -; Function Attrs: nounwind -define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !12 { -entry: - %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !17, metadata !18), !dbg !19 - %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !20 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20 - store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !20 - %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !22 - %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22 - store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23 - %2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !24 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24 - store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25 - ret void, !dbg !26 -} - -; Function Attrs: nounwind readnone -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } - -!llvm.dbg.cu = !{!0} -!opencl.kernels = !{!3} -!llvm.module.flags = !{!9, !10} -!llvm.ident = !{!11} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "test01.cl", directory: "/home/kzhuravl/Lightning/testing") -!2 = !{} -!3 = !{void (i32 addrspace(1)*)* @test, !4, !5, !6, !7, !8} -!4 = !{!"kernel_arg_addr_space", i32 1} -!5 = !{!"kernel_arg_access_qual", !"none"} -!6 = !{!"kernel_arg_type", !"int addrspace(5)*"} -!7 = !{!"kernel_arg_base_type", !"int addrspace(5)*"} -!8 = !{!"kernel_arg_type_qual", !""} -!9 = !{i32 2, !"Dwarf Version", i32 2} -!10 = !{i32 2, !"Debug Info Version", i32 3} -!11 = !{!"clang version 3.9.0 (trunk 268929)"} -!12 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) -!13 = !DISubroutineType(types: !14) -!14 = !{null, !15} -!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32) -!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15) -!18 = !DIExpression() -!19 = !DILocation(line: 1, column: 30, scope: !12) -!20 = !DILocation(line: 2, column: 3, scope: !12) -!21 = !DILocation(line: 2, column: 8, scope: !12) -!22 = !DILocation(line: 3, column: 3, scope: !12) -!23 = !DILocation(line: 3, column: 8, scope: !12) -!24 = !DILocation(line: 4, column: 3, scope: !12) -!25 = !DILocation(line: 4, column: 8, scope: !12) -!26 = !DILocation(line: 5, column: 1, scope: !12) Index: test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll +++ /dev/null @@ -1,67 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s -target datalayout = "A5" - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] - -; CHECK: Kernels: -; CHECK: - Name: test -; CHECK: SymbolName: 'test@kd' -; CHECK: DebugProps: -; CHECK: DebuggerABIVersion: [ 1, 0 ] -; CHECK: PrivateSegmentBufferSGPR: 0 -; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11 -define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 { -entry: - %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !16, metadata !17), !dbg !18 - %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !19 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 0, !dbg !19 - store i32 777, i32 addrspace(1)* %arrayidx, align 4, !dbg !20 - %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !21 - %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i64 1, !dbg !21 - store i32 888, i32 addrspace(1)* %arrayidx1, align 4, !dbg !22 - %2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !23 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i64 2, !dbg !23 - store i32 999, i32 addrspace(1)* %arrayidx2, align 4, !dbg !24 - ret void, !dbg !25 -} - -attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="gfx800" "target-features"="+16-bit-insts,-code-object-v3,+amdgpu-debugger-emit-prologue,+amdgpu-debugger-insert-nops,+amdgpu-debugger-reserve-regs,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.dbg.cu = !{!0} -!opencl.ocl.version = !{!3} -!llvm.module.flags = !{!4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "code-object-metadata-kernel-debug-props.cl", directory: "/some/random/directory") -!2 = !{} -!3 = !{i32 1, i32 0} -!4 = !{i32 2, !"Dwarf Version", i32 2} -!5 = !{i32 2, !"Debug Info Version", i32 3} -!6 = !{!"clang version 5.0.0"} -!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !9) -!9 = !{null, !10} -!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) -!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!12 = !{i32 1} -!13 = !{!"none"} -!14 = !{!"int addrspace(5)*"} -!15 = !{!""} -!16 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) -!17 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef) -!18 = !DILocation(line: 1, column: 30, scope: !7) -!19 = !DILocation(line: 2, column: 3, scope: !7) -!20 = !DILocation(line: 2, column: 8, scope: !7) -!21 = !DILocation(line: 3, column: 3, scope: !7) -!22 = !DILocation(line: 3, column: 8, scope: !7) -!23 = !DILocation(line: 4, column: 3, scope: !7) -!24 = !DILocation(line: 4, column: 8, scope: !7) -!25 = !DILocation(line: 5, column: 1, scope: !7) Index: test/CodeGen/AMDGPU/syncscopes.ll =================================================================== --- test/CodeGen/AMDGPU/syncscopes.ll +++ test/CodeGen/AMDGPU/syncscopes.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-before=si-debugger-insert-nops < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-after=si-insert-skips < %s | FileCheck --check-prefix=GCN %s ; GCN-LABEL: name: syncscopes ; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out)