Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -328,11 +328,11 @@ "Insert two nop instructions for each high level source statement" >; -def FeatureDebuggerReserveTrapRegs : SubtargetFeature< - "amdgpu-debugger-reserve-trap-regs", - "DebuggerReserveTrapVGPRs", +def FeatureDebuggerReserveRegs : SubtargetFeature< + "amdgpu-debugger-reserve-regs", + "DebuggerReserveRegs", "true", - "Reserve VGPRs for trap handler usage" + "Reserve registers for debugger usage" >; //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -435,12 +435,13 @@ MaxSGPR += ExtraSGPRs; - // Update necessary Reserved* fields and max VGPRs used if - // "amdgpu-debugger-reserve-trap-regs" attribute was specified. - if (STM.debuggerReserveTrapVGPRs()) { + // Record first reserved register and reserved register count fields, and + // update max register counts if "amdgpu-debugger-reserve-regs" attribute was + // specified. + if (STM.debuggerReserveRegs()) { ProgInfo.ReservedVGPRFirst = MaxVGPR + 1; - ProgInfo.ReservedVGPRCount = MFI->getDebuggerReserveTrapVGPRCount(); - MaxVGPR += MFI->getDebuggerReserveTrapVGPRCount(); + ProgInfo.ReservedVGPRCount = MFI->getDebuggerReservedVGPRCount(); + MaxVGPR += MFI->getDebuggerReservedVGPRCount(); } // We found the maximum register index. They start at 0, so add one to get the Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -96,7 +96,7 @@ unsigned IsaVersion; bool EnableSIScheduler; bool DebuggerInsertNops; - bool DebuggerReserveTrapVGPRs; + bool DebuggerReserveRegs; std::unique_ptr FrameLowering; std::unique_ptr TLInfo; @@ -310,8 +310,8 @@ return DebuggerInsertNops; } - bool debuggerReserveTrapVGPRs() const { - return DebuggerReserveTrapVGPRs; + bool debuggerReserveRegs() const { + return DebuggerReserveRegs; } bool dumpCode() const { Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -98,7 +98,7 @@ LDSBankCount(0), IsaVersion(ISAVersion0_0_0), EnableSIScheduler(false), - DebuggerInsertNops(false), DebuggerReserveTrapVGPRs(false), + DebuggerInsertNops(false), DebuggerReserveRegs(false), FrameLowering(nullptr), GISel(), InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -62,8 +62,8 @@ unsigned MaximumWorkGroupSize; - // Number of reserved VGPRs for trap handler usage. - unsigned DebuggerReserveTrapVGPRCount; + // Number of reserved VGPRs for debugger usage. + unsigned DebuggerReservedVGPRCount; public: // FIXME: Make private @@ -329,8 +329,9 @@ ReturnsVoid = Value; } - unsigned getDebuggerReserveTrapVGPRCount() const { - return DebuggerReserveTrapVGPRCount; + /// \returns Number of reserved VGPRs for debugger usage. + unsigned getDebuggerReservedVGPRCount() const { + return DebuggerReservedVGPRCount; } unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const; Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -14,6 +14,7 @@ #include "SIInstrInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -49,7 +50,7 @@ PSInputAddr(0), ReturnsVoid(true), MaximumWorkGroupSize(0), - DebuggerReserveTrapVGPRCount(0), + DebuggerReservedVGPRCount(0), LDSWaveSpillSize(0), PSInputEna(0), NumUserSGPRs(0), @@ -134,8 +135,15 @@ else MaximumWorkGroupSize = ST.getWavefrontSize(); - if (ST.debuggerReserveTrapVGPRs()) - DebuggerReserveTrapVGPRCount = 4; + const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine(); + if (ST.debuggerReserveRegs()) { + if (!MF.getMMI().hasDebugInfo()) + report_fatal_error("amdgpu-debugger-reserve-regs only allowed with -g"); + if (TM.getOptLevel() != CodeGenOpt::None) + report_fatal_error("amdgpu-debugger-reserve-regs only allowed with -O0"); + + DebuggerReservedVGPRCount = 4; + } } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -193,12 +193,12 @@ assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); } - // Reserve VGPRs for trap handler usage if "amdgpu-debugger-reserve-trap-regs" + // Reserve registers for debugger usage if "amdgpu-debugger-reserve-trap-regs" // attribute was specified. const AMDGPUSubtarget &ST = MF.getSubtarget(); - if (ST.debuggerReserveTrapVGPRs()) { + if (ST.debuggerReserveRegs()) { unsigned ReservedVGPRFirst = - MaxWorkGroupVGPRCount - MFI->getDebuggerReserveTrapVGPRCount(); + MaxWorkGroupVGPRCount - MFI->getDebuggerReservedVGPRCount(); for (unsigned i = ReservedVGPRFirst; i < MaxWorkGroupVGPRCount; ++i) { unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); Index: test/CodeGen/AMDGPU/debugger-no-g.ll =================================================================== --- test/CodeGen/AMDGPU/debugger-no-g.ll +++ test/CodeGen/AMDGPU/debugger-no-g.ll @@ -0,0 +1,32 @@ +; RUN: not llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR0 %s +; ERROR0: LLVM ERROR: amdgpu-debugger-reserve-regs only allowed with -g + +; Function Attrs: nounwind +define void @test(i32 addrspace(1)* %A) #0 { +entry: + %A.addr = alloca i32 addrspace(1)*, align 4 + store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4 + %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0 + store i32 1, i32 addrspace(1)* %arrayidx, align 4 + %1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1 + store i32 2, i32 addrspace(1)* %arrayidx1, align 4 + %2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2 + store i32 3, i32 addrspace(1)* %arrayidx2, align 4 + ret void +} + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!opencl.kernels = !{!0} +!llvm.ident = !{!6} + +!0 = !{void (i32 addrspace(1)*)* @test, !1, !2, !3, !4, !5} +!1 = !{!"kernel_arg_addr_space", i32 1} +!2 = !{!"kernel_arg_access_qual", !"none"} +!3 = !{!"kernel_arg_type", !"int*"} +!4 = !{!"kernel_arg_base_type", !"int*"} +!5 = !{!"kernel_arg_type_qual", !""} +!6 = !{!"clang version 3.9.0 (trunk 269067)"} Index: test/CodeGen/AMDGPU/debugger-reserve-regs.ll =================================================================== --- test/CodeGen/AMDGPU/debugger-reserve-regs.ll +++ test/CodeGen/AMDGPU/debugger-reserve-regs.ll @@ -1,8 +1,12 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-trap-regs -verify-machineinstrs < %s | FileCheck %s +; RUN: not llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR0 %s +; ERROR0: LLVM ERROR: amdgpu-debugger-reserve-regs only allowed with -O0 +; RUN: not llc -O2 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR1 %s +; ERROR1: LLVM ERROR: amdgpu-debugger-reserve-regs only allowed with -O0 + +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s ; CHECK: reserved_vgpr_first = {{[0-9]+}} ; CHECK-NEXT: reserved_vgpr_count = 4 - ; CHECK: ReservedVGPRFirst: {{[0-9]+}} ; CHECK-NEXT: ReservedVGPRCount: 4