Index: docs/AMDGPUUsage.rst =================================================================== --- docs/AMDGPUUsage.rst +++ docs/AMDGPUUsage.rst @@ -3788,14 +3788,33 @@ ``queue_ptr`` terminated and its associated queue put into the error state. - ``llvm.debugtrap`` ``s_trap 0x03`` ``SGPR0-1``: If debugger not - ``queue_ptr`` installed handled - same as ``llvm.trap``. - debugger breakpoint ``s_trap 0x07`` Reserved for debugger + ``llvm.debugtrap`` ``s_trap 0x03`` - If debugger not + installed then + behaves as a + no-operation. The + trap handler is + entered and + immediately returns + to continue + execution of the + wavefront. + - If the debugger is + installed, causes + the debug trap to be + reported by the + debugger and the + wavefront is put in + the halt state until + resumed by the + debugger. + reserved ``s_trap 0x04`` Reserved. + reserved ``s_trap 0x05`` Reserved. + reserved ``s_trap 0x06`` Reserved. + debugger breakpoint ``s_trap 0x07`` Reserved for debugger breakpoints. - debugger ``s_trap 0x08`` Reserved for debugger. - debugger ``s_trap 0xfe`` Reserved for debugger. - debugger ``s_trap 0xff`` Reserved for debugger. + reserved ``s_trap 0x08`` Reserved. + reserved ``s_trap 0xfe`` Reserved. + reserved ``s_trap 0xff`` Reserved. =================== =============== =============== ======================= AMDPAL Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -85,6 +85,7 @@ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const; SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -3333,8 +3333,9 @@ case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); case ISD::TRAP: - case ISD::DEBUGTRAP: return lowerTRAP(Op, DAG); + case ISD::DEBUGTRAP: + return lowerDEBUGTRAP(Op, DAG); } return SDValue(); } @@ -3995,40 +3996,37 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); - MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); - unsigned TrapID = Op.getOpcode() == ISD::DEBUGTRAP ? - SISubtarget::TrapIDLLVMDebugTrap : SISubtarget::TrapIDLLVMTrap; - - if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa && - Subtarget->isTrapHandlerEnabled()) { - SIMachineFunctionInfo *Info = MF.getInfo(); - unsigned UserSGPR = Info->getQueuePtrUserSGPR(); - assert(UserSGPR != AMDGPU::NoRegister); - - SDValue QueuePtr = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); - - SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); - - SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01, - QueuePtr, SDValue()); + if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa || + !Subtarget->isTrapHandlerEnabled()) + return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); - SDValue Ops[] = { - ToReg, - DAG.getTargetConstant(TrapID, SL, MVT::i16), - SGPR01, - ToReg.getValue(1) - }; + MachineFunction &MF = DAG.getMachineFunction(); + SIMachineFunctionInfo *Info = MF.getInfo(); + unsigned UserSGPR = Info->getQueuePtrUserSGPR(); + assert(UserSGPR != AMDGPU::NoRegister); + SDValue QueuePtr = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); + SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); + SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01, + QueuePtr, SDValue()); + SDValue Ops[] = { + ToReg, + DAG.getTargetConstant(SISubtarget::TrapIDLLVMTrap, SL, MVT::i16), + SGPR01, + ToReg.getValue(1) + }; + return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); +} - return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); - } +SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Chain = Op.getOperand(0); + MachineFunction &MF = DAG.getMachineFunction(); - switch (TrapID) { - case SISubtarget::TrapIDLLVMTrap: - return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); - case SISubtarget::TrapIDLLVMDebugTrap: { + if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa || + !Subtarget->isTrapHandlerEnabled()) { DiagnosticInfoUnsupported NoTrap(MF.getFunction(), "debugtrap handler not supported", Op.getDebugLoc(), @@ -4037,11 +4035,12 @@ Ctx.diagnose(NoTrap); return Chain; } - default: - llvm_unreachable("unsupported trap handler type!"); - } - return Chain; + SDValue Ops[] = { + Chain, + DAG.getTargetConstant(SISubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16) + }; + return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); } SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, Index: test/CodeGen/AMDGPU/trap.ll =================================================================== --- test/CodeGen/AMDGPU/trap.ll +++ test/CodeGen/AMDGPU/trap.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-TRAP %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s @@ -15,15 +15,15 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s declare void @llvm.trap() #0 -declare void @llvm.debugtrap() #0 +declare void @llvm.debugtrap() #1 ; MESA-TRAP: .section .AMDGPU.config ; MESA-TRAP: .long 47180 -; MESA-TRAP-NEXT: .long 204 +; MESA-TRAP-NEXT: .long 208 ; NOMESA-TRAP: .section .AMDGPU.config ; NOMESA-TRAP: .long 47180 -; NOMESA-TRAP-NEXT: .long 140 +; NOMESA-TRAP-NEXT: .long 144 ; GCN-LABEL: {{^}}hsa_trap: ; HSA-TRAP: enable_trap_handler = 1 @@ -38,24 +38,27 @@ ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @hsa_trap() { +define amdgpu_kernel void @hsa_trap(i32 addrspace(1)* nocapture readonly %arg0) { + store volatile i32 1, i32 addrspace(1)* %arg0 call void @llvm.trap() + unreachable + store volatile i32 2, i32 addrspace(1)* %arg0 ret void } ; MESA-TRAP: .section .AMDGPU.config ; MESA-TRAP: .long 47180 -; MESA-TRAP-NEXT: .long 204 +; MESA-TRAP-NEXT: .long 208 ; NOMESA-TRAP: .section .AMDGPU.config ; NOMESA-TRAP: .long 47180 -; NOMESA-TRAP-NEXT: .long 140 +; NOMESA-TRAP-NEXT: .long 144 -; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (): debugtrap handler not supported +; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported ; GCN-LABEL: {{^}}hsa_debugtrap: ; HSA-TRAP: enable_trap_handler = 1 -; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] ; HSA-TRAP: s_trap 3 +; HSA-TRAP: flat_store_dword v[0:1], v3 ; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction ; NO-HSA-TRAP: enable_trap_handler = 0 @@ -64,8 +67,10 @@ ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @hsa_debugtrap() { +define amdgpu_kernel void @hsa_debugtrap(i32 addrspace(1)* nocapture readonly %arg0) { + store volatile i32 1, i32 addrspace(1)* %arg0 call void @llvm.debugtrap() + store volatile i32 2, i32 addrspace(1)* %arg0 ret void } @@ -75,8 +80,11 @@ ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-HSA-TRAP: s_endpgm ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @trap() { +define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { + store volatile i32 1, i32 addrspace(1)* %arg0 call void @llvm.trap() + unreachable + store volatile i32 2, i32 addrspace(1)* %arg0 ret void } @@ -84,10 +92,10 @@ ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 -; HSA: BB{{[0-9]_[0-9]+]]: ; %trap +; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap ; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] ; HSA-TRAP-NEXT: s_trap 2 -define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr #1 { +define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr { entry: %tmp29 = load volatile i32, i32 addrspace(1)* %arg0 %cmp = icmp eq i32 %tmp29, -1 @@ -98,7 +106,9 @@ unreachable ret: + store volatile i32 3, i32 addrspace(1)* %arg0 ret void } attributes #0 = { nounwind noreturn } +attributes #1 = { nounwind }