Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -191,7 +191,8 @@ { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }, - { "llvm.trap", "amdgpu-queue-ptr" } + { "llvm.trap", "amdgpu-queue-ptr" }, + { "llvm.debugtrap", "amdgpu-queue-ptr" } }; // TODO: We should not add the attributes if the known compile time workgroup Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -247,6 +247,9 @@ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " + Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " + + Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)), + false); OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " + Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)), false); @@ -635,6 +638,7 @@ ProgInfo.ComputePGMRSrc2 = S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) | S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) | + S_00B84C_TRAP_HANDLER(MFI->hasTrapHandler()) | S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) | S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) | S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) | Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -66,6 +66,11 @@ ISAVersion8_1_0, }; + enum TrapHandlerAbi { + TrapHandlerAbiNone = 0, + TrapHandlerAbiHsa = 1 + }; + protected: // Basic subtarget description. Triple TargetTriple; @@ -92,6 +97,8 @@ bool DebuggerInsertNops; bool DebuggerReserveRegs; bool DebuggerEmitPrologue; + //HSA ABI trap handler. + bool TrapHandler; // Used as options. bool EnableVGPRSpilling; @@ -257,6 +264,13 @@ return CaymanISA; } + unsigned getTrapHandlerAbi() const { + if (isAmdHsaOS()) + return TrapHandlerAbiHsa; + else + return TrapHandlerAbiNone; + } + bool isPromoteAllocaEnabled() const { return EnablePromoteAlloca; } Index: lib/Target/AMDGPU/AMDKernelCodeT.h =================================================================== --- lib/Target/AMDGPU/AMDKernelCodeT.h +++ lib/Target/AMDGPU/AMDKernelCodeT.h @@ -195,7 +195,7 @@ AMD_CODE_PROPERTY_RESERVED2_SHIFT = 23, AMD_CODE_PROPERTY_RESERVED2_WIDTH = 9, - AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED2_SHIFT + AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED2_SHIFT, }; /// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -300,6 +300,9 @@ #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1) #define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F) #define C_00B84C_USER_SGPR 0xFFFFFFC1 +#define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6) +#define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1) +#define C_00B84C_TRAP_HANDLER 0xFFFFFFBF #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7) #define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1) #define C_00B84C_TGID_X_EN 0xFFFFFF7F Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1786,24 +1786,39 @@ } switch (MI.getOpcode()) { - case AMDGPU::S_TRAP_PSEUDO: { - DebugLoc DL = MI.getDebugLoc(); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) - .addImm(1); - - MachineFunction *MF = BB->getParent(); - SIMachineFunctionInfo *Info = MF->getInfo(); - unsigned UserSGPR = Info->getQueuePtrUserSGPR(); - assert(UserSGPR != AMDGPU::NoRegister); - - if (!BB->isLiveIn(UserSGPR)) - BB->addLiveIn(UserSGPR); - - BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1) - .addReg(UserSGPR); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)).addImm(0x1) - .addReg(AMDGPU::VGPR0, RegState::Implicit) - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit); + case AMDGPU::S_TRAP_PSEUDO: { + const DebugLoc &DL = MI.getDebugLoc(); + + if (Subtarget->getTrapHandlerAbi()) { + BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) + .addImm(1); + + MachineFunction *MF = BB->getParent(); + SIMachineFunctionInfo *Info = MF->getInfo(); + unsigned UserSGPR = Info->getQueuePtrUserSGPR(); + assert(UserSGPR != AMDGPU::NoRegister); + + if (!BB->isLiveIn(UserSGPR)) + BB->addLiveIn(UserSGPR); + + BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1) + .addReg(UserSGPR); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)).addImm(0x1) + .addReg(AMDGPU::VGPR0, RegState::Implicit) + .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit); + } + else { + MachineFunction *MF = BB->getParent(); + DiagnosticInfoUnsupported NoTrap(*MF->getFunction(), + "trap handler not supported", + DL, + DS_Error); + LLVMContext &C = MI.getParent()->getParent()-> + getFunction()->getContext(); + C.diagnose(NoTrap); + + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM)); + } MI.eraseFromParent(); return BB; Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -151,6 +151,7 @@ bool PrivateSegmentBuffer : 1; bool DispatchPtr : 1; bool QueuePtr : 1; + bool TrapHandler : 1; bool KernargSegmentPtr : 1; bool DispatchID : 1; bool FlatScratchInit : 1; @@ -257,6 +258,10 @@ return DispatchPtr; } + bool hasTrapHandler() const { + return TrapHandler; + } + bool hasQueuePtr() const { return QueuePtr; } Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -64,6 +64,7 @@ PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), + TrapHandler(false), KernargSegmentPtr(false), DispatchID(false), FlatScratchInit(false), @@ -132,6 +133,9 @@ PrivateMemoryInputPtr = true; } + if (ST.getTrapHandlerAbi()) + TrapHandler = true; + // We don't need to worry about accessing spills with flat instructions. // TODO: On VI where we must use flat for global, we should be able to omit // this if it is never used for generic access. Index: lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h +++ lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h @@ -87,7 +87,7 @@ // TODO: cdbg_user COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN), COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR), -// TODO: enable_trap_handler +COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER), COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN), COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN), COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN), Index: test/CodeGen/AMDGPU/trap.ll =================================================================== --- test/CodeGen/AMDGPU/trap.ll +++ test/CodeGen/AMDGPU/trap.ll @@ -1,11 +1,34 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA %s +; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s declare void @llvm.trap() #0 +declare void @llvm.debugtrap() #0 -; GCN-LABEL: {{^}}trap: -; GCN: v_mov_b32_e32 v0, 1 -; GCN: s_mov_b64 s[0:1], s[4:5] -; GCN: s_trap 1 +; HSA-LABEL: {{^}}hsa_trap: +; HSA: enable_trap_handler = 1 +; HSA: v_mov_b32_e32 v0, 1 +; HSA: s_mov_b64 s[0:1], s[4:5] +; HSA: s_trap 1 +define void @hsa_trap() { + call void @llvm.trap() + ret void +} + +; HSA-LABEL: {{^}}hsa_debugtrap: +; HSA: enable_trap_handler = 1 +; HSA: v_mov_b32_e32 v0, 1 +; HSA: s_mov_b64 s[0:1], s[4:5] +; HSA: s_trap 1 +define void @hsa_debugtrap() { + call void @llvm.debugtrap() + ret void +} + +; For non-HSA path +; ERROR: error: :0:0: in function trap void (): trap handler not supported +; ERROR-LABEL: {{^}}trap: +; ERROR: s_endpgm +; ERROR-NEXT: s_endpgm define void @trap() { call void @llvm.trap() ret void