Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -191,7 +191,8 @@ { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }, - { "llvm.trap", "amdgpu-queue-ptr" } + { "llvm.trap", "amdgpu-queue-ptr" }, + { "llvm.debugtrap", "amdgpu-queue-ptr" } }; // TODO: We should not add the attributes if the known compile time workgroup Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -739,6 +739,9 @@ if (MFI->hasDispatchPtr()) header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + if (MFI->hasTrapHandlerPtr()) + header.code_properties |= AMD_CODE_PROPERTY_IS_TRAP_HANDLER_SUPPORTED; + if (MFI->hasQueuePtr()) header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; Index: lib/Target/AMDGPU/AMDKernelCodeT.h =================================================================== --- lib/Target/AMDGPU/AMDKernelCodeT.h +++ lib/Target/AMDGPU/AMDKernelCodeT.h @@ -195,7 +195,11 @@ AMD_CODE_PROPERTY_RESERVED2_SHIFT = 23, AMD_CODE_PROPERTY_RESERVED2_WIDTH = 9, - AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED2_SHIFT + AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED2_SHIFT, + + AMD_CODE_PROPERTY_IS_TRAP_HANDLER_SUPPORTED_SHIFT = 24, + AMD_CODE_PROPERTY_IS_TRAP_HANDLER_SUPPORTED_WIDTH = 1, + AMD_CODE_PROPERTY_IS_TRAP_HANDLER_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_TRAP_HANDLER_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_TRAP_HANDLER_SUPPORTED_SHIFT }; /// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -272,7 +272,11 @@ // On SI this is s_memtime and s_memrealtime on VI. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); - setOperationAction(ISD::TRAP, MVT::Other, Legal); + + if (Subtarget->isAmdHsaOS()) + setOperationAction(ISD::TRAP, MVT::Other, Legal); + else + setOperationAction(ISD::TRAP, MVT::Other, Custom); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -1786,24 +1790,25 @@ } switch (MI.getOpcode()) { - case AMDGPU::S_TRAP_PSEUDO: { - DebugLoc DL = MI.getDebugLoc(); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) - .addImm(1); + case AMDGPU::S_TRAP_PSEUDO: { + const DebugLoc &DL = MI.getDebugLoc(); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) + .addImm(1); MachineFunction *MF = BB->getParent(); SIMachineFunctionInfo *Info = MF->getInfo(); unsigned UserSGPR = Info->getQueuePtrUserSGPR(); + Info->enableTrapHandler(); assert(UserSGPR != AMDGPU::NoRegister); if (!BB->isLiveIn(UserSGPR)) BB->addLiveIn(UserSGPR); BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1) - .addReg(UserSGPR); + .addReg(UserSGPR); BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)).addImm(0x1) - .addReg(AMDGPU::VGPR0, RegState::Implicit) - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit); + .addReg(AMDGPU::VGPR0, RegState::Implicit) + .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit); MI.eraseFromParent(); return BB; @@ -1978,6 +1983,8 @@ return lowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::TRAP: + return lowerTRAP(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); } @@ -2451,6 +2458,23 @@ MachineMemOperand::MOInvariant); } +SDValue SITargetLowering::lowerTRAP(SDValue Op, + SelectionDAG &DAG) const { + const MachineFunction &MF = DAG.getMachineFunction(); + DiagnosticInfoUnsupported NoTrap(*MF.getFunction(), + "trap handler not supported", + Op.getDebugLoc(), + DS_Error); + DAG.getContext()->diagnose(NoTrap); + + // Emit s_endpgm. + + // FIXME: This should really be selected to s_trap, but that requires + // setting up the trap handler for it o do anything. + return DAG.getNode(AMDGPUISD::ENDPGM, SDLoc(Op), MVT::Other, + Op.getOperand(0)); +} + SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const { // We can't use S_MOV_B32 directly, because there is no way to specify m0 as Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -151,6 +151,7 @@ bool PrivateSegmentBuffer : 1; bool DispatchPtr : 1; bool QueuePtr : 1; + bool TrapHandlerPtr : 1; bool KernargSegmentPtr : 1; bool DispatchID : 1; bool FlatScratchInit : 1; @@ -249,6 +250,10 @@ PrivateSegmentWaveByteOffsetSystemSGPR = Reg; } + void enableTrapHandler() { + TrapHandlerPtr = true; + } + bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; } @@ -257,6 +262,10 @@ return DispatchPtr; } + bool hasTrapHandlerPtr() const { + return TrapHandlerPtr; + } + bool hasQueuePtr() const { return QueuePtr; } Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -64,6 +64,7 @@ PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), + TrapHandlerPtr(false), KernargSegmentPtr(false), DispatchID(false), FlatScratchInit(false), Index: lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h +++ lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h @@ -87,7 +87,6 @@ // TODO: cdbg_user COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN), COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR), -// TODO: enable_trap_handler COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN), COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN), COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN), @@ -113,6 +112,7 @@ CODEPROP(is_dynamic_callstack, IS_DYNAMIC_CALLSTACK), CODEPROP(is_debug_enabled, IS_DEBUG_SUPPORTED), CODEPROP(is_xnack_enabled, IS_XNACK_SUPPORTED), +CODEPROP(is_trap_handler_supported, IS_TRAP_HANDLER_SUPPORTED), FIELD(workitem_private_segment_byte_size), FIELD(workgroup_group_segment_byte_size), Index: test/CodeGen/AMDGPU/trap.ll =================================================================== --- test/CodeGen/AMDGPU/trap.ll +++ test/CodeGen/AMDGPU/trap.ll @@ -1,11 +1,34 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA %s +; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s declare void @llvm.trap() #0 +declare void @llvm.debugtrap() #0 -; GCN-LABEL: {{^}}trap: -; GCN: v_mov_b32_e32 v0, 1 -; GCN: s_mov_b64 s[0:1], s[4:5] -; GCN: s_trap 1 +; HSA-LABEL: {{^}}hsa_trap: +; HSA: is_trap_handler_supported = 1 +; HSA: v_mov_b32_e32 v0, 1 +; HSA: s_mov_b64 s[0:1], s[4:5] +; HSA: s_trap 1 +define void @hsa_trap() { + call void @llvm.trap() + ret void +} + +; HSA-LABEL: {{^}}hsa_debugtrap: +; HSA: is_trap_handler_supported = 1 +; HSA: v_mov_b32_e32 v0, 1 +; HSA: s_mov_b64 s[0:1], s[4:5] +; HSA: s_trap 1 +define void @hsa_debugtrap() { + call void @llvm.debugtrap() + ret void +} + +; For non-HSA path +; ERROR: error: :0:0: in function trap void (): trap handler not supported +; ERROR-LABEL: {{^}}trap: +; ERROR: s_endpgm +; ERROR-NEXT: s_endpgm define void @trap() { call void @llvm.trap() ret void