Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -67,6 +67,12 @@ "Support unaligned global loads and stores" >; +def FeatureTrapHandler: SubtargetFeature<"trap-handler", + "TrapHandler", + "true", + "Trap handler support" +>; + def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", "UnalignedScratchAccess", "true", Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -191,7 +191,8 @@ { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }, - { "llvm.trap", "amdgpu-queue-ptr" } + { "llvm.trap", "amdgpu-queue-ptr" }, + { "llvm.debugtrap", "amdgpu-queue-ptr" } }; // TODO: We should not add the attributes if the known compile time workgroup Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -247,6 +247,9 @@ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " + Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " + + Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)), + false); OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " + Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)), false); @@ -635,6 +638,7 @@ ProgInfo.ComputePGMRSrc2 = S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) | S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) | + S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) | S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) | S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) | S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) | Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -66,6 +66,20 @@ ISAVersion8_1_0, }; + enum TrapHandlerAbi { + TrapHandlerAbiNone = 0, + TrapHandlerAbiHsa = 1 + }; + + enum TrapCode { + TrapCodeLLVMTrap = 1, + TrapCodeLLVMDebugTrap = 2 + }; + + enum TrapRegValues { + TrapCodeLLVMTrapRegValue = 1 + }; + protected: // Basic subtarget description. Triple TargetTriple; @@ -87,6 +101,7 @@ bool FlatForGlobal; bool NoAddr64; bool UnalignedScratchAccess; + bool TrapHandler; bool UnalignedBufferAccess; bool EnableXNACK; bool DebuggerInsertNops; @@ -257,6 +272,10 @@ return CaymanISA; } + TrapHandlerAbi getTrapHandlerAbi() const { + return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; + } + bool isPromoteAllocaEnabled() const { return EnablePromoteAlloca; } @@ -309,6 +328,10 @@ return UnalignedScratchAccess; } + bool isTrapHandlerEnabled() const { + return TrapHandler; + } + bool isXNACKEnabled() const { return EnableXNACK; } @@ -488,7 +511,8 @@ enum { // The closed Vulkan driver sets 96, which limits the wave count to 8 but // doesn't spill SGPRs as much as when 80 is set. - FIXED_SGPR_COUNT_FOR_INIT_BUG = 96 + FIXED_SGPR_COUNT_FOR_INIT_BUG = 96, + TRAP_HANDLER_SGPR_COUNT = 16 }; private: Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -43,7 +43,7 @@ SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-for-global,+unaligned-buffer-access,"; + FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; FullFS += FS; @@ -84,6 +84,7 @@ FlatForGlobal(false), NoAddr64(false), UnalignedScratchAccess(false), + TrapHandler(false), UnalignedBufferAccess(false), EnableXNACK(false), Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -300,6 +300,9 @@ #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1) #define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F) #define C_00B84C_USER_SGPR 0xFFFFFFC1 +#define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6) +#define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1) +#define C_00B84C_TRAP_HANDLER 0xFFFFFFBF #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7) #define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1) #define C_00B84C_TGID_X_EN 0xFFFFFF7F @@ -388,6 +391,8 @@ #define R_SPILLED_SGPRS 0x4 #define R_SPILLED_VGPRS 0x8 +#define TRAP_V0 0x1 + } // End namespace llvm #endif Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -273,6 +273,7 @@ // On SI this is s_memtime and s_memrealtime on VI. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -1786,24 +1787,41 @@ } switch (MI.getOpcode()) { - case AMDGPU::S_TRAP_PSEUDO: { - DebugLoc DL = MI.getDebugLoc(); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) - .addImm(1); - - MachineFunction *MF = BB->getParent(); - SIMachineFunctionInfo *Info = MF->getInfo(); - unsigned UserSGPR = Info->getQueuePtrUserSGPR(); - assert(UserSGPR != AMDGPU::NoRegister); - - if (!BB->isLiveIn(UserSGPR)) - BB->addLiveIn(UserSGPR); - - BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1) - .addReg(UserSGPR); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)).addImm(0x1) - .addReg(AMDGPU::VGPR0, RegState::Implicit) - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit); + case AMDGPU::S_TRAP_PSEUDO: { + const DebugLoc &DL = MI.getDebugLoc(); + const int TrapType = MI.getOperand(0).getImm(); + + if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa && + Subtarget->isTrapHandlerEnabled()) { + BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) + .addImm(TRAP_V0); + + MachineFunction *MF = BB->getParent(); + SIMachineFunctionInfo *Info = MF->getInfo(); + unsigned UserSGPR = Info->getQueuePtrUserSGPR(); + assert(UserSGPR != AMDGPU::NoRegister); + + if (!BB->isLiveIn(UserSGPR)) + BB->addLiveIn(UserSGPR); + + BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1) + .addReg(UserSGPR); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)) + .addImm(TrapType) + .addReg(AMDGPU::VGPR0, RegState::Implicit) + .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit); + } else { + if (TrapType == SISubtarget::TrapCodeLLVMDebugTrap) { + DiagnosticInfoUnsupported NoTrap(*MF->getFunction(), + "debugtrap handler not supported", + DL, + DS_Warning); + + LLVMContext &C = MF->getFunction()->getContext(); + C.diagnose(NoTrap); + } + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM)); + } MI.eraseFromParent(); return BB; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -617,6 +617,11 @@ int NONE = 0; } +def TRAPTYPE { + int LLVM_TRAP = 1; + int LLVM_DEBUG_TRAP = 2; +} + //===----------------------------------------------------------------------===// // // SI Instruction multiclass helpers. Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -111,8 +111,7 @@ (ins VSrc_b64:$src0)>; } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] -def S_TRAP_PSEUDO : VPseudoInstSI <(outs), (ins), - [(trap)]> { +def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> { let hasSideEffects = 1; let SALU = 1; let usesCustomInserter = 1; @@ -390,6 +389,15 @@ } // End SubtargetPredicate = isGCN let Predicates = [isGCN] in { +def :Pat< + (trap), + (S_TRAP_PSEUDO TRAPTYPE.LLVM_TRAP) +>; + +def :Pat< + (debugtrap), + (S_TRAP_PSEUDO TRAPTYPE.LLVM_DEBUG_TRAP) +>; def : Pat< (int_amdgcn_else i64:$src, bb:$target), Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1242,8 +1242,12 @@ // Compute maximum number of SGPRs function can use using default/requested // minimum number of waves per execution unit. std::pair WavesPerEU = MFI.getWavesPerEU(); - unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false); - unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true); + int TrapHandlerSGPRsReserved = + ST.isTrapHandlerEnabled() ? SISubtarget::TRAP_HANDLER_SGPR_COUNT : 0; + unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false) + - TrapHandlerSGPRsReserved; + unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true) + - TrapHandlerSGPRsReserved; // Check if maximum number of SGPRs was explicitly requested using // "amdgpu-num-sgpr" attribute. Index: lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h +++ lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h @@ -87,7 +87,7 @@ // TODO: cdbg_user COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN), COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR), -// TODO: enable_trap_handler +COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER), COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN), COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN), COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN), Index: test/CodeGen/AMDGPU/trap.ll =================================================================== --- test/CodeGen/AMDGPU/trap.ll +++ test/CodeGen/AMDGPU/trap.ll @@ -1,11 +1,71 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-TRAP %s + +; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s + +; enable trap handler feature +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=RSRC2-BIT %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s + +; disable trap handler feature +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NO-RSRC2-BIT %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s + +; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN-WARNING %s declare void @llvm.trap() #0 +declare void @llvm.debugtrap() #0 + +; GCN-LABEL: {{^}}hsa_trap: +; HSA-TRAP: enable_trap_handler = 1 +; HSA-TRAP: v_mov_b32_e32 v0, 1 +; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP: s_trap 1 +; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 1 + +; for llvm.trap in hsa path without ABI, direct generate s_endpgm instruction without any warning information +; NO-HSA-TRAP: enable_trap_handler = 0 +; NO-HSA-TRAP: s_endpgm +; NO-HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0 + +; TRAP-BIT: enable_trap_handler = 1 +; NO-TRAP-BIT: enable_trap_handler = 0 +; NO-MESA-TRAP: s_endpgm +; RSRC2-BIT: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 1 +; NO-RSRC2-BIT: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0 +define void @hsa_trap() { + call void @llvm.trap() + ret void +} + +; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (): debugtrap handler not supported +; GCN-LABEL: {{^}}hsa_debugtrap: +; HSA-TRAP: enable_trap_handler = 1 +; HSA-TRAP: v_mov_b32_e32 v0, 1 +; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP: s_trap 2 +; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 1 + +; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction +; NO-HSA-TRAP: enable_trap_handler = 0 +; NO-HSA-TRAP: s_endpgm +; NO-HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0 + +; TRAP-BIT: enable_trap_handler = 1 +; NO-TRAP-BIT: enable_trap_handler = 0 +; NO-MESA-TRAP: s_endpgm +define void @hsa_debugtrap() { + call void @llvm.debugtrap() + ret void +} +; For non-HSA path ; GCN-LABEL: {{^}}trap: -; GCN: v_mov_b32_e32 v0, 1 -; GCN: s_mov_b64 s[0:1], s[4:5] -; GCN: s_trap 1 +; TRAP-BIT: enable_trap_handler = 1 +; NO-TRAP-BIT: enable_trap_handler = 0 +; NO-HSA-TRAP: s_endpgm +; NO-MESA-TRAP: s_endpgm define void @trap() { call void @llvm.trap() ret void