Index: lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- lib/Target/AMDGPU/AMDGPU.td
+++ lib/Target/AMDGPU/AMDGPU.td
@@ -67,6 +67,12 @@
   "Support unaligned global loads and stores"
 >;
 
+def FeatureTrapHandler: SubtargetFeature<"trap-handler",
+  "TrapHandler",
+  "true",
+  "Trap handler support"
+>;
+
 def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
   "UnalignedScratchAccess",
   "true",
Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -191,7 +191,8 @@
     { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
     { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
     { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
-	{ "llvm.trap", "amdgpu-queue-ptr" }
+    { "llvm.trap", "amdgpu-queue-ptr" },
+    { "llvm.debugtrap", "amdgpu-queue-ptr" }
   };
 
   // TODO: We should not add the attributes if the known compile time workgroup
Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -247,6 +247,9 @@
       OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
                                   Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
                                   false);
+      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
+                                  Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)),
+                                  false);
       OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
                                   Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
                                   false);
@@ -635,6 +638,7 @@
   ProgInfo.ComputePGMRSrc2 =
       S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
       S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
+      S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) |
       S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
       S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
       S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -66,6 +66,22 @@
     ISAVersion8_1_0,
   };
 
+  enum TrapHandlerAbi {
+    TrapHandlerAbiNone = 0,
+    TrapHandlerAbiHsa = 1
+  };
+
+  enum TrapCode {
+    TrapCodeBreakPoint = 0,
+    TrapCodeLLVMTrap = 1,
+    TrapCodeLLVMDebugTrap = 2,
+    TrapCodeHSADebugTrap = 3
+  };
+
+  enum TrapRegValues {
+    TrapCodeLLVMTrapRegValue = 1
+  };
+
 protected:
   // Basic subtarget description.
   Triple TargetTriple;
@@ -87,6 +103,7 @@
   bool FlatForGlobal;
   bool NoAddr64;
   bool UnalignedScratchAccess;
+  bool TrapHandler;
   bool UnalignedBufferAccess;
   bool EnableXNACK;
   bool DebuggerInsertNops;
@@ -257,6 +274,10 @@
     return CaymanISA;
   }
 
+  TrapHandlerAbi getTrapHandlerAbi() const {
+    return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
+  }
+
   bool isPromoteAllocaEnabled() const {
     return EnablePromoteAlloca;
   }
@@ -309,6 +330,10 @@
     return UnalignedScratchAccess;
   }
 
+  bool isTrapHandlerEnabled() const {
+    return TrapHandler;
+  }
+
   bool isXNACKEnabled() const {
     return EnableXNACK;
   }
Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -43,7 +43,7 @@
 
   SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
-    FullFS += "+flat-for-global,+unaligned-buffer-access,";
+    FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
 
   FullFS += FS;
 
@@ -84,6 +84,7 @@
     FlatForGlobal(false),
     NoAddr64(false),
     UnalignedScratchAccess(false),
+    TrapHandler(false),
     UnalignedBufferAccess(false),
 
     EnableXNACK(false),
Index: lib/Target/AMDGPU/SIDefines.h
===================================================================
--- lib/Target/AMDGPU/SIDefines.h
+++ lib/Target/AMDGPU/SIDefines.h
@@ -300,6 +300,9 @@
 #define   S_00B84C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
 #define   G_00B84C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
 #define   C_00B84C_USER_SGPR                                          0xFFFFFFC1
+#define   S_00B84C_TRAP_HANDLER(x)                                    (((x) & 0x1) << 6)
+#define   G_00B84C_TRAP_HANDLER(x)                                    (((x) >> 6) & 0x1)
+#define   C_00B84C_TRAP_HANDLER                                       0xFFFFFFBF
 #define   S_00B84C_TGID_X_EN(x)                                       (((x) & 0x1) << 7)
 #define   G_00B84C_TGID_X_EN(x)                                       (((x) >> 7) & 0x1)
 #define   C_00B84C_TGID_X_EN                                          0xFFFFFF7F
@@ -387,7 +390,6 @@
 
 #define R_SPILLED_SGPRS         0x4
 #define R_SPILLED_VGPRS         0x8
-
 } // End namespace llvm
 
 #endif
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -273,6 +273,7 @@
   // On SI this is s_memtime and s_memrealtime on VI.
   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
+  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
 
   setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
   setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -1786,24 +1787,39 @@
   }
 
   switch (MI.getOpcode()) {
-   case AMDGPU::S_TRAP_PSEUDO: {
-	DebugLoc DL = MI.getDebugLoc();
-	BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
-     .addImm(1);
-
-    MachineFunction *MF = BB->getParent();
-    SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
-    unsigned UserSGPR = Info->getQueuePtrUserSGPR();
-    assert(UserSGPR != AMDGPU::NoRegister);
-
-    if (!BB->isLiveIn(UserSGPR))
-      BB->addLiveIn(UserSGPR);
-
-    BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
-     .addReg(UserSGPR);
-    BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)).addImm(0x1)
-     .addReg(AMDGPU::VGPR0, RegState::Implicit)
-     .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
+  case AMDGPU::S_TRAP_PSEUDO: {
+    const DebugLoc &DL = MI.getDebugLoc();
+    const int TrapType = MI.getOperand(0).getImm(); 
+
+    if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
+        Subtarget->isTrapHandlerEnabled()) {
+
+      MachineFunction *MF = BB->getParent();
+      SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+      unsigned UserSGPR = Info->getQueuePtrUserSGPR();
+      assert(UserSGPR != AMDGPU::NoRegister);
+
+      if (!BB->isLiveIn(UserSGPR))
+        BB->addLiveIn(UserSGPR);
+
+      BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
+        .addReg(UserSGPR);
+      BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP))
+	      .addImm(TrapType)
+        .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
+    } else {
+      if (TrapType == SISubtarget::TrapCodeLLVMDebugTrap) {
+        DiagnosticInfoUnsupported NoTrap(*MF->getFunction(),
+                                         "debugtrap handler not supported",
+                                         DL,
+                                         DS_Warning);
+		  
+        LLVMContext &C = MF->getFunction()->getContext();
+        C.diagnose(NoTrap);
+        BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP)).addImm(0);
+      } else if (TrapType == SISubtarget::TrapCodeLLVMTrap)
+        BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM));
+    }
 
     MI.eraseFromParent();
     return BB;
Index: lib/Target/AMDGPU/SIInstrInfo.td
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.td
+++ lib/Target/AMDGPU/SIInstrInfo.td
@@ -617,6 +617,11 @@
   int NONE = 0;
 }
 
+def TRAPTYPE {
+  int LLVM_TRAP = 1;
+  int LLVM_DEBUG_TRAP = 2;
+}
+
 //===----------------------------------------------------------------------===//
 //
 // SI Instruction multiclass helpers.
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -111,8 +111,7 @@
                                       (ins VSrc_b64:$src0)>;
 } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
 
-def S_TRAP_PSEUDO : VPseudoInstSI <(outs), (ins),
-  [(trap)]> {
+def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> {
   let hasSideEffects = 1;
   let SALU = 1;
   let usesCustomInserter = 1;
@@ -390,6 +389,15 @@
 } // End SubtargetPredicate = isGCN
 
 let Predicates = [isGCN] in {
+def :Pat<
+  (trap),
+  (S_TRAP_PSEUDO TRAPTYPE.LLVM_TRAP)
+>;
+
+def :Pat<
+  (debugtrap),
+  (S_TRAP_PSEUDO TRAPTYPE.LLVM_DEBUG_TRAP)
+>;
 
 def : Pat<
   (int_amdgcn_else i64:$src, bb:$target),
Index: lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
===================================================================
--- lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -87,7 +87,7 @@
 // TODO: cdbg_user
 COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
 COMPPGM2(user_sgpr_count,                 compute_pgm_rsrc2_user_sgpr,      USER_SGPR),
-// TODO: enable_trap_handler
+COMPPGM2(enable_trap_handler,             compute_pgm_rsrc2_trap_handler,   TRAP_HANDLER),
 COMPPGM2(enable_sgpr_workgroup_id_x,      compute_pgm_rsrc2_tgid_x_en,      TGID_X_EN),
 COMPPGM2(enable_sgpr_workgroup_id_y,      compute_pgm_rsrc2_tgid_y_en,      TGID_Y_EN),
 COMPPGM2(enable_sgpr_workgroup_id_z,      compute_pgm_rsrc2_tgid_z_en,      TGID_Z_EN),
Index: test/CodeGen/AMDGPU/fneg-fabs.f16.ll
===================================================================
--- test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -6,7 +6,7 @@
 ; CI: v_cvt_f32_f16_e32
 ; CI: v_sub_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}|
 
-; VI-NOT: and
+; VI-NOT: _and
 ; VI: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|
 define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) {
   %fabs = call half @llvm.fabs.f16(half %x)
@@ -22,9 +22,9 @@
 ; CI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}|
 ; CI: v_cvt_f16_f32_e32
 
-; VI-NOT: and
+; VI-NOT: _and
 ; VI: v_mul_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}|
-; VI-NOT: and
+; VI-NOT: _and
 define void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) {
   %fabs = call half @llvm.fabs.f16(half %x)
   %fsub = fsub half -0.000000e+00, %fabs
Index: test/CodeGen/AMDGPU/trap.ll
===================================================================
--- test/CodeGen/AMDGPU/trap.ll
+++ test/CodeGen/AMDGPU/trap.ll
@@ -1,11 +1,80 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-TRAP %s
+
+; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
+
+; enable trap handler feature
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s
+
+; disable trap handler feature
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s
+
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
 
 declare void @llvm.trap() #0
+declare void @llvm.debugtrap() #0
+
+; MESA-TRAP: .section .AMDGPU.config
+; MESA-TRAP:  .long   47180
+; MESA-TRAP-NEXT: .long   208
+
+; NOMESA-TRAP: .section .AMDGPU.config
+; NOMESA-TRAP:  .long   47180
+; NOMESA-TRAP-NEXT: .long   144
+
+; GCN-LABEL: {{^}}hsa_trap:
+; HSA-TRAP: enable_trap_handler = 1
+; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP: s_trap 1
+
+; for llvm.trap in hsa path without ABI, direct generate s_endpgm instruction without any warning information
+; NO-HSA-TRAP: enable_trap_handler = 0
+; NO-HSA-TRAP: s_endpgm
+; NO-HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
+
+; TRAP-BIT: enable_trap_handler = 1
+; NO-TRAP-BIT: enable_trap_handler = 0
+; NO-MESA-TRAP: s_endpgm
+define void @hsa_trap() {
+  call void @llvm.trap()
+  ret void
+}
+
+; MESA-TRAP: .section .AMDGPU.config
+; MESA-TRAP:  .long   47180
+; MESA-TRAP-NEXT: .long   208
+
+; NOMESA-TRAP: .section .AMDGPU.config
+; NOMESA-TRAP:  .long   47180
+; NOMESA-TRAP-NEXT: .long   144
+
+; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (): debugtrap handler not supported
+; GCN-LABEL: {{^}}hsa_debugtrap:
+; HSA-TRAP: enable_trap_handler = 1
+; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP: s_trap 2
+
+; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction
+; NO-HSA-TRAP: enable_trap_handler = 0
+; NO-HSA-TRAP: s_endpgm
+
+; TRAP-BIT: enable_trap_handler = 1
+; NO-TRAP-BIT: enable_trap_handler = 0
+; NO-MESA-TRAP: s_endpgm
+define void @hsa_debugtrap() {
+  call void @llvm.debugtrap()
+  ret void
+}
 
+; For non-HSA path
 ; GCN-LABEL: {{^}}trap:
-; GCN: v_mov_b32_e32 v0, 1
-; GCN: s_mov_b64 s[0:1], s[4:5]
-; GCN: s_trap 1
+; TRAP-BIT: enable_trap_handler = 1
+; NO-TRAP-BIT: enable_trap_handler = 0
+; NO-HSA-TRAP: s_endpgm
+; NO-MESA-TRAP: s_endpgm
 define void @trap() {
   call void @llvm.trap()
   ret void