Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -104,6 +104,11 @@ // Instruction Intrinsics //===----------------------------------------------------------------------===// +def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; +def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; + def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -303,6 +303,7 @@ /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, SENDMSG, + SENDMSGHALT, INTERP_MOV, INTERP_P1, INTERP_P2, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2740,6 +2740,7 @@ NODE_NAME_CASE(KILL) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) + NODE_NAME_CASE(SENDMSGHALT) NODE_NAME_CASE(INTERP_MOV) NODE_NAME_CASE(INTERP_P1) NODE_NAME_CASE(INTERP_P2) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -246,6 +246,10 @@ SDTypeProfile<0, 1, [SDTCisInt<0>]>, [SDNPHasChain, SDNPInGlue]>; +def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", + SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPInGlue]>; + def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", SDTypeProfile<1, 3, [SDTCisFP<0>]>, [SDNPInGlue]>; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2295,12 +2295,18 @@ unsigned IntrinsicID = cast(Op.getOperand(1))->getZExtValue(); switch (IntrinsicID) { - case AMDGPUIntrinsic::SI_sendmsg: { + case Intrinsic::amdgcn_s_sendmsg: { Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); SDValue Glue = Chain.getValue(1); return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain, Op.getOperand(2), Glue); } + case Intrinsic::amdgcn_s_sendmsghalt: { + Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); + SDValue Glue = Chain.getValue(1); + return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain, + Op.getOperand(2), Glue); + } case AMDGPUIntrinsic::SI_tbuffer_store: { SDValue Ops[] = { Chain, Index: lib/Target/AMDGPU/SIInsertWaits.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertWaits.cpp +++ lib/Target/AMDGPU/SIInsertWaits.cpp @@ -491,7 +491,7 @@ return; // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG. - if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) { + if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) { BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0); LastInstWritesM0 = false; return; @@ -591,7 +591,8 @@ // but we also want to wait for any other outstanding transfers before // signalling other hardware blocks if (I->getOpcode() == AMDGPU::S_BARRIER || - I->getOpcode() == AMDGPU::S_SENDMSG) + I->getOpcode() == AMDGPU::S_SENDMSG || + I->getOpcode() == AMDGPU::S_SENDMSGHALT) Required = LastIssued; else Required = handleOperands(*I); Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -508,9 +508,11 @@ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", [(AMDGPUsendmsg (i32 imm:$simm16))] >; -} // End Uses = [EXEC, M0] -def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">; + def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", + [(AMDGPUsendmsghalt (i32 imm:$simm16))] + >; +} // End Uses = [EXEC, M0] def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">; def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { let simm16 = 0; Index: lib/Target/AMDGPU/SIIntrinsics.td =================================================================== --- lib/Target/AMDGPU/SIIntrinsics.td +++ lib/Target/AMDGPU/SIIntrinsics.td @@ -51,8 +51,6 @@ llvm_i32_ty], // tfe(imm) [IntrReadMem, IntrArgMemOnly]>; - def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; - // Fully-flexible SAMPLE instruction. class SampleRaw : Intrinsic < [llvm_v4f32_ty], // vdata(VGPR) Index: test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll +++ test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll @@ -8,10 +8,22 @@ ; GCN-NEXT: s_endpgm define amdgpu_gs void @main(i32 inreg %a) #0 { - call void @llvm.SI.sendmsg(i32 3, i32 %a) + call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %a) ret void } -declare void @llvm.SI.sendmsg(i32, i32) #0 +; GCN-LABEL: {{^}}main_halt: +; GCN: s_mov_b32 m0, s0 +; VI-NEXT: s_nop 0 +; GCN-NEXT: s_sendmsghalt sendmsg(MSG_INTERRUPT) +; GCN-NEXT: s_endpgm + +define void @main_halt(i32 inreg %a) #0 { + call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %a) + ret void +} + +declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0 +declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0 attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll +++ test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll @@ -4,21 +4,35 @@ ; CHECK-LABEL: {{^}}main: ; CHECK: s_mov_b32 m0, 0 ; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_INTERRUPT) ; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0) ; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1) ; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) ; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP) +; CHECK: s_sendmsghalt sendmsg(MSG_INTERRUPT) +; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT, 0) +; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_CUT, 1) +; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) +; CHECK: s_sendmsghalt sendmsg(MSG_GS_DONE, GS_OP_NOP) define void @main() { main_body: - call void @llvm.SI.sendmsg(i32 34, i32 0); - call void @llvm.SI.sendmsg(i32 274, i32 0); - call void @llvm.SI.sendmsg(i32 562, i32 0); - call void @llvm.SI.sendmsg(i32 3, i32 0); + call void @llvm.amdgcn.s.sendmsg(i32 1, i32 0); + call void @llvm.amdgcn.s.sendmsg(i32 34, i32 0); + call void @llvm.amdgcn.s.sendmsg(i32 274, i32 0); + call void @llvm.amdgcn.s.sendmsg(i32 562, i32 0); + call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0); + + call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 0); + call void @llvm.amdgcn.s.sendmsghalt(i32 34, i32 0); + call void @llvm.amdgcn.s.sendmsghalt(i32 274, i32 0); + call void @llvm.amdgcn.s.sendmsghalt(i32 562, i32 0); + call void @llvm.amdgcn.s.sendmsghalt(i32 3, i32 0); ret void } ; Function Attrs: nounwind -declare void @llvm.SI.sendmsg(i32, i32) #0 +declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0 +declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0 attributes #0 = { nounwind }