diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2067,6 +2067,9 @@ def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU; def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU; +def int_amdgcn_wait_event_export_ready : + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn] +>; //===----------------------------------------------------------------------===// // Deep learning intrinsics. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8372,6 +8372,10 @@ case Intrinsic::amdgcn_end_cf: return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other, Op->getOperand(2), Chain), 0); + case Intrinsic::amdgcn_wait_event_export_ready: + return SDValue(DAG.getMachineNode(AMDGPU::S_WAIT_EVENT, DL, MVT::Other, + DAG.getTargetConstant(0, DL, MVT::i32), + Chain), 0); default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1388,7 +1388,9 @@ let SubtargetPredicate = isGFX11Plus in { def S_WAIT_EVENT : SOPP_Pseudo<"s_wait_event", (ins s16imm:$simm16), - "$simm16">; + "$simm16"> { + let hasSideEffects = 1; + } def S_DELAY_ALU : SOPP_Pseudo<"s_delay_alu", (ins DELAY_FLAG:$simm16), "$simm16">; } // End SubtargetPredicate = isGFX11Plus diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wait.event.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wait.event.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wait.event.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test_wait_event: +; GCN: s_wait_event 0x0 + +define amdgpu_ps void @test_wait_event() #0 { +entry: + call void @llvm.amdgcn.wait.event.export.ready() #1 + ret void +} + +declare void @llvm.amdgcn.wait.event.export.ready() #0 + +attributes #0 = { nounwind } +attributes #1 = { convergent nounwind }