diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -261,6 +261,7 @@ // TODO: This is a no-op in wave32. Should the builtin require wavefrontsize64? TARGET_BUILTIN(__builtin_amdgcn_permlane64, "UiUi", "nc", "gfx11-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_wait_event_export_ready, "v", "n", "gfx11-insts") //===----------------------------------------------------------------------===// // WMMA builtins. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -37,3 +37,9 @@ void test_permlane64(global uint* out, uint a) { *out = __builtin_amdgcn_permlane64(a); } + +// CHECK-LABEL: @test_s_wait_event_export_ready +// CHECK: call void @llvm.amdgcn.s.wait.event.export.ready +void test_s_wait_event_export_ready() { + __builtin_amdgcn_s_wait_event_export_ready(); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2067,6 +2067,10 @@ def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU; def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU; +def int_amdgcn_s_wait_event_export_ready : + ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">, + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn] +>; //===----------------------------------------------------------------------===// // Deep learning intrinsics. diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1388,7 +1388,9 @@ let SubtargetPredicate = isGFX11Plus in { def S_WAIT_EVENT : SOPP_Pseudo<"s_wait_event", (ins s16imm:$simm16), - "$simm16">; + "$simm16"> { + let hasSideEffects = 1; + } def S_DELAY_ALU : SOPP_Pseudo<"s_delay_alu", (ins DELAY_FLAG:$simm16), "$simm16">; } // End SubtargetPredicate = isGFX11Plus @@ -1430,6 +1432,10 @@ (S_SEXT_I32_I16 $src) >; +def : GCNPat < + (int_amdgcn_s_wait_event_export_ready), + (S_WAIT_EVENT (i16 0)) +>; //===----------------------------------------------------------------------===// // SOP2 Patterns diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll @@ -0,0 +1,15 @@ +; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test_wait_event: +; GCN: s_wait_event 0x0 + +define amdgpu_ps void @test_wait_event() #0 { +entry: + call void @llvm.amdgcn.s.wait.event.export.ready() #0 + ret void +} + +declare void @llvm.amdgcn.s.wait.event.export.ready() #0 + +attributes #0 = { nounwind }