Index: clang/include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- clang/include/clang/Basic/BuiltinsAMDGPU.def +++ clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -281,6 +281,8 @@ TARGET_BUILTIN(__builtin_amdgcn_s_sendmsg_rtn, "UiUIi", "n", "gfx11-insts") TARGET_BUILTIN(__builtin_amdgcn_s_sendmsg_rtnl, "UWiUIi", "n", "gfx11-insts") +TARGET_BUILTIN(__builtin_amdgcn_ds_bvh_stack_rtn, "V2UiUiUiV4UiIi", "n", "gfx11-insts") + //===----------------------------------------------------------------------===// // Special builtins. //===----------------------------------------------------------------------===// Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -16897,6 +16897,21 @@ RayInverseDir, TextureDescr}); } + case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: { + SmallVector Args; + for (int i = 0, e = E->getNumArgs(); i != e; ++i) + Args.push_back(EmitScalarExpr(E->getArg(i))); + + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn); + Value *Call = Builder.CreateCall(F, Args); + Value *Rtn = Builder.CreateExtractValue(Call, 0); + Value *A = Builder.CreateExtractValue(Call, 1); + llvm::Type *RetTy = ConvertType(E->getType()); + Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn, + (uint64_t)0); + return Builder.CreateInsertElement(I0, A, 1); + } + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11-err.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11-err.cl @@ -0,0 +1,11 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -verify -S -emit-llvm -o - %s + +typedef unsigned int uint; +typedef uint uint2 __attribute__((ext_vector_type(2))); +typedef uint uint4 __attribute__((ext_vector_type(4))); + +kernel void builtins_amdgcn_bvh_err(global uint2* out, uint addr, uint data, uint4 data1, uint offset) { + *out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, offset); // expected-error {{'__builtin_amdgcn_ds_bvh_stack_rtn' must be a constant integer}} +} Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -6,6 +6,8 @@ typedef unsigned int uint; typedef unsigned long ulong; +typedef uint uint2 __attribute__((ext_vector_type(2))); +typedef uint uint4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: @test_s_sendmsg_rtn( // CHECK: call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0) @@ -18,3 +20,14 @@ void test_s_sendmsg_rtnl(global ulong* out) { *out = __builtin_amdgcn_s_sendmsg_rtnl(0); } + +// CHECK-LABEL: @test_ds_bvh_stack_rtn( +// CHECK: %0 = tail call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data, <4 x i32> %data1, i32 128) +// CHECK: %1 = extractvalue { i32, i32 } %0, 0 +// CHECK: %2 = extractvalue { i32, i32 } %0, 1 +// CHECK: %3 = insertelement <2 x i32> poison, i32 %1, i64 0 +// CHECK: %4 = insertelement <2 x i32> %3, i32 %2, i64 1 +void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1) +{ + *out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, 128); +}