Index: include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- include/clang/Basic/BuiltinsAMDGPU.def +++ include/clang/Basic/BuiltinsAMDGPU.def @@ -98,6 +98,8 @@ BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n") +BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n") //===----------------------------------------------------------------------===// // CI+ only builtins. Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -12478,6 +12478,14 @@ case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + case AMDGPU::BI__builtin_amdgcn_ds_append: + case AMDGPU::BI__builtin_amdgcn_ds_consume: { + Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? + Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); + return Builder.CreateCall(F, { Src0, Builder.getFalse() }); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- test/CodeGenOpenCL/builtins-amdgcn.cl +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -536,6 +536,18 @@ *out = __builtin_amdgcn_s_getpc(); } +// CHECK-LABEL: @test_ds_append_lds( +// CHECK: call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_append_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_append(ptr); +} + +// CHECK-LABEL: @test_ds_consume_lds( +// CHECK: call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_consume_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_consume(ptr); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }