Index: include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- include/clang/Basic/BuiltinsAMDGPU.def +++ include/clang/Basic/BuiltinsAMDGPU.def @@ -93,9 +93,9 @@ BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc") BUILTIN(__builtin_amdgcn_readlane, "iii", "nc") BUILTIN(__builtin_amdgcn_fmed3f, "ffff", "nc") -BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n") -BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n") -BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n") +BUILTIN(__builtin_amdgcn_ds_fadd, "ff*fiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_fmin, "ff*fiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_fmax, "ff*fiIiIb", "n") //===----------------------------------------------------------------------===// // VI+ only builtins. Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -9860,6 +9860,29 @@ CI->setConvergent(); return CI; } + case AMDGPU::BI__builtin_amdgcn_ds_fadd: + case AMDGPU::BI__builtin_amdgcn_ds_fmin: + case AMDGPU::BI__builtin_amdgcn_ds_fmax: { + llvm::SmallVector Args; + for (unsigned I = 0; I != 5; ++I) + Args.push_back(EmitScalarExpr(E->getArg(I))); + Intrinsic::ID ID; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_ds_fadd: + ID = Intrinsic::amdgcn_ds_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_ds_fmin: + ID = Intrinsic::amdgcn_ds_fmin; + break; + case AMDGPU::BI__builtin_amdgcn_ds_fmax: + ID = Intrinsic::amdgcn_ds_fmax; + break; + default: + llvm_unreachable("Unknown BuiltinID"); + } + Value *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Args); + } // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl =================================================================== --- test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -91,18 +91,18 @@ // CHECK-LABEL: @test_ds_fadd // CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false) -void test_ds_fadd(__attribute__((address_space(3))) float *out, float src) { +void test_ds_fadd(local float *out, float src) { *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false); } // CHECK-LABEL: @test_ds_fmin // CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false) -void test_ds_fmin(__attribute__((address_space(3))) float *out, float src) { +void test_ds_fmin(local float *out, float src) { *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false); } // CHECK-LABEL: @test_ds_fmax // CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false) -void test_ds_fmax(__attribute__((address_space(3))) float *out, float src) { +void test_ds_fmax(local float *out, float src) { *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false); }