diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -214,7 +214,7 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "gfx90a-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts") diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -206,6 +206,7 @@ Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["gfx11-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; break; case GK_GFX1036: case GK_GFX1035: @@ -264,6 +265,7 @@ case GK_GFX90A: Features["gfx90a-insts"] = true; Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; [[fallthrough]]; case GK_GFX908: Features["dot3-insts"] = true; @@ -271,6 +273,7 @@ Features["dot5-insts"] = true; Features["dot6-insts"] = true; Features["mai-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; [[fallthrough]]; case GK_GFX906: Features["dl-insts"] = true; diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl @@ -11,7 +11,6 @@ float *fp_rtn; double *rtn; *half_rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}} - *fp_rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmax_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmin_f64' needs target feature gfx90a-insts}} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -43,3 +43,9 @@ void test_s_wait_event_export_ready() { __builtin_amdgcn_s_wait_event_export_ready(); } + +// CHECK-LABEL: @test_global_add_f32 +// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}) +void test_global_add_f32(float *rtn, global float *addr, float x) { + *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx908.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx908.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx908.cl @@ -0,0 +1,11 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=IR +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -o - %s | FileCheck %s --check-prefix=GFX908 + +// IR-LABEL: @test_global_add_f32 +// IR: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}) +// GFX908-LABEL: test_global_add_f32 +// GFX908: global_atomic_add_f32 +void test_global_add_f32(float *rtn, global float *addr, float x) { + *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); +}