diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -214,8 +214,8 @@
 TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
 
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts")
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts")
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts")
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -4477,6 +4477,21 @@
 
 bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
                                           CallExpr *TheCall) {
+
+  if (BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16 ||
+      BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32) {
+    auto TargetID = Context.getTargetInfo().getTargetID();
+    if (!TargetID || TargetID->find("gfx908") == std::string::npos)
+      return false;
+
+    // GFX908/MI100 global atomic f32/f16 are not returning old value from
+    // memory. By overriding return type of the builtin to 'void' we will force
+    // clang to throw error if anyone is expecting to receive return value from
+    // the builtin.
+    TheCall->setType(Context.VoidTy);
+    return false;
+  }
+
   // position of memory order and scope arguments in the builtin
   unsigned OrderIndex, ScopeIndex;
   switch (BuiltinID) {
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -70,11 +70,11 @@
 // GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX908: "target-features"="+16-bit-insts,+atomic-fadd-no-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-no-rtn-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-no-rtn-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f16-gfx9-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f16-gfx9-err.cl
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f16-gfx9-err.cl
@@ -0,0 +1,12 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -verify -S -emit-llvm -o - %s
+
+typedef half __attribute__((ext_vector_type(2))) half2;
+
+half2 test_global_fadd_f16(__global half2 *addrh2, half2 xh2) {
+  half2 *rtn;
+
+  __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts}}
+  *rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts}}
+  return __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f32-gfx9-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f32-gfx9-err.cl
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f32-gfx9-err.cl
@@ -0,0 +1,10 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -verify -S -emit-llvm -o - %s
+
+float test_global_fadd_f32(__global float *addr, float x) {
+  float *rtn;
+
+  __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts}}
+  *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts}}
+  return __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
@@ -2,21 +2,12 @@
 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
 // RUN:   -verify -S -o - %s
 
-typedef half __attribute__((ext_vector_type(2))) half2;
-
-void test_global_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2,
-                      __global float *addrf, float xf,
-                      __global double *addr, double x) {
-  half2 *half_rtn;
-  float *fp_rtn;
+void test_global_fadd(__global double *addr, double x) {
   double *rtn;
-  *half_rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}}
-  *fp_rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts}}
   *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmax_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmin_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmin_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmax_f64' needs target feature gfx90a-insts}}
-  __builtin_amdgcn_ds_atomic_fadd_v2f16(addrh2l, xh2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2f16' needs target feature atomic-ds-pk-add-16-insts}}
 }
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-noret-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-noret-err.cl
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-noret-err.cl
@@ -0,0 +1,40 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
+// RUN:   -verify -S -o - %s
+
+typedef half __attribute__((ext_vector_type(2))) half2;
+
+half2 func1(half2 x); // expected-note{{passing argument to parameter 'x' here}}
+float func2(float x); // expected-note{{passing argument to parameter 'x' here}}
+
+half2 test_global_fadd_v2f16(__global half2 *addrh2, half2 xh2) {
+  half2 *rtn1;
+
+  half2 *rtn2 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{initializing '__generic half2 *__private' with an expression of incompatible type 'void'}}
+  *rtn1 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{assigning to '__generic half2' (vector of 2 'half' values) from incompatible type 'void'}}
+  *rtn1 = *rtn1 + __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{cannot convert between vector and non-scalar values ('half2' (vector of 2 'half' values) and 'void')}}
+  func1(__builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2)); // expected-error{{passing 'void' to parameter of incompatible type 'half2' (vector of 2 'half' values)}}
+
+  half2 rtn3;
+  half2 rtn4 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{initializing '__private half2' (vector of 2 'half' values) with an expression of incompatible type 'void'}}
+  rtn3 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{assigning to '__private half2' (vector of 2 'half' values) from incompatible type 'void'}}
+  rtn3 = rtn3 + __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{cannot convert between vector and non-scalar values ('half2' (vector of 2 'half' values) and 'void')}}
+
+  return __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{returning 'void' from a function with incompatible result type 'half2' (vector of 2 'half' values)}}
+}
+
+float test_global_fadd_f32(__global float *addrf, float xf) {
+  float *rtn1;
+
+  float *rtn2 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{initializing '__generic float *__private' with an expression of incompatible type 'void'}}
+  *rtn1 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{assigning to '__generic float' from incompatible type 'void'}}
+  *rtn1 = *rtn1 + __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{invalid operands to binary expression ('__generic float' and 'void')}}
+  func2(__builtin_amdgcn_global_atomic_fadd_f32(addrf, xf)); // expected-error{{passing 'void' to parameter of incompatible type 'float'}}
+
+  float rtn3;
+  float rtn4 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{initializing '__private float' with an expression of incompatible type 'void'}}
+  rtn3 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{assigning to '__private float' from incompatible type 'void'}}
+  rtn3 = rtn3 + __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{invalid operands to binary expression ('__private float' and 'void')}}
+
+  return __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{returning 'void' from a function with incompatible result type 'float'}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx908.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx908.cl
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx908.cl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
+// RUN:   %s -S -emit-llvm -o - | FileCheck %s -check-prefix=CHECK
+
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
+// RUN:   -S -o - %s | FileCheck -check-prefix=GFX908 %s
+
+// REQUIRES: amdgpu-registered-target
+
+typedef half __attribute__((ext_vector_type(2))) half2;
+
+// CHECK-LABEL: test_global_add_half2
+// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %0, <2 x half> %1)
+// GFX908-LABEL:  test_global_add_half2
+// GFX908:  global_atomic_pk_add_f16 v[0:1], v2, off
+half2 test_global_add_half2(__global half2 *addr, half2 x) {
+  __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x);
+  return *addr;
+}
+
+// CHECK-LABEL: test_global_add_float
+// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %0, float %1)
+// GFX908-LABEL:  test_global_add_float
+// GFX908:  global_atomic_add_f32 v[0:1], v2, off
+float test_global_add_float(__global float *addr, float x) {
+  __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
+  return *addr;
+}
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -340,6 +340,7 @@
       Features["dot5-insts"] = true;
       Features["dot6-insts"] = true;
       Features["mai-insts"] = true;
+      Features["atomic-fadd-no-rtn-insts"] = true;
       [[fallthrough]];
     case GK_GFX906:
       Features["dl-insts"] = true;