diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -196,6 +196,19 @@
 
 TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
 
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "gfx908-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts")
+
+TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f64, "dd*0d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmin_f64, "dd*0d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-insts")
+
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts")
+
 //===----------------------------------------------------------------------===//
 // Deep learning builtins.
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -16201,6 +16201,74 @@
     Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
     return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
   }
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
+    Intrinsic::ID IID;
+    llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
+    switch (BuiltinID) {
+    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
+      ArgTy = llvm::Type::getFloatTy(getLLVMContext());
+      IID = Intrinsic::amdgcn_global_atomic_fadd;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
+      ArgTy = llvm::FixedVectorType::get(
+          llvm::Type::getHalfTy(getLLVMContext()), 2);
+      IID = Intrinsic::amdgcn_global_atomic_fadd;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
+      IID = Intrinsic::amdgcn_global_atomic_fadd;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
+      IID = Intrinsic::amdgcn_global_atomic_fmin;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
+      IID = Intrinsic::amdgcn_global_atomic_fmax;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
+      IID = Intrinsic::amdgcn_flat_atomic_fadd;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
+      IID = Intrinsic::amdgcn_flat_atomic_fmin;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
+      IID = Intrinsic::amdgcn_flat_atomic_fmax;
+      break;
+    }
+    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
+    llvm::Function *F =
+        CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
+    return Builder.CreateCall(F, {Addr, Val});
+  }
+  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
+  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: {
+    Intrinsic::ID IID;
+    llvm::Type *ArgTy;
+    switch (BuiltinID) {
+    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
+      ArgTy = llvm::Type::getFloatTy(getLLVMContext());
+      IID = Intrinsic::amdgcn_ds_fadd;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
+      ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
+      IID = Intrinsic::amdgcn_ds_fadd;
+      break;
+    }
+    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
+    llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
+        llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
+    llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
+        llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
+    llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
+    return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
+  }
   case AMDGPU::BI__builtin_amdgcn_read_exec: {
     CallInst *CI = cast<CallInst>(
       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx1030.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx1030.cl
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx1030.cl
@@ -0,0 +1,14 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1030 \
+// RUN:   -S -o - %s
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1030 \
+// RUN:   -S -o - %s | FileCheck -check-prefix=GFX1030 %s
+
+// CHECK-LABEL: test_ds_addf_local
+// CHECK: call float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* %{{.*}}, float %{{.*}},
+// GFX1030-LABEL:  test_ds_addf_local$local
+// GFX1030:  ds_add_rtn_f32
+void test_ds_addf_local(__local float *addr, float x){
+  float *rtn;
+  *rtn = __builtin_amdgcn_ds_atomic_fadd_f32(addr, x);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
+// RUN:   %s -S -emit-llvm -o - | FileCheck %s -check-prefix=CHECK
+
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
+// RUN:   -S -o - %s | FileCheck -check-prefix=GFX90A %s
+
+typedef half __attribute__((ext_vector_type(2))) half2;
+
+// CHECK-LABEL: test_global_add
+// CHECK: call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %{{.*}}, double %{{.*}})
+// GFX90A-LABEL:  test_global_add$local:
+// GFX90A:  global_atomic_add_f64
+void test_global_add(__global double *addr, double x) {
+  double *rtn;
+  *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x);
+}
+
+// CHECK-LABEL: test_global_global_min
+// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %{{.*}}, double %{{.*}})
+// GFX90A-LABEL:  test_global_global_min$local
+// GFX90A:  global_atomic_min_f64
+void test_global_global_min(__global double *addr, double x){
+  double *rtn;
+  *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x);
+}
+
+// CHECK-LABEL: test_global_max
+// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %{{.*}}, double %{{.*}})
+// GFX90A-LABEL:  test_global_max$local
+// GFX90A:  global_atomic_max_f64
+void test_global_max(__global double *addr, double x){
+  double *rtn;
+  *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x);
+}
+
+// CHECK-LABEL: test_flat_add_local
+// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3f64.f64(double addrspace(3)* %{{.*}}, double %{{.*}})
+// GFX90A-LABEL:  test_flat_add_local$local
+// GFX90A:  ds_add_rtn_f64
+void test_flat_add_local(__local double *addr, double x){
+  double *rtn;
+  *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x);
+}
+
+// CHECK-LABEL: test_flat_global_add
+// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %{{.*}}, double %{{.*}})
+// GFX90A-LABEL:  test_flat_global_add$local
+// GFX90A:  global_atomic_add_f64
+void test_flat_global_add(__global double *addr, double x){
+  double *rtn;
+  *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x);
+}
+
+// CHECK-LABEL: test_flat_min_constant
+// CHECK: call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %{{.*}}, double %{{.*}})
+// GFX90A-LABEL:  test_flat_min_constant$local
+// GFX90A:  flat_atomic_min_f64
+void test_flat_min_constant(__generic double *addr, double x){
+  double *rtn;
+  *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x);
+}
+
+// CHECK-LABEL: test_flat_global_min
+// CHECK: call double @llvm.amdgcn.flat.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %{{.*}}, double %{{.*}})
+// GFX90A:  test_flat_global_min$local
+// GFX90A:  global_atomic_min_f64
+void test_flat_global_min(__global double *addr, double x){
+  double *rtn;
+  *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x);
+}
+
+// CHECK-LABEL: test_flat_max_constant
+// CHECK: call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %{{.*}}, double %{{.*}})
+// GFX90A-LABEL:  test_flat_max_constant$local
+// GFX90A:  flat_atomic_max_f64
+void test_flat_max_constant(__generic double *addr, double x){
+  double *rtn;
+  *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x);
+}
+
+// CHECK-LABEL: test_flat_global_max
+// CHECK: call double @llvm.amdgcn.flat.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %{{.*}}, double %{{.*}})
+// GFX90A-LABEL:  test_flat_global_max$local
+// GFX90A:  global_atomic_max_f64
+void test_flat_global_max(__global double *addr, double x){
+  double *rtn;
+  *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x);
+}
+
+// CHECK-LABEL: test_ds_add_local
+// CHECK: call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %{{.*}}, double %{{.*}},
+// GFX90A:  test_ds_add_local$local
+// GFX90A:  ds_add_rtn_f64
+void test_ds_add_local(__local double *addr, double x){
+  double *rtn;
+  *rtn = __builtin_amdgcn_ds_atomic_fadd_f64(addr, x);
+}
+
+// CHECK-LABEL: test_ds_addf_local
+// CHECK: call float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* %{{.*}}, float %{{.*}},
+// GFX90A-LABEL:  test_ds_addf_local$local
+// GFX90A:  ds_add_rtn_f32
+void test_ds_addf_local(__local float *addr, float x){
+  float *rtn;
+  *rtn = __builtin_amdgcn_ds_atomic_fadd_f32(addr, x);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-unsupported-gfx908.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-unsupported-gfx908.cl
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-unsupported-gfx908.cl
@@ -0,0 +1,8 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
+// RUN:   -verify -S -o - %s
+
+void test_global_add(__global double *addr, double x) {
+  double *rtn;
+  *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f64' needs target feature gfx90a-insts}}
+}