diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -231,6 +231,7 @@
 TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts")
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst")
 TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts")
 
 //===----------------------------------------------------------------------===//
 // Deep learning builtins.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17213,7 +17213,8 @@
     return Builder.CreateCall(F, {Addr, Val});
   }
   case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
-  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: {
+  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
+  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
     Intrinsic::ID IID;
     llvm::Type *ArgTy;
     switch (BuiltinID) {
@@ -17225,6 +17226,11 @@
       ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
       IID = Intrinsic::amdgcn_ds_fadd;
       break;
+    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
+      ArgTy = llvm::FixedVectorType::get(
+          llvm::Type::getHalfTy(getLLVMContext()), 2);
+      IID = Intrinsic::amdgcn_ds_fadd;
+      break;
     }
     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
     llvm::Value *Val = EmitScalarExpr(E->getArg(1));
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
@@ -15,4 +15,5 @@
   __builtin_amdgcn_global_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2bf16' needs target feature atomic-global-pk-add-bf16-inst}}
   __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}}
   __builtin_amdgcn_ds_atomic_fadd_v2bf16(addrs2l, xs2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2bf16' needs target feature atomic-ds-pk-add-16-insts}}
+  __builtin_amdgcn_ds_atomic_fadd_v2f16(addrh2l, xh2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2f16' needs target feature atomic-ds-pk-add-16-insts}}
 }
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
@@ -4,9 +4,9 @@
 
 typedef half __attribute__((ext_vector_type(2))) half2;
 
-void test_global_add_2f16(__global half2 *addrh2, half2 xh2,
-                          __global float *addrf, float xf,
-                          __global double *addr, double x) {
+void test_global_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2,
+                      __global float *addrf, float xf,
+                      __global double *addr, double x) {
   half2 *half_rtn;
   float *fp_rtn;
   double *rtn;
@@ -18,4 +18,5 @@
   *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmin_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmax_f64' needs target feature gfx90a-insts}}
+  __builtin_amdgcn_ds_atomic_fadd_v2f16(addrh2l, xh2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2f16' needs target feature atomic-ds-pk-add-16-insts}}
 }
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
@@ -6,7 +6,7 @@
 typedef half  __attribute__((ext_vector_type(2))) half2;
 typedef short __attribute__((ext_vector_type(2))) short2;
 
-void test_atomic_fadd(__global half2 *addrh2, half2 xh2,
+void test_atomic_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2,
                       __global short2 *addrs2, __local short2 *addrs2l, short2 xs2,
                       __global float *addrf, float xf) {
   __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx940-insts}}
@@ -14,4 +14,5 @@
   __builtin_amdgcn_flat_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2bf16' needs target feature atomic-flat-pk-add-16-insts}}
   __builtin_amdgcn_global_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2bf16' needs target feature atomic-global-pk-add-bf16-inst}}
   __builtin_amdgcn_ds_atomic_fadd_v2bf16(addrs2l, xs2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2bf16' needs target feature atomic-ds-pk-add-16-insts}}
+  __builtin_amdgcn_ds_atomic_fadd_v2f16(addrh2l, xh2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2f16' needs target feature atomic-ds-pk-add-16-insts}}
 }
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
@@ -48,3 +48,19 @@
 short2 test_local_add_2bf16(__local short2 *addr, short2 x) {
   return __builtin_amdgcn_ds_atomic_fadd_v2bf16(addr, x);
 }
+
+// CHECK-LABEL: test_local_add_2f16
+// CHECK: call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %{{.*}}, <2 x half> %
+// GFX940-LABEL:  test_local_add_2f16
+// GFX940: ds_pk_add_rtn_f16
+half2 test_local_add_2f16(__local half2 *addr, half2 x) {
+  return __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
+}
+
+// CHECK-LABEL: test_local_add_2f16_noret
+// CHECK: call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %{{.*}}, <2 x half> %
+// GFX940-LABEL:  test_local_add_2f16_noret
+// GFX940: ds_pk_add_f16
+void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
+  __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
+}