diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1234,10 +1234,13 @@
 static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
   switch (Intrinsic) {
   case Intrinsic::aarch64_sve_fmul:
+  case Intrinsic::aarch64_sve_fmul_u:
     return Instruction::BinaryOps::FMul;
   case Intrinsic::aarch64_sve_fadd:
+  case Intrinsic::aarch64_sve_fadd_u:
     return Instruction::BinaryOps::FAdd;
   case Intrinsic::aarch64_sve_fsub:
+  case Intrinsic::aarch64_sve_fsub_u:
     return Instruction::BinaryOps::FSub;
   default:
     return Instruction::BinaryOpsEnd;
@@ -1675,14 +1678,20 @@
     return instCombineSVEPTest(IC, II);
   case Intrinsic::aarch64_sve_mul:
   case Intrinsic::aarch64_sve_fmul:
+  case Intrinsic::aarch64_sve_fmul_u:
     return instCombineSVEVectorMul(IC, II);
   case Intrinsic::aarch64_sve_fadd:
   case Intrinsic::aarch64_sve_add:
     return instCombineSVEVectorAdd(IC, II);
-  case Intrinsic::aarch64_sve_fadd_u:
-    return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
-                                             Intrinsic::aarch64_sve_fmla_u>(
-        IC, II, true);
+  case Intrinsic::aarch64_sve_fadd_u: {
+    auto Replacement = instCombineSVEVectorBinOp(IC, II);
+    if (Replacement == std::nullopt)
+      return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
+                                               Intrinsic::aarch64_sve_fmla_u>(
+          IC, II, true);
+
+    return Replacement;
+  }
   case Intrinsic::aarch64_sve_add_u:
     return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
                                              Intrinsic::aarch64_sve_mla_u>(
@@ -1690,10 +1699,14 @@
   case Intrinsic::aarch64_sve_fsub:
   case Intrinsic::aarch64_sve_sub:
     return instCombineSVEVectorSub(IC, II);
-  case Intrinsic::aarch64_sve_fsub_u:
-    return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
-                                             Intrinsic::aarch64_sve_fmls_u>(
-        IC, II, true);
+  case Intrinsic::aarch64_sve_fsub_u: {
+    auto Replacement = instCombineSVEVectorBinOp(IC, II);
+    if (Replacement == std::nullopt)
+      return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
+                                               Intrinsic::aarch64_sve_fmls_u>(
+          IC, II, true);
+    return Replacement;
+  }
   case Intrinsic::aarch64_sve_sub_u:
     return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
                                              Intrinsic::aarch64_sve_mls_u>(
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
@@ -6,7 +6,7 @@
 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
 
-; SVE intrinsics fmul and fadd should be replaced with regular fmul and fadd
+; SVE intrinsics fmul, fmul_u, fadd, fadd_u, fsub and fsub_u should be replaced with regular fmul, fadd and fsub.
 declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
 define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
 ; CHECK-LABEL: @replace_fmul_intrinsic_half
@@ -37,6 +37,36 @@
   ret <vscale x 2 x double> %2
 }
 
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmul_u_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: @replace_fmul_u_intrinsic_half
+; CHECK-NEXT:  %1 = fmul fast <vscale x 8 x half> %a, %b
+; CHECK-NEXT:  ret <vscale x 8 x half> %1
+  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %2
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmul_u_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: @replace_fmul_u_intrinsic_float
+; CHECK-NEXT:  %1 = fmul fast <vscale x 4 x float> %a, %b
+; CHECK-NEXT:  ret <vscale x 4 x float> %1
+  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %2
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmul_u_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: @replace_fmul_u_intrinsic_double
+; CHECK-NEXT:  %1 = fmul fast <vscale x 2 x double> %a, %b
+; CHECK-NEXT:  ret <vscale x 2 x double> %1
+  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %2
+}
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
 define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
 ; CHECK-LABEL: @replace_fadd_intrinsic_half
@@ -67,6 +97,36 @@
   ret <vscale x 2 x double> %2
 }
 
+declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fadd_u_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: @replace_fadd_u_intrinsic_half
+; CHECK-NEXT:  %1 = fadd fast <vscale x 8 x half> %a, %b
+; CHECK-NEXT:  ret <vscale x 8 x half> %1
+  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %2
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fadd_u_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: @replace_fadd_u_intrinsic_float
+; CHECK-NEXT:  %1 = fadd fast <vscale x 4 x float> %a, %b
+; CHECK-NEXT:  ret <vscale x 4 x float> %1
+  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %2
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fadd_u_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: @replace_fadd_u_intrinsic_double
+; CHECK-NEXT:  %1 = fadd fast <vscale x 2 x double> %a, %b
+; CHECK-NEXT:  ret <vscale x 2 x double> %1
+  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %2
+}
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
 define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
 ; CHECK-LABEL: @replace_fsub_intrinsic_half
@@ -87,7 +147,6 @@
   ret <vscale x 4 x float> %2
 }
 
-
 declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
 ; CHECK-LABEL: @replace_fsub_intrinsic_double
@@ -117,4 +176,44 @@
   ret <vscale x 2 x double> %2
 }
 
+declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fsub_u_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: @replace_fsub_u_intrinsic_half
+; CHECK-NEXT:  %1 = fsub fast <vscale x 8 x half> %a, %b
+; CHECK-NEXT:  ret <vscale x 8 x half> %1
+  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %2
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fsub_u_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: @replace_fsub_u_intrinsic_float
+; CHECK-NEXT:  %1 = fsub fast <vscale x 4 x float> %a, %b
+; CHECK-NEXT:  ret <vscale x 4 x float> %1
+  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 2 x double> @no_replace_on_non_ptrue_all_u(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: @no_replace_on_non_ptrue_all_u
+; CHECK-NEXT:  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
+; CHECK-NEXT:  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+; CHECK-NEXT:  ret <vscale x 2 x double> %2
+  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
+  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %2
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fsub_u_intrinsic_no_fast_flag(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: @replace_fsub_u_intrinsic_no_fast_flag
+; CHECK-NEXT:  %1 = fsub <vscale x 2 x double> %a, %b
+; CHECK-NEXT:  ret <vscale x 2 x double> %1
+  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %2 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %2
+}
+
 attributes #0 = { "target-features"="+sve" }