diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -517,6 +517,8 @@
     SDValue visitFSUBForFMACombine(SDNode *N);
     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 
+    SDValue visitVPFADDForVPFMACombine(SDNode *N);
+
     SDValue XformToShuffleWithZero(SDNode *N);
     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
                                                     const SDLoc &DL,
@@ -23030,6 +23032,49 @@
   return SDValue();
 }
 
+/// Try to perform VP_FMA combining on a given VP_FADD node.
+SDValue DAGCombiner::visitVPFADDForVPFMACombine(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue Mask = N->getOperand(2);
+  SDValue VL = N->getOperand(3);
+  EVT VT = N->getValueType(0);
+  SDLoc SL(N);
+
+  const TargetOptions &Options = DAG.getTarget().Options;
+
+  bool HasFMA =
+      TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::VP_FMA, VT));
+
+  if (!HasFMA)
+    return SDValue();
+
+  bool AllowFusionGlobally =
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+
+  // Is the node an VP_FMUL and contractable either due to global flags or
+  // SDNodeFlags.
+  auto isContractableVPFMUL = [AllowFusionGlobally](SDValue N) {
+    if (N.getOpcode() != ISD::VP_FMUL)
+      return false;
+    return AllowFusionGlobally || N->getFlags().hasAllowContract();
+  };
+
+  // fold (vp_fadd (vp_fmul x, y), z) -> (vp_fma x, y, z)
+  if (isContractableVPFMUL(N0) && N0->hasOneUse())
+    return DAG.getNode(ISD::VP_FMA, SL, VT, N0.getOperand(0), N0.getOperand(1),
+                       N1, Mask, VL);
+
+  // fold (vp_fadd x, (vp_fmul y, z)) -> (vp_fma y, z, x)
+  // Note: Commutes VP_FADD operands.
+  if (isContractableVPFMUL(N1) && N1->hasOneUse())
+    return DAG.getNode(ISD::VP_FMA, SL, VT, N1.getOperand(0), N1.getOperand(1),
+                       N0, Mask, VL);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitVPOp(SDNode *N) {
   // VP operations in which all vector elements are disabled - either by
   // determining that the mask is all false or that the EVL is 0 - can be
@@ -23042,8 +23087,13 @@
         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
 
   // This is the only generic VP combine we support for now.
-  if (!AreAllEltsDisabled)
+  if (!AreAllEltsDisabled) {
+    switch (N->getOpcode()) {
+    case ISD::VP_FADD:
+      return visitVPFADDForVPFMACombine(N);
+    }
     return SDValue();
+  }
 
   // Binary operations can be replaced by UNDEF.
   if (ISD::isVPBinaryOp(N->getOpcode()))
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+
+declare <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x double> %y, <vscale x 1 x i1> %m, i32 %vl)
+declare <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x double> %y, <vscale x 1 x i1> %m, i32 %vl)
+
+define <vscale x 1 x double> @test1(<vscale x 1 x double> %x, <vscale x 1 x double> %y, <vscale x 1 x double> %z, <vscale x 1 x i1> %m, i32 %vl) {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %1 = call fast <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x double> %y, <vscale x 1 x i1> %m, i32 %vl)
+  %2 = call fast <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %1, <vscale x 1 x double> %z, <vscale x 1 x i1> %m, i32 %vl)
+  ret <vscale x 1 x double> %2
+}
+
+define <vscale x 1 x double> @test2(<vscale x 1 x double> %x, <vscale x 1 x double> %y, <vscale x 1 x double> %z, <vscale x 1 x i1> %m, i32 %vl) {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %1 = call fast <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x double> %y, <vscale x 1 x i1> %m, i32 %vl)
+  %2 = call fast <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %z, <vscale x 1 x double> %1, <vscale x 1 x i1> %m, i32 %vl)
+  ret <vscale x 1 x double> %2
+}