diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1406,6 +1406,7 @@
     SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineADDESUBE(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1360,6 +1360,9 @@
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine({ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL,
                        ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
+  if (Subtarget.isISA3_0() && Subtarget.isPPC64()) {
+    setTargetDAGCombine({ISD::ADDE, ISD::SUBE});
+  }
   if (Subtarget.hasFPCVT())
     setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
@@ -15105,6 +15108,9 @@
   default: break;
   case ISD::ADD:
     return combineADD(N, DCI);
+  case ISD::ADDE:
+  case ISD::SUBE:
+    return combineADDESUBE(N, DCI);
   case ISD::SHL:
     return combineSHL(N, DCI);
   case ISD::SRA:
@@ -17134,6 +17140,150 @@
   return SDValue();
 }
 
+// Look for multiply-add opportunities. The pattern is marked by glued
+// ADDC/ADDE, SUBC/SUBE pairs with multiply and multiply high nodes on one side,
+// and constant zero or splatted sign bit together with the other node on the
+// other side.
+//
+// Pattern 1/2:
+// (a  MUL  b) [ADD|SUB]C     (c)     carry-out -->     (MADDLD a, b, [+|-]c)
+//                                                | ==>
+// (a MULHS b) [ADD|SUB]E (SRA c, 63) carry-in <---     (MADDHD a, b, [+|-]c)
+//
+// Pattern 3:
+// (a  MUL  b) ADDC (c) carry-out -->     (MADDLD a, b, c)
+//                                  | ==>
+// (a MULHU b) ADDE (0) carry-in <---     (MADDHDU a, b, c)
+//
+// Pattern 4:
+//     (c)     SUBC (a  MUL  b) carry-out -->     (MADDLD -a, b, c)
+//                                          | ==>
+// (SRA c, 63) SUBE (a MULHS b) carry-in <---     (MADDHD -a, b, c)
+SDValue PPCTargetLowering::combineADDESUBE(SDNode *N,
+                                           DAGCombinerInfo &DCI) const {
+  if (!Subtarget.isISA3_0() || !Subtarget.isPPC64())
+    return SDValue();
+
+  if (N->getOpcode() != ISD::ADDE && N->getOpcode() != ISD::SUBE)
+    return SDValue();
+
+  SDNode *ASEN = N;
+  SDNode *ASCN = N->getOperand(2).getNode();
+
+  if (ASEN->getValueType(0) != MVT::i64 || ASCN->getValueType(0) != MVT::i64)
+    return SDValue();
+
+  // Check it should be ADDC/ADDE or SUBC/SUBE pair glued together.
+  if ((ASEN->getOpcode() == ISD::ADDE && ASCN->getOpcode() != ISD::ADDC) ||
+      (ASEN->getOpcode() == ISD::SUBE && ASCN->getOpcode() != ISD::SUBC))
+    return SDValue();
+
+  SDValue ASCOp0 = ASCN->getOperand(0);
+  SDValue ASCOp1 = ASCN->getOperand(1);
+  SDValue ASEOp0 = ASEN->getOperand(0);
+  SDValue ASEOp1 = ASEN->getOperand(1);
+
+  // Check that two operands should be from different nodes.
+  if (ASCOp0.getNode() == ASCOp1.getNode() ||
+      ASEOp0.getNode() == ASEOp1.getNode())
+    return SDValue();
+
+  SDValue MHOp;
+  SDValue ASHOp;
+  SDValue MLOp;
+  SDValue ASLOp;
+  int MHIdx = -1;
+  int MLIdx = -1;
+  if (ASEOp0->getOpcode() == ISD::MULHS || ASEOp0->getOpcode() == ISD::MULHU) {
+    MHOp = ASEOp0;
+    ASHOp = ASEOp1;
+    MHIdx = 0;
+  } else if (ASEOp1->getOpcode() == ISD::MULHS ||
+             ASEOp1->getOpcode() == ISD::MULHU) {
+    MHOp = ASEOp1;
+    ASHOp = ASEOp0;
+    MHIdx = 1;
+  } else
+    return SDValue();
+
+  if (ASCOp0->getOpcode() == ISD::MUL) {
+    MLOp = ASCOp0;
+    ASLOp = ASCOp1;
+    MLIdx = 0;
+  } else if (ASCOp1->getOpcode() == ISD::MUL) {
+    MLOp = ASCOp1;
+    ASLOp = ASCOp0;
+    MLIdx = 1;
+  } else
+    return SDValue();
+
+  // Node c cannot be (a MULH[S|U] b) or [ADD|SUB]E node.
+  if (ASLOp.getNode() == MHOp.getNode() || ASLOp.getNode() == ASEN)
+    return SDValue();
+
+  // SUBE unsigned does not worth the effort.
+  if (ASEN->getOpcode() == ISD::SUBE && MHOp->getOpcode() == ISD::MULHU)
+    return SDValue();
+
+  // SUBC/SUBE signed must have multiply nodes on the same side.
+  if (ASEN->getOpcode() == ISD::SUBE && MHOp->getOpcode() == ISD::MULHS &&
+      MHIdx != MLIdx)
+    return SDValue();
+
+  // Check (SRA c, 63) for signed.
+  if (MHOp->getOpcode() == ISD::MULHS) {
+    if (ASHOp->getOpcode() != ISD::SRA)
+      return SDValue();
+    if (auto *CN = dyn_cast<ConstantSDNode>(ASHOp.getOperand(1)))
+      if (CN->getZExtValue() != 63)
+        return SDValue();
+    if (ASLOp.getNode() != ASHOp.getOperand(0).getNode())
+      return SDValue();
+  }
+
+  // Check constant zero for unsigned.
+  if (MHOp->getOpcode() == ISD::MULHU) {
+    auto *CN = dyn_cast<ConstantSDNode>(ASHOp);
+    if (!CN || CN->getZExtValue() != 0)
+      return SDValue();
+  }
+
+  // Check that multiply and multiply high nodes have the same pair of operands.
+  if (!(((MHOp.getOperand(0).getNode() == MLOp.getOperand(0).getNode()) &&
+         (MHOp.getOperand(1).getNode() == MLOp.getOperand(1).getNode())) ||
+        ((MHOp.getOperand(0).getNode() == MLOp.getOperand(1).getNode()) &&
+         (MHOp.getOperand(1).getNode() == MLOp.getOperand(0).getNode()))))
+    return SDValue();
+
+  SDValue Op0 = MHOp.getOperand(0);
+  SDValue Op1 = MHOp.getOperand(1);
+  SDValue Op2 = ASLOp;
+
+  // Move the minus sign for SUBE.
+  if (ASEN->getOpcode() == ISD::SUBE) {
+    if (MHIdx == 0)
+      Op2 = DCI.DAG.getNode(
+          ISD::SUB, SDLoc(Op2.getNode()), MVT::i64,
+          DCI.DAG.getConstant(0, SDLoc(Op2.getNode()), MVT::i64), Op2);
+    else
+      Op0 = DCI.DAG.getNode(
+          ISD::SUB, SDLoc(Op0.getNode()), MVT::i64,
+          DCI.DAG.getConstant(0, SDLoc(Op0.getNode()), MVT::i64), Op0);
+  }
+
+  SDValue NHNode =
+      BuildIntrinsicOp(MHOp->getOpcode() == ISD::MULHS ? Intrinsic::ppc_maddhd
+                                                       : Intrinsic::ppc_maddhdu,
+                       Op0, Op1, Op2, DCI.DAG, SDLoc(ASEN));
+  DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(ASEN, 0), NHNode);
+
+  SDValue NLNode = BuildIntrinsicOp(Intrinsic::ppc_maddld, Op0, Op1, Op2,
+                                    DCI.DAG, SDLoc(ASCN));
+  DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(ASCN, 0), NLNode);
+
+  return SDValue(ASEN, 0);
+}
+
 // Detect TRUNCATE operations on bitcasts of float128 values.
 // What we are looking for here is the situtation where we extract a subset
 // of bits from a 128 bit float.
diff --git a/llvm/test/CodeGen/PowerPC/adde-sube-int128-madd.ll b/llvm/test/CodeGen/PowerPC/adde-sube-int128-madd.ll
--- a/llvm/test/CodeGen/PowerPC/adde-sube-int128-madd.ll
+++ b/llvm/test/CodeGen/PowerPC/adde-sube-int128-madd.ll
@@ -4,11 +4,9 @@
 define i128 @adde_int128(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: adde_int128:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mulld 6, 4, 3
-; CHECK-P9-NEXT:    mulhd 4, 4, 3
-; CHECK-P9-NEXT:    sradi 7, 5, 63
-; CHECK-P9-NEXT:    addc 3, 6, 5
-; CHECK-P9-NEXT:    adde 4, 4, 7
+; CHECK-P9-NEXT:    maddld 6, 4, 3, 5
+; CHECK-P9-NEXT:    maddhd 4, 4, 3, 5
+; CHECK-P9-NEXT:    mr 3, 6
 ; CHECK-P9-NEXT:    blr
 entry:
   %conv = sext i64 %a to i128
@@ -22,10 +20,9 @@
 define i128 @adde_unsigned_int128(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: adde_unsigned_int128:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mulld 6, 4, 3
-; CHECK-P9-NEXT:    mulhdu 4, 4, 3
-; CHECK-P9-NEXT:    addc 3, 6, 5
-; CHECK-P9-NEXT:    addze 4, 4
+; CHECK-P9-NEXT:    maddld 6, 4, 3, 5
+; CHECK-P9-NEXT:    maddhdu 4, 4, 3, 5
+; CHECK-P9-NEXT:    mr 3, 6
 ; CHECK-P9-NEXT:    blr
 entry:
   %conv = zext i64 %a to i128
@@ -39,11 +36,10 @@
 define i128 @sube_int128_AxBmC(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: sube_int128_AxBmC:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mulld 6, 4, 3
-; CHECK-P9-NEXT:    mulhd 4, 4, 3
-; CHECK-P9-NEXT:    sradi 7, 5, 63
-; CHECK-P9-NEXT:    subc 3, 6, 5
-; CHECK-P9-NEXT:    subfe 4, 7, 4
+; CHECK-P9-NEXT:    neg 6, 5
+; CHECK-P9-NEXT:    maddld 5, 4, 3, 6
+; CHECK-P9-NEXT:    maddhd 4, 4, 3, 6
+; CHECK-P9-NEXT:    mr 3, 5
 ; CHECK-P9-NEXT:    blr
 entry:
   %conv = sext i64 %a to i128
@@ -57,11 +53,10 @@
 define i128 @sube_int128_CmAxB(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: sube_int128_CmAxB:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mulld 7, 4, 3
-; CHECK-P9-NEXT:    mulhd 4, 4, 3
-; CHECK-P9-NEXT:    sradi 6, 5, 63
-; CHECK-P9-NEXT:    subc 3, 5, 7
-; CHECK-P9-NEXT:    subfe 4, 4, 6
+; CHECK-P9-NEXT:    neg 4, 4
+; CHECK-P9-NEXT:    maddld 6, 4, 3, 5
+; CHECK-P9-NEXT:    maddhd 4, 4, 3, 5
+; CHECK-P9-NEXT:    mr 3, 6
 ; CHECK-P9-NEXT:    blr
 entry:
   %conv = sext i64 %c to i128
diff --git a/llvm/test/CodeGen/PowerPC/mulld.ll b/llvm/test/CodeGen/PowerPC/mulld.ll
--- a/llvm/test/CodeGen/PowerPC/mulld.ll
+++ b/llvm/test/CodeGen/PowerPC/mulld.ll
@@ -10,10 +10,9 @@
 define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* nocapture readonly %b) {
 ; CHECK-LABEL: bn_mul_comba8:
 ; CHECK:    mulhdu
-; CHECK-NEXT:    mulld
-; CHECK:         mulhdu
-; CHECK:         mulld
-; CHECK-NEXT:    mulhdu
+; CHECK:    maddhdu
+; CHECK-NEXT:    maddld
+; CHECK:    maddhdu
 
 
 ; CHECK-ITIN-LABEL: bn_mul_comba8: