diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -100,6 +100,11 @@
 BUILTIN(__builtin_altivec_vmulosh, "V4SiV8SsV8Ss", "")
 BUILTIN(__builtin_altivec_vmulouw, "V2ULLiV4UiV4Ui", "")
 BUILTIN(__builtin_altivec_vmulosw, "V2SLLiV4SiV4Si", "")
+BUILTIN(__builtin_altivec_vmuleud, "V1ULLLiV2ULLiV2ULLi", "")
+BUILTIN(__builtin_altivec_vmulesd, "V1SLLLiV2SLLiV2SLLi", "")
+BUILTIN(__builtin_altivec_vmuloud, "V1ULLLiV2ULLiV2ULLi", "")
+BUILTIN(__builtin_altivec_vmulosd, "V1SLLLiV2SLLiV2SLLi", "")
+BUILTIN(__builtin_altivec_vmsumcud, "V1ULLLiV2ULLiV2ULLiV1ULLLi", "")
 
 BUILTIN(__builtin_altivec_vnmsubfp, "V4fV4fV4fV4f", "")
 
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -5487,6 +5487,16 @@
   return __builtin_altivec_vmsumuhm(__a, __b, __c);
 }
 
+/* vec_msumc */
+
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_msumc(vector unsigned long long __a, vector unsigned long long __b,
+          vector unsigned __int128 __c) {
+  return __builtin_altivec_vmsumcud(__a, __b, __c);
+}
+#endif
+
 /* vec_vmsummbm */
 
 static __inline__ vector int __attribute__((__always_inline__))
@@ -5713,6 +5723,26 @@
 }
 #endif
 
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_mule(vector signed long long __a, vector signed long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vmulosd(__a, __b);
+#else
+  return __builtin_altivec_vmulesd(__a, __b);
+#endif
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_mule(vector unsigned long long __a, vector unsigned long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vmuloud(__a, __b);
+#else
+  return __builtin_altivec_vmuleud(__a, __b);
+#endif
+}
+#endif
+
 /* vec_vmulesb */
 
 static __inline__ vector short __attribute__((__always_inline__))
@@ -5839,6 +5869,26 @@
 }
 #endif
 
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_mulo(vector signed long long __a, vector signed long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vmulesd(__a, __b);
+#else
+  return __builtin_altivec_vmulosd(__a, __b);
+#endif
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_mulo(vector unsigned long long __a, vector unsigned long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vmuleud(__a, __b);
+#else
+  return __builtin_altivec_vmuloud(__a, __b);
+#endif
+}
+#endif
+
 /* vec_vmulosb */
 
 static __inline__ vector short __attribute__((__always_inline__))
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -928,6 +928,44 @@
   return vec_test_lsbb_all_zeros(vuca);
 }
 
+vector unsigned __int128 test_vec_mule_u128(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vmuleud(<2 x i64>
+  // CHECK-BE-NEXT: ret <1 x i128>
+  // CHECK-LE: @llvm.ppc.altivec.vmuloud(<2 x i64>
+  // CHECK-LE-NEXT: ret <1 x i128>
+  return vec_mule(vulla, vullb);
+}
+
+vector signed __int128 test_vec_mule_s128(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vmulesd(<2 x i64>
+  // CHECK-BE-NEXT: ret <1 x i128>
+  // CHECK-LE: @llvm.ppc.altivec.vmulosd(<2 x i64>
+  // CHECK-LE-NEXT: ret <1 x i128>
+  return vec_mule(vslla, vsllb);
+}
+
+vector unsigned __int128 test_vec_mulo_u128(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vmuloud(<2 x i64>
+  // CHECK-BE-NEXT: ret <1 x i128>
+  // CHECK-LE: @llvm.ppc.altivec.vmuleud(<2 x i64>
+  // CHECK-LE-NEXT: ret <1 x i128>
+  return vec_mulo(vulla, vullb);
+}
+
+vector signed __int128 test_vec_mulo_s128(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vmulosd(<2 x i64>
+  // CHECK-BE-NEXT: ret <1 x i128>
+  // CHECK-LE: @llvm.ppc.altivec.vmulesd(<2 x i64>
+  // CHECK-LE-NEXT: ret <1 x i128>
+  return vec_mulo(vslla, vsllb);
+}
+
+vector unsigned __int128 test_vec_msumc_u128(void) {
+  // CHECK: @llvm.ppc.altivec.vmsumcud(<2 x i64>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_msumc(vulla, vullb, vui128a);
+}
+
 vector signed __int128 test_vec_xl_sext_i8(void) {
   // CHECK: load i8
   // CHECK: sext i8
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -193,6 +193,13 @@
                          [llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
                          [IntrNoMem]>;
 
+/// PowerPC_Vec_QDD_Intrinsic - A PowerPC intrinsic that takes two v2i64
+/// vectors and returns one v1i128. These intrinsics have no side effects.
+class PowerPC_Vec_QDD_Intrinsic<string GCCIntSuffix>
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                          [IntrNoMem]>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Class Definitions.
 //
@@ -673,6 +680,9 @@
   def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
             Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
                        llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">,
+            Intrinsic<[llvm_v1i128_ty],
+                      [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>;
 
   // Vector Multiply Instructions.
   def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">,
@@ -684,6 +694,7 @@
   def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">,
           Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                     [IntrNoMem]>;
+  def int_ppc_altivec_vmulesd : PowerPC_Vec_QDD_Intrinsic<"vmulesd">;
   def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
           Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
                     [IntrNoMem]>;
@@ -693,6 +704,7 @@
   def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">,
           Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                     [IntrNoMem]>;
+  def int_ppc_altivec_vmuleud : PowerPC_Vec_QDD_Intrinsic<"vmuleud">;
 
   def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
           Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@@ -703,6 +715,7 @@
   def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">,
           Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                     [IntrNoMem]>;
+  def int_ppc_altivec_vmulosd : PowerPC_Vec_QDD_Intrinsic<"vmulosd">;
   def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
           Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
                     [IntrNoMem]>;
@@ -712,6 +725,7 @@
   def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">,
           Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                     [IntrNoMem]>;
+  def int_ppc_altivec_vmuloud : PowerPC_Vec_QDD_Intrinsic<"vmuloud">;
 
   // Vector Sum Instructions.
   def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1256,16 +1256,25 @@
   }
 
   def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulesd $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA,
+                               v2i64:$vB))]>;
   def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmuleud $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA,
+                               v2i64:$vB))]>;
   def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulosd $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA,
+                               v2i64:$vB))]>;
   def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>;
-  def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD),
-                           (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
-                           "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>;
+                         "vmuloud $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA,
+                               v2i64:$vB))]>;
+  def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+                           "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral,
+                           [(set v1i128:$vD, (int_ppc_altivec_vmsumcud
+                                 v2i64:$vA, v2i64:$vB, v1i128:$vC))]>;
   def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                         "vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>;
   def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
--- a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
@@ -10,6 +10,7 @@
 ; This includes the low order and high order versions of vector multiply.
 ; The low order version operates on doublewords, whereas the high order version
 ; operates on signed and unsigned words and doublewords.
+; This file also includes 128 bit vector multiply instructions.
 
 define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vmulld:
@@ -122,3 +123,54 @@
   %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %mulh
 }
+
+declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone
+
+define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuleud:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmuleud v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuloud:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmuloud v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulesd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmulesd v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulosd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmulosd v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone {
+; CHECK-LABEL: test_vmsumcud:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmsumcud v2, v2, v3, v4
+; CHECK-NEXT:    blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z)
+  ret <1 x i128> %tmp
+}