Index: include/clang/Basic/BuiltinsPPC.def
===================================================================
--- include/clang/Basic/BuiltinsPPC.def
+++ include/clang/Basic/BuiltinsPPC.def
@@ -279,6 +279,39 @@
 BUILTIN(__builtin_vsx_xvcmpgtdp, "V2ULLiV2dV2d", "")
 BUILTIN(__builtin_vsx_xvcmpgtsp, "V4UiV4fV4f", "")
 
+BUILTIN(__builtin_vsx_xvrdpim, "V2dV2d", "")
+BUILTIN(__builtin_vsx_xvrspim, "V4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvrdpi, "V2dV2d", "")
+BUILTIN(__builtin_vsx_xvrspi, "V4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvrdpic, "V2dV2d", "")
+BUILTIN(__builtin_vsx_xvrspic, "V4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvrdpiz, "V2dV2d", "")
+BUILTIN(__builtin_vsx_xvrspiz, "V4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvmaddadp, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_vsx_xvmaddasp, "V4fV4fV4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvmsubadp, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_vsx_xvmsubasp, "V4fV4fV4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvmuldp, "V2dV2dV2d", "")
+BUILTIN(__builtin_vsx_xvmulsp, "V4fV4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvnmaddadp, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_vsx_xvnmaddasp, "V4fV4fV4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvnmsubadp, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_vsx_xvnmsubasp, "V4fV4fV4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvrsqrtedp, "V2dV2d", "")
+BUILTIN(__builtin_vsx_xvrsqrtesp, "V4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvsqrtdp, "V2dV2d", "")
+BUILTIN(__builtin_vsx_xvsqrtsp, "V4fV4f", "")
+
 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
 BUILTIN(__builtin_tend, "UiUIi", "")
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -6560,14 +6560,85 @@
     llvm::Function *F = CGM.getIntrinsic(ID);
     return Builder.CreateCall(F, Ops, "");
   }
+  // Square root
+  case PPC::BI__builtin_vsx_xvsqrtsp:
+  case PPC::BI__builtin_vsx_xvsqrtdp:
+  {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    ID = Intrinsic::sqrt;
+    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
+    return Builder.CreateCall(F, X);
+  }
+  // Rounding/truncation
   case PPC::BI__builtin_vsx_xvrspip:
   case PPC::BI__builtin_vsx_xvrdpip:
+  case PPC::BI__builtin_vsx_xvrdpim:
+  case PPC::BI__builtin_vsx_xvrspim:
+  case PPC::BI__builtin_vsx_xvrdpi:
+  case PPC::BI__builtin_vsx_xvrspi:
+  case PPC::BI__builtin_vsx_xvrdpic:
+  case PPC::BI__builtin_vsx_xvrspic:
+  case PPC::BI__builtin_vsx_xvrdpiz:
+  case PPC::BI__builtin_vsx_xvrspiz:
+  {
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *X = EmitScalarExpr(E->getArg(0));
-    ID = Intrinsic::ceil;
+    if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
+        BuiltinID == PPC::BI__builtin_vsx_xvrspim)
+      ID = Intrinsic::floor;
+    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
+             BuiltinID == PPC::BI__builtin_vsx_xvrspi)
+      ID = Intrinsic::round;
+    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
+             BuiltinID == PPC::BI__builtin_vsx_xvrspic)
+      ID = Intrinsic::nearbyint;
+    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
+             BuiltinID == PPC::BI__builtin_vsx_xvrspip)
+      ID = Intrinsic::ceil;
+    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
+             BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
+      ID = Intrinsic::trunc;
     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
     return Builder.CreateCall(F, X);
   }
+  // FMA variations
+  case PPC::BI__builtin_vsx_xvmaddadp:
+  case PPC::BI__builtin_vsx_xvmaddasp:
+  case PPC::BI__builtin_vsx_xvnmaddadp:
+  case PPC::BI__builtin_vsx_xvnmaddasp:
+  case PPC::BI__builtin_vsx_xvmsubadp:
+  case PPC::BI__builtin_vsx_xvmsubasp:
+  case PPC::BI__builtin_vsx_xvnmsubadp:
+  case PPC::BI__builtin_vsx_xvnmsubasp:
+  {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Y = EmitScalarExpr(E->getArg(1));
+    Value *Z = EmitScalarExpr(E->getArg(2));
+    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+    switch (BuiltinID)
+    {
+      case PPC::BI__builtin_vsx_xvmaddadp:
+      case PPC::BI__builtin_vsx_xvmaddasp:
+        return Builder.CreateCall(F, {X, Y, Z});
+      case PPC::BI__builtin_vsx_xvnmaddadp:
+      case PPC::BI__builtin_vsx_xvnmaddasp:
+        return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
+      case PPC::BI__builtin_vsx_xvmsubadp:
+      case PPC::BI__builtin_vsx_xvmsubasp:
+        return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
+      case PPC::BI__builtin_vsx_xvnmsubadp:
+      case PPC::BI__builtin_vsx_xvnmsubasp:
+        Value *FsubRes =
+          Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
+        return Builder.CreateFSub(Zero, FsubRes, "sub");
+    }
+    llvm_unreachable("Unknown FMA operation");
+    return nullptr; // Suppress no-return warning
+  }
+  }
 }
 
 // Emit an intrinsic that has 1 float or double.
Index: lib/Headers/altivec.h
===================================================================
--- lib/Headers/altivec.h
+++ lib/Headers/altivec.h
@@ -1857,11 +1857,20 @@
 
 /* vec_floor */
 
-static vector float __attribute__((__always_inline__))
-vec_floor(vector float __a) {
+static vector float __ATTRS_o_ai vec_floor(vector float __a) {
+#ifdef __VSX__
+  return __builtin_vsx_xvrspim(__a);
+#else
   return __builtin_altivec_vrfim(__a);
+#endif
 }
 
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_floor(vector double __a) {
+  return __builtin_vsx_xvrdpim(__a);
+}
+#endif
+
 /* vec_vrfim */
 
 static vector float __attribute__((__always_inline__))
@@ -2532,10 +2541,21 @@
 
 /* vec_madd */
 
-static vector float __attribute__((__always_inline__))
+static vector float __ATTRS_o_ai
 vec_madd(vector float __a, vector float __b, vector float __c) {
+#ifdef __VSX__
+  return __builtin_vsx_xvmaddasp(__a, __b, __c);
+#else
   return __builtin_altivec_vmaddfp(__a, __b, __c);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_madd(vector double __a, vector double __b, vector double __c) {
+  return __builtin_vsx_xvmaddadp(__a, __b, __c);
 }
+#endif
 
 /* vec_vmaddfp */
 
@@ -2559,6 +2579,20 @@
   return __builtin_altivec_vmhaddshs(__a, __b, __c);
 }
 
+/* vec_msub */
+
+#ifdef __VSX__
+static vector float __ATTRS_o_ai
+vec_msub(vector float __a, vector float __b, vector float __c) {
+  return __builtin_vsx_xvmsubasp(__a, __b, __c);
+}
+
+static vector double __ATTRS_o_ai
+vec_msub(vector double __a, vector double __b, vector double __c) {
+  return __builtin_vsx_xvmsubadp(__a, __b, __c);
+}
+#endif
+
 /* vec_max */
 
 static vector signed char __ATTRS_o_ai vec_max(vector signed char __a,
@@ -3627,6 +3661,18 @@
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
+/* vec_mul */
+static vector float __ATTRS_o_ai vec_mul(vector float __a, vector float __b) {
+  __a * __b;
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_mul(vector double __a, vector double __b) {
+  __a * __b;
+}
+#endif
+
 /* The vmulos* and vmules* instructions have a big endian bias, so
    we must reverse the meaning of "even" and "odd" for little endian.  */
 
@@ -3832,12 +3878,37 @@
 #endif
 }
 
+/* vec_nmadd */
+
+#ifdef __VSX__
+static vector float __ATTRS_o_ai
+vec_nmadd(vector float __a, vector float __b, vector float __c) {
+  return __builtin_vsx_xvnmaddasp(__a, __b, __c);
+}
+
+static vector double __ATTRS_o_ai
+vec_nmadd(vector double __a, vector double __b, vector double __c) {
+  return __builtin_vsx_xvnmaddadp(__a, __b, __c);
+}
+#endif
+
 /* vec_nmsub */
 
-static vector float __attribute__((__always_inline__))
+static vector float __ATTRS_o_ai
 vec_nmsub(vector float __a, vector float __b, vector float __c) {
+#ifdef __VSX__
+  return __builtin_vsx_xvnmsubasp(__a, __b, __c);
+#else
   return __builtin_altivec_vnmsubfp(__a, __b, __c);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_nmsub(vector double __a, vector double __b, vector double __c) {
+  return __builtin_vsx_xvnmsubadp(__a, __b, __c);
 }
+#endif
 
 /* vec_vnmsubfp */
 
@@ -3899,6 +3970,15 @@
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_nor(vector double __a, vector double __b) {
+  vector unsigned long long __res =
+      ~((vector unsigned long long)__a | (vector unsigned long long)__b);
+  return (vector double)__res;
+}
+#endif
+
 /* vec_vnor */
 
 static vector signed char __ATTRS_o_ai vec_vnor(vector signed char __a,
@@ -4091,6 +4171,12 @@
 }
 
 #ifdef __VSX__
+static vector double __ATTRS_o_ai vec_or(vector double __a, vector double __b) {
+  vector unsigned long long __res =
+      (vector unsigned long long)__a | (vector unsigned long long)__b;
+  return (vector double)__res;
+}
+
 static vector signed long long __ATTRS_o_ai
 vec_or(vector signed long long __a, vector signed long long __b) {
   return __a | __b;
@@ -5187,6 +5273,30 @@
   return __builtin_altivec_vrfin(__a);
 }
 
+#ifdef __VSX__
+/* vec_rint */
+
+static vector float __ATTRS_o_ai
+vec_rint(vector float __a) {
+  return __builtin_vsx_xvrspic(__a);
+}
+
+static vector double __ATTRS_o_ai
+vec_rint(vector double __a) {
+  return __builtin_vsx_xvrdpic(__a);
+}
+
+/* vec_nearbyint */
+
+static vector float __ATTRS_o_ai vec_nearbyint(vector float __a) {
+  return __builtin_vsx_xvrspi(__a);
+}
+
+static vector double __ATTRS_o_ai vec_nearbyint(vector double __a) {
+  return __builtin_vsx_xvrdpi(__a);
+}
+#endif
+
 /* vec_vrfin */
 
 static vector float __attribute__((__always_inline__))
@@ -5194,16 +5304,38 @@
   return __builtin_altivec_vrfin(__a);
 }
 
+/* vec_sqrt */
+
+#ifdef __VSX__
+static vector float __ATTRS_o_ai vec_sqrt(vector float __a) {
+  return __builtin_vsx_xvsqrtsp(__a);
+}
+
+static vector double __ATTRS_o_ai vec_sqrt(vector double __a) {
+  return __builtin_vsx_xvsqrtdp(__a);
+}
+#endif
+
 /* vec_rsqrte */
 
-static __vector float __attribute__((__always_inline__))
+static vector float __ATTRS_o_ai
 vec_rsqrte(vector float __a) {
+#ifdef __VSX__
+  return __builtin_vsx_xvrsqrtesp(__a);
+#else
   return __builtin_altivec_vrsqrtefp(__a);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_rsqrte(vector double __a) {
+  return __builtin_vsx_xvrsqrtedp(__a);
 }
+#endif
 
 /* vec_vrsqrtefp */
 
-static __vector float __attribute__((__always_inline__))
+static vector float __attribute__((__always_inline__))
 vec_vrsqrtefp(vector float __a) {
   return __builtin_altivec_vrsqrtefp(__a);
 }
@@ -5331,6 +5463,22 @@
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b,
+                                          vector bool long long __c) {
+  vector long long __res = ((vector long long)__a & ~(vector long long)__c) |
+                     ((vector long long)__b & (vector long long)__c);
+  return (vector double)__res;
+}
+
+static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b,
+                                          vector unsigned long long __c) {
+  vector long long __res = ((vector long long)__a & ~(vector long long)__c) |
+                     ((vector long long)__b & (vector long long)__c);
+  return (vector double)__res;
+}
+#endif
+
 /* vec_vsel */
 
 static vector signed char __ATTRS_o_ai vec_vsel(vector signed char __a,
@@ -7910,6 +8058,13 @@
   return __a - __b;
 }
 
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_sub(vector double __a, vector double __b) {
+  return __a - __b;
+}
+#endif
+
 /* vec_vsububm */
 
 #define __builtin_altivec_vsububm vec_vsububm
@@ -8401,10 +8556,20 @@
 
 /* vec_trunc */
 
-static vector float __attribute__((__always_inline__))
+static vector float __ATTRS_o_ai
 vec_trunc(vector float __a) {
+#ifdef __VSX__
+  return __builtin_vsx_xvrspiz(__a);
+#else
   return __builtin_altivec_vrfiz(__a);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_trunc(vector double __a) {
+  return __builtin_vsx_xvrdpiz(__a);
 }
+#endif
 
 /* vec_vrfiz */
 
@@ -8895,6 +9060,24 @@
                                                   vector bool long long __b) {
   return __a ^ __b;
 }
+
+static vector double __ATTRS_o_ai
+vec_xor(vector double __a, vector double __b) {
+  return (vector double)((vector unsigned long long)__a ^
+                          (vector unsigned long long)__b);
+}
+
+static vector double __ATTRS_o_ai
+vec_xor(vector double __a, vector bool long long __b) {
+  return (vector double)((vector unsigned long long)__a ^
+                         (vector unsigned long long) __b);
+}
+
+static vector double __ATTRS_o_ai
+vec_xor(vector bool long long __a, vector double __b) {
+  return (vector double)((vector unsigned long long)__a ^
+                         (vector unsigned long long)__b);
+}
 #endif
 
 /* vec_vxor */
@@ -10848,7 +11031,7 @@
 
 /* vec_all_nan */
 
-static int __attribute__((__always_inline__)) vec_all_nan(vector float __a) {
+static int __ATTRS_o_ai vec_all_nan(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __a);
 }
 
@@ -11022,35 +11205,35 @@
 
 /* vec_all_nge */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_all_nge(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __a, __b);
 }
 
 /* vec_all_ngt */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_all_ngt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __a, __b);
 }
 
 /* vec_all_nle */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_all_nle(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __b, __a);
 }
 
 /* vec_all_nlt */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_all_nlt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __b, __a);
 }
 
 /* vec_all_numeric */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_all_numeric(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __a);
 }
@@ -11865,7 +12048,7 @@
 
 /* vec_any_nan */
 
-static int __attribute__((__always_inline__)) vec_any_nan(vector float __a) {
+static int __ATTRS_o_ai vec_any_nan(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __a);
 }
 
@@ -12039,35 +12222,35 @@
 
 /* vec_any_nge */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_any_nge(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __a, __b);
 }
 
 /* vec_any_ngt */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_any_ngt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __a, __b);
 }
 
 /* vec_any_nle */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_any_nle(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __b, __a);
 }
 
 /* vec_any_nlt */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_any_nlt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __b, __a);
 }
 
 /* vec_any_numeric */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_any_numeric(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __a);
 }
Index: test/CodeGen/builtins-ppc-vsx.c
===================================================================
--- test/CodeGen/builtins-ppc-vsx.c
+++ test/CodeGen/builtins-ppc-vsx.c
@@ -33,81 +33,81 @@
 
   res_vd = vec_and(vbll, vd);
 // CHECK: and <2 x i64>
-// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
 
   res_vd = vec_and(vd, vbll);
 // CHECK: and <2 x i64>
-// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
 
   res_vd = vec_and(vd, vd);
 // CHECK: and <2 x i64>
-// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
 
   dummy();
 // CHECK: call void @dummy()
 
   res_vd = vec_andc(vbll, vd);
-// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
-// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
 // CHECK: and <2 x i64>
-// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
 
   dummy();
 // CHECK: call void @dummy()
 
   res_vd = vec_andc(vd, vbll);
-// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
-// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
 // CHECK: and <2 x i64>
-// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
 
   dummy();
 // CHECK: call void @dummy()
 
   res_vd = vec_andc(vd, vd);
-// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
-// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
 // CHECK: and <2 x i64>
-// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
 
   dummy();
 // CHECK: call void @dummy()
 
   res_vd = vec_ceil(vd);
-// CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}})
+// CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]+}})
 
   res_vf = vec_ceil(vf);
-// CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}})
+// CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]+}})
 
   res_vbll = vec_cmpeq(vd, vd);
-// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
 
   res_vbi = vec_cmpeq(vf, vf);
-// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
 
   res_vbll = vec_cmpge(vd, vd);
-// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
 
   res_vbi = vec_cmpge(vf, vf);
-// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
 
   res_vbll = vec_cmpgt(vd, vd);
-// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
 
   res_vbi = vec_cmpgt(vf, vf);
-// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
 
   res_vbll = vec_cmple(vd, vd);
-// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
 
   res_vbi = vec_cmple(vf, vf);
-// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
 
   res_vbll = vec_cmplt(vd, vd);
-// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
 
   res_vbi = vec_cmplt(vf, vf);
-// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
 
   /* vec_div */
   res_vf = vec_div(vf, vf);
@@ -274,6 +274,56 @@
 // CHECK: xor <2 x i64>
 // CHECK: and <2 x i64>
 
+  res_vf = vec_floor(vf);
+// CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_floor(vd);
+// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}})
+
+  res_vf = vec_madd(vf, vf, vf);
+// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+
+  res_vd = vec_madd(vd, vd, vd);
+// CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+
+  res_vf = vec_msub(vf, vf, vf);
+// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
+// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
+
+  res_vd = vec_msub(vd, vd, vd);
+// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
+// CHECK-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
+
+  res_vf = vec_mul(vf, vf);
+// CHECK: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}}
+
+  res_vd = vec_mul(vd, vd);
+// CHECK: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+
+  res_vf = vec_nearbyint(vf);
+// CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_nearbyint(vd);
+// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}})
+
+  res_vf = vec_nmadd(vf, vf, vf);
+// CHECK: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+// CHECK-NEXT: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %[[FM]]
+
+  res_vd = vec_nmadd(vd, vd, vd);
+// CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+// CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]
+
+  res_vf = vec_nmsub(vf, vf, vf);
+// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
+// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
+// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
+
+  res_vd = vec_nmsub(vd, vd, vd);
+// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
+// CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
+// CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]
+
   /* vec_nor */
   res_vsll = vec_nor(vsll, vsll);
 // CHECK: or <2 x i64>
@@ -287,6 +337,11 @@
 // CHECK: or <2 x i64>
 // CHECK: xor <2 x i64>
 
+  res_vd = vec_nor(vd, vd);
+// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK: [[OR:[0-9]+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-NEXT: xor <2 x i64> %or.i.[[OR]], <i64 -1, i64 -1>
+
   /* vec_or */
   res_vsll = vec_or(vsll, vsll);
 // CHECK: or <2 x i64>
@@ -309,6 +364,57 @@
   res_vbll = vec_or(vbll, vbll);
 // CHECK: or <2 x i64>
 
+  res_vd = vec_or(vd, vd);
+// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+
+  res_vf = vec_rint(vf);
+// CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_rint(vd);
+// CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}})
+
+  res_vf = vec_rsqrte(vf);
+// CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_rsqrte(vd);
+// CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
+
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vf = vec_sel(vd, vd, vbll);
+// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64> %{{[0-9]+}},
+// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: or <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vd = vec_sel(vd, vd, vull);
+// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64> %{{[0-9]+}},
+// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: or <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
+
+  res_vf = vec_sqrt(vf);
+// CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_sqrt(vd);
+// CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}})
+
+  res_vd = vec_sub(vd, vd);
+// CHECK: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+
+  res_vf = vec_trunc(vf);
+// CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_trunc(vd);
+// CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}})
+
   /* vec_vor */
   res_vsll = vec_vor(vsll, vsll);
 // CHECK: or <2 x i64>
@@ -353,6 +459,27 @@
   res_vbll = vec_xor(vbll, vbll);
 // CHECK: xor <2 x i64>
 
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vd = vec_xor(vd, vd);
+// CHECK: [[X1:[0-9]+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: bitcast <2 x i64> %xor.i.[[X1]] to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vd = vec_xor(vd, vbll);
+// CHECK: [[X1:[0-9]+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: bitcast <2 x i64> %xor.i.[[X1]] to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vd = vec_xor(vbll, vd);
+// CHECK: [[X1:[0-9]+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: bitcast <2 x i64> %xor.i.[[X1]] to <2 x double>
+
   /* vec_vxor */
   res_vsll = vec_vxor(vsll, vsll);
 // CHECK: xor <2 x i64>