Index: include/clang/Basic/BuiltinsPPC.def =================================================================== --- include/clang/Basic/BuiltinsPPC.def +++ include/clang/Basic/BuiltinsPPC.def @@ -279,6 +279,39 @@ BUILTIN(__builtin_vsx_xvcmpgtdp, "V2ULLiV2dV2d", "") BUILTIN(__builtin_vsx_xvcmpgtsp, "V4UiV4fV4f", "") +BUILTIN(__builtin_vsx_xvrdpim, "V2dV2d", "") +BUILTIN(__builtin_vsx_xvrspim, "V4fV4f", "") + +BUILTIN(__builtin_vsx_xvrdpi, "V2dV2d", "") +BUILTIN(__builtin_vsx_xvrspi, "V4fV4f", "") + +BUILTIN(__builtin_vsx_xvrdpic, "V2dV2d", "") +BUILTIN(__builtin_vsx_xvrspic, "V4fV4f", "") + +BUILTIN(__builtin_vsx_xvrdpiz, "V2dV2d", "") +BUILTIN(__builtin_vsx_xvrspiz, "V4fV4f", "") + +BUILTIN(__builtin_vsx_xvmaddadp, "V2dV2dV2dV2d", "") +BUILTIN(__builtin_vsx_xvmaddasp, "V4fV4fV4fV4f", "") + +BUILTIN(__builtin_vsx_xvmsubadp, "V2dV2dV2dV2d", "") +BUILTIN(__builtin_vsx_xvmsubasp, "V4fV4fV4fV4f", "") + +BUILTIN(__builtin_vsx_xvmuldp, "V2dV2dV2d", "") +BUILTIN(__builtin_vsx_xvmulsp, "V4fV4fV4f", "") + +BUILTIN(__builtin_vsx_xvnmaddadp, "V2dV2dV2dV2d", "") +BUILTIN(__builtin_vsx_xvnmaddasp, "V4fV4fV4fV4f", "") + +BUILTIN(__builtin_vsx_xvnmsubadp, "V2dV2dV2dV2d", "") +BUILTIN(__builtin_vsx_xvnmsubasp, "V4fV4fV4fV4f", "") + +BUILTIN(__builtin_vsx_xvrsqrtedp, "V2dV2d", "") +BUILTIN(__builtin_vsx_xvrsqrtesp, "V4fV4f", "") + +BUILTIN(__builtin_vsx_xvsqrtdp, "V2dV2d", "") +BUILTIN(__builtin_vsx_xvsqrtsp, "V4fV4f", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -6637,14 +6637,83 @@ llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); } + // Square root + case PPC::BI__builtin_vsx_xvsqrtsp: + case PPC::BI__builtin_vsx_xvsqrtdp: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + ID = Intrinsic::sqrt; + llvm::Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, X); + } + // Rounding/truncation case PPC::BI__builtin_vsx_xvrspip: case PPC::BI__builtin_vsx_xvrdpip: + case PPC::BI__builtin_vsx_xvrdpim: + case PPC::BI__builtin_vsx_xvrspim: + case PPC::BI__builtin_vsx_xvrdpi: + case PPC::BI__builtin_vsx_xvrspi: + case PPC::BI__builtin_vsx_xvrdpic: + case PPC::BI__builtin_vsx_xvrspic: + case PPC::BI__builtin_vsx_xvrdpiz: + case PPC::BI__builtin_vsx_xvrspiz: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); - ID = Intrinsic::ceil; + if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || + BuiltinID == PPC::BI__builtin_vsx_xvrspim) + ID = Intrinsic::floor; + else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || + BuiltinID == PPC::BI__builtin_vsx_xvrspi) + ID = Intrinsic::round; + else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || + BuiltinID == PPC::BI__builtin_vsx_xvrspic) + ID = Intrinsic::nearbyint; + else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || + BuiltinID == PPC::BI__builtin_vsx_xvrspip) + ID = Intrinsic::ceil; + else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || + BuiltinID == PPC::BI__builtin_vsx_xvrspiz) + ID = Intrinsic::trunc; llvm::Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, X); } + // FMA variations + case PPC::BI__builtin_vsx_xvmaddadp: + case PPC::BI__builtin_vsx_xvmaddasp: + case PPC::BI__builtin_vsx_xvnmaddadp: + case PPC::BI__builtin_vsx_xvnmaddasp: + case PPC::BI__builtin_vsx_xvmsubadp: + case PPC::BI__builtin_vsx_xvmsubasp: + case PPC::BI__builtin_vsx_xvnmsubadp: + case PPC::BI__builtin_vsx_xvnmsubasp: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + switch (BuiltinID) { + case PPC::BI__builtin_vsx_xvmaddadp: + case PPC::BI__builtin_vsx_xvmaddasp: + return Builder.CreateCall(F, {X, Y, Z}); + case PPC::BI__builtin_vsx_xvnmaddadp: + case PPC::BI__builtin_vsx_xvnmaddasp: + return Builder.CreateFSub(Zero, + Builder.CreateCall(F, {X, Y, Z}), "sub"); + case PPC::BI__builtin_vsx_xvmsubadp: + case PPC::BI__builtin_vsx_xvmsubasp: + return Builder.CreateCall(F, + {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); + case PPC::BI__builtin_vsx_xvnmsubadp: + case PPC::BI__builtin_vsx_xvnmsubasp: + Value *FsubRes = + Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); + return Builder.CreateFSub(Zero, FsubRes, "sub"); + } + llvm_unreachable("Unknown FMA operation"); + return nullptr; // Suppress no-return warning + } + } } // Emit an intrinsic that has 1 float or double. Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -1857,11 +1857,20 @@ /* vec_floor */ -static vector float __attribute__((__always_inline__)) -vec_floor(vector float __a) { +static vector float __ATTRS_o_ai vec_floor(vector float __a) { +#ifdef __VSX__ + return __builtin_vsx_xvrspim(__a); +#else return __builtin_altivec_vrfim(__a); +#endif } +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_floor(vector double __a) { + return __builtin_vsx_xvrdpim(__a); +} +#endif + /* vec_vrfim */ static vector float __attribute__((__always_inline__)) @@ -2532,10 +2541,21 @@ /* vec_madd */ -static vector float __attribute__((__always_inline__)) +static vector float __ATTRS_o_ai vec_madd(vector float __a, vector float __b, vector float __c) { +#ifdef __VSX__ + return __builtin_vsx_xvmaddasp(__a, __b, __c); +#else return __builtin_altivec_vmaddfp(__a, __b, __c); +#endif +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_madd(vector double __a, vector double __b, vector double __c) { + return __builtin_vsx_xvmaddadp(__a, __b, __c); } +#endif /* vec_vmaddfp */ @@ -2559,6 +2579,20 @@ return __builtin_altivec_vmhaddshs(__a, __b, __c); } +/* vec_msub */ + +#ifdef __VSX__ +static vector float __ATTRS_o_ai +vec_msub(vector float __a, vector float __b, vector float __c) { + return __builtin_vsx_xvmsubasp(__a, __b, __c); +} + +static vector double __ATTRS_o_ai +vec_msub(vector double __a, vector double __b, vector double __c) { + return __builtin_vsx_xvmsubadp(__a, __b, __c); +} +#endif + /* vec_max */ static vector signed char __ATTRS_o_ai vec_max(vector signed char __a, @@ -3627,6 +3661,18 @@ __builtin_altivec_mtvscr((vector int)__a); } +/* vec_mul */ +static vector float __ATTRS_o_ai vec_mul(vector float __a, vector float __b) { + __a * __b; +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_mul(vector double __a, vector double __b) { + __a * __b; +} +#endif + /* The vmulos* and vmules* instructions have a big endian bias, so we must reverse the meaning of "even" and "odd" for little endian. */ @@ -3832,12 +3878,37 @@ #endif } +/* vec_nmadd */ + +#ifdef __VSX__ +static vector float __ATTRS_o_ai +vec_nmadd(vector float __a, vector float __b, vector float __c) { + return __builtin_vsx_xvnmaddasp(__a, __b, __c); +} + +static vector double __ATTRS_o_ai +vec_nmadd(vector double __a, vector double __b, vector double __c) { + return __builtin_vsx_xvnmaddadp(__a, __b, __c); +} +#endif + /* vec_nmsub */ -static vector float __attribute__((__always_inline__)) +static vector float __ATTRS_o_ai vec_nmsub(vector float __a, vector float __b, vector float __c) { +#ifdef __VSX__ + return __builtin_vsx_xvnmsubasp(__a, __b, __c); +#else return __builtin_altivec_vnmsubfp(__a, __b, __c); +#endif +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_nmsub(vector double __a, vector double __b, vector double __c) { + return __builtin_vsx_xvnmsubadp(__a, __b, __c); } +#endif /* vec_vnmsubfp */ @@ -3899,6 +3970,15 @@ return (vector float)__res; } +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_nor(vector double __a, vector double __b) { + vector unsigned long long __res = + ~((vector unsigned long long)__a | (vector unsigned long long)__b); + return (vector double)__res; +} +#endif + /* vec_vnor */ static vector signed char __ATTRS_o_ai vec_vnor(vector signed char __a, @@ -4091,6 +4171,12 @@ } #ifdef __VSX__ +static vector double __ATTRS_o_ai vec_or(vector double __a, vector double __b) { + vector unsigned long long __res = + (vector unsigned long long)__a | (vector unsigned long long)__b; + return (vector double)__res; +} + static vector signed long long __ATTRS_o_ai vec_or(vector signed long long __a, vector signed long long __b) { return __a | __b; @@ -5187,6 +5273,30 @@ return __builtin_altivec_vrfin(__a); } +#ifdef __VSX__ +/* vec_rint */ + +static vector float __ATTRS_o_ai +vec_rint(vector float __a) { + return __builtin_vsx_xvrspic(__a); +} + +static vector double __ATTRS_o_ai +vec_rint(vector double __a) { + return __builtin_vsx_xvrdpic(__a); +} + +/* vec_nearbyint */ + +static vector float __ATTRS_o_ai vec_nearbyint(vector float __a) { + return __builtin_vsx_xvrspi(__a); +} + +static vector double __ATTRS_o_ai vec_nearbyint(vector double __a) { + return __builtin_vsx_xvrdpi(__a); +} +#endif + /* vec_vrfin */ static vector float __attribute__((__always_inline__)) @@ -5194,13 +5304,35 @@ return __builtin_altivec_vrfin(__a); } +/* vec_sqrt */ + +#ifdef __VSX__ +static vector float __ATTRS_o_ai vec_sqrt(vector float __a) { + return __builtin_vsx_xvsqrtsp(__a); +} + +static vector double __ATTRS_o_ai vec_sqrt(vector double __a) { + return __builtin_vsx_xvsqrtdp(__a); +} +#endif + /* vec_rsqrte */ -static __vector float __attribute__((__always_inline__)) +static vector float __ATTRS_o_ai vec_rsqrte(vector float __a) { +#ifdef __VSX__ + return __builtin_vsx_xvrsqrtesp(__a); +#else return __builtin_altivec_vrsqrtefp(__a); +#endif } +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_rsqrte(vector double __a) { + return __builtin_vsx_xvrsqrtedp(__a); +} +#endif + /* vec_vrsqrtefp */ static __vector float __attribute__((__always_inline__)) @@ -5331,6 +5463,22 @@ return (vector float)__res; } +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b, + vector bool long long __c) { + vector long long __res = ((vector long long)__a & ~(vector long long)__c) | + ((vector long long)__b & (vector long long)__c); + return (vector double)__res; +} + +static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b, + vector unsigned long long __c) { + vector long long __res = ((vector long long)__a & ~(vector long long)__c) | + ((vector long long)__b & (vector long long)__c); + return (vector double)__res; +} +#endif + /* vec_vsel */ static vector signed char __ATTRS_o_ai vec_vsel(vector signed char __a, @@ -7910,6 +8058,13 @@ return __a - __b; } +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_sub(vector double __a, vector double __b) { + return __a - __b; +} +#endif + /* vec_vsububm */ #define __builtin_altivec_vsububm vec_vsububm @@ -8401,11 +8556,21 @@ /* vec_trunc */ -static vector float __attribute__((__always_inline__)) +static vector float __ATTRS_o_ai vec_trunc(vector float __a) { +#ifdef __VSX__ + return __builtin_vsx_xvrspiz(__a); +#else return __builtin_altivec_vrfiz(__a); +#endif } +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_trunc(vector double __a) { + return __builtin_vsx_xvrdpiz(__a); +} +#endif + /* vec_vrfiz */ static vector float __attribute__((__always_inline__)) @@ -8895,6 +9060,24 @@ vector bool long long __b) { return __a ^ __b; } + +static vector double __ATTRS_o_ai +vec_xor(vector double __a, vector double __b) { + return (vector double)((vector unsigned long long)__a ^ + (vector unsigned long long)__b); +} + +static vector double __ATTRS_o_ai +vec_xor(vector double __a, vector bool long long __b) { + return (vector double)((vector unsigned long long)__a ^ + (vector unsigned long long) __b); +} + +static vector double __ATTRS_o_ai +vec_xor(vector bool long long __a, vector double __b) { + return (vector double)((vector unsigned long long)__a ^ + (vector unsigned long long)__b); +} #endif /* vec_vxor */ Index: test/CodeGen/builtins-ppc-vsx.c =================================================================== --- test/CodeGen/builtins-ppc-vsx.c +++ test/CodeGen/builtins-ppc-vsx.c @@ -274,6 +274,56 @@ // CHECK: xor <2 x i64> // CHECK: and <2 x i64> + res_vf = vec_floor(vf); +// CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}}) + + res_vd = vec_floor(vd); +// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}}) + + res_vf = vec_madd(vf, vf, vf); +// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) + + res_vd = vec_madd(vd, vd, vd); +// CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) + + res_vf = vec_msub(vf, vf, vf); +// CHECK: fsub <4 x float> , %{{[0-9]+}} +// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> + + res_vd = vec_msub(vd, vd, vd); +// CHECK: fsub <2 x double> , %{{[0-9]+}} +// CHECK-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> + + res_vf = vec_mul(vf, vf); +// CHECK: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}} + + res_vd = vec_mul(vd, vd); +// CHECK: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}} + + res_vf = vec_nearbyint(vf); +// CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}}) + + res_vd = vec_nearbyint(vd); +// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}}) + + res_vf = vec_nmadd(vf, vf, vf); +// CHECK: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) +// CHECK-NEXT: fsub <4 x float> , %[[FM]] + + res_vd = vec_nmadd(vd, vd, vd); +// CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) +// CHECK-NEXT: fsub <2 x double> , %[[FM]] + + res_vf = vec_nmsub(vf, vf, vf); +// CHECK: fsub <4 x float> , %{{[0-9]+}} +// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> +// CHECK: fsub <4 x float> , %{{[0-9]+}} + + res_vd = vec_nmsub(vd, vd, vd); +// CHECK: fsub <2 x double> , %{{[0-9]+}} +// CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> +// CHECK-NEXT: fsub <2 x double> , %[[FM]] + /* vec_nor */ res_vsll = vec_nor(vsll, vsll); // CHECK: or <2 x i64> @@ -287,6 +337,11 @@ // CHECK: or <2 x i64> // CHECK: xor <2 x i64> + res_vd = vec_nor(vd, vd); +// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> +// CHECK: [[OR:[0-9]+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} +// CHECK-NEXT: xor <2 x i64> %or.i.[[OR]], + /* vec_or */ res_vsll = vec_or(vsll, vsll); // CHECK: or <2 x i64> @@ -309,6 +364,57 @@ res_vbll = vec_or(vbll, vbll); // CHECK: or <2 x i64> + res_vd = vec_or(vd, vd); +// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> +// CHECK: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} + + res_vf = vec_rint(vf); +// CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}}) + + res_vd = vec_rint(vd); +// CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}}) + + res_vf = vec_rsqrte(vf); +// CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}}) + + res_vd = vec_rsqrte(vd); +// CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}}) + + dummy(); +// CHECK: call void @dummy() + + res_vf = vec_sel(vd, vd, vbll); +// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: and <2 x i64> %{{[0-9]+}}, +// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} +// CHECK: or <2 x i64> +// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> + + dummy(); +// CHECK: call void @dummy() + + res_vd = vec_sel(vd, vd, vull); +// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: and <2 x i64> %{{[0-9]+}}, +// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} +// CHECK: or <2 x i64> +// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> + + res_vf = vec_sqrt(vf); +// CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}}) + + res_vd = vec_sqrt(vd); +// CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}}) + + res_vd = vec_sub(vd, vd); +// CHECK: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}} + + res_vf = vec_trunc(vf); +// CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}}) + + res_vd = vec_trunc(vd); +// CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}}) + /* vec_vor */ res_vsll = vec_vor(vsll, vsll); // CHECK: or <2 x i64> @@ -353,6 +459,27 @@ res_vbll = vec_xor(vbll, vbll); // CHECK: xor <2 x i64> + dummy(); +// CHECK: call void @dummy() + + res_vd = vec_xor(vd, vd); +// CHECK: [[X1:[0-9]+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} +// CHECK: bitcast <2 x i64> %xor.i.[[X1]] to <2 x double> + + dummy(); +// CHECK: call void @dummy() + + res_vd = vec_xor(vd, vbll); +// CHECK: [[X1:[0-9]+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} +// CHECK: bitcast <2 x i64> %xor.i.[[X1]] to <2 x double> + + dummy(); +// CHECK: call void @dummy() + + res_vd = vec_xor(vbll, vd); +// CHECK: [[X1:[0-9]+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} +// CHECK: bitcast <2 x i64> %xor.i.[[X1]] to <2 x double> + /* vec_vxor */ res_vsll = vec_vxor(vsll, vsll); // CHECK: xor <2 x i64>