diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -600,6 +600,12 @@ BUILTIN(__builtin_vsx_scalar_extract_expq, "ULLiLLd", "") BUILTIN(__builtin_vsx_scalar_insert_exp_qp, "LLdLLdULLi", "") +// Fastmath by default builtins +BUILTIN(__builtin_ppc_rsqrtf, "V4fV4f", "") +BUILTIN(__builtin_ppc_rsqrtd, "V2dV2d", "") +BUILTIN(__builtin_ppc_recipdivf, "V4fV4fV4f", "") +BUILTIN(__builtin_ppc_recipdivd, "V2dV2dV2d", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15113,6 +15113,25 @@ return Builder.CreateCall(F, X); } + // Fastmath by default + case PPC::BI__builtin_ppc_recipdivf: + case PPC::BI__builtin_ppc_recipdivd: + case PPC::BI__builtin_ppc_rsqrtf: + case PPC::BI__builtin_ppc_rsqrtd: { + Builder.getFastMathFlags().setFast(); + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + + if (BuiltinID == PPC::BI__builtin_ppc_recipdivf || + BuiltinID == PPC::BI__builtin_ppc_recipdivd) { + Value *Y = EmitScalarExpr(E->getArg(1)); + return Builder.CreateFDiv(X, Y, "recipdiv"); + } + auto *One = ConstantFP::get(ResultType, 1.0); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); + return Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt"); + } + // FMA variations case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -8359,6 +8359,16 @@ } #endif +static vector float __ATTRS_o_ai vec_rsqrt(vector float __a) { + return __builtin_ppc_rsqrtf(__a); +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_rsqrt(vector double __a) { + return __builtin_ppc_rsqrtd(__a); +} +#endif + /* vec_vrsqrtefp */ static __inline__ __vector float __attribute__((__always_inline__)) @@ -17897,6 +17907,18 @@ return __builtin_altivec_vminsb(__a, -__a); } +static vector float __ATTRS_o_ai vec_recipdiv(vector float __a, + vector float __b) { + return __builtin_ppc_recipdivf(__a, __b); +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_recipdiv(vector double __a, + vector double __b) { + return __builtin_ppc_recipdivd(__a, __b); +} +#endif + #ifdef __POWER10_VECTOR__ /* vec_extractm */ diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c --- a/clang/test/CodeGen/builtins-ppc-altivec.c +++ b/clang/test/CodeGen/builtins-ppc-altivec.c @@ -9577,3 +9577,21 @@ // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 1 // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}}) } + +vector float test_rsqrtf(vector float a, vector float b) { + // CHECK-LABEL: test_rsqrtf + // CHECK: call fast <4 x float> @llvm.sqrt.v4f32 + // CHECK: fdiv fast <4 x float> + // CHECK-LE-LABEL: test_rsqrtf + // CHECK-LE: call fast <4 x float> @llvm.sqrt.v4f32 + // CHECK-LE: fdiv fast <4 x float> + return vec_rsqrt(a); +} + +vector float test_recipdivf(vector float a, vector float b) { + // CHECK-LABEL: test_recipdivf + // CHECK: fdiv fast <4 x float> + // CHECK-LE-LABEL: test_recipdivf + // CHECK-LE: fdiv fast <4 x float> + return vec_recipdiv(a, b); +} diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -2283,3 +2283,21 @@ // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> [[RA]], <2 x double> [[RB]]) __builtin_vsx_xvcpsgndp(a, b); } + +vector double test_recipdivd(vector double a, vector double b) { + // CHECK-LABEL: test_recipdivd + // CHECK: fdiv fast <2 x double> + // CHECK-LE-LABEL: test_recipdivd + // CHECK-LE: fdiv fast <2 x double> + return vec_recipdiv(a, b); +} + +vector double test_rsqrtd(vector double a, vector double b) { + // CHECK-LABEL: test_rsqrtd + // CHECK: call fast <2 x double> @llvm.sqrt.v2f64 + // CHECK: fdiv fast <2 x double> + // CHECK-LE-LABEL: test_rsqrtd + // CHECK-LE: call fast <2 x double> @llvm.sqrt.v2f64 + // CHECK-LE: fdiv fast <2 x double> + return vec_rsqrt(a); +}