Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1204,7 +1204,8 @@ return nullptr; } -/// Use sqrt() for pow(x, +/-0.5) and cbrt() for pow(x, +/-0.333...). +/// Use sqrt() for pow(x, +/-0.5) and pow(x, +/-0.25) and +/// cbrt() for pow(x, +/-0.333...). Value *LibCallSimplifier::replacePowWithRoot(CallInst *Pow, IRBuilder<> &B) { Value *Root, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); @@ -1217,21 +1218,25 @@ const double OneThird = (Ty->getTypeID() == Type::FloatTyID) ? (1.0f / 3.0f) : (1.0 / 3.0); - bool isHalf (ExpoF->isExactlyValue(0.5) || ExpoF->isExactlyValue(-0.5)), - isThird (ExpoF->isExactlyValue(OneThird) || - ExpoF->isExactlyValue(-OneThird)); - if (!isHalf && !isThird) + bool isHalf (ExpoF->isExactlyValue(0.5) || ExpoF->isExactlyValue(-0.5)), + isThird (ExpoF->isExactlyValue(OneThird) || + ExpoF->isExactlyValue(-OneThird)), + isQuarter (ExpoF->isExactlyValue(0.25) || ExpoF->isExactlyValue(-0.25)); + if (!isHalf && !isThird && !isQuarter) return nullptr; - // Expand pow(x, +/-0.5) to sqrt(). - if (isHalf) { - // If errno is never set, then use the intrinsic for sqrt(). + // Expand pow(x, +/-0.5) to sqrt() and pow(x, +/-0.25) to sqrt(sqrt()). + if (isHalf || isQuarter) { if (Pow->hasFnAttr(Attribute::ReadNone)) { Function *SqrtFn = Intrinsic::getDeclaration(Mod, Intrinsic::sqrt, Ty); Root = B.CreateCall(SqrtFn, Base, "sqrt"); + if (isQuarter) + Root = B.CreateCall(SqrtFn, Root, "sqrt"); } - // Otherwise, use the libcall for sqrt(). - else if (hasUnaryFloatFn(TLI, Ty, + // Otherwise, use the libcall for sqrt(), but only when the exponent is 0.5, + // since using the libcall back to back doesn't seem to be a good idea. + else if (!isQuarter && + hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) // TODO: We also should check that the target can in fact lower the sqrt() // libcall. We currently have no way to ask this question, so we ask if Index: llvm/test/Transforms/InstCombine/pow-sqrt.ll =================================================================== --- llvm/test/Transforms/InstCombine/pow-sqrt.ll +++ llvm/test/Transforms/InstCombine/pow-sqrt.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define float @powf_intrinsic_half_fast(float %x) { @@ -10,6 +9,16 @@ ret float %pow } +define double @pow_intrinsic_quarter_fast(double %x) { +; CHECK-LABEL: @pow_intrinsic_quarter_fast( +; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double %x) +; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: ret double [[SQRT1]] +; + %pow = call fast double @llvm.pow.f64(double %x, double 2.5e-01) + ret double %pow +} + define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_half_approx( ; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) @@ -22,6 +31,19 @@ ret <2 x double> %pow } +define <2 x float> @powf_intrinsic_quarter_approx(<2 x float> %x) { +; CHECK-LABEL: @powf_intrinsic_quarter_approx( +; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) +; CHECK-NEXT: [[SQRT1:%.*]] = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = call afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[SQRT1]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp afn oeq <2 x float> %x, +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x float> , <2 x float> [[TMP1]] +; CHECK-NEXT: ret <2 x float> [[TMP3]] +; + %pow = call afn <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + define float @powf_libcall_half_ninf(float %x) { ; CHECK-LABEL: @powf_libcall_half_ninf( ; CHECK-NEXT: [[SQRTF:%.*]] = call ninf float @sqrtf(float %x) @@ -32,16 +54,36 @@ ret float %pow } +define double @pow_libcall_quarter_ninf(double %x) { +; CHECK-LABEL: @pow_libcall_quarter_ninf( +; CHECK-NEXT: [[POW:%.*]] = call ninf double @pow(double %x, double 2.500000e-01) +; CHECK-NEXT: ret double [[POW]] +; + %pow = call ninf double @pow(double %x, double 2.5e-01) + ret double %pow +} + define <2 x double> @pow_intrinsic_neghalf_fast(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_fast( -; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) -; CHECK-NEXT: [[RECP:%.*]] = fdiv fast <2 x double> , [[SQRT]] -; CHECK-NEXT: ret <2 x double> [[RECP]] +; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <2 x double> , [[SQRT]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %pow = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %pow } +define <4 x float> @powf_intrinsic_negquarter_fast(<4 x float> %x) { +; CHECK-LABEL: @powf_intrinsic_negquarter_fast( +; CHECK-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) +; CHECK-NEXT: [[SQRT1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x float> , [[SQRT1]] +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %pow = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %pow +} + define <2 x float> @powf_intrinsic_neghalf_ninf(<2 x float> %x) { ; CHECK-LABEL: @powf_intrinsic_neghalf_ninf( ; CHECK-NEXT: [[SQRT:%.*]] = call ninf <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) @@ -53,6 +95,18 @@ ret <2 x float> %pow } +define <2 x double> @pow_intrinsic_negquarter_ninf(<2 x double> %x) { +; CHECK-LABEL: @pow_intrinsic_negquarter_ninf( +; CHECK-NEXT: [[SQRT:%.*]] = call ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) +; CHECK-NEXT: [[SQRT1:%.*]] = call ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = call ninf <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT1]]) +; CHECK-NEXT: [[TMP2:%.*]] = fdiv ninf <2 x double> , [[TMP1]] +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %pow = call ninf <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %pow +} + define float @powf_libcall_neghalf_approx(float %x) { ; CHECK-LABEL: @powf_libcall_neghalf_approx( ; CHECK-NEXT: [[SQRTF:%.*]] = call afn float @sqrtf(float %x) @@ -66,6 +120,15 @@ ret float %pow } +define double @pow_libcall_negquarter_approx(double %x) { +; CHECK-LABEL: @pow_libcall_negquarter_approx( +; CHECK-NEXT: [[POW:%.*]] = call afn double @pow(double %x, double -2.500000e-01) +; CHECK-NEXT: ret double [[POW]] +; + %pow = call afn double @pow(double %x, double -2.5e-01) + ret double %pow +} + define double @pow_libcall_neghalf_fast(double %x) { ; CHECK-LABEL: @pow_libcall_neghalf_fast( ; CHECK-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double %x) @@ -76,6 +139,15 @@ ret double %pow } +define float @powf_libcall_negquarter_fast(float %x) { +; CHECK-LABEL: @powf_libcall_negquarter_fast( +; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float %x, float -2.500000e-01) +; CHECK-NEXT: ret float [[POW]] +; + %pow = call fast float @powf(float %x, float -2.5e-01) + ret float %pow +} + declare double @llvm.pow.f64(double, double) #0 declare float @llvm.pow.f32(float, float) #0 declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) #0 @@ -85,3 +157,4 @@ declare float @powf(float, float) attributes #0 = { nounwind readnone speculatable } +attributes #1 = { nounwind readnone }