Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1128,10 +1128,11 @@ Type *Ty = Pow->getType(); const double OneThird = (Ty->getTypeID() == Type::FloatTyID) ? (1.0f / 3.0f) : (1.0 / 3.0); - bool isHalf (ExpoF->isExactlyValue(0.5) || ExpoF->isExactlyValue(-0.5)), - isThird (ExpoF->isExactlyValue(OneThird) || - ExpoF->isExactlyValue(-OneThird)); - if (!isHalf && !isThird) + bool isHalf (ExpoF->isExactlyValue(0.5) || ExpoF->isExactlyValue(-0.5)), + isThird (ExpoF->isExactlyValue(OneThird) || + ExpoF->isExactlyValue(-OneThird)), + isQuarter (ExpoF->isExactlyValue(0.25) || ExpoF->isExactlyValue(-0.25)); + if (!isHalf && !isThird && !isQuarter) return nullptr; // Fast-math flags from the pow() are propagated to all replacement ops. @@ -1143,19 +1144,24 @@ Value *BaseV = Pow->getArgOperand(0); Value *Root; - if (isHalf) { - // Expand pow(x, +/-0.5) to sqrt(). + if (isHalf || isQuarter) { + // Expand pow(x, +/-0.5) to sqrt() and pow(x, +/-0.25) to sqrt(sqrt()). if (Pow->hasFnAttr(Attribute::ReadNone)) { // We know that errno is never set, so replace with an intrinsic. - Function *SqrtFn = Intrinsic::getDeclaration(Mod, Intrinsic::sqrt, Ty); - Root = B.CreateCall(SqrtFn, BaseV, "sqrt"); + Function *SqrtF = Intrinsic::getDeclaration(Mod, Intrinsic::sqrt, Ty); + Root = B.CreateCall(SqrtF, BaseV, "sqrt"); + if (isQuarter) + Root = B.CreateCall(SqrtF, Root, "sqrt"); } else if (hasUnaryFloatFn(TLI, Ty, - LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) + LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) { // TODO: We also should check that the target can in fact lower the sqrt() // libcall. We currently have no way to ask this question, so we ask if // the target has a sqrt() libcall, which is not exactly the same. Root = emitUnaryFloatFnCall(BaseV, TLI->getName(LibFunc_sqrt), B, Attrs); + if (isQuarter) + Root = emitUnaryFloatFnCall(Root, TLI->getName(LibFunc_sqrt), B, Attrs); + } else return nullptr; Index: llvm/test/Transforms/InstCombine/pow-sqrt.ll =================================================================== --- llvm/test/Transforms/InstCombine/pow-sqrt.ll +++ llvm/test/Transforms/InstCombine/pow-sqrt.ll @@ -1,73 +1,171 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s -define double @pow_intrinsic_half_fast(double %x) { -; CHECK-LABEL: @pow_intrinsic_half_fast( +define float @powf_intrinsic_half_fast(float %x) { +; CHECK-LABEL: @powf_intrinsic_half_fast( +; CHECK-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: ret float [[SQRT]] +; + %pow = call fast float @llvm.pow.f32(float %x, float 5.0e-01) + ret float %pow +} + +define double @pow_intrinsic_quarter_fast(double %x) { +; CHECK-LABEL: @pow_intrinsic_quarter_fast( ; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) -; CHECK-NEXT: ret double [[SQRT]] +; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: ret double [[SQRT1]] ; - %pow = call fast double @llvm.pow.f64(double %x, double 5.000000e-01) + %pow = call fast double @llvm.pow.f64(double %x, double 2.5e-01) ret double %pow } -define <2 x double> @pow_intrinsic_half(<2 x double> %x) { -; CHECK-LABEL: @pow_intrinsic_half( -; CHECK-NEXT: [[SQRT:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <2 x double> [[X]], +define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) { +; CHECK-LABEL: @pow_intrinsic_half_approx( +; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp afn oeq <2 x double> [[X]], ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x double> , <2 x double> [[TMP1]] ; CHECK-NEXT: ret <2 x double> [[TMP3]] ; - %pow = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + %pow = call afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %pow } -define double @pow_libcall_half_ninf(double %x) { -; CHECK-LABEL: @pow_libcall_half_ninf( +define <2 x float> @powf_intrinsic_quarter_approx(<2 x float> %x) { +; CHECK-LABEL: @powf_intrinsic_quarter_approx( +; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = call afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[SQRT1]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp afn oeq <2 x float> [[X]], +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x float> , <2 x float> [[TMP1]] +; CHECK-NEXT: ret <2 x float> [[TMP3]] +; + %pow = call afn <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define float @powf_libcall_half_ninf(float %x) { +; CHECK-LABEL: @powf_libcall_half_ninf( +; CHECK-NEXT: [[SQRTF:%.*]] = call ninf float @sqrtf(float [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call ninf float @llvm.fabs.f32(float [[SQRTF]]) +; CHECK-NEXT: ret float [[TMP1]] +; + %pow = call ninf float @powf(float %x, float 5.0e-01) + ret float %pow +} + +define double @pow_libcall_quarter_ninf(double %x) { +; CHECK-LABEL: @pow_libcall_quarter_ninf( ; CHECK-NEXT: [[SQRT:%.*]] = call ninf double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call ninf double @llvm.fabs.f64(double [[SQRT]]) +; CHECK-NEXT: [[SQRT1:%.*]] = call ninf double @sqrt(double [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = call ninf double @llvm.fabs.f64(double [[SQRT1]]) ; CHECK-NEXT: ret double [[TMP1]] ; - %pow = call ninf double @pow(double %x, double 5.0e-01) + %pow = call ninf double @pow(double %x, double 2.5e-01) ret double %pow } define <2 x double> @pow_intrinsic_neghalf_fast(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_fast( ; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) -; CHECK-NEXT: [[RECP:%.*]] = fdiv fast <2 x double> , [[SQRT]] -; CHECK-NEXT: ret <2 x double> [[RECP]] +; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <2 x double> , [[SQRT]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %pow = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %pow } -define double @pow_intrinsic_neghalf(double %x) { -; CHECK-LABEL: @pow_intrinsic_neghalf( -; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) -; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[DOTOP:%.*]] = fdiv double 1.000000e+00, [[TMP1]] -; CHECK-NEXT: [[RECP:%.*]] = select i1 [[TMP2]], double 0.000000e+00, double [[DOTOP]] -; CHECK-NEXT: ret double [[RECP]] +define <4 x float> @powf_intrinsic_negquarter_fast(<4 x float> %x) { +; CHECK-LABEL: @powf_intrinsic_negquarter_fast( +; CHECK-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x float> , [[SQRT1]] +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %pow = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %pow +} + +define <2 x float> @powf_intrinsic_neghalf_ninf(<2 x float> %x) { +; CHECK-LABEL: @powf_intrinsic_neghalf_ninf( +; CHECK-NEXT: [[SQRT:%.*]] = call ninf <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call ninf <2 x float> @llvm.fabs.v2f32(<2 x float> [[SQRT]]) +; CHECK-NEXT: [[TMP2:%.*]] = fdiv ninf <2 x float> , [[TMP1]] +; CHECK-NEXT: ret <2 x float> [[TMP2]] ; - %pow = call double @llvm.pow.f64(double %x, double -5.0e-01) + %pow = call ninf <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x double> @pow_intrinsic_negquarter_ninf(<2 x double> %x) { +; CHECK-LABEL: @pow_intrinsic_negquarter_ninf( +; CHECK-NEXT: [[SQRT:%.*]] = call ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = call ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = call ninf <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT1]]) +; CHECK-NEXT: [[TMP2:%.*]] = fdiv ninf <2 x double> , [[TMP1]] +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %pow = call ninf <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %pow +} + +define float @powf_libcall_neghalf_approx(float %x) { +; CHECK-LABEL: @powf_libcall_neghalf_approx( +; CHECK-NEXT: [[SQRTF:%.*]] = call afn float @sqrtf(float [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call afn float @llvm.fabs.f32(float [[SQRTF]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp afn oeq float [[X]], 0xFFF0000000000000 +; CHECK-NEXT: [[DOTOP:%.*]] = fdiv afn float 1.000000e+00, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], float 0.000000e+00, float [[DOTOP]] +; CHECK-NEXT: ret float [[TMP3]] +; + %pow = call afn float @powf(float %x, float -5.0e-01) + ret float %pow +} + +define double @pow_libcall_negquarter_approx(double %x) { +; CHECK-LABEL: @pow_libcall_negquarter_approx( +; CHECK-NEXT: [[SQRT:%.*]] = call afn double @sqrt(double [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = call afn double @sqrt(double [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = call afn double @llvm.fabs.f64(double [[SQRT1]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp afn oeq double [[X]], 0xFFF0000000000000 +; CHECK-NEXT: [[DOTOP:%.*]] = fdiv afn double 1.000000e+00, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], double 0.000000e+00, double [[DOTOP]] +; CHECK-NEXT: ret double [[TMP3]] +; + %pow = call afn double @pow(double %x, double -2.5e-01) ret double %pow } -define float @pow_libcall_neghalf_fast(float %x) { +define double @pow_libcall_neghalf_fast(double %x) { ; CHECK-LABEL: @pow_libcall_neghalf_fast( +; CHECK-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: ret double [[TMP1]] +; + %pow = call fast double @pow(double %x, double -5.0e-01) + ret double %pow +} + +define float @powf_libcall_negquarter_fast(float %x) { +; CHECK-LABEL: @powf_libcall_negquarter_fast( ; CHECK-NEXT: [[SQRTF:%.*]] = call fast float @sqrtf(float [[X:%.*]]) -; CHECK-NEXT: [[RECP:%.*]] = fdiv fast float 1.000000e+00, [[SQRTF]] -; CHECK-NEXT: ret float [[RECP]] +; CHECK-NEXT: [[SQRTF1:%.*]] = call fast float @sqrtf(float [[SQRTF]]) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast float 1.000000e+00, [[SQRTF1]] +; CHECK-NEXT: ret float [[TMP1]] ; - %pow = call fast float @powf(float %x, float -5.0e-01) + %pow = call fast float @powf(float %x, float -2.5e-01) ret float %pow } declare double @llvm.pow.f64(double, double) #0 +declare float @llvm.pow.f32(float, float) #0 declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) #0 +declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) #0 +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) #0 declare double @pow(double, double) declare float @powf(float, float) attributes #0 = { nounwind readnone speculatable } +attributes #1 = { nounwind readnone } +