diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -189,7 +189,7 @@ Value *optimizeCAbs(CallInst *CI, IRBuilderBase &B); Value *optimizePow(CallInst *CI, IRBuilderBase &B); Value *replacePowWithExp(CallInst *Pow, IRBuilderBase &B); - Value *replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B); + std::pair replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B); Value *optimizeExp2(CallInst *CI, IRBuilderBase &B); Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B); Value *optimizeLog(CallInst *CI, IRBuilderBase &B); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1620,7 +1620,8 @@ } /// Use square root in place of pow(x, +/-0.5). -Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { +std::pair +LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); AttributeList Attrs; // Attributes are only meaningful on the original call Module *Mod = Pow->getModule(); @@ -1629,16 +1630,23 @@ const APFloat *ExpoF; if (!match(Expo, m_APFloat(ExpoF)) || (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5))) - return nullptr; + return {nullptr, false}; // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step, // so that requires fast-math-flags (afn or reassoc). if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc())) - return nullptr; + return {nullptr, true}; - Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI); + const bool CallDoesNotAccessMemory = Pow->doesNotAccessMemory(); + const bool HasNoInfs = Pow->hasNoInfs(); + // Avoid library call to sqrt function family causing errno = EDOM when + // x == -infinity. + if (!(CallDoesNotAccessMemory || HasNoInfs)) + return {nullptr, true}; + + Sqrt = getSqrtCall(Base, Attrs, CallDoesNotAccessMemory, Mod, B, TLI); if (!Sqrt) - return nullptr; + return {nullptr, true}; // Handle signed zero base by expanding to fabs(sqrt(x)). if (!Pow->hasNoSignedZeros()) { @@ -1648,7 +1656,7 @@ // Handle non finite base by expanding to // (x == -infinity ? +infinity : sqrt(x)). - if (!Pow->hasNoInfs()) { + if (!HasNoInfs) { Value *PosInf = ConstantFP::getInfinity(Ty), *NegInf = ConstantFP::getInfinity(Ty, true); Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); @@ -1659,7 +1667,7 @@ if (ExpoF->isNegative()) Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal"); - return Sqrt; + return {Sqrt, true}; } static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, @@ -1717,12 +1725,16 @@ if (match(Expo, m_SpecificFP(2.0))) return B.CreateFMul(Base, Base, "square"); - if (Value *Sqrt = replacePowWithSqrt(Pow, B)) - return Sqrt; + bool IsSqrtOrSqrtReciprocal; + Value *PowReplacedWithSqrt; + std::tie(PowReplacedWithSqrt, IsSqrtOrSqrtReciprocal) = + replacePowWithSqrt(Pow, B); + if (PowReplacedWithSqrt) + return PowReplacedWithSqrt; // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (AllowApprox && match(Expo, m_APFloat(ExpoF))) { + if (!IsSqrtOrSqrtReciprocal && AllowApprox && match(Expo, m_APFloat(ExpoF))) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll --- a/llvm/test/Transforms/InstCombine/pow-1.ll +++ b/llvm/test/Transforms/InstCombine/pow-1.ll @@ -19,6 +19,7 @@ ; in the cases below where pow is transformed into another function call. declare float @powf(float, float) nounwind readonly +declare float @llvm.pow.f32(float, float) declare double @pow(double, double) nounwind readonly declare double @llvm.pow.f64(double, double) declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) nounwind readonly @@ -247,43 +248,34 @@ ; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity. -define float @powf_libcall_to_select_sqrt(float %x) { -; CHECK-LABEL: @powf_libcall_to_select_sqrt( -; ANY-NEXT: [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]]) -; ANY-NEXT: [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]]) -; ANY-NEXT: [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000 -; ANY-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], float 0x7FF0000000000000, float [[ABS]] -; ANY-NEXT: ret float [[TMP1]] -; VC32-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float 5.000000e-01) +define float @powf_libcall_half_ninf(float %x) { +; CHECK-LABEL: @powf_libcall_half_ninf( +; ANY-NEXT: [[SQRTF:%.*]] = call ninf float @sqrtf(float [[X:%.*]]) +; ANY-NEXT: [[ABS:%.*]] = call ninf float @llvm.fabs.f32(float [[SQRTF]]) +; ANY-NEXT: ret float [[ABS]] +; VC32-NEXT: [[POW:%.*]] = call ninf float @powf(float [[X:%.*]], float 5.000000e-01) ; VC32-NEXT: ret float [[POW]] -; VC51-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float 5.000000e-01) +; VC51-NEXT: [[POW:%.*]] = call ninf float @powf(float [[X:%.*]], float 5.000000e-01) ; VC51-NEXT: ret float [[POW]] -; VC64-NEXT: [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]]) -; VC64-NEXT: [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]]) -; VC64-NEXT: [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000 -; VC64-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], float 0x7FF0000000000000, float [[ABS]] -; VC64-NEXT: ret float [[TMP1]] -; VC83-NEXT: [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]]) -; VC83-NEXT: [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]]) -; VC83-NEXT: [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000 -; VC83-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], float 0x7FF0000000000000, float [[ABS]] -; VC83-NEXT: ret float [[TMP1]] -; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float 5.000000e-01) +; VC64-NEXT: [[SQRTF:%.*]] = call ninf float @sqrtf(float [[X:%.*]]) +; VC64-NEXT: [[ABS:%.*]] = call ninf float @llvm.fabs.f32(float [[SQRTF]]) +; VC64-NEXT: ret float [[ABS]] +; VC83-NEXT: [[SQRTF:%.*]] = call ninf float @sqrtf(float [[X:%.*]]) +; VC83-NEXT: [[ABS:%.*]] = call ninf float @llvm.fabs.f32(float [[SQRTF]]) +; VC83-NEXT: ret float [[ABS]] +; NOLIB-NEXT: [[POW:%.*]] = call ninf float @powf(float [[X:%.*]], float 5.000000e-01) ; NOLIB-NEXT: ret float [[POW]] ; - %retval = call float @powf(float %x, float 0.5) + %retval = call ninf float @powf(float %x, float 0.5) ret float %retval } -define double @pow_libcall_to_select_sqrt(double %x) { -; CHECK-LABEL: @pow_libcall_to_select_sqrt( -; LIB-NEXT: [[SQRT:%.*]] = call double @sqrt(double [[X:%.*]]) -; LIB-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) -; LIB-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 -; LIB-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] -; LIB-NEXT: ret double [[TMP1]] -; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double [[X:%.*]], double 5.000000e-01) -; NOLIB-NEXT: ret double [[POW]] +; Check pow(x, 0.5) where x may be -infinity does not call a library sqrt function. + +define double @pow_libcall_half_no_FMF(double %x) { +; CHECK-LABEL: @pow_libcall_half_no_FMF( +; CHECK-NEXT: [[POW:%.*]] = call double @pow(double [[X:%.*]], double 5.000000e-01) +; CHECK-NEXT: ret double [[POW]] ; %retval = call double @pow(double %x, double 0.5) ret double %retval @@ -293,27 +285,17 @@ define float @test_simplify9(float %x) { ; CHECK-LABEL: @test_simplify9( -; ANY-NEXT: ret float 0x7FF0000000000000 -; VC32-NEXT: [[POW:%.*]] = call float @powf(float 0xFFF0000000000000, float 5.000000e-01) -; VC32-NEXT: ret float [[POW]] -; VC51-NEXT: [[POW:%.*]] = call float @powf(float 0xFFF0000000000000, float 5.000000e-01) -; VC51-NEXT: ret float [[POW]] -; VC64-NEXT: ret float 0x7FF0000000000000 -; VC83-NEXT: ret float 0x7FF0000000000000 -; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float 0xFFF0000000000000, float 5.000000e-01) -; NOLIB-NEXT: ret float [[POW]] +; CHECK-NEXT: ret float 0x7FF0000000000000 ; - %retval = call float @powf(float 0xFFF0000000000000, float 0.5) + %retval = call float @llvm.pow.f32(float 0xFFF0000000000000, float 0.5) ret float %retval } define double @test_simplify10(double %x) { ; CHECK-LABEL: @test_simplify10( -; LIB-NEXT: ret double 0x7FF0000000000000 -; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double 0xFFF0000000000000, double 5.000000e-01) -; NOLIB-NEXT: ret double [[POW]] +; CHECK-NEXT: ret double 0x7FF0000000000000 ; - %retval = call double @pow(double 0xFFF0000000000000, double 0.5) + %retval = call double @llvm.pow.f64(double 0xFFF0000000000000, double 0.5) ret double %retval } @@ -482,8 +464,8 @@ ret <2 x double> %r } -define double @test_simplify17(double %x) { -; CHECK-LABEL: @test_simplify17( +define double @pow_intrinsic_half_no_FMF(double %x) { +; CHECK-LABEL: @pow_intrinsic_half_no_FMF( ; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) ; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 diff --git a/llvm/test/Transforms/InstCombine/pow-sqrt.ll b/llvm/test/Transforms/InstCombine/pow-sqrt.ll --- a/llvm/test/Transforms/InstCombine/pow-sqrt.ll +++ b/llvm/test/Transforms/InstCombine/pow-sqrt.ll @@ -3,20 +3,19 @@ ; Check the libcall and the intrinsic for each case with differing FMF. -; The transform to sqrt is allowed as long as we deal with -0.0 and -INF. +; The transform to sqrt is not allowed if we risk setting errno due to -INF. define double @pow_libcall_half_no_FMF(double %x) { ; CHECK-LABEL: @pow_libcall_half_no_FMF( -; CHECK-NEXT: [[SQRT:%.*]] = call double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] -; CHECK-NEXT: ret double [[TMP1]] +; CHECK-NEXT: [[POW:%.*]] = call double @pow(double [[X:%.*]], double 5.000000e-01) +; CHECK-NEXT: ret double [[POW]] ; %pow = call double @pow(double %x, double 5.0e-01) ret double %pow } +; The transform to (non-errno setting) sqrt is allowed as long as we deal with -0.0 and -INF. + define double @pow_intrinsic_half_no_FMF(double %x) { ; CHECK-LABEL: @pow_intrinsic_half_no_FMF( ; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) @@ -29,15 +28,12 @@ ret double %pow } -; This makes no difference, but FMF are propagated. +; This makes no difference, but FMF are propagated/retained. define double @pow_libcall_half_approx(double %x) { ; CHECK-LABEL: @pow_libcall_half_approx( -; CHECK-NEXT: [[SQRT:%.*]] = call afn double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call afn double @llvm.fabs.f64(double [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select afn i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] -; CHECK-NEXT: ret double [[TMP1]] +; CHECK-NEXT: [[POW:%.*]] = call afn double @pow(double [[X:%.*]], double 5.000000e-01) +; CHECK-NEXT: ret double [[POW]] ; %pow = call afn double @pow(double %x, double 5.0e-01) ret double %pow @@ -86,14 +82,12 @@ ret <2 x double> %pow } -; If we can disregard -0.0, no need for fabs. +; If we can disregard -0.0, no need for fabs, but still (because of -INF) cannot use library sqrt. define double @pow_libcall_half_nsz(double %x) { ; CHECK-LABEL: @pow_libcall_half_nsz( -; CHECK-NEXT: [[SQRT:%.*]] = call nsz double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select nsz i1 [[ISINF]], double 0x7FF0000000000000, double [[SQRT]] -; CHECK-NEXT: ret double [[TMP1]] +; CHECK-NEXT: [[POW:%.*]] = call nsz double @pow(double [[X:%.*]], double 5.000000e-01) +; CHECK-NEXT: ret double [[POW]] ; %pow = call nsz double @pow(double %x, double 5.0e-01) ret double %pow @@ -162,35 +156,27 @@ ret float %pow } +; If we can disregard INFs, a call to a library sqrt is okay. ; Transform to sqrt+fdiv because 'reassoc' allows an extra rounding step. ; Use 'fabs' to handle -0.0 correctly. -; Use 'select' to handle -INF correctly. -define float @pow_libcall_neghalf_reassoc(float %x) { -; CHECK-LABEL: @pow_libcall_neghalf_reassoc( -; CHECK-NEXT: [[SQRTF:%.*]] = call reassoc float @sqrtf(float [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call reassoc float @llvm.fabs.f32(float [[SQRTF]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp reassoc oeq float [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[ABS_OP:%.*]] = fdiv reassoc float 1.000000e+00, [[ABS]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = select i1 [[ISINF]], float 0.000000e+00, float [[ABS_OP]] +define float @pow_libcall_neghalf_reassoc_ninf(float %x) { +; CHECK-LABEL: @pow_libcall_neghalf_reassoc_ninf( +; CHECK-NEXT: [[SQRTF:%.*]] = call reassoc ninf float @sqrtf(float [[X:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = call reassoc ninf float @llvm.fabs.f32(float [[SQRTF]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv reassoc ninf float 1.000000e+00, [[ABS]] ; CHECK-NEXT: ret float [[RECIPROCAL]] ; - %pow = call reassoc float @powf(float %x, float -5.0e-01) + %pow = call reassoc ninf float @powf(float %x, float -5.0e-01) ret float %pow } -; Transform to sqrt+fdiv because 'afn' allows an extra rounding step. -; Use 'fabs' to handle -0.0 correctly. -; Use 'select' to handle -INF correctly. +; If we cannot disregard INFs, a call to a library sqrt is not okay. define float @pow_libcall_neghalf_afn(float %x) { ; CHECK-LABEL: @pow_libcall_neghalf_afn( -; CHECK-NEXT: [[SQRTF:%.*]] = call afn float @sqrtf(float [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call afn float @llvm.fabs.f32(float [[SQRTF]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq float [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[ABS_OP:%.*]] = fdiv afn float 1.000000e+00, [[ABS]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = select i1 [[ISINF]], float 0.000000e+00, float [[ABS_OP]] -; CHECK-NEXT: ret float [[RECIPROCAL]] +; CHECK-NEXT: [[POW:%.*]] = call afn float @powf(float [[X:%.*]], float -5.000000e-01) +; CHECK-NEXT: ret float [[POW]] ; %pow = call afn float @powf(float %x, float -5.0e-01) ret float %pow @@ -265,15 +251,12 @@ ret <2 x double> %pow } -; If we can disregard -0.0, no need for fabs. +; If we can disregard -0.0, no need for fabs, but still (because of -INF) cannot use library sqrt. define double @pow_libcall_neghalf_nsz(double %x) { ; CHECK-LABEL: @pow_libcall_neghalf_nsz( -; CHECK-NEXT: [[SQRT:%.*]] = call nsz afn double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp nsz afn oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[SQRT_OP:%.*]] = fdiv nsz afn double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = select i1 [[ISINF]], double 0.000000e+00, double [[SQRT_OP]] -; CHECK-NEXT: ret double [[RECIPROCAL]] +; CHECK-NEXT: [[POW:%.*]] = call nsz afn double @pow(double [[X:%.*]], double -5.000000e-01) +; CHECK-NEXT: ret double [[POW]] ; %pow = call afn nsz double @pow(double %x, double -5.0e-01) ret double %pow diff --git a/llvm/test/Transforms/InstCombine/win-math.ll b/llvm/test/Transforms/InstCombine/win-math.ll --- a/llvm/test/Transforms/InstCombine/win-math.ll +++ b/llvm/test/Transforms/InstCombine/win-math.ll @@ -330,6 +330,6 @@ ; MINGW64-NOT: float @powf ; MINGW64: float @sqrtf ; MINGW64: float @llvm.fabs.f32( - %1 = call float @powf(float %x, float 0.5) + %1 = call ninf float @powf(float %x, float 0.5) ret float %1 }