Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1136,23 +1136,21 @@ // If errno is never set, then use the intrinsic for sqrt(). if (Pow->hasFnAttr(Attribute::ReadNone)) { - Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty); Sqrt = B.CreateCall(SqrtFn, Base); } // Otherwise, use the libcall for sqrt(). - else if (hasUnaryFloatFn(TLI, Ty, - LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) { + else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) // TODO: We also should check that the target can in fact lower the sqrt() // libcall. We currently have no way to ask this question, so we ask if // the target has a sqrt() libcall, which is not exactly the same. Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B, Pow->getCalledFunction()->getAttributes()); - } else + else return nullptr; - // If this is pow(x, -0.5), then get the reciprocal. + // If the exponent is negative, then get the reciprocal. if (ExpoF->isNegative()) Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal"); @@ -1169,18 +1167,23 @@ Value *Shrunk = nullptr; bool Ignored; - if (UnsafeFPShrink && - Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name)) - Shrunk = optimizeUnaryDoubleFP(Pow, B, true); + // Bail out if simplifying libcalls to pow() is disabled. + if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) + return nullptr; - // Propagate math semantics flags from the call to any created instructions. + // Propagate the math semantics from the call to any created instructions. IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(Pow->getFastMathFlags()); + // Shrink pow() to powf() if the arguments are single precision. + if (UnsafeFPShrink && + Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name)) + Shrunk = optimizeBinaryDoubleFP(Pow, B); + // Evaluate special cases related to the base. // pow(1.0, x) -> 1.0 - if (match(Base, m_SpecificFP(1.0))) + if (match(Base, m_FPOne())) return Base; // pow(2.0, x) -> exp2(x) @@ -1189,13 +1192,12 @@ return B.CreateCall(Exp2, Expo, "exp2"); } - // There's no exp10 intrinsic yet, but, maybe, some day there shall be one. - if (ConstantFP *BaseC = dyn_cast(Base)) { - // pow(10.0, x) -> exp10(x) + // pow(10.0, x) -> exp10(x) + if (ConstantFP *BaseC = dyn_cast(Base)) + // There's no exp10() intrinsic yet, but, maybe, some day there shall be one. if (BaseC->isExactlyValue(10.0) && hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs); - } // pow(exp(x), y) -> exp(x * y) // pow(exp2(x), y) -> exp2(x * y) @@ -1209,9 +1211,6 @@ Function *CalleeFn = BaseFn->getCalledFunction(); if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) { - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(Pow->getFastMathFlags()); - Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B, CalleeFn->getAttributes()); @@ -1220,78 +1219,73 @@ // Evaluate special cases related to the exponent. - if (Value *Sqrt = replacePowWithSqrt(Pow, B)) - return Sqrt; - - ConstantFP *ExpoC = dyn_cast(Expo); - if (!ExpoC) - return Shrunk; - // pow(x, -1.0) -> 1.0 / x - if (ExpoC->isExactlyValue(-1.0)) + if (match(Expo, m_SpecificFP(-1.0))) return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); // pow(x, 0.0) -> 1.0 - if (ExpoC->getValueAPF().isZero()) - return ConstantFP::get(Ty, 1.0); + if (match(Expo, m_SpecificFP(0.0))) + return ConstantFP::get(Ty, 1.0); // pow(x, 1.0) -> x - if (ExpoC->isExactlyValue(1.0)) + if (match(Expo, m_FPOne())) return Base; // pow(x, 2.0) -> x * x - if (ExpoC->isExactlyValue(2.0)) + if (match(Expo, m_SpecificFP(2.0))) return B.CreateFMul(Base, Base, "square"); + if (Value *Sqrt = replacePowWithSqrt(Pow, B)) + return Sqrt; + // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). - if (ExpoC->isExactlyValue(0.5) && + ConstantFP *ExpoC = dyn_cast(Expo); + if (ExpoC && ExpoC->isExactlyValue(0.5) && hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) { // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). - // This is faster than calling pow, and still handles negative zero - // and negative infinity correctly. + // This is faster than calling pow(), and still handles -0.0 and + // negative infinity correctly. // TODO: In finite-only mode, this could be just fabs(sqrt(x)). Value *PosInf = ConstantFP::getInfinity(Ty); Value *NegInf = ConstantFP::getInfinity(Ty, true); - // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is - // an intrinsic, to match errno semantics. + // TODO: As above, we should lower to the sqrt intrinsic if the pow is an + // intrinsic, to match errno semantics. Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B, Attrs); - Function *FabsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty); - Value *FAbs = B.CreateCall(FabsFn, Sqrt, "abs"); - - Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); - Value *Sel = B.CreateSelect(FCmp, PosInf, FAbs); - return Sel; - } - - // pow(x, n) -> x * x * x * .... - if (Pow->isFast()) { - APFloat ExpoA = abs(ExpoC->getValueAPF()); - // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32. - // This transformation applies to integer exponents only. - if (!ExpoA.isInteger() || - ExpoA.compare - (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan) - return nullptr; - - // We will memoize intermediate products of the Addition Chain. - Value *InnerChain[33] = {nullptr}; - InnerChain[1] = Base; - InnerChain[2] = B.CreateFMul(Base, Base, "square"); - - // We cannot readily convert a non-double type (like float) to a double. - // So we first convert ExpoA to something which could be converted to double. - ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); + Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty); + Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs"); + Value *FCmp = B.CreateFCmpOEQ(Base, NegInf); + Sqrt = B.CreateSelect(FCmp, PosInf, FAbs); + return Sqrt; + } - Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); - // For negative exponents simply compute the reciprocal. - if (ExpoC->isNegative()) - FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); - return FMul; + // pow(x, n) -> x * x * x * ... + const APFloat *ExpoF; + if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + // We limit to a max of 7 multiplications, thus the maximum exponent is 32. + APFloat LimF(ExpoF->getSemantics(), 33.0), + ExpoA(abs(*ExpoF)); + if (ExpoA.isInteger() && ExpoA.compare(LimF) == APFloat::cmpLessThan) { + // We will memoize intermediate products of the Addition Chain. + Value *InnerChain[33] = {nullptr}; + InnerChain[1] = Base; + InnerChain[2] = B.CreateFMul(Base, Base, "square"); + + // We cannot readily convert a non-double type (like float) to a double. + // So we first convert it to something which could be converted to double. + ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); + Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); + + // If the exponent is negative, then get the reciprocal. + if (ExpoF->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); + + return FMul; + } } - return nullptr; + return Shrunk; } Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Index: llvm/test/Transforms/InstCombine/double-float-shrink-1.ll =================================================================== --- llvm/test/Transforms/InstCombine/double-float-shrink-1.ll +++ llvm/test/Transforms/InstCombine/double-float-shrink-1.ll @@ -266,6 +266,25 @@ ; CHECK: call fast double @logb(double %conv) } +define float @pow_test1(float %f, float %g) { + %df = fpext float %f to double + %dg = fpext float %g to double + %call = call fast double @pow(double %df, double %dg) + %fr = fptrunc double %call to float + ret float %fr +; CHECK-LABEL: pow_test1 +; CHECK: call fast float @powf(float %f, float %g) +} + +define double @pow_test2(float %f, float %g) { + %df = fpext float %f to double + %dg = fpext float %g to double + %call = call fast double @pow(double %df, double %dg) + ret double %call +; CHECK-LABEL: pow_test2 +; CHECK: call fast float @powf(float %f, float %g) +} + define float @sin_test1(float %f) { %conv = fpext float %f to double %call = call fast double @sin(double %conv) @@ -395,6 +414,7 @@ declare double @llvm.sqrt.f64(double) declare double @sin(double) +declare double @pow(double, double) declare double @log2(double) declare double @log1p(double) declare double @log10(double)