Index: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1211,6 +1211,10 @@ Value *Shrunk = nullptr; bool Ignored; + // Bail out if simplifying libcalls to pow() is disabled. + if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) + return nullptr; + // Propagate the math semantics from the call to any created instructions. IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(Pow->getFastMathFlags()); @@ -1252,9 +1256,6 @@ Function *CalleeFn = BaseFn->getCalledFunction(); if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) { - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(Pow->getFastMathFlags()); - Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B, CalleeFn->getAttributes()); @@ -1263,31 +1264,28 @@ // Evaluate special cases related to the exponent. - if (Value *Sqrt = replacePowWithSqrt(Pow, B)) - return Sqrt; - - ConstantFP *ExpoC = dyn_cast(Expo); - if (!ExpoC) - return Shrunk; - // pow(x, -1.0) -> 1.0 / x - if (ExpoC->isExactlyValue(-1.0)) + if (match(Expo, m_SpecificFP(-1.0))) return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); // pow(x, 0.0) -> 1.0 - if (ExpoC->getValueAPF().isZero()) - return ConstantFP::get(Ty, 1.0); + if (match(Expo, m_SpecificFP(0.0))) + return ConstantFP::get(Ty, 1.0); // pow(x, 1.0) -> x - if (ExpoC->isExactlyValue(1.0)) + if (match(Expo, m_FPOne())) return Base; // pow(x, 2.0) -> x * x - if (ExpoC->isExactlyValue(2.0)) + if (match(Expo, m_SpecificFP(2.0))) return B.CreateFMul(Base, Base, "square"); + if (Value *Sqrt = replacePowWithSqrt(Pow, B)) + return Sqrt; + // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). - if (ExpoC->isExactlyValue(0.5) && + ConstantFP *ExpoC = dyn_cast(Expo); + if (ExpoC && ExpoC->isExactlyValue(0.5) && hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) { // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow(), and still handles -0.0 and @@ -1307,30 +1305,29 @@ return Sqrt; } - // pow(x, n) -> x * x * x * .... - if (Pow->isFast()) { - APFloat ExpoA = abs(ExpoC->getValueAPF()); - // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32. - // This transformation applies to integer exponents only. - if (!ExpoA.isInteger() || - ExpoA.compare - (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan) - return Shrunk; - - // We will memoize intermediate products of the Addition Chain. - Value *InnerChain[33] = {nullptr}; - InnerChain[1] = Base; - InnerChain[2] = B.CreateFMul(Base, Base, "square"); - - // We cannot readily convert a non-double type (like float) to a double. - // So we first convert it to something which could be converted to double. - ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); - Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); - - // If the exponent is negative, then get the reciprocal. - if (ExpoC->isNegative()) - FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); - return FMul; + // pow(x, n) -> x * x * x * ... + const APFloat *ExpoF; + if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + // We limit to a max of 7 multiplications, thus the maximum exponent is 32. + APFloat LimF(ExpoF->getSemantics(), 33.0), + ExpoA(abs(*ExpoF)); + if (ExpoA.isInteger() && ExpoA.compare(LimF) == APFloat::cmpLessThan) { + // We will memoize intermediate products of the Addition Chain. + Value *InnerChain[33] = {nullptr}; + InnerChain[1] = Base; + InnerChain[2] = B.CreateFMul(Base, Base, "square"); + + // We cannot readily convert a non-double type (like float) to a double. + // So we first convert it to something which could be converted to double. + ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); + Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); + + // If the exponent is negative, then get the reciprocal. + if (ExpoF->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); + + return FMul; + } } return Shrunk; Index: llvm/trunk/test/Transforms/InstCombine/pow-1.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/pow-1.ll +++ llvm/trunk/test/Transforms/InstCombine/pow-1.ll @@ -95,7 +95,7 @@ ; CHECK-LABEL: @test_simplify5v( %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %retval -; CHECK-NEXT: %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> zeroinitializer) +; CHECK-NEXT: ret <2 x float> } define double @test_simplify6(double %x) { @@ -109,7 +109,7 @@ ; CHECK-LABEL: @test_simplify6v( %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %retval -; CHECK-NEXT: %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> zeroinitializer) +; CHECK-NEXT: ret <2 x double> } ; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity. @@ -165,7 +165,7 @@ ; CHECK-LABEL: @test_simplify11v( %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %retval -; CHECK-NEXT: %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) +; CHECK-NEXT: ret <2 x float> %x } define double @test_simplify12(double %x) { @@ -179,7 +179,7 @@ ; CHECK-LABEL: @test_simplify12v( %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %retval -; CHECK-NEXT: %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) +; CHECK-NEXT: ret <2 x double> %x } ; Check pow(x, 2.0) -> x*x. @@ -195,7 +195,7 @@ define <2 x float> @pow2_strictv(<2 x float> %x) { ; CHECK-LABEL: @pow2_strictv( -; CHECK-NEXT: [[POW2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) +; CHECK-NEXT: [[POW2:%.*]] = fmul <2 x float> %x, %x ; CHECK-NEXT: ret <2 x float> [[POW2]] ; %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) @@ -212,7 +212,7 @@ } define <2 x double> @pow2_double_strictv(<2 x double> %x) { ; CHECK-LABEL: @pow2_double_strictv( -; CHECK-NEXT: [[POW2:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) +; CHECK-NEXT: [[POW2:%.*]] = fmul <2 x double> %x, %x ; CHECK-NEXT: ret <2 x double> [[POW2]] ; %r = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) @@ -243,7 +243,7 @@ define <2 x float> @pow_neg1_strictv(<2 x float> %x) { ; CHECK-LABEL: @pow_neg1_strictv( -; CHECK-NEXT: [[POWRECIP:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) +; CHECK-NEXT: [[POWRECIP:%.*]] = fdiv <2 x float> , %x ; CHECK-NEXT: ret <2 x float> [[POWRECIP]] ; %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) @@ -261,7 +261,7 @@ define <2 x double> @pow_neg1_double_fastv(<2 x double> %x) { ; CHECK-LABEL: @pow_neg1_double_fastv( -; CHECK-NEXT: [[POWRECIP:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) +; CHECK-NEXT: [[POWRECIP:%.*]] = fdiv fast <2 x double> , %x ; CHECK-NEXT: ret <2 x double> [[POWRECIP]] ; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) Index: llvm/trunk/test/Transforms/InstCombine/pow-3.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/pow-3.ll +++ llvm/trunk/test/Transforms/InstCombine/pow-3.ll @@ -48,4 +48,3 @@ %fr = fptrunc double %call to float ret float %fr } - Index: llvm/trunk/test/Transforms/InstCombine/pow-4.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/pow-4.ll +++ llvm/trunk/test/Transforms/InstCombine/pow-4.ll @@ -3,17 +3,8 @@ declare double @llvm.pow.f64(double, double) declare float @llvm.pow.f32(float, float) - -; pow(x, 4.0f) -define float @test_simplify_4f(float %x) { -; CHECK-LABEL: @test_simplify_4f( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] -; CHECK-NEXT: ret float [[TMP2]] -; - %1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00) - ret float %1 -} +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) +declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) ; pow(x, 3.0) define double @test_simplify_3(double %x) { @@ -26,6 +17,17 @@ ret double %1 } +; powf(x, 4.0) +define float @test_simplify_4f(float %x) { +; CHECK-LABEL: @test_simplify_4f( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] +; CHECK-NEXT: ret float [[TMP2]] +; + %1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00) + ret float %1 +} + ; pow(x, 4.0) define double @test_simplify_4(double %x) { ; CHECK-LABEL: @test_simplify_4( @@ -37,48 +39,48 @@ ret double %1 } -; pow(x, 15.0) -define double @test_simplify_15(double %x) { +; powf(x, <15.0, 15.0>) +define <2 x float> @test_simplify_15(<2 x float> %x) { ; CHECK-LABEL: @test_simplify_15( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[X]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast double [[TMP2]], [[TMP4]] -; CHECK-NEXT: ret double [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP4]] +; CHECK-NEXT: ret <2 x float> [[TMP5]] ; - %1 = call fast double @llvm.pow.f64(double %x, double 1.500000e+01) - ret double %1 + %1 = call fast <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) + ret <2 x float> %1 } ; pow(x, -7.0) -define double @test_simplify_neg_7(double %x) { +define <2 x double> @test_simplify_neg_7(<2 x double> %x) { ; CHECK-LABEL: @test_simplify_neg_7( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[X]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]] -; CHECK-NEXT: ret double [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast <2 x double> , [[TMP4]] +; CHECK-NEXT: ret <2 x double> [[TMP5]] ; - %1 = call fast double @llvm.pow.f64(double %x, double -7.000000e+00) - ret double %1 + %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %1 } -; pow(x, -19.0) -define double @test_simplify_neg_19(double %x) { +; powf(x, -19.0) +define float @test_simplify_neg_19(float %x) { ; CHECK-LABEL: @test_simplify_neg_19( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast double [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul fast double [[TMP5]], [[X]] -; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast double 1.000000e+00, [[TMP6]] -; CHECK-NEXT: ret double [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP5]], [[X]] +; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] +; CHECK-NEXT: ret float [[TMP7]] ; - %1 = call fast double @llvm.pow.f64(double %x, double -1.900000e+01) - ret double %1 + %1 = call fast float @llvm.pow.f32(float %x, float -1.900000e+01) + ret float %1 } ; pow(x, 11.23) @@ -91,18 +93,18 @@ ret double %1 } -; pow(x, 32.0) -define double @test_simplify_32(double %x) { +; powf(x, 32.0) +define float @test_simplify_32(float %x) { ; CHECK-LABEL: @test_simplify_32( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast double [[TMP4]], [[TMP4]] -; CHECK-NEXT: ret double [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[TMP4]] +; CHECK-NEXT: ret float [[TMP5]] ; - %1 = call fast double @llvm.pow.f64(double %x, double 3.200000e+01) - ret double %1 + %1 = call fast float @llvm.pow.f32(float %x, float 3.200000e+01) + ret float %1 } ; pow(x, 33.0)