diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1322,12 +1322,12 @@ APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; - bool IsInteger = BaseF->isInteger(), - IsReciprocal = BaseR.isInteger(); + bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger(); const APFloat *NF = IsReciprocal ? &BaseR : BaseF; APSInt NI(64, false); if ((IsInteger || IsReciprocal) && - !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) && + NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK && NI > 1 && NI.isPowerOf2()) { double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); @@ -1410,12 +1410,22 @@ return Sqrt; } +static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, + IRBuilder<> &B) { + Value *Args[] = {Base, Expo}; + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); + return B.CreateCall(F, Args); +} + Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { - Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Value *Base = Pow->getArgOperand(0); + Value *Expo = Pow->getArgOperand(1); Function *Callee = Pow->getCalledFunction(); StringRef Name = Callee->getName(); Type *Ty = Pow->getType(); + Module *M = Pow->getModule(); Value *Shrunk = nullptr; + bool AllowApprox = Pow->hasApproxFunc(); bool Ignored; // Bail out if simplifying libcalls to pow() is disabled. @@ -1428,8 +1438,8 @@ // Shrink pow() to powf() if the arguments are single precision, // unless the result is expected to be double precision. - if (UnsafeFPShrink && - Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name)) + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) && + hasFloatVersion(Name)) Shrunk = optimizeBinaryDoubleFP(Pow, B, true); // Evaluate special cases related to the base. @@ -1438,6 +1448,21 @@ if (match(Base, m_FPOne())) return Base; + // powf(x, sitofp(e)) -> powi(x, e) + // powf(x, uitofp(e)) -> powi(x, e) + if (AllowApprox && (isa(Expo) || isa(Expo))) { + Value *IntExpo = cast(Expo)->getOperand(0); + Value *NewExpo = nullptr; + unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits(); + if (isa(Expo) && BitWidth == 32) + NewExpo = IntExpo; + else if (BitWidth < 32) + NewExpo = isa(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty()) + : B.CreateZExt(IntExpo, B.getInt32Ty()); + if (NewExpo) + return createPowWithIntegerExponent(Base, NewExpo, M, B); + } + if (Value *Exp = replacePowWithExp(Pow, B)) return Exp; @@ -1449,7 +1474,7 @@ // pow(x, 0.0) -> 1.0 if (match(Expo, m_SpecificFP(0.0))) - return ConstantFP::get(Ty, 1.0); + return ConstantFP::get(Ty, 1.0); // pow(x, 1.0) -> x if (match(Expo, m_FPOne())) @@ -1462,9 +1487,12 @@ if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + if (!AllowApprox) + return Shrunk; + // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + if (match(Expo, m_APFloat(ExpoF))) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. @@ -1488,9 +1516,8 @@ if (!Expo2.isInteger()) return nullptr; - Sqrt = - getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI); + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); } // We will memoize intermediate products of the Addition Chain. @@ -1513,6 +1540,14 @@ return FMul; } + + APSInt IntExpo(32, /*isUnsigned=*/false); + // powf(x, C) -> powi(x, C) iff C is a constant signed integer value + if (ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK) { + return createPowWithIntegerExponent( + Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B); + } } return Shrunk; @@ -3101,4 +3136,4 @@ FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) - : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} + : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} \ No newline at end of file diff --git a/llvm/test/Transforms/InstCombine/pow-4.ll b/llvm/test/Transforms/InstCombine/pow-4.ll --- a/llvm/test/Transforms/InstCombine/pow-4.ll +++ b/llvm/test/Transforms/InstCombine/pow-4.ll @@ -11,9 +11,9 @@ ; pow(x, 3.0) define double @test_simplify_3(double %x) { ; CHECK-LABEL: @test_simplify_3( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[X]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[X]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call fast double @llvm.pow.f64(double %x, double 3.000000e+00) ret double %1 @@ -22,9 +22,9 @@ ; powf(x, 4.0) define float @test_simplify_4f(float %x) { ; CHECK-LABEL: @test_simplify_4f( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]] +; CHECK-NEXT: ret float [[TMP1]] ; %1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00) ret float %1 @@ -33,9 +33,9 @@ ; pow(x, 4.0) define double @test_simplify_4(double %x) { ; CHECK-LABEL: @test_simplify_4( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call fast double @llvm.pow.f64(double %x, double 4.000000e+00) ret double %1 @@ -44,12 +44,12 @@ ; powf(x, <15.0, 15.0>) define <2 x float> @test_simplify_15(<2 x float> %x) { ; CHECK-LABEL: @test_simplify_15( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[X]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[SQUARE]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP4]] -; CHECK-NEXT: ret <2 x float> [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret <2 x float> [[TMP4]] ; %1 = call fast <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %1 @@ -58,12 +58,12 @@ ; pow(x, -7.0) define <2 x double> @test_simplify_neg_7(<2 x double> %x) { ; CHECK-LABEL: @test_simplify_neg_7( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], [[X]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast <2 x double> , [[TMP4]] -; CHECK-NEXT: ret <2 x double> [[TMP5]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[SQUARE]], [[SQUARE]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[SQUARE]], [[TMP2]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> , [[TMP3]] +; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %1 @@ -72,14 +72,14 @@ ; powf(x, -19.0) define float @test_simplify_neg_19(float %x) { ; CHECK-LABEL: @test_simplify_neg_19( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP5]], [[X]] -; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[SQUARE]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[X]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[TMP5]] +; CHECK-NEXT: ret float [[RECIPROCAL]] ; %1 = call fast float @llvm.pow.f32(float %x, float -1.900000e+01) ret float %1 @@ -98,12 +98,12 @@ ; powf(x, 32.0) define float @test_simplify_32(float %x) { ; CHECK-LABEL: @test_simplify_32( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[TMP4]] -; CHECK-NEXT: ret float [[TMP5]] +; CHECK-NEXT: ret float [[TMP4]] ; %1 = call fast float @llvm.pow.f32(float %x, float 3.200000e+01) ret float %1 @@ -112,7 +112,7 @@ ; pow(x, 33.0) define double @test_simplify_33(double %x) { ; CHECK-LABEL: @test_simplify_33( -; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 3.300000e+01) +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[X:%.*]], i32 33) ; CHECK-NEXT: ret double [[TMP1]] ; %1 = call fast double @llvm.pow.f64(double %x, double 3.300000e+01) @@ -122,8 +122,8 @@ ; pow(x, 16.5) with double define double @test_simplify_16_5(double %x) { ; CHECK-LABEL: @test_simplify_16_5( -; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] +; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] @@ -137,8 +137,8 @@ ; pow(x, -16.5) with double define double @test_simplify_neg_16_5(double %x) { ; CHECK-LABEL: @test_simplify_neg_16_5( -; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] +; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] @@ -214,10 +214,10 @@ define <4 x float> @test_simplify_3_5(<4 x float> %x) { ; CHECK-LABEL: @test_simplify_3_5( ; CHECK-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[X]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[X]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[SQRT]] -; CHECK-NEXT: ret <4 x float> [[TMP3]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <4 x float> [[X]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[SQUARE]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[SQRT]] +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %1 diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll --- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll +++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll @@ -5,74 +5,294 @@ define double @pow_sitofp_const_base_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call afn float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float - %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) - %res = fpext float %powi to double + %pow = tail call afn float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double ret double %res } -define double @pow_sitofp_const_base_power_of_2_fast(i32 %x) { -; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double +define double @pow_uitofp_const_base_fast(i31 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i31 %x to float + %pow = tail call afn float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_sitofp_double_const_base_fast(i32 %x) { +; CHECK-LABEL: @pow_sitofp_double_const_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call afn double @llvm.powi.f64(double 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] +; + %subfp = sitofp i32 %x to double + %pow = tail call afn double @llvm.pow.f64(double 7.000000e+00, double %subfp) + ret double %pow +} + +define double @pow_uitofp_double_const_base_fast(i31 %x) { +; CHECK-LABEL: @pow_uitofp_double_const_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call afn double @llvm.powi.f64(double 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: ret double [[TMP2]] +; + %subfp = uitofp i31 %x to double + %pow = tail call afn double @llvm.pow.f64(double 7.000000e+00, double %subfp) + ret double %pow +} + +define double @pow_sitofp_double_const_base_power_of_2_fast(i32 %x) { +; CHECK-LABEL: @pow_sitofp_double_const_base_power_of_2_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call afn float @llvm.powi.f32(float 1.600000e+01, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float - %powi = tail call fast float @llvm.pow.f32(float 16.000000e+00, float %subfp) - %res = fpext float %powi to double + %pow = tail call afn float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_const_base_power_of_2_fast(i31 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast( +; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32(float 1.600000e+01, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i31 %x to float + %pow = tail call afn float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %pow to double ret double %res } define double @pow_sitofp_float_base_fast(float %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call afn float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float - %powi = tail call fast float @llvm.pow.f32(float %base, float %subfp) - %res = fpext float %powi to double + %pow = tail call afn float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_float_base_fast(float %base, i31 %x) { +; CHECK-LABEL: @pow_uitofp_float_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32(float [[BASE:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i31 %x to float + %pow = tail call afn float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %pow to double ret double %res } define double @pow_sitofp_double_base_fast(double %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to double -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) -; CHECK-NEXT: ret double [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call afn double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] ; %subfp = sitofp i32 %x to double - %res = tail call fast double @llvm.pow.f64(double %base, double %subfp) + %res = tail call afn double @llvm.pow.f64(double %base, double %subfp) + ret double %res +} + +define double @pow_uitofp_double_base_fast(double %base, i31 %x) { +; CHECK-LABEL: @pow_uitofp_double_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call afn double @llvm.powi.f64(double [[BASE:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: ret double [[TMP2]] +; + %subfp = uitofp i31 %x to double + %res = tail call afn double @llvm.pow.f64(double %base, double %subfp) + ret double %res +} + +define double @pow_sitofp_const_base_fast_i8(i8 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_fast_i8( +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i8 %x to float + %pow = tail call afn float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_sitofp_const_base_fast_i16(i16 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_fast_i16( +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i16 %x to float + %pow = tail call afn float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + + +define double @pow_uitofp_const_base_fast_i8(i8 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast_i8( +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i8 %x to float + %pow = tail call afn float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_const_base_fast_i16(i16 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast_i16( +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i16 %x to float + %pow = tail call afn float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double ret double %res } define double @powf_exp_const_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01) -; CHECK-NEXT: ret double [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 40) +; CHECK-NEXT: ret double [[TMP1]] ; %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01) ret double %res } +define double @powf_exp_const2_int_fast(double %base) { +; CHECK-LABEL: @powf_exp_const2_int_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 -40) +; CHECK-NEXT: ret double [[TMP1]] +; + %res = tail call fast double @llvm.pow.f64(double %base, double -4.000000e+01) + ret double %res +} + +; Negative tests + +define double @pow_uitofp_const_base_fast_i32(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast_i32( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POW]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %pow = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_const_base_power_of_2_fast_i32(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast_i32( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00 +; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %pow = tail call fast float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_float_base_fast_i32(float %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_float_base_fast_i32( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call fast float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POW]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %pow = tail call fast float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_double_base_fast_i32(double %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_double_base_fast_i32( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to double +; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to double + %res = tail call fast double @llvm.pow.f64(double %base, double %subfp) + ret double %res +} + +define double @pow_sitofp_const_base_fast_i64(i64 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_fast_i64( +; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i64 [[X:%.*]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POW]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i64 %x to float + %pow = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_const_base_fast_i64(i64 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast_i64( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i64 [[X:%.*]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POW]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i64 %x to float + %pow = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + define double @pow_sitofp_const_base_no_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[POW:%.*]] = tail call float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POW]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float - %powi = tail call float @llvm.pow.f32(float 7.000000e+00, float %subfp) - %res = fpext float %powi to double + %pow = tail call float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_const_base_no_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POW]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %pow = tail call float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %pow to double ret double %res } @@ -85,33 +305,71 @@ ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float - %powi = tail call float @llvm.pow.f32(float 16.000000e+00, float %subfp) - %res = fpext float %powi to double + %pow = tail call float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_const_base_power_of_2_no_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00 +; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %pow = tail call float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %pow to double ret double %res } define double @pow_sitofp_float_base_no_fast(float %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[POW:%.*]] = tail call float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POW]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float - %powi = tail call float @llvm.pow.f32(float %base, float %subfp) - %res = fpext float %powi to double + %pow = tail call float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %pow to double + ret double %res +} + +define double @pow_uitofp_float_base_no_fast(float %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_float_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POW]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %pow = tail call float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %pow to double ret double %res } define double @pow_sitofp_double_base_no_fast(double %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to double -; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) -; CHECK-NEXT: ret double [[POWI]] +; CHECK-NEXT: [[POW:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) +; CHECK-NEXT: ret double [[POW]] ; %subfp = sitofp i32 %x to double - %powi = tail call double @llvm.pow.f64(double %base, double %subfp) - ret double %powi + %pow = tail call double @llvm.pow.f64(double %base, double %subfp) + ret double %pow +} + +define double @pow_uitofp_double_base_no_fast(double %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_double_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to double +; CHECK-NEXT: [[POW:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) +; CHECK-NEXT: ret double [[POW]] +; + %subfp = uitofp i32 %x to double + %pow = tail call double @llvm.pow.f64(double %base, double %subfp) + ret double %pow } define double @powf_exp_const_int_no_fast(double %base) { @@ -141,5 +399,14 @@ ret double %res } +define double @powf_exp_const2_int_no_fast(double %base) { +; CHECK-LABEL: @powf_exp_const2_int_no_fast( +; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double -4.000000e+01) +; CHECK-NEXT: ret double [[RES]] +; + %res = tail call double @llvm.pow.f64(double %base, double -4.000000e+01) + ret double %res +} + declare float @llvm.pow.f32(float, float) declare double @llvm.pow.f64(double, double)