diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1638,31 +1638,6 @@ return nullptr; } -static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilderBase &B) { - // Multiplications calculated using Addition Chains. - // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html - - assert(Exp != 0 && "Incorrect exponent 0 not handled"); - - if (InnerChain[Exp]) - return InnerChain[Exp]; - - static const unsigned AddChain[33][2] = { - {0, 0}, // Unused. - {0, 0}, // Unused (base case = pow1). - {1, 1}, // Unused (pre-computed). - {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4}, - {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7}, - {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10}, - {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13}, - {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16}, - }; - - InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B), - getPow(InnerChain, AddChain[Exp][1], B)); - return InnerChain[Exp]; -} - // Return a properly extended integer (DstWidth bits wide) if the operation is // an itofp. static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) { @@ -1963,70 +1938,52 @@ if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; - // pow(x, n) -> x * x * x * ... + // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction const APFloat *ExpoF; - if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && - !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { - // We limit to a max of 7 multiplications, thus the maximum exponent is 32. - // If the exponent is an integer+0.5 we generate a call to sqrt and an - // additional fmul. - // TODO: This whole transformation should be backend specific (e.g. some - // backends might prefer libcalls or the limit for the exponent might - // be different) and it should also consider optimizing for size. - APFloat LimF(ExpoF->getSemantics(), 33), - ExpoA(abs(*ExpoF)); - if (ExpoA < LimF) { - // This transformation applies to integer or integer+0.5 exponents only. - // For integer+0.5, we create a sqrt(Base) call. - Value *Sqrt = nullptr; - if (!ExpoA.isInteger()) { - APFloat Expo2 = ExpoA; - // To check if ExpoA is an integer + 0.5, we add it to itself. If there - // is no floating point exception and the result is an integer, then - // ExpoA == integer + 0.5 - if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK) - return nullptr; - - if (!Expo2.isInteger()) - return nullptr; - - Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), M, B, TLI); - if (!Sqrt) - return nullptr; - } - - // We will memoize intermediate products of the Addition Chain. - Value *InnerChain[33] = {nullptr}; - InnerChain[1] = Base; - InnerChain[2] = B.CreateFMul(Base, Base, "square"); - - // We cannot readily convert a non-double type (like float) to a double. - // So we first convert it to something which could be converted to double. - ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); - Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); + if (match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) && + !ExpoF->isExactlyValue(-0.5)) { + APFloat ExpoA(abs(*ExpoF)); + APFloat ExpoI(*ExpoF); + Value *Sqrt = nullptr; + if (AllowApprox && !ExpoA.isInteger()) { + APFloat Expo2 = ExpoA; + // To check if ExpoA is an integer + 0.5, we add it to itself. If there + // is no floating point exception and the result is an integer, then + // ExpoA == integer + 0.5 + if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK) + return nullptr; - // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x). - if (Sqrt) - FMul = B.CreateFMul(FMul, Sqrt); + if (!Expo2.isInteger()) + return nullptr; - // If the exponent is negative, then get the reciprocal. - if (ExpoF->isNegative()) - FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); + if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) != + APFloat::opInexact) + return nullptr; + if (!ExpoI.isInteger()) + return nullptr; + ExpoF = &ExpoI; - return FMul; + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); + if (!Sqrt) + return nullptr; } + // pow(x, n) -> powi(x, n) if n is a constant signed integer value APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false); - // powf(x, n) -> powi(x, n) if n is a constant signed integer value if (ExpoF->isInteger() && ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == APFloat::opOK) { - return copyFlags( + Value *PowI = copyFlags( *Pow, createPowWithIntegerExponent( Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B)); + + if (PowI && Sqrt) + return B.CreateFMul(PowI, Sqrt); + + return PowI; } } diff --git a/llvm/test/CodeGen/AArch64/powi.ll b/llvm/test/CodeGen/AArch64/powi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/powi.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +declare double @llvm.powi.f64.i32(double, i32) +declare float @llvm.powi.f32.i32(float, i32) +declare float @pow(double noundef, double noundef) + +define float @powi_f32(float %x) nounwind { +; CHECK-LABEL: powi_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul s0, s0, s0 +; CHECK-NEXT: fmul s0, s0, s0 +; CHECK-NEXT: ret + %1 = tail call float @llvm.powi.f32.i32(float %x, i32 4) + ret float %1 +} + +define double @powi_f64(double %x) nounwind { +; CHECK-LABEL: powi_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul d1, d0, d0 +; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: ret + %1 = tail call double @llvm.powi.f64.i32(double %x, i32 3) + ret double %1 +} diff --git a/llvm/test/Transforms/InstCombine/pow-4.ll b/llvm/test/Transforms/InstCombine/pow-4.ll --- a/llvm/test/Transforms/InstCombine/pow-4.ll +++ b/llvm/test/Transforms/InstCombine/pow-4.ll @@ -13,10 +13,13 @@ ; pow(x, 3.0) define double @test_simplify_3(double %x) { -; CHECK-LABEL: @test_simplify_3( -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[X]] -; CHECK-NEXT: ret double [[TMP1]] +; CHECKI32-LABEL: @test_simplify_3( +; CHECKI32-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[X:%.*]], i32 3) +; CHECKI32-NEXT: ret double [[TMP1]] +; +; CHECKI16-LABEL: @test_simplify_3( +; CHECKI16-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[X:%.*]], i16 3) +; CHECKI16-NEXT: ret double [[TMP1]] ; %1 = call fast double @llvm.pow.f64(double %x, double 3.000000e+00) ret double %1 @@ -24,10 +27,13 @@ ; powf(x, 4.0) define float @test_simplify_4f(float %x) { -; CHECK-LABEL: @test_simplify_4f( -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: ret float [[TMP1]] +; CHECKI32-LABEL: @test_simplify_4f( +; CHECKI32-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i32(float [[X:%.*]], i32 4) +; CHECKI32-NEXT: ret float [[TMP1]] +; +; CHECKI16-LABEL: @test_simplify_4f( +; CHECKI16-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i16(float [[X:%.*]], i16 4) +; CHECKI16-NEXT: ret float [[TMP1]] ; %1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00) ret float %1 @@ -35,10 +41,13 @@ ; pow(x, 4.0) define double @test_simplify_4(double %x) { -; CHECK-LABEL: @test_simplify_4( -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: ret double [[TMP1]] +; CHECKI32-LABEL: @test_simplify_4( +; CHECKI32-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[X:%.*]], i32 4) +; CHECKI32-NEXT: ret double [[TMP1]] +; +; CHECKI16-LABEL: @test_simplify_4( +; CHECKI16-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[X:%.*]], i16 4) +; CHECKI16-NEXT: ret double [[TMP1]] ; %1 = call fast double @llvm.pow.f64(double %x, double 4.000000e+00) ret double %1 @@ -46,13 +55,13 @@ ; powf(x, <15.0, 15.0>) define <2 x float> @test_simplify_15(<2 x float> %x) { -; CHECK-LABEL: @test_simplify_15( -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[SQUARE]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]] -; CHECK-NEXT: ret <2 x float> [[TMP4]] +; CHECKI32-LABEL: @test_simplify_15( +; CHECKI32-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[X:%.*]], i32 15) +; CHECKI32-NEXT: ret <2 x float> [[TMP1]] +; +; CHECKI16-LABEL: @test_simplify_15( +; CHECKI16-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.powi.v2f32.i16(<2 x float> [[X:%.*]], i16 15) +; CHECKI16-NEXT: ret <2 x float> [[TMP1]] ; %1 = call fast <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %1 @@ -60,13 +69,13 @@ ; pow(x, -7.0) define <2 x double> @test_simplify_neg_7(<2 x double> %x) { -; CHECK-LABEL: @test_simplify_neg_7( -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[X]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[SQUARE]], [[TMP2]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> , [[TMP3]] -; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] +; CHECKI32-LABEL: @test_simplify_neg_7( +; CHECKI32-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[X:%.*]], i32 -7) +; CHECKI32-NEXT: ret <2 x double> [[TMP1]] +; +; CHECKI16-LABEL: @test_simplify_neg_7( +; CHECKI16-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i16(<2 x double> [[X:%.*]], i16 -7) +; CHECKI16-NEXT: ret <2 x double> [[TMP1]] ; %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %1 @@ -74,15 +83,13 @@ ; powf(x, -19.0) define float @test_simplify_neg_19(float %x) { -; CHECK-LABEL: @test_simplify_neg_19( -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[SQUARE]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[X]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[TMP5]] -; CHECK-NEXT: ret float [[RECIPROCAL]] +; CHECKI32-LABEL: @test_simplify_neg_19( +; CHECKI32-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i32(float [[X:%.*]], i32 -19) +; CHECKI32-NEXT: ret float [[TMP1]] +; +; CHECKI16-LABEL: @test_simplify_neg_19( +; CHECKI16-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i16(float [[X:%.*]], i16 -19) +; CHECKI16-NEXT: ret float [[TMP1]] ; %1 = call fast float @llvm.pow.f32(float %x, float -1.900000e+01) ret float %1 @@ -100,13 +107,13 @@ ; powf(x, 32.0) define float @test_simplify_32(float %x) { -; CHECK-LABEL: @test_simplify_32( -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]] -; CHECK-NEXT: ret float [[TMP4]] +; CHECKI32-LABEL: @test_simplify_32( +; CHECKI32-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i32(float [[X:%.*]], i32 32) +; CHECKI32-NEXT: ret float [[TMP1]] +; +; CHECKI16-LABEL: @test_simplify_32( +; CHECKI16-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i16(float [[X:%.*]], i16 32) +; CHECKI16-NEXT: ret float [[TMP1]] ; %1 = call fast float @llvm.pow.f32(float %x, float 3.200000e+01) ret float %1 @@ -128,14 +135,17 @@ ; pow(x, 16.5) with double define double @test_simplify_16_5(double %x) { -; CHECK-LABEL: @test_simplify_16_5( -; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] -; CHECK-NEXT: ret double [[TMP4]] +; CHECK32-LABEL: @test_simplify_16_5( +; CHECK32-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK32-NEXT: [[POWI:%.*]] = call fast double @llvm.powi.f64.i32(double [[X]], i32 16) +; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast double [[POWI]], [[SQRT]] +; CHECK32-NEXT: ret double [[TMP1]] +; +; CHECK16-LABEL: @test_simplify_16_5( +; CHECK16-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK16-NEXT: [[POWI:%.*]] = call fast double @llvm.powi.f64.i16(double [[X]], i16 16) +; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast double [[POWI]], [[SQRT]] +; CHECK16-NEXT: ret double [[TMP1]] ; %1 = call fast double @llvm.pow.f64(double %x, double 1.650000e+01) ret double %1 @@ -143,112 +153,80 @@ ; pow(x, -16.5) with double define double @test_simplify_neg_16_5(double %x) { -; CHECK-LABEL: @test_simplify_neg_16_5( -; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]] -; CHECK-NEXT: ret double [[RECIPROCAL]] +; CHECK32-LABEL: @test_simplify_neg_16_5( +; CHECK32-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK32-NEXT: [[POWI:%.*]] = call fast double @llvm.powi.f64.i32(double [[X]], i32 -17) +; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast double [[POWI]], [[SQRT]] +; CHECK32-NEXT: ret double [[TMP1]] +; +; CHECK16-LABEL: @test_simplify_neg_16_5( +; CHECK16-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK16-NEXT: [[POWI:%.*]] = call fast double @llvm.powi.f64.i16(double [[X]], i16 -17) +; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast double [[POWI]], [[SQRT]] +; CHECK16-NEXT: ret double [[TMP1]] ; %1 = call fast double @llvm.pow.f64(double %x, double -1.650000e+01) ret double %1 } -; pow(x, 16.5) with double +; pow(x, 0.5) with double -define double @test_simplify_16_5_libcall(double %x) { -; SQRT-LABEL: @test_simplify_16_5_libcall( -; SQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) -; SQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] -; SQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; SQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; SQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; SQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] -; SQRT-NEXT: ret double [[TMP4]] -; -; NOSQRT-LABEL: @test_simplify_16_5_libcall( -; NOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double 1.650000e+01) -; NOSQRT-NEXT: ret double [[TMP1]] -; -; CHECKSQRT-LABEL: @test_simplify_16_5_libcall( +define double @test_simplify_0_5_libcall(double %x) { +; CHECKSQRT-LABEL: @test_simplify_0_5_libcall( ; CHECKSQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) -; CHECKSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] -; CHECKSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; CHECKSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECKSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECKSQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] -; CHECKSQRT-NEXT: ret double [[TMP4]] -; -; CHECKNOSQRT-LABEL: @test_simplify_16_5_libcall( -; CHECKNOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double 1.650000e+01) +; CHECKSQRT-NEXT: ret double [[SQRT]] +; +; CHECKNOSQRT-LABEL: @test_simplify_0_5_libcall( +; CHECKNOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double 5.000000e-01) ; CHECKNOSQRT-NEXT: ret double [[TMP1]] ; - %1 = call fast double @pow(double %x, double 1.650000e+01) + %1 = call fast double @pow(double %x, double 5.000000e-01) ret double %1 } -; pow(x, -16.5) with double +; pow(x, -0.5) with double -define double @test_simplify_neg_16_5_libcall(double %x) { -; SQRT-LABEL: @test_simplify_neg_16_5_libcall( -; SQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) -; SQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] -; SQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; SQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; SQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; SQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] -; SQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]] -; SQRT-NEXT: ret double [[RECIPROCAL]] -; -; NOSQRT-LABEL: @test_simplify_neg_16_5_libcall( -; NOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double -1.650000e+01) -; NOSQRT-NEXT: ret double [[TMP1]] -; -; CHECKSQRT-LABEL: @test_simplify_neg_16_5_libcall( +define double @test_simplify_neg_0_5_libcall(double %x) { +; CHECKSQRT-LABEL: @test_simplify_neg_0_5_libcall( ; CHECKSQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) -; CHECKSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] -; CHECKSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] -; CHECKSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] -; CHECKSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECKSQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] -; CHECKSQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]] +; CHECKSQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[SQRT]] ; CHECKSQRT-NEXT: ret double [[RECIPROCAL]] ; -; CHECKNOSQRT-LABEL: @test_simplify_neg_16_5_libcall( -; CHECKNOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double -1.650000e+01) +; CHECKNOSQRT-LABEL: @test_simplify_neg_0_5_libcall( +; CHECKNOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double -5.000000e-01) ; CHECKNOSQRT-NEXT: ret double [[TMP1]] ; - %1 = call fast double @pow(double %x, double -1.650000e+01) + %1 = call fast double @pow(double %x, double -5.000000e-01) ret double %1 } ; pow(x, -8.5) with float define float @test_simplify_neg_8_5(float %x) { -; CHECK-LABEL: @test_simplify_neg_8_5( -; CHECK-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]]) -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[SQRT]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[TMP2]] -; CHECK-NEXT: ret float [[RECIPROCAL]] -; - %1 = call fast float @llvm.pow.f32(float %x, float -0.450000e+01) +; CHECK32-LABEL: @test_simplify_neg_8_5( +; CHECK32-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK32-NEXT: [[POWI:%.*]] = call fast float @llvm.powi.f32.i32(float [[X]], i32 -9) +; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast float [[POWI]], [[SQRT]] +; +; CHECK16-LABEL: @test_simplify_neg_8_5( +; CHECK16-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK16-NEXT: [[POWI:%.*]] = call fast float @llvm.powi.f32.i16(float [[X]], i16 -9) +; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast float [[POWI]], [[SQRT]] +; + %1 = call fast float @llvm.pow.f32(float %x, float -0.850000e+01) ret float %1 } ; pow(x, 7.5) with <2 x double> define <2 x double> @test_simplify_7_5(<2 x double> %x) { -; CHECK-LABEL: @test_simplify_7_5( -; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x double> [[X]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[SQUARE]], [[SQUARE]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[X]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[SQUARE]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[TMP3]], [[SQRT]] -; CHECK-NEXT: ret <2 x double> [[TMP4]] +; CHECK32-LABEL: @test_simplify_7_5( +; CHECK32-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK32-NEXT: [[POWI:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[X]], i32 7) +; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[POWI]], [[SQRT]] +; +; CHECK16-LABEL: @test_simplify_7_5( +; CHECK16-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK16-NEXT: [[POWI:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i16(<2 x double> [[X]], i16 7) +; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[POWI]], [[SQRT]] ; %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %1 @@ -256,12 +234,15 @@ ; pow(x, 3.5) with <4 x float> define <4 x float> @test_simplify_3_5(<4 x float> %x) { -; CHECK-LABEL: @test_simplify_3_5( -; CHECK-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) -; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <4 x float> [[X]], [[X]] -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[SQUARE]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[SQRT]] -; CHECK-NEXT: ret <4 x float> [[TMP2]] +; CHECK32-LABEL: @test_simplify_3_5( +; CHECK32-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK32-NEXT: [[POWI:%.*]] = call fast <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[X]], i32 3) +; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[POWI]], [[SQRT]] +; +; CHECK16-LABEL: @test_simplify_3_5( +; CHECK16-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK16-NEXT: [[POWI:%.*]] = call fast <4 x float> @llvm.powi.v4f32.i16(<4 x float> [[X]], i16 3) +; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[POWI]], [[SQRT]] ; %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %1 diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll --- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll +++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll @@ -444,7 +444,7 @@ define double @powf_exp_const_int_no_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_no_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01) +; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 40) ; CHECK-NEXT: ret double [[RES]] ; %res = tail call double @llvm.pow.f64(double %base, double 4.000000e+01) @@ -453,7 +453,9 @@ define double @powf_exp_const_not_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_not_int_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double 3.750000e+01) +; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[BASE:%.*]]) +; CHECK-NEXT: [[POWI:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[BASE]], i32 37) +; CHECK-NEXT: [[RES:%.*]] = fmul fast double [[POWI]], [[SQRT]] ; CHECK-NEXT: ret double [[RES]] ; %res = tail call fast double @llvm.pow.f64(double %base, double 3.750000e+01) @@ -471,7 +473,7 @@ define double @powf_exp_const2_int_no_fast(double %base) { ; CHECK-LABEL: @powf_exp_const2_int_no_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double -4.000000e+01) +; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 -40) ; CHECK-NEXT: ret double [[RES]] ; %res = tail call double @llvm.pow.f64(double %base, double -4.000000e+01) diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int16.ll b/llvm/test/Transforms/InstCombine/pow_fp_int16.ll --- a/llvm/test/Transforms/InstCombine/pow_fp_int16.ll +++ b/llvm/test/Transforms/InstCombine/pow_fp_int16.ll @@ -414,7 +414,7 @@ define double @powf_exp_const_int_no_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_no_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01) +; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 40) ; CHECK-NEXT: ret double [[RES]] ; %res = tail call double @llvm.pow.f64(double %base, double 4.000000e+01) @@ -423,7 +423,9 @@ define double @powf_exp_const_not_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_not_int_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double 3.750000e+01) +; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[BASE:%.*]]) +; CHECK-NEXT: [[POWI:%.*]] = tail call fast double @llvm.powi.f64.i16(double [[BASE]], i16 37) +; CHECK-NEXT: [[RES:%.*]] = fmul fast double [[POWI]], [[SQRT]] ; CHECK-NEXT: ret double [[RES]] ; %res = tail call fast double @llvm.pow.f64(double %base, double 3.750000e+01) @@ -441,7 +443,7 @@ define double @powf_exp_const2_int_no_fast(double %base) { ; CHECK-LABEL: @powf_exp_const2_int_no_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double -4.000000e+01) +; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 -40) ; CHECK-NEXT: ret double [[RES]] ; %res = tail call double @llvm.pow.f64(double %base, double -4.000000e+01)