Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1322,12 +1322,12 @@ APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; - bool IsInteger = BaseF->isInteger(), - IsReciprocal = BaseR.isInteger(); + bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger(); const APFloat *NF = IsReciprocal ? &BaseR : BaseF; APSInt NI(64, false); if ((IsInteger || IsReciprocal) && - !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) && + NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK && NI > 1 && NI.isPowerOf2()) { double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); @@ -1410,12 +1410,22 @@ return Sqrt; } +static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, + IRBuilder<> &B) { + Value *Args[] = {Base, Expo}; + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); + return B.CreateCall(F, Args); +} + Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { - Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Value *Base = Pow->getArgOperand(0); + Value *Expo = Pow->getArgOperand(1); Function *Callee = Pow->getCalledFunction(); StringRef Name = Callee->getName(); Type *Ty = Pow->getType(); + Module *M = Pow->getModule(); Value *Shrunk = nullptr; + bool IsFast = Pow->isFast(); bool Ignored; // Bail out if simplifying libcalls to pow() is disabled. @@ -1438,6 +1448,21 @@ if (match(Base, m_FPOne())) return Base; + // powf(x, sitofp(e)) -> powi(x, e) + // powf(x, uitofp(e)) -> powi(x, e) + if (IsFast && (isa(Expo) || isa(Expo))) { + Value *IntExpo = cast(Expo)->getOperand(0); + Value *NewExpo = nullptr; + unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits(); + if (BitWidth == 32) + NewExpo = IntExpo; + else if (BitWidth < 32) + NewExpo = isa(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty()) + : B.CreateZExt(IntExpo, B.getInt32Ty()); + if (NewExpo) + return createPowWithIntegerExponent(Base, NewExpo, M, B); + } + if (Value *Exp = replacePowWithExp(Pow, B)) return Exp; @@ -1462,9 +1487,12 @@ if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + if (!IsFast) + return Shrunk; + // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + if (match(Expo, m_APFloat(ExpoF))) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. @@ -1488,9 +1516,8 @@ if (!Expo2.isInteger()) return nullptr; - Sqrt = - getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI); + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); } // We will memoize intermediate products of the Addition Chain. @@ -1513,6 +1540,14 @@ return FMul; } + + APSInt IntExpo(32, /*isUnsigned=*/false); + // powf(x, C) -> powi(x, C) iff C is a constant signed integer value + if (ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK) { + return createPowWithIntegerExponent( + Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B); + } } return Shrunk; @@ -3111,4 +3146,4 @@ FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) - : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} + : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} \ No newline at end of file Index: test/Transforms/InstCombine/pow_fp_int.ll =================================================================== --- test/Transforms/InstCombine/pow_fp_int.ll +++ test/Transforms/InstCombine/pow_fp_int.ll @@ -5,9 +5,8 @@ define double @pow_sitofp_const_base_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -16,12 +15,22 @@ ret double %res } +define double @pow_uitofp_const_base_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_const_base_power_of_2_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 1.600000e+01, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -30,11 +39,22 @@ ret double %res } +define double @pow_uitofp_const_base_power_of_2_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 1.600000e+01, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_float_base_fast(float %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -43,26 +63,101 @@ ret double %res } +define double @pow_uitofp_float_base_fast(float %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_float_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_double_base_fast(double %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to double -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) -; CHECK-NEXT: ret double [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] ; %subfp = sitofp i32 %x to double %res = tail call fast double @llvm.pow.f64(double %base, double %subfp) ret double %res } +define double @pow_uitofp_double_base_fast(double %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_double_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] +; + %subfp = uitofp i32 %x to double + %res = tail call fast double @llvm.pow.f64(double %base, double %subfp) + ret double %res +} + +define double @pow_sitofp_const_base_fast_i8(i8 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_fast_i8( +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i8 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_uitofp_const_base_fast_i8(i8 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast_i8( +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i8 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @powf_exp_const_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01) -; CHECK-NEXT: ret double [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 40) +; CHECK-NEXT: ret double [[TMP1]] ; %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01) ret double %res } +; Negative tests + +define double @pow_sitofp_const_base_fast_i64(i64 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_fast_i64( +; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i64 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i64 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_uitofp_const_base_fast_i64(i64 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast_i64( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i64 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i64 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_const_base_no_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float @@ -76,6 +171,19 @@ ret double %res } +define double @pow_uitofp_const_base_no_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_const_base_power_of_2_no_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float @@ -90,6 +198,20 @@ ret double %res } +define double @pow_uitofp_const_base_power_of_2_no_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00 +; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_float_base_no_fast(float %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float @@ -103,6 +225,19 @@ ret double %res } +define double @pow_uitofp_float_base_no_fast(float %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_float_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_double_base_no_fast(double %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to double @@ -114,6 +249,17 @@ ret double %powi } +define double @pow_uitofp_double_base_no_fast(double %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_double_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to double +; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) +; CHECK-NEXT: ret double [[POWI]] +; + %subfp = uitofp i32 %x to double + %powi = tail call double @llvm.pow.f64(double %base, double %subfp) + ret double %powi +} + define double @powf_exp_const_int_no_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_no_fast( ; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01)