Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1322,12 +1322,12 @@ APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; - bool IsInteger = BaseF->isInteger(), - IsReciprocal = BaseR.isInteger(); + bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger(); const APFloat *NF = IsReciprocal ? &BaseR : BaseF; APSInt NI(64, false); if ((IsInteger || IsReciprocal) && - !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) && + NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK && NI > 1 && NI.isPowerOf2()) { double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); @@ -1410,11 +1410,20 @@ return Sqrt; } +static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, + IRBuilder<> &B) { + Value *Args[] = {Base, Expo}; + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); + return B.CreateCall(F, Args); +} + Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { - Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Value *Base = Pow->getArgOperand(0); + Value *Expo = Pow->getArgOperand(1); Function *Callee = Pow->getCalledFunction(); StringRef Name = Callee->getName(); Type *Ty = Pow->getType(); + Module *M = Pow->getModule(); Value *Shrunk = nullptr; bool Ignored; @@ -1438,6 +1447,10 @@ if (match(Base, m_FPOne())) return Base; + // powf(x, sitofp(n)) -> powi(x, n) + if (auto *IntToFP = dyn_cast(Expo)) + return createPowWithIntegerExponent(Base, IntToFP->getOperand(0), M, B); + if (Value *Exp = replacePowWithExp(Pow, B)) return Exp; @@ -1462,9 +1475,12 @@ if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + if (!Pow->isFast()) + return Shrunk; + // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + if (match(Expo, m_APFloat(ExpoF))) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. @@ -1488,9 +1504,8 @@ if (!Expo2.isInteger()) return nullptr; - Sqrt = - getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI); + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); } // We will memoize intermediate products of the Addition Chain. @@ -1513,6 +1528,14 @@ return FMul; } + + APSInt IntExpo(32, false); + // powf(x, C) -> powi(x, C) iff C is a constant integer value + if (ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK) { + return createPowWithIntegerExponent( + Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B); + } } return Shrunk; @@ -3111,4 +3134,4 @@ FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) - : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} + : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} \ No newline at end of file Index: test/Transforms/InstCombine/pow_fp_int.ll =================================================================== --- test/Transforms/InstCombine/pow_fp_int.ll +++ test/Transforms/InstCombine/pow_fp_int.ll @@ -5,9 +5,8 @@ define double @pow_sitofp_const_base_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -18,10 +17,8 @@ define double @pow_sitofp_const_base_power_of_2_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 1.600000e+01, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -32,9 +29,8 @@ define double @pow_sitofp_float_base_fast(float %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -45,9 +41,8 @@ define double @pow_sitofp_double_base_fast(double %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to double -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) -; CHECK-NEXT: ret double [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] ; %subfp = sitofp i32 %x to double %res = tail call fast double @llvm.pow.f64(double %base, double %subfp) @@ -56,8 +51,8 @@ define double @powf_exp_const_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01) -; CHECK-NEXT: ret double [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 40) +; CHECK-NEXT: ret double [[TMP1]] ; %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01) ret double %res @@ -65,9 +60,8 @@ define double @pow_sitofp_const_base_no_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_no_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -78,10 +72,8 @@ define double @pow_sitofp_const_base_power_of_2_no_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_no_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.powi.f32(float 1.600000e+01, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -92,9 +84,8 @@ define double @pow_sitofp_float_base_no_fast(float %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_no_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -105,9 +96,8 @@ define double @pow_sitofp_double_base_no_fast(double %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_no_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to double -; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) -; CHECK-NEXT: ret double [[POWI]] +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] ; %subfp = sitofp i32 %x to double %powi = tail call double @llvm.pow.f64(double %base, double %subfp)