Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1322,12 +1322,12 @@ APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; - bool IsInteger = BaseF->isInteger(), - IsReciprocal = BaseR.isInteger(); + bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger(); const APFloat *NF = IsReciprocal ? &BaseR : BaseF; APSInt NI(64, false); if ((IsInteger || IsReciprocal) && - !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) && + NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK && NI > 1 && NI.isPowerOf2()) { double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); @@ -1410,11 +1410,20 @@ return Sqrt; } +static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, + IRBuilder<> &B) { + Value *Args[] = {Base, Expo}; + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); + return B.CreateCall(F, Args); +} + Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { - Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Value *Base = Pow->getArgOperand(0); + Value *Expo = Pow->getArgOperand(1); Function *Callee = Pow->getCalledFunction(); StringRef Name = Callee->getName(); Type *Ty = Pow->getType(); + Module *M = Pow->getModule(); Value *Shrunk = nullptr; bool Ignored; @@ -1438,6 +1447,10 @@ if (match(Base, m_FPOne())) return Base; + // powf(x, sitofp(n)) -> powi(x, n) + if (auto *IntToFP = dyn_cast(Expo)) + return createPowWithIntegerExponent(Base, IntToFP->getOperand(0), M, B); + if (Value *Exp = replacePowWithExp(Pow, B)) return Exp; @@ -1462,9 +1475,12 @@ if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + if (!Pow->isFast()) + return Shrunk; + // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + if (match(Expo, m_APFloat(ExpoF))) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. @@ -1488,9 +1504,8 @@ if (!Expo2.isInteger()) return nullptr; - Sqrt = - getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI); + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); } // We will memoize intermediate products of the Addition Chain. @@ -1513,6 +1528,14 @@ return FMul; } + + APSInt IntExpo(32, false); + // powf(x, C) -> powi(x, C) iff C is a constant integer value + if (ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK) { + return createPowWithIntegerExponent( + Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B); + } } return Shrunk; Index: test/Transforms/InstCombine/pow_fp_int.ll =================================================================== --- test/Transforms/InstCombine/pow_fp_int.ll +++ test/Transforms/InstCombine/pow_fp_int.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +; PR42190 + +define double @pow_sitofp_const_base_fast(i32 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_sitofp_const_base_power_of_2_fast(i32 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 1.600000e+01, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_sitofp_float_base_fast(float %base, i32 %x) { +; CHECK-LABEL: @pow_sitofp_float_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_sitofp_double_base_fast(double %base, i32 %x) { +; CHECK-LABEL: @pow_sitofp_double_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] +; + %subfp = sitofp i32 %x to double + %res = tail call fast double @llvm.pow.f64(double %base, double %subfp) + ret double %res +} + +define double @powf_exp_const_int_fast(double %base) { +; CHECK-LABEL: @powf_exp_const_int_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 40) +; CHECK-NEXT: ret double [[TMP1]] +; + %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01) + ret double %res +} + +define double @pow_sitofp_const_base_no_fast(i32 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_no_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_sitofp_float_base_no_fast(float %base, i32 %x) { +; CHECK-LABEL: @pow_sitofp_float_base_no_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_sitofp_double_base_no_fast(double %base, i32 %x) { +; CHECK-LABEL: @pow_sitofp_double_base_no_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] +; + %subfp = sitofp i32 %x to double + %powi = tail call double @llvm.pow.f64(double %base, double %subfp) + ret double %powi +} + +; Negative tests +define double @powf_exp_const_int_no_fast(double %base) { +; CHECK-LABEL: @powf_exp_const_int_no_fast( +; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01) +; CHECK-NEXT: ret double [[RES]] +; + %res = tail call double @llvm.pow.f64(double %base, double 4.000000e+01) + ret double %res +} + +define double @powf_exp_const_not_int_fast(double %base) { +; CHECK-LABEL: @powf_exp_const_not_int_fast( +; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double 3.750000e+01) +; CHECK-NEXT: ret double [[RES]] +; + %res = tail call fast double @llvm.pow.f64(double %base, double 3.750000e+01) + ret double %res +} + +define double @powf_exp_const_not_int_no_fast(double %base) { +; CHECK-LABEL: @powf_exp_const_not_int_no_fast( +; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double 3.750000e+01) +; CHECK-NEXT: ret double [[RES]] +; + %res = tail call double @llvm.pow.f64(double %base, double 3.750000e+01) + ret double %res +} + +declare float @llvm.pow.f32(float, float) +declare double @llvm.pow.f64(double, double)