Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1462,9 +1462,22 @@ if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + if (!Pow->isFast()) + return Shrunk; + + // powf(x, sitofp(n)) -> powi(x, n) + if (auto *IntToFP = dyn_cast(Expo)) { + // TODO: better transformations if x is also (constant) integer? + Value* IntVal = IntToFP->getOperand(0); + Value *Args[] = {Base, IntVal}; + Module *M = Pow->getModule(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); + return B.CreateCall(F, Args); + } + // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + if (match(Expo, m_APFloat(ExpoF))) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. Index: test/Transforms/InstCombine/pow_fp_int.ll =================================================================== --- test/Transforms/InstCombine/pow_fp_int.ll +++ test/Transforms/InstCombine/pow_fp_int.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s +define double @pow_sitofp_const_base_fast(i32 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_sitofp_float_base_fast(float %base, i32 %x) { +; CHECK-LABEL: @pow_sitofp_float_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_sitofp_double_base_fast(double %base, i32 %x) { +; CHECK-LABEL: @pow_sitofp_double_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] +; + %subfp = sitofp i32 %x to double + %res = tail call fast double @llvm.pow.f64(double %base, double %subfp) + ret double %res +} + +define float @pow_sitofp_const_base_exp_i64_fast(i64 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_exp_i64_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i64 [[X:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: ret float [[RES]] +; + %subfp = uitofp i64 %x to float + %res = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + ret float %res +} + +define double @pow_sitofp_const_base_no_fast(i32 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_sitofp_float_base_no_fast(float %base, i32 %x) { +; CHECK-LABEL: @pow_sitofp_float_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_sitofp_double_base_no_fast(double %base, i32 %x) { +; CHECK-LABEL: @pow_sitofp_double_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to double +; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) +; CHECK-NEXT: ret double [[POWI]] +; + %subfp = sitofp i32 %x to double + %powi = tail call double @llvm.pow.f64(double %base, double %subfp) + ret double %powi +} + +define float @pow_sitofp_const_base_exp_i64_no_fast(i64 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_exp_i64_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i64 [[X:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = tail call float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: ret float [[RES]] +; + %subfp = uitofp i64 %x to float + %res = tail call float @llvm.pow.f32(float 7.000000e+00, float %subfp) + ret float %res +} + +declare float @llvm.pow.f32(float, float) +declare double @llvm.pow.f64(double, double)