Index: llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h =================================================================== --- llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -132,6 +132,7 @@ // Math Library Optimizations Value *optimizeCAbs(CallInst *CI, IRBuilder<> &B); Value *optimizePow(CallInst *CI, IRBuilder<> &B); + Value *replacePowWithExp(CallInst *Pow, IRBuilder<> &B); Value *replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B); Value *optimizeExp2(CallInst *CI, IRBuilder<> &B); Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B); Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" @@ -1179,6 +1180,86 @@ return InnerChain[Exp]; } +/// Use exp2(n * x) for pow(2.0 ** n, x); exp10(n * x) for pow(10.0 ** n, x); +/// exp{,2}(x * y) for pow(exp{,2}(x), y). +/// TODO: Handle exp10() when more targets have it available. +Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { + Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); + Module *Mod = Pow->getModule(); + Type *Ty = Pow->getType(); + bool Ignored; + + // Evaluate special cases related to a nested function as the base. + + // pow(exp(x), y) -> exp(x * y) + // pow(exp2(x), y) -> exp2(x * y) + // We enable these only with fast-math. Besides rounding differences, the + // transformation changes overflow and underflow behavior quite dramatically. + // For example: + // pow(exp(1000), 0.001) = pow(inf, 0.001) = inf + // Whereas: + // exp(1000 * 0.001) = exp(1) + CallInst *BaseFn = dyn_cast(Base); + if (BaseFn && BaseFn->isFast() && Pow->isFast()) { + Function *CalledFn = BaseFn->getCalledFunction(); + if (CalledFn) { + StringRef NameFn = CalledFn->getName(); + LibFunc Fn; + if (TLI->getLibFunc(NameFn, Fn) && TLI->has(Fn)) { + Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); + Value *ExpFn; + + switch (Fn) { + default: + return nullptr; + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_expl: + ExpFn = Intrinsic::getDeclaration(Mod, Intrinsic::exp, Ty); + return B.CreateCall(ExpFn, FMul, "exp"); + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: + ExpFn = Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty); + return B.CreateCall(ExpFn, FMul, "exp2"); + } + } + } + } + + // Evaluate special cases related to a constant base. + + const APFloat *BaseF; + if (!match(Pow->getArgOperand(0), m_APFloat(BaseF))) + return nullptr; + + // pow(2.0 ** n, x) -> exp2(n * x) + APFloat BaseR = APFloat(1.0); + BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); + BaseR = BaseR / *BaseF; + bool isInteger = BaseF->isInteger(), + isReciprocal = BaseR.isInteger(); + const APFloat *NF = isReciprocal ? &BaseR : BaseF; + APSInt NI(64, false); + if ((isInteger || isReciprocal) && + !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) && + NI > 1 && NI.isPowerOf2()) { + double N = NI.logBase2() * (isReciprocal ? -1.0 : 1.0); + Value *ExpoN = B.CreateFMul(Expo, ConstantFP::get(Ty, N)); + Value *Exp2Fn = Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty); + return B.CreateCall(Exp2Fn, ExpoN, "exp2"); + } + + // pow(10.0, x) -> exp10(x) + // TODO: There is no exp10() intrinsic yet, but some day there shall be one. + if (BaseF->isExactlyValue(10.0) && + hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) + return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs); + + return nullptr; +} + /// Use square root in place of pow(x, +/-0.5). Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) { Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); @@ -1231,9 +1312,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); Function *Callee = Pow->getCalledFunction(); - AttributeList Attrs = Callee->getAttributes(); StringRef Name = Callee->getName(); - Module *Module = Pow->getModule(); Type *Ty = Pow->getType(); Value *Shrunk = nullptr; bool Ignored; @@ -1258,36 +1337,8 @@ if (match(Base, m_FPOne())) return Base; - // pow(2.0, x) -> exp2(x) - if (match(Base, m_SpecificFP(2.0))) { - Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty); - return B.CreateCall(Exp2, Expo, "exp2"); - } - - // pow(10.0, x) -> exp10(x) - if (ConstantFP *BaseC = dyn_cast(Base)) - // There's no exp10 intrinsic yet, but, maybe, some day there shall be one. - if (BaseC->isExactlyValue(10.0) && - hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) - return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs); - - // pow(exp(x), y) -> exp(x * y) - // pow(exp2(x), y) -> exp2(x * y) - // We enable these only with fast-math. Besides rounding differences, the - // transformation changes overflow and underflow behavior quite dramatically. - // Example: x = 1000, y = 0.001. - // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x * y) = exp(1). - auto *BaseFn = dyn_cast(Base); - if (BaseFn && BaseFn->isFast() && Pow->isFast()) { - LibFunc LibFn; - Function *CalleeFn = BaseFn->getCalledFunction(); - if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && - (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) { - Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); - return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B, - CalleeFn->getAttributes()); - } - } + if (Value *Exp = replacePowWithExp(Pow, B)) + return Exp; // Evaluate special cases related to the exponent. Index: llvm/test/Transforms/InstCombine/pow-1.ll =================================================================== --- llvm/test/Transforms/InstCombine/pow-1.ll +++ llvm/test/Transforms/InstCombine/pow-1.ll @@ -53,7 +53,7 @@ ret <2 x double> %retval } -; Check pow(2.0, x) -> exp2(x). +; Check pow(2.0 ** n, x) -> exp2(n * x). define float @test_simplify3(float %x) { ; ANY-LABEL: @test_simplify3( @@ -64,6 +64,16 @@ ret float %retval } +define double @test_simplify3a(double %x) { +; ANY-LABEL: @test_simplify3a( +; ANY-NEXT: [[TMP1:%.*]] = fmul double [[X:%.*]], -2.000000e+00 +; ANY-NEXT: [[EXP2:%.*]] = call double @llvm.exp2.f64(double [[TMP1]]) +; ANY-NEXT: ret double [[EXP2]] +; + %retval = call double @pow(double 0.25, double %x) + ret double %retval +} + define <2 x float> @test_simplify3v(<2 x float> %x) { ; ANY-LABEL: @test_simplify3v( ; ANY-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[X:%.*]]) @@ -73,6 +83,16 @@ ret <2 x float> %retval } +define <2 x double> @test_simplify3w(<2 x double> %x) { +; ANY-LABEL: @test_simplify3w( +; ANY-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[X:%.*]], +; ANY-NEXT: [[EXP2:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP1]]) +; ANY-NEXT: ret <2 x double> [[EXP2]] +; + %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) + ret <2 x double> %retval +} + define double @test_simplify4(double %x) { ; ANY-LABEL: @test_simplify4( ; ANY-NEXT: [[EXP2:%.*]] = call double @llvm.exp2.f64(double [[X:%.*]]) @@ -82,6 +102,16 @@ ret double %retval } +define float @test_simplify4a(float %x) { +; ANY-LABEL: @test_simplify4a( +; ANY-NEXT: [[TMP1:%.*]] = fmul float [[X:%.*]], 3.000000e+00 +; ANY-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[TMP1]]) +; ANY-NEXT: ret float [[EXP2]] +; + %retval = call float @powf(float 8.0, float %x) + ret float %retval +} + define <2 x double> @test_simplify4v(<2 x double> %x) { ; ANY-LABEL: @test_simplify4v( ; ANY-NEXT: [[EXP2:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[X:%.*]]) @@ -91,6 +121,16 @@ ret <2 x double> %retval } +define <2 x float> @test_simplify4w(<2 x float> %x) { +; ANY-LABEL: @test_simplify4w( +; ANY-NEXT: [[TMP1:%.*]] = fsub <2 x float> , [[X:%.*]] +; ANY-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP1]]) +; ANY-NEXT: ret <2 x float> [[EXP2]] +; + %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) + ret <2 x float> %retval +} + ; Check pow(x, 0.0) -> 1.0. define float @test_simplify5(float %x) { @@ -307,7 +347,7 @@ define float @test_simplify18(float %x) { ; CHECK-EXP10-LABEL: @test_simplify18( -; CHECK-EXP10-NEXT: [[__EXP10F:%.*]] = call float @__exp10f(float [[X:%.*]]) [[NUW_RO:#[0-9]+]] +; CHECK-EXP10-NEXT: [[__EXP10F:%.*]] = call float @__exp10f(float [[X:%.*]]) #0 ; CHECK-EXP10-NEXT: ret float [[__EXP10F]] ; ; CHECK-NO-EXP10-LABEL: @test_simplify18( @@ -320,7 +360,7 @@ define double @test_simplify19(double %x) { ; CHECK-EXP10-LABEL: @test_simplify19( -; CHECK-EXP10-NEXT: [[__EXP10:%.*]] = call double @__exp10(double [[X:%.*]]) [[NUW_RO]] +; CHECK-EXP10-NEXT: [[__EXP10:%.*]] = call double @__exp10(double [[X:%.*]]) #0 ; CHECK-EXP10-NEXT: ret double [[__EXP10]] ; ; CHECK-NO-EXP10-LABEL: @test_simplify19( @@ -330,6 +370,3 @@ %retval = call double @pow(double 10.0, double %x) ret double %retval } - -; CHECK-EXP10: attributes [[NUW_RO]] = { nounwind readonly } - Index: llvm/test/Transforms/InstCombine/pow-exp.ll =================================================================== --- llvm/test/Transforms/InstCombine/pow-exp.ll +++ llvm/test/Transforms/InstCombine/pow-exp.ll @@ -1,21 +1,43 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s +define float @powf_expf(float %x, float %y) { +; CHECK-LABEL: @powf_expf( +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[EXP:%.*]] = call fast float @llvm.exp.f32(float [[MUL]]) +; CHECK-NEXT: ret float [[EXP]] + + %call = call fast float @expf(float %x) nounwind readnone + %pow = call fast float @llvm.pow.f32(float %call, float %y) + ret float %pow +} + define double @pow_exp(double %x, double %y) { ; CHECK-LABEL: @pow_exp( ; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[EXP:%.*]] = call fast double @exp(double [[MUL]]) +; CHECK-NEXT: [[EXP:%.*]] = call fast double @llvm.exp.f64(double [[MUL]]) ; CHECK-NEXT: ret double [[EXP]] -; + %call = call fast double @exp(double %x) nounwind readnone %pow = call fast double @llvm.pow.f64(double %call, double %y) ret double %pow } +define float @powf_exp2f(float %x, float %y) { +; CHECK-LABEL: @powf_exp2f( +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: ret float [[EXP2]] +; + %call = call fast float @exp2f(float %x) nounwind readnone + %pow = call fast float @llvm.pow.f32(float %call, float %y) + ret float %pow +} + define double @pow_exp2(double %x, double %y) { ; CHECK-LABEL: @pow_exp2( ; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[EXP2:%.*]] = call fast double @exp2(double [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = call fast double @llvm.exp2.f64(double [[MUL]]) ; CHECK-NEXT: ret double [[EXP2]] ; %call = call fast double @exp2(double %x) nounwind readnone @@ -23,6 +45,30 @@ ret double %pow } +; TODO: exp10() is not widely enabled by many targets. + +define float @powf_exp10f(float %x, float %y) { +; CHECK-LABEL: @powf_exp10f( +; CHECK-NEXT: [[CALL:%.*]] = call fast float @exp10f(float %x) +; CHECK-NEXT: [[POW:%.*]] = call fast float @llvm.pow.f32(float [[CALL]], float %y) +; CHECK-NEXT: ret float [[POW]] + + %call = call fast float @exp10f(float %x) nounwind readnone + %pow = call fast float @llvm.pow.f32(float %call, float %y) + ret float %pow +} + +define double @pow_exp10(double %x, double %y) { +; CHECK-LABEL: @pow_exp10( +; CHECK-NEXT: [[CALL:%.*]] = call fast double @exp10(double %x) +; CHECK-NEXT: [[POW:%.*]] = call fast double @llvm.pow.f64(double [[CALL]], double %y) +; CHECK-NEXT: ret double [[POW]] + + %call = call fast double @exp10(double %x) nounwind readnone + %pow = call fast double @llvm.pow.f64(double %call, double %y) + ret double %pow +} + define double @pow_exp_not_fast(double %x, double %y) { ; CHECK-LABEL: @pow_exp_not_fast( ; CHECK-NEXT: [[CALL:%.*]] = call double @exp(double [[X:%.*]]) @@ -45,7 +91,11 @@ ret double %pow } +declare float @expf(float) declare double @exp(double) +declare float @exp2f(float) declare double @exp2(double) +declare float @exp10f(float) +declare double @exp10(double) +declare float @llvm.pow.f32(float, float) declare double @llvm.pow.f64(double, double) - Index: llvm/test/Transforms/InstCombine/pow-sqrt.ll =================================================================== --- llvm/test/Transforms/InstCombine/pow-sqrt.ll +++ llvm/test/Transforms/InstCombine/pow-sqrt.ll @@ -295,4 +295,3 @@ attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind readnone } -