Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -17,14 +17,13 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/Loads.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -36,6 +35,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" using namespace llvm; @@ -47,18 +47,17 @@ cl::desc("Enable unsafe double to float " "shrinking for math lib calls")); - //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// static bool ignoreCallingConv(LibFunc Func) { - return Func == LibFunc_abs || Func == LibFunc_labs || - Func == LibFunc_llabs || Func == LibFunc_strlen; + return Func == LibFunc_abs || Func == LibFunc_labs || Func == LibFunc_llabs || + Func == LibFunc_strlen; } static bool isCallingConvCCompatible(CallInst *CI) { - switch(CI->getCallingConv()) { + switch (CI->getCallingConv()) { default: return false; case llvm::CallingConv::C: @@ -108,9 +107,8 @@ } static bool callHasFP128Argument(const CallInst *CI) { - return any_of(CI->operands(), [](const Use &OI) { - return OI->getType()->isFP128Ty(); - }); + return any_of(CI->operands(), + [](const Use &OI) { return OI->getType()->isFP128Ty(); }); } static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) { @@ -223,8 +221,9 @@ // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(CpyDst, 1, Src, 1, - ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1)); + B.CreateMemCpy( + CpyDst, 1, Src, 1, + ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1)); return Dst; } @@ -559,7 +558,7 @@ KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr); Known.Zero.flipAllBits(); uint64_t ArrSize = - cast(GEP->getSourceElementType())->getNumElements(); + cast(GEP->getSourceElementType())->getNumElements(); // KnownZero's bits are flipped, so zeros in KnownZero now represent // bits known to be zeros in Offset, and ones in KnowZero represent @@ -1003,9 +1002,9 @@ B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); const DataLayout &DL = Malloc->getModule()->getDataLayout(); IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); - Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), - Malloc->getArgOperand(0), Malloc->getAttributes(), - B, *TLI); + Value *Calloc = + emitCalloc(ConstantInt::get(SizeType, 1), Malloc->getArgOperand(0), + Malloc->getAttributes(), B, *TLI); if (!Calloc) return nullptr; @@ -1037,7 +1036,8 @@ //===----------------------------------------------------------------------===// // Replace a libcall \p CI with a call to intrinsic \p IID -static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) { +static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, + Intrinsic::ID IID) { // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); @@ -1073,8 +1073,8 @@ } /// Shrink double -> float functions. -static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, - bool isBinary, bool isPrecise = false) { +static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, bool isBinary, + bool isPrecise = false) { Function *CalleeFn = CI->getCalledFunction(); if (!CI->getType()->isDoubleTy() || !CalleeFn) return nullptr; @@ -1108,8 +1108,7 @@ if (!CalleeIn) { const Function *Fn = CI->getFunction(); StringRef FnName = Fn->getName(); - if (FnName.back() == 'f' && - FnName.size() == (CalleeNm.size() + 1) && + if (FnName.back() == 'f' && FnName.size() == (CalleeNm.size() + 1) && FnName.startswith(CalleeNm)) return nullptr; } @@ -1125,8 +1124,7 @@ Intrinsic::ID IID = CalleeFn->getIntrinsicID(); Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]); - } - else + } else R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt) : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt); @@ -1263,8 +1261,8 @@ LibFunc LibFn; Function *CalleeFn = BaseFn->getCalledFunction(); - if (CalleeFn && - TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) { + if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && + TLI->has(LibFn)) { StringRef ExpName; Intrinsic::ID ID; Value *ExpFn; @@ -1275,14 +1273,18 @@ switch (LibFn) { default: return nullptr; - case LibFunc_expf: case LibFunc_exp: case LibFunc_expl: + case LibFunc_expf: + case LibFunc_exp: + case LibFunc_expl: ExpName = TLI->getName(LibFunc_exp); ID = Intrinsic::exp; LibFnFloat = LibFunc_expf; LibFnDouble = LibFunc_exp; LibFnLongDouble = LibFunc_expl; break; - case LibFunc_exp2f: case LibFunc_exp2: case LibFunc_exp2l: + case LibFunc_exp2f: + case LibFunc_exp2: + case LibFunc_exp2l: ExpName = TLI->getName(LibFunc_exp2); ID = Intrinsic::exp2; LibFnFloat = LibFunc_exp2f; @@ -1294,11 +1296,11 @@ // Create new exp{,2}() with the product as its argument. Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); ExpFn = BaseFn->doesNotAccessMemory() - ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty), - FMul, ExpName) - : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat, - LibFnLongDouble, B, - BaseFn->getAttributes()); + ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty), FMul, + ExpName) + : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat, + LibFnLongDouble, B, + BaseFn->getAttributes()); // Since the new exp{,2}() is different from the original one, dead code // elimination cannot be trusted to remove it, since it may have side @@ -1322,12 +1324,12 @@ APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; - bool IsInteger = BaseF->isInteger(), - IsReciprocal = BaseR.isInteger(); + bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger(); const APFloat *NF = IsReciprocal ? &BaseR : BaseF; APSInt NI(64, false); if ((IsInteger || IsReciprocal) && - !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) && + NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK && NI > 1 && NI.isPowerOf2()) { double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); @@ -1410,12 +1412,22 @@ return Sqrt; } +static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, + IRBuilder<> &B) { + Value *Args[] = {Base, Expo}; + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); + return B.CreateCall(F, Args); +} + Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { - Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Value *Base = Pow->getArgOperand(0); + Value *Expo = Pow->getArgOperand(1); Function *Callee = Pow->getCalledFunction(); StringRef Name = Callee->getName(); Type *Ty = Pow->getType(); + Module *M = Pow->getModule(); Value *Shrunk = nullptr; + bool IsFast = Pow->isFast(); bool Ignored; // Bail out if simplifying libcalls to pow() is disabled. @@ -1428,8 +1440,8 @@ // Shrink pow() to powf() if the arguments are single precision, // unless the result is expected to be double precision. - if (UnsafeFPShrink && - Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name)) + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) && + hasFloatVersion(Name)) Shrunk = optimizeBinaryDoubleFP(Pow, B, true); // Evaluate special cases related to the base. @@ -1438,6 +1450,21 @@ if (match(Base, m_FPOne())) return Base; + // powf(x, sitofp(e)) -> powi(x, e) + // powf(x, uitofp(e)) -> powi(x, e) + if (IsFast && (isa(Expo) || isa(Expo))) { + Value *IntExpo = cast(Expo)->getOperand(0); + Value *NewExpo = nullptr; + unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits(); + if (BitWidth == 32) + NewExpo = IntExpo; + else if (BitWidth < 32) + NewExpo = isa(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty()) + : B.CreateZExt(IntExpo, B.getInt32Ty()); + if (NewExpo) + return createPowWithIntegerExponent(Base, NewExpo, M, B); + } + if (Value *Exp = replacePowWithExp(Pow, B)) return Exp; @@ -1449,7 +1476,7 @@ // pow(x, 0.0) -> 1.0 if (match(Expo, m_SpecificFP(0.0))) - return ConstantFP::get(Ty, 1.0); + return ConstantFP::get(Ty, 1.0); // pow(x, 1.0) -> x if (match(Expo, m_FPOne())) @@ -1462,17 +1489,19 @@ if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + if (!IsFast) + return Shrunk; + // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + if (match(Expo, m_APFloat(ExpoF))) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. // TODO: This whole transformation should be backend specific (e.g. some // backends might prefer libcalls or the limit for the exponent might // be different) and it should also consider optimizing for size. - APFloat LimF(ExpoF->getSemantics(), 33.0), - ExpoA(abs(*ExpoF)); + APFloat LimF(ExpoF->getSemantics(), 33.0), ExpoA(abs(*ExpoF)); if (ExpoA.compare(LimF) == APFloat::cmpLessThan) { // This transformation applies to integer or integer+0.5 exponents only. // For integer+0.5, we create a sqrt(Base) call. @@ -1488,9 +1517,8 @@ if (!Expo2.isInteger()) return nullptr; - Sqrt = - getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI); + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); } // We will memoize intermediate products of the Addition Chain. @@ -1513,6 +1541,14 @@ return FMul; } + + APSInt IntExpo(32, /*isUnsigned=*/false); + // powf(x, C) -> powi(x, C) iff C is a constant signed integer value + if (ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK) { + return createPowWithIntegerExponent( + Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B); + } } return Shrunk; @@ -1594,8 +1630,8 @@ // exceptions, because fmin/fmax do not have those. Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); - Value *Cmp = Callee->getName().startswith("fmin") ? - B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1); + Value *Cmp = Callee->getName().startswith("fmin") ? B.CreateFCmpOLT(Op0, Op1) + : B.CreateFCmpOGT(Op0, Op1); return B.CreateSelect(Cmp, Op0, Op1); } @@ -1628,10 +1664,13 @@ LibFunc Func; Function *F = OpC->getCalledFunction(); if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && - Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow)) + Func == LibFunc_pow) || + F->getIntrinsicID() == Intrinsic::pow)) return B.CreateFMul(OpC->getArgOperand(1), - emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, - Callee->getAttributes()), "mul"); + emitUnaryFloatFnCall(OpC->getOperand(0), + Callee->getName(), B, + Callee->getAttributes()), + "mul"); // log(exp2(y)) -> y*log(2) if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) && @@ -2079,13 +2118,12 @@ return New; } - // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point - // arguments. + // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating + // point arguments. if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - auto SmallPrintFFn = - M->getOrInsertFunction(TLI->getName(LibFunc_small_printf), - FT, Callee->getAttributes()); + auto SmallPrintFFn = M->getOrInsertFunction( + TLI->getName(LibFunc_small_printf), FT, Callee->getAttributes()); CallInst *New = cast(CI->clone()); New->setCalledFunction(SmallPrintFFn); B.Insert(New); @@ -2109,9 +2147,10 @@ return nullptr; // we found a format specifier, bail out. // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1) - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), - FormatStr.size() + 1)); // Copy the null byte. + B.CreateMemCpy( + CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size() + 1)); // Copy the null byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -2173,13 +2212,12 @@ return New; } - // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit - // floating point arguments. + // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no + // 128-bit floating point arguments. if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - auto SmallSPrintFFn = - M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf), - FT, Callee->getAttributes()); + auto SmallSPrintFFn = M->getOrInsertFunction( + TLI->getName(LibFunc_small_sprintf), FT, Callee->getAttributes()); CallInst *New = cast(CI->clone()); New->setCalledFunction(SmallSPrintFFn); B.Insert(New); @@ -2348,9 +2386,8 @@ // 128-bit floating point arguments. if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - auto SmallFPrintFFn = - M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf), - FT, Callee->getAttributes()); + auto SmallFPrintFFn = M->getOrInsertFunction( + TLI->getName(LibFunc_small_fprintf), FT, Callee->getAttributes()); CallInst *New = cast(CI->clone()); New->setCalledFunction(SmallFPrintFFn); B.Insert(New); @@ -2486,9 +2523,9 @@ // Check for string/memory library functions. if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { // Make sure we never change the calling convention. - assert((ignoreCallingConv(Func) || - isCallingConvCCompatible(CI)) && - "Optimizing string/memory libcall would change the calling convention"); + assert( + (ignoreCallingConv(Func) || isCallingConvCCompatible(CI)) && + "Optimizing string/memory libcall would change the calling convention"); switch (Func) { case LibFunc_strcat: return optimizeStrCat(CI, Builder); @@ -2774,8 +2811,8 @@ LibCallSimplifier::LibCallSimplifier( const DataLayout &DL, const TargetLibraryInfo *TLI, - OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, function_ref Replacer, function_ref Eraser) : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI), @@ -2786,9 +2823,7 @@ Replacer(I, With); } -void LibCallSimplifier::eraseFromParent(Instruction *I) { - Eraser(I); -} +void LibCallSimplifier::eraseFromParent(Instruction *I) { Eraser(I); } // TODO: // Additional cases that we need to add to this file: @@ -2825,12 +2860,9 @@ // Fortified Library Call Optimizations //===----------------------------------------------------------------------===// -bool -FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, - unsigned ObjSizeOp, - Optional SizeOp, - Optional StrOp, - Optional FlagOp) { +bool FortifiedLibCallSimplifier::isFortifiedCallFoldable( + CallInst *CI, unsigned ObjSizeOp, Optional SizeOp, + Optional StrOp, Optional FlagOp) { // If this function takes a flag argument, the implementation may use it to // perform extra checks. Don't fold into the non-checking variant. if (FlagOp) { @@ -2948,7 +2980,7 @@ if (isFortifiedCallFoldable(CI, 3, 2)) { if (Func == LibFunc_strncpy_chk) return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); + CI->getArgOperand(2), B, TLI); else return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, TLI); @@ -3111,4 +3143,4 @@ FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) - : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} + : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} \ No newline at end of file Index: test/Transforms/InstCombine/pow_fp_int.ll =================================================================== --- test/Transforms/InstCombine/pow_fp_int.ll +++ test/Transforms/InstCombine/pow_fp_int.ll @@ -5,9 +5,8 @@ define double @pow_sitofp_const_base_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -16,12 +15,22 @@ ret double %res } +define double @pow_uitofp_const_base_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_const_base_power_of_2_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 1.600000e+01, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -30,11 +39,22 @@ ret double %res } +define double @pow_uitofp_const_base_power_of_2_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float 1.600000e+01, i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_float_base_fast(float %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) -; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; %subfp = sitofp i32 %x to float @@ -43,26 +63,101 @@ ret double %res } +define double @pow_uitofp_float_base_fast(float %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_float_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call fast float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_double_base_fast(double %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_fast( -; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to double -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) -; CHECK-NEXT: ret double [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] ; %subfp = sitofp i32 %x to double %res = tail call fast double @llvm.pow.f64(double %base, double %subfp) ret double %res } +define double @pow_uitofp_double_base_fast(double %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_double_base_fast( +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] +; + %subfp = uitofp i32 %x to double + %res = tail call fast double @llvm.pow.f64(double %base, double %subfp) + ret double %res +} + +define double @pow_sitofp_const_base_fast_i8(i8 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_fast_i8( +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i8 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_uitofp_const_base_fast_i8(i8 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast_i8( +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.powi.f32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i8 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @powf_exp_const_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_fast( -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01) -; CHECK-NEXT: ret double [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[BASE:%.*]], i32 40) +; CHECK-NEXT: ret double [[TMP1]] ; %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01) ret double %res } +; Negative tests + +define double @pow_sitofp_const_base_fast_i64(i64 %x) { +; CHECK-LABEL: @pow_sitofp_const_base_fast_i64( +; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i64 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = sitofp i64 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + +define double @pow_uitofp_const_base_fast_i64(i64 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_fast_i64( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i64 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call fast float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i64 %x to float + %powi = tail call fast float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_const_base_no_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float @@ -76,6 +171,19 @@ ret double %res } +define double @pow_uitofp_const_base_no_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float 7.000000e+00, float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float 7.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_const_base_power_of_2_no_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float @@ -90,6 +198,20 @@ ret double %res } +define double @pow_uitofp_const_base_power_of_2_no_fast(i32 %x) { +; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00 +; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float 16.000000e+00, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_float_base_no_fast(float %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float @@ -103,6 +225,19 @@ ret double %res } +define double @pow_uitofp_float_base_no_fast(float %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_float_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[POWI:%.*]] = tail call float @llvm.pow.f32(float [[BASE:%.*]], float [[SUBFP]]) +; CHECK-NEXT: [[RES:%.*]] = fpext float [[POWI]] to double +; CHECK-NEXT: ret double [[RES]] +; + %subfp = uitofp i32 %x to float + %powi = tail call float @llvm.pow.f32(float %base, float %subfp) + %res = fpext float %powi to double + ret double %res +} + define double @pow_sitofp_double_base_no_fast(double %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to double @@ -114,6 +249,17 @@ ret double %powi } +define double @pow_uitofp_double_base_no_fast(double %base, i32 %x) { +; CHECK-LABEL: @pow_uitofp_double_base_no_fast( +; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to double +; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double [[SUBFP]]) +; CHECK-NEXT: ret double [[POWI]] +; + %subfp = uitofp i32 %x to double + %powi = tail call double @llvm.pow.f64(double %base, double %subfp) + ret double %powi +} + define double @powf_exp_const_int_no_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_no_fast( ; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01)