diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2472,6 +2472,12 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { if (!CI->getCalledFunction()) return nullptr; + // Skip optimizing notail and musttail calls so + // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants. + // LibCallSimplifier::optimizeCall should try to preseve tail calls though. + if (CI->isMustTailCall() || CI->isNoTailCall()) + return nullptr; + auto InstCombineRAUW = [this](Instruction *From, Value *With) { replaceInstUsesWith(*From, With); }; diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -193,6 +193,19 @@ } } +// Copy CallInst "flags" like musttail, notail, and tail. Return New param for +// easier chaining. Calls to emit* and B.createCall should probably be wrapped +// in this function when New is created to replace Old. Callers should take +// care to check Old.isMustTailCall() if they aren't replacing Old directly +// with New. +static Value *copyFlags(const CallInst &Old, Value *New) { + assert(!Old.isMustTailCall() && "do not copy musttail call flags"); + assert(!Old.isNoTailCall() && "do not copy notail call flags"); + if (auto *NewCI = dyn_cast_or_null(New)) + NewCI->setTailCallKind(Old.getTailCallKind()); + return New; +} + //===----------------------------------------------------------------------===// // String and Memory Library Call Optimizations //===----------------------------------------------------------------------===// @@ -215,7 +228,7 @@ if (Len == 0) return Dst; - return emitStrLenMemCpy(Src, Dst, Len, B); + return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, Len, B)); } Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, @@ -279,7 +292,7 @@ // strncat(x, s, c) -> strcat(x, s) // s is constant so the strcat can be optimized further. - return emitStrLenMemCpy(Src, Dst, SrcLen, B); + return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B)); } Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) { @@ -300,9 +313,11 @@ if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; - return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), - B, DL, TLI); + return copyFlags( + *CI, + emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B, + DL, TLI)); } // Otherwise, the character is a constant, see if the first argument is @@ -340,7 +355,7 @@ if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) if (CharC->isZero()) - return emitStrChr(SrcStr, '\0', B, TLI); + return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI)); return nullptr; } @@ -385,25 +400,28 @@ annotateDereferenceableBytes(CI, 1, Len2); if (Len1 && Len2) { - return emitMemCmp(Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), - std::min(Len1, Len2)), - B, DL, TLI); + return copyFlags( + *CI, emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + std::min(Len1, Len2)), + B, DL, TLI)); } // strcmp to memcmp if (!HasStr1 && HasStr2) { if (canTransformToMemCmp(CI, Str1P, Len2, DL)) - return emitMemCmp( - Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL, - TLI); + return copyFlags( + *CI, + emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), + B, DL, TLI)); } else if (HasStr1 && !HasStr2) { if (canTransformToMemCmp(CI, Str2P, Len1, DL)) - return emitMemCmp( - Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL, - TLI); + return copyFlags( + *CI, + emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), + B, DL, TLI)); } annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); @@ -430,7 +448,7 @@ return ConstantInt::get(CI->getType(), 0); if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return emitMemCmp(Str1P, Str2P, Size, B, DL, TLI); + return copyFlags(*CI, emitMemCmp(Str1P, Str2P, Size, B, DL, TLI)); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -462,17 +480,19 @@ if (!HasStr1 && HasStr2) { Len2 = std::min(Len2, Length); if (canTransformToMemCmp(CI, Str1P, Len2, DL)) - return emitMemCmp( - Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL, - TLI); + return copyFlags( + *CI, + emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), + B, DL, TLI)); } else if (HasStr1 && !HasStr2) { Len1 = std::min(Len1, Length); if (canTransformToMemCmp(CI, Str2P, Len1, DL)) - return emitMemCmp( - Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL, - TLI); + return copyFlags( + *CI, + emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), + B, DL, TLI)); } return nullptr; @@ -485,7 +505,7 @@ if (SrcLen && Size) { annotateDereferenceableBytes(CI, 0, SrcLen); if (SrcLen <= Size->getZExtValue() + 1) - return emitStrDup(Src, B, TLI); + return copyFlags(*CI, emitStrDup(Src, B, TLI)); } return nullptr; @@ -495,7 +515,7 @@ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; - + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); @@ -511,6 +531,7 @@ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return Dst; } @@ -520,7 +541,7 @@ // stpcpy(d,s) -> strcpy(d,s) if the result is not used. if (CI->use_empty()) - return emitStrCpy(Dst, Src, B, TLI); + return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI)); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) Value *StrLen = emitStrLen(Src, B, DL, TLI); @@ -544,6 +565,7 @@ CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return DstEnd; } @@ -583,6 +605,7 @@ AttrBuilder ArgAttrs(CI->getAttributes().getParamAttrs(0)); NewCI->setAttributes(NewCI->getAttributes().addParamAttributes( CI->getContext(), 0, ArgAttrs)); + copyFlags(*CI, NewCI); return Dst; } @@ -606,6 +629,7 @@ ConstantInt::get(DL.getIntPtrType(PT), Len)); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return Dst; } @@ -737,7 +761,7 @@ // strpbrk(s, "a") -> strchr(s, 'a') if (HasS2 && S2.size() == 1) - return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI); + return copyFlags(*CI, emitStrChr(CI->getArgOperand(0), S2[0], B, TLI)); return nullptr; } @@ -793,7 +817,7 @@ // strcspn(s, "") -> strlen(s) if (HasS2 && S2.empty()) - return emitStrLen(CI->getArgOperand(0), B, DL, TLI); + return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, DL, TLI)); return nullptr; } @@ -1062,7 +1086,7 @@ Value *LHS = CI->getArgOperand(0); Value *RHS = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); - return emitBCmp(LHS, RHS, Size, B, DL, TLI); + return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI)); } return nullptr; @@ -1083,6 +1107,7 @@ CI->getArgOperand(1), Align(1), Size); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return CI->getArgOperand(0); } @@ -1110,7 +1135,8 @@ size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF); if (Pos == StringRef::npos) { if (N->getZExtValue() <= SrcStr.size()) { - B.CreateMemCpy(Dst, Align(1), Src, Align(1), CI->getArgOperand(3)); + copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), + CI->getArgOperand(3))); return Constant::getNullValue(CI->getType()); } return nullptr; @@ -1119,7 +1145,7 @@ Value *NewN = ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue())); // memccpy -> llvm.memcpy - B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN); + copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN)); return Pos + 1 <= N->getZExtValue() ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN) : Constant::getNullValue(CI->getType()); @@ -1136,6 +1162,7 @@ // TODO: Attach return value attributes to the 1st operand to preserve them? NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); } @@ -1150,6 +1177,7 @@ CI->getArgOperand(1), Align(1), Size); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return CI->getArgOperand(0); } @@ -1164,12 +1192,13 @@ CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1)); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return CI->getArgOperand(0); } Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) { if (isa(CI->getArgOperand(0))) - return emitMalloc(CI->getArgOperand(1), B, DL, TLI); + return copyFlags(*CI, emitMalloc(CI->getArgOperand(1), B, DL, TLI)); return nullptr; } @@ -1190,7 +1219,7 @@ Function *F = Intrinsic::getDeclaration(M, IID, CI->getType()); CallInst *NewCall = B.CreateCall(F, V); NewCall->takeName(CI); - return NewCall; + return copyFlags(*CI, NewCall); } /// Return a variant of Val with float type. @@ -1311,7 +1340,8 @@ Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt, CI->getType()); - return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs"); + return copyFlags( + *CI, B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs")); } static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func, @@ -1334,14 +1364,16 @@ // sin(-X) --> -sin(X) // tan(-X) --> -tan(X) if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) - return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X)); + return B.CreateFNeg( + copyFlags(*Call, B.CreateCall(Call->getCalledFunction(), X))); break; case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl: // cos(-X) --> cos(X) if (match(Call->getArgOperand(0), m_FNeg(m_Value(X)))) - return B.CreateCall(Call->getCalledFunction(), X, "cos"); + return copyFlags(*Call, + B.CreateCall(Call->getCalledFunction(), X, "cos")); break; default: break; @@ -1476,9 +1508,10 @@ (isa(Expo) || isa(Expo)) && hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) - return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI, - LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, - B, Attrs); + return copyFlags(*Pow, + emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, + TLI, LibFunc_ldexp, LibFunc_ldexpf, + LibFunc_ldexpl, B, Attrs)); } // pow(2.0 ** n, x) -> exp2(n * x) @@ -1496,11 +1529,13 @@ double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); if (Pow->doesNotAccessMemory()) - return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty), - FMul, "exp2"); + return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration( + Mod, Intrinsic::exp2, Ty), + FMul, "exp2")); else - return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs); + return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, + LibFunc_exp2f, + LibFunc_exp2l, B, Attrs)); } } @@ -1508,8 +1543,9 @@ // TODO: There is no exp10() intrinsic yet, but some day there shall be one. if (match(Base, m_SpecificFP(10.0)) && hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) - return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, - LibFunc_exp10l, B, Attrs); + return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, + LibFunc_exp10f, LibFunc_exp10l, + B, Attrs)); // pow(x, y) -> exp2(log2(x) * y) if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() && @@ -1528,11 +1564,13 @@ if (Log) { Value *FMul = B.CreateFMul(Log, Expo, "mul"); if (Pow->doesNotAccessMemory()) - return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty), - FMul, "exp2"); + return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration( + Mod, Intrinsic::exp2, Ty), + FMul, "exp2")); else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) - return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs); + return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, + LibFunc_exp2f, + LibFunc_exp2l, B, Attrs)); } } @@ -1595,6 +1633,8 @@ Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs"); } + Sqrt = copyFlags(*Pow, Sqrt); + // Handle non finite base by expanding to // (x == -infinity ? +infinity : sqrt(x)). if (!Pow->hasNoInfs()) { @@ -1721,15 +1761,18 @@ if (ExpoF->isInteger() && ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == APFloat::opOK) { - return createPowWithIntegerExponent( - Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B); + return copyFlags( + *Pow, + createPowWithIntegerExponent( + Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), + M, B)); } } // powf(x, itofp(y)) -> powi(x, y) if (AllowApprox && (isa(Expo) || isa(Expo))) { if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) - return createPowWithIntegerExponent(Base, ExpoI, M, B); + return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B)); } // Shrink pow() to powf() if the arguments are single precision, @@ -1792,7 +1835,8 @@ Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum : Intrinsic::maxnum; Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType()); - return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) }); + return copyFlags( + *CI, B.CreateCall(F, {CI->getArgOperand(0), CI->getArgOperand(1)})); } Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { @@ -2010,9 +2054,9 @@ // of the square root calculation. Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); - return B.CreateFMul(FabsCall, SqrtCall); + return copyFlags(*CI, B.CreateFMul(FabsCall, SqrtCall)); } - return FabsCall; + return copyFlags(*CI, FabsCall); } // TODO: Generalize to handle any trig function and its inverse. @@ -2327,7 +2371,7 @@ // printf("x") -> putchar('x'), even for "%" and "%%". if (FormatStr.size() == 1 || FormatStr == "%%") - return emitPutChar(B.getInt32(FormatStr[0]), B, TLI); + return copyFlags(*CI, emitPutChar(B.getInt32(FormatStr[0]), B, TLI)); // Try to remove call or emit putchar/puts. if (FormatStr == "%s" && CI->arg_size() > 1) { @@ -2339,12 +2383,12 @@ return (Value *)CI; // printf("%s", "a") --> putchar('a') if (OperandStr.size() == 1) - return emitPutChar(B.getInt32(OperandStr[0]), B, TLI); + return copyFlags(*CI, emitPutChar(B.getInt32(OperandStr[0]), B, TLI)); // printf("%s", str"\n") --> puts(str) if (OperandStr.back() == '\n') { OperandStr = OperandStr.drop_back(); Value *GV = B.CreateGlobalString(OperandStr, "str"); - return emitPutS(GV, B, TLI); + return copyFlags(*CI, emitPutS(GV, B, TLI)); } return nullptr; } @@ -2356,19 +2400,19 @@ // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - return emitPutS(GV, B, TLI); + return copyFlags(*CI, emitPutS(GV, B, TLI)); } // Optimize specific format strings. // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->arg_size() > 1 && CI->getArgOperand(1)->getType()->isIntegerTy()) - return emitPutChar(CI->getArgOperand(1), B, TLI); + return copyFlags(*CI, emitPutChar(CI->getArgOperand(1), B, TLI)); // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->arg_size() > 1 && CI->getArgOperand(1)->getType()->isPointerTy()) - return emitPutS(CI->getArgOperand(1), B, TLI); + return copyFlags(*CI, emitPutS(CI->getArgOperand(1), B, TLI)); return nullptr; } @@ -2459,7 +2503,7 @@ if (CI->use_empty()) // sprintf(dest, "%s", str) -> strcpy(dest, str) - return emitStrCpy(Dest, CI->getArgOperand(2), B, TLI); + return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI)); uint64_t SrcLen = GetStringLength(CI->getArgOperand(2)); if (SrcLen) { @@ -2558,10 +2602,12 @@ // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt, // strlen(fmt)+1) - B.CreateMemCpy( - CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1), - ConstantInt::get(DL.getIntPtrType(CI->getContext()), - FormatStr.size() + 1)); // Copy the null byte. + copyFlags( + *CI, + B.CreateMemCpy( + CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size() + 1))); // Copy the null byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -2599,8 +2645,10 @@ else if (N < Str.size() + 1) return nullptr; - B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(3), - Align(1), ConstantInt::get(CI->getType(), Str.size() + 1)); + copyFlags( + *CI, B.CreateMemCpy(CI->getArgOperand(0), Align(1), + CI->getArgOperand(3), Align(1), + ConstantInt::get(CI->getType(), Str.size() + 1))); // The snprintf result is the unincremented number of bytes in the string. return ConstantInt::get(CI->getType(), Str.size()); @@ -2640,10 +2688,11 @@ if (FormatStr.contains('%')) return nullptr; // We found a format specifier. - return emitFWrite( - CI->getArgOperand(1), - ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()), - CI->getArgOperand(0), B, DL, TLI); + return copyFlags( + *CI, emitFWrite(CI->getArgOperand(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size()), + CI->getArgOperand(0), B, DL, TLI)); } // The remaining optimizations require the format string to be "%s" or "%c" @@ -2656,14 +2705,16 @@ // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; - return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); + return copyFlags( + *CI, emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI)); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; - return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); + return copyFlags( + *CI, emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI)); } return nullptr; } @@ -2750,10 +2801,11 @@ return nullptr; // Known to have no uses (see above). - return emitFWrite( - CI->getArgOperand(0), - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1), - CI->getArgOperand(1), B, DL, TLI); + return copyFlags( + *CI, + emitFWrite(CI->getArgOperand(0), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1), + CI->getArgOperand(1), B, DL, TLI)); } Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) { @@ -2765,15 +2817,16 @@ // puts("") -> putchar('\n') StringRef Str; if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) - return emitPutChar(B.getInt32('\n'), B, TLI); + return copyFlags(*CI, emitPutChar(B.getInt32('\n'), B, TLI)); return nullptr; } Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) { // bcopy(src, dst, n) -> llvm.memmove(dst, src, n) - return B.CreateMemMove(CI->getArgOperand(1), Align(1), CI->getArgOperand(0), - Align(1), CI->getArgOperand(2)); + return copyFlags(*CI, B.CreateMemMove(CI->getArgOperand(1), Align(1), + CI->getArgOperand(0), Align(1), + CI->getArgOperand(2))); } bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { @@ -2971,6 +3024,8 @@ } Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { + assert(!CI->isMustTailCall() && "These transforms aren't musttail safe."); + // TODO: Split out the code below that operates on FP calls so that // we can all non-FP calls with the StrictFP attribute to be // optimized. @@ -3212,6 +3267,7 @@ Align(1), CI->getArgOperand(2)); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return CI->getArgOperand(0); } return nullptr; @@ -3225,6 +3281,7 @@ Align(1), CI->getArgOperand(2)); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return CI->getArgOperand(0); } return nullptr; @@ -3238,6 +3295,7 @@ CI->getArgOperand(2), Align(1)); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); return CI->getArgOperand(0); } return nullptr; @@ -3252,7 +3310,7 @@ CallInst *NewCI = cast(Call); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); - return NewCI; + return copyFlags(*CI, NewCI); } return nullptr; } @@ -3277,9 +3335,9 @@ // string lengths for varying. if (isFortifiedCallFoldable(CI, 2, None, 1)) { if (Func == LibFunc_strcpy_chk) - return emitStrCpy(Dst, Src, B, TLI); + return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI)); else - return emitStpCpy(Dst, Src, B, TLI); + return copyFlags(*CI, emitStpCpy(Dst, Src, B, TLI)); } if (OnlyLowerUnknownSize) @@ -3303,14 +3361,14 @@ // a __memcpy_chk, we still need to return the correct end pointer. if (Ret && Func == LibFunc_stpcpy_chk) return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1)); - return Ret; + return copyFlags(*CI, cast(Ret)); } Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 1, None, 0)) - return emitStrLen(CI->getArgOperand(0), B, CI->getModule()->getDataLayout(), - TLI); + return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, + CI->getModule()->getDataLayout(), TLI)); return nullptr; } @@ -3319,11 +3377,13 @@ LibFunc Func) { if (isFortifiedCallFoldable(CI, 3, 2)) { if (Func == LibFunc_strncpy_chk) - return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); + return copyFlags(*CI, + emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); else - return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); + return copyFlags(*CI, + emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); } return nullptr; @@ -3332,8 +3392,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 4, 3)) - return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), CI->getArgOperand(3), B, TLI); + return copyFlags( + *CI, emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), B, TLI)); return nullptr; } @@ -3342,8 +3403,9 @@ IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) { SmallVector VariadicArgs(drop_begin(CI->args(), 5)); - return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(4), VariadicArgs, B, TLI); + return copyFlags(*CI, + emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4), VariadicArgs, B, TLI)); } return nullptr; @@ -3353,8 +3415,9 @@ IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 2, None, None, 1)) { SmallVector VariadicArgs(drop_begin(CI->args(), 4)); - return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs, - B, TLI); + return copyFlags(*CI, + emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), + VariadicArgs, B, TLI)); } return nullptr; @@ -3363,7 +3426,8 @@ Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 2)) - return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI); + return copyFlags( + *CI, emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI)); return nullptr; } @@ -3371,8 +3435,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 3)) - return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); + return copyFlags(*CI, + emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); return nullptr; } @@ -3380,8 +3445,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 3)) - return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); + return copyFlags(*CI, + emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); return nullptr; } @@ -3389,8 +3455,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 3)) - return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); + return copyFlags(*CI, + emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); return nullptr; } @@ -3398,8 +3465,9 @@ Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) - return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(4), CI->getArgOperand(5), B, TLI); + return copyFlags( + *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4), CI->getArgOperand(5), B, TLI)); return nullptr; } @@ -3407,8 +3475,9 @@ Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 2, None, None, 1)) - return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), - CI->getArgOperand(4), B, TLI); + return copyFlags(*CI, + emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), + CI->getArgOperand(4), B, TLI)); return nullptr; } diff --git a/llvm/test/CodeGen/X86/memset-nonzero.ll b/llvm/test/CodeGen/X86/memset-nonzero.ll --- a/llvm/test/CodeGen/X86/memset-nonzero.ll +++ b/llvm/test/CodeGen/X86/memset-nonzero.ll @@ -196,14 +196,9 @@ define void @memset_256_nonzero_bytes(i8* %x) { ; SSE-LABEL: memset_256_nonzero_bytes: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rax -; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movl $256, %edx # imm = 0x100 ; SSE-NEXT: movl $42, %esi -; SSE-NEXT: callq memset@PLT -; SSE-NEXT: popq %rax -; SSE-NEXT: .cfi_def_cfa_offset 8 -; SSE-NEXT: retq +; SSE-NEXT: jmp memset@PLT # TAILCALL ; ; SSE2FAST-LABEL: memset_256_nonzero_bytes: ; SSE2FAST: # %bb.0: diff --git a/llvm/test/Transforms/InstCombine/cabs-array.ll b/llvm/test/Transforms/InstCombine/cabs-array.ll --- a/llvm/test/Transforms/InstCombine/cabs-array.ll +++ b/llvm/test/Transforms/InstCombine/cabs-array.ll @@ -35,7 +35,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[REAL]], [[REAL]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[IMAG]], [[IMAG]] ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]]) +; CHECK-NEXT: [[CABS:%.*]] = tail call fast double @llvm.sqrt.f64(double [[TMP3]]) ; CHECK-NEXT: ret double [[CABS]] ; %call = tail call fast double @cabs([2 x double] %z) @@ -49,7 +49,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[REAL]], [[REAL]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[IMAG]], [[IMAG]] ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]]) +; CHECK-NEXT: [[CABS:%.*]] = tail call fast float @llvm.sqrt.f32(float [[TMP3]]) ; CHECK-NEXT: ret float [[CABS]] ; %call = tail call fast float @cabsf([2 x float] %z) @@ -63,7 +63,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast fp128 [[REAL]], [[REAL]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast fp128 [[IMAG]], [[IMAG]] ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]]) +; CHECK-NEXT: [[CABS:%.*]] = tail call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]]) ; CHECK-NEXT: ret fp128 [[CABS]] ; %call = tail call fast fp128 @cabsl([2 x fp128] %z) diff --git a/llvm/test/Transforms/InstCombine/cabs-discrete.ll b/llvm/test/Transforms/InstCombine/cabs-discrete.ll --- a/llvm/test/Transforms/InstCombine/cabs-discrete.ll +++ b/llvm/test/Transforms/InstCombine/cabs-discrete.ll @@ -33,7 +33,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[REAL:%.*]], [[REAL]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[IMAG:%.*]], [[IMAG]] ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]]) +; CHECK-NEXT: [[CABS:%.*]] = tail call fast double @llvm.sqrt.f64(double [[TMP3]]) ; CHECK-NEXT: ret double [[CABS]] ; %call = tail call fast double @cabs(double %real, double %imag) @@ -45,7 +45,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[REAL:%.*]], [[REAL]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[IMAG:%.*]], [[IMAG]] ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]]) +; CHECK-NEXT: [[CABS:%.*]] = tail call fast float @llvm.sqrt.f32(float [[TMP3]]) ; CHECK-NEXT: ret float [[CABS]] ; %call = tail call fast float @cabsf(float %real, float %imag) @@ -57,7 +57,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast fp128 [[REAL:%.*]], [[REAL]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast fp128 [[IMAG:%.*]], [[IMAG]] ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]]) +; CHECK-NEXT: [[CABS:%.*]] = tail call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]]) ; CHECK-NEXT: ret fp128 [[CABS]] ; %call = tail call fast fp128 @cabsl(fp128 %real, fp128 %imag) diff --git a/llvm/test/Transforms/InstCombine/cos-1.ll b/llvm/test/Transforms/InstCombine/cos-1.ll --- a/llvm/test/Transforms/InstCombine/cos-1.ll +++ b/llvm/test/Transforms/InstCombine/cos-1.ll @@ -29,6 +29,27 @@ ret double %r } +define double @cos_negated_arg_tail(double %x) { +; ANY-LABEL: @cos_negated_arg_tail( +; ANY-NEXT: [[COS:%.*]] = tail call double @cos(double [[X:%.*]]) +; ANY-NEXT: ret double [[COS]] +; + %neg = fsub double -0.0, %x + %r = tail call double @cos(double %neg) + ret double %r +} + +define double @cos_negated_arg_musttail(double %x) { +; ANY-LABEL: @cos_negated_arg_musttail( +; ANY-NEXT: [[NEG:%.*]] = fneg double [[X:%.*]] +; ANY-NEXT: [[R:%.*]] = musttail call double @cos(double [[NEG]]) +; ANY-NEXT: ret double [[R]] +; + %neg = fsub double -0.0, %x + %r = musttail call double @cos(double %neg) + ret double %r +} + define double @cos_unary_negated_arg(double %x) { ; ANY-LABEL: @cos_unary_negated_arg( ; ANY-NEXT: [[COS:%.*]] = call double @cos(double [[X:%.*]]) @@ -103,6 +124,17 @@ ret double %r } +define double @sin_unary_negated_arg_musttail(double %x) { +; ANY-LABEL: @sin_unary_negated_arg_musttail( +; ANY-NEXT: [[NEG:%.*]] = fneg double [[X:%.*]] +; ANY-NEXT: [[R:%.*]] = musttail call double @sin(double [[NEG]]) +; ANY-NEXT: ret double [[R]] +; + %neg = fneg double %x + %r = musttail call double @sin(double %neg) + ret double %r +} + define float @sinf_negated_arg(float %x) { ; ANY-LABEL: @sinf_negated_arg( ; ANY-NEXT: [[TMP1:%.*]] = call float @sinf(float [[X:%.*]]) @@ -235,6 +267,27 @@ ret double %r } +define double @tan_negated_arg_tail(double %x) { +; ANY-LABEL: @tan_negated_arg_tail( +; ANY-NEXT: [[TMP1:%.*]] = tail call double @tan(double [[X:%.*]]) +; ANY-NEXT: [[TMP2:%.*]] = fneg double [[TMP1]] +; ANY-NEXT: ret double [[TMP2]] +; + %neg = fsub double -0.0, %x + %r = tail call double @tan(double %neg) + ret double %r +} +define double @tan_negated_arg_musttail(double %x) { +; ANY-LABEL: @tan_negated_arg_musttail( +; ANY-NEXT: [[NEG:%.*]] = fneg double [[X:%.*]] +; ANY-NEXT: [[R:%.*]] = musttail call double @tan(double [[NEG]]) +; ANY-NEXT: ret double [[R]] +; + %neg = fsub double -0.0, %x + %r = musttail call double @tan(double %neg) + ret double %r +} + define double @tan_unary_negated_arg(double %x) { ; ANY-LABEL: @tan_unary_negated_arg( ; ANY-NEXT: [[TMP1:%.*]] = call double @tan(double [[X:%.*]]) diff --git a/llvm/test/Transforms/InstCombine/fabs-libcall.ll b/llvm/test/Transforms/InstCombine/fabs-libcall.ll --- a/llvm/test/Transforms/InstCombine/fabs-libcall.ll +++ b/llvm/test/Transforms/InstCombine/fabs-libcall.ll @@ -5,7 +5,7 @@ define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) { ; CHECK-LABEL: @replace_fabs_call_f80( -; CHECK-NEXT: [[FABSL:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]]) +; CHECK-NEXT: [[FABSL:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]]) ; CHECK-NEXT: ret x86_fp80 [[FABSL]] ; %fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x) @@ -14,7 +14,7 @@ define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) { ; CHECK-LABEL: @fmf_replace_fabs_call_f80( -; CHECK-NEXT: [[FABSL:%.*]] = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]]) +; CHECK-NEXT: [[FABSL:%.*]] = tail call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]]) ; CHECK-NEXT: ret x86_fp80 [[FABSL]] ; %fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x) diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll --- a/llvm/test/Transforms/InstCombine/fabs.ll +++ b/llvm/test/Transforms/InstCombine/fabs.ll @@ -18,7 +18,7 @@ define float @replace_fabs_call_f32(float %x) { ; CHECK-LABEL: @replace_fabs_call_f32( -; CHECK-NEXT: [[FABSF:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: [[FABSF:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]]) ; CHECK-NEXT: ret float [[FABSF]] ; %fabsf = tail call float @fabsf(float %x) @@ -27,7 +27,7 @@ define double @replace_fabs_call_f64(double %x) { ; CHECK-LABEL: @replace_fabs_call_f64( -; CHECK-NEXT: [[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]]) ; CHECK-NEXT: ret double [[FABS]] ; %fabs = tail call double @fabs(double %x) @@ -36,7 +36,7 @@ define fp128 @replace_fabs_call_f128(fp128 %x) { ; CHECK-LABEL: @replace_fabs_call_f128( -; CHECK-NEXT: [[FABSL:%.*]] = call fp128 @llvm.fabs.f128(fp128 [[X:%.*]]) +; CHECK-NEXT: [[FABSL:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[X:%.*]]) ; CHECK-NEXT: ret fp128 [[FABSL]] ; %fabsl = tail call fp128 @fabsl(fp128 %x) @@ -46,7 +46,7 @@ ; Make sure fast math flags are preserved when replacing the libcall. define float @fmf_replace_fabs_call_f32(float %x) { ; CHECK-LABEL: @fmf_replace_fabs_call_f32( -; CHECK-NEXT: [[FABSF:%.*]] = call nnan float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: [[FABSF:%.*]] = tail call nnan float @llvm.fabs.f32(float [[X:%.*]]) ; CHECK-NEXT: ret float [[FABSF]] ; %fabsf = tail call nnan float @fabsf(float %x) diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll --- a/llvm/test/Transforms/InstCombine/fortify-folding.ll +++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll @@ -31,6 +31,17 @@ ret i8* %ret } +define i8* @test_memccpy_tail() { +; CHECK-LABEL: @test_memccpy_tail( +; CHECK-NEXT: [[MEMCCPY:%.*]] = tail call i8* @memccpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i32 0, i64 60) +; CHECK-NEXT: ret i8* [[MEMCCPY]] +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1) + ret i8* %ret +} + define i8* @test_mempcpy() { ; CHECK-LABEL: @test_mempcpy( ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false) @@ -53,6 +64,17 @@ ret i8* %ret } +define i8* @test_mempcpy_tail() { +; CHECK-LABEL: @test_mempcpy_tail( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 15) +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 15, i64 -1) + ret i8* %ret +} + define i32 @test_snprintf() { ; CHECK-LABEL: @test_snprintf( ; CHECK-NEXT: [[SNPRINTF:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0)) @@ -77,6 +99,17 @@ ret i32 %ret } +define i32 @test_snprintf_tail() { +; CHECK-LABEL: @test_snprintf_tail( +; CHECK-NEXT: [[SNPRINTF:%.*]] = tail call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0)) +; CHECK-NEXT: ret i32 [[SNPRINTF]] +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 0, i64 -1, i8* %fmt) + ret i32 %ret +} + define i32 @test_sprintf() { ; CHECK-LABEL: @test_sprintf( ; CHECK-NEXT: [[SPRINTF:%.*]] = call i32 (i8*, i8*, ...) @sprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0)) @@ -101,6 +134,17 @@ ret i32 %ret } +define i32 @test_sprintf_tail() { +; CHECK-LABEL: @test_sprintf_tail( +; CHECK-NEXT: [[SPRINTF:%.*]] = tail call i32 (i8*, i8*, ...) @sprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0)) +; CHECK-NEXT: ret i32 [[SPRINTF]] +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 0, i64 -1, i8* %fmt) + ret i32 %ret +} + define i8* @test_strcat() { ; CHECK-LABEL: @test_strcat( ; CHECK-NEXT: [[STRCAT:%.*]] = call i8* @strcat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0)) @@ -123,6 +167,17 @@ ret i8* %ret } +define i8* @test_strcat_tail() { +; CHECK-LABEL: @test_strcat_tail( +; CHECK-NEXT: [[STRCAT:%.*]] = tail call i8* @strcat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0)) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0) +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i8* @__strcat_chk(i8* %dst, i8* %src, i64 -1) + ret i8* %ret +} + define i64 @test_strlcat() { ; CHECK-LABEL: @test_strlcat( ; CHECK-NEXT: [[STRLCAT:%.*]] = call i64 @strlcat(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) @@ -145,6 +200,17 @@ ret i64 %ret } +define i64 @test_strlcat_tail() { +; CHECK-LABEL: @test_strlcat_tail( +; CHECK-NEXT: [[STRLCAT:%.*]] = tail call i64 @strlcat(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) +; CHECK-NEXT: ret i64 [[STRLCAT]] +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i64 @__strlcat_chk(i8* %dst, i8* %src, i64 22, i64 -1) + ret i64 %ret +} + define i8* @test_strncat() { ; CHECK-LABEL: @test_strncat( ; CHECK-NEXT: [[STRNCAT:%.*]] = call i8* @strncat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) @@ -167,6 +233,17 @@ ret i8* %ret } +define i8* @test_strncat_tail() { +; CHECK-LABEL: @test_strncat_tail( +; CHECK-NEXT: [[STRNCAT:%.*]] = tail call i8* @strncat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0) +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i8* @__strncat_chk(i8* %dst, i8* %src, i64 22, i64 -1) + ret i8* %ret +} + define i64 @test_strlcpy() { ; CHECK-LABEL: @test_strlcpy( ; CHECK-NEXT: [[STRLCPY:%.*]] = call i64 @strlcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) @@ -189,6 +266,17 @@ ret i64 %ret } +define i64 @test_strlcpy_tail() { +; CHECK-LABEL: @test_strlcpy_tail( +; CHECK-NEXT: [[STRLCPY:%.*]] = tail call i64 @strlcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) +; CHECK-NEXT: ret i64 [[STRLCPY]] +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i64 @__strlcpy_chk(i8* %dst, i8* %src, i64 22, i64 -1) + ret i64 %ret +} + define i32 @test_vsnprintf() { ; CHECK-LABEL: @test_vsnprintf( ; CHECK-NEXT: [[VSNPRINTF:%.*]] = call i32 @vsnprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null) @@ -215,6 +303,18 @@ ret i32 %ret } +define i32 @test_vsnprintf_tail() { +; CHECK-LABEL: @test_vsnprintf_tail( +; CHECK-NEXT: [[VSNPRINTF:%.*]] = tail call i32 @vsnprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null) +; CHECK-NEXT: ret i32 [[VSNPRINTF]] +; + ; ret i32 + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null) + ret i32 %ret +} + define i32 @test_vsprintf() { ; CHECK-LABEL: @test_vsprintf( ; CHECK-NEXT: [[VSPRINTF:%.*]] = call i32 @vsprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null) @@ -241,6 +341,18 @@ ret i32 %ret } +define i32 @test_vsprintf_tail() { +; CHECK-LABEL: @test_vsprintf_tail( +; CHECK-NEXT: [[VSPRINTF:%.*]] = tail call i32 @vsprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null) +; CHECK-NEXT: ret i32 [[VSPRINTF]] +; + ; ret i32 + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = tail call i32 @__vsprintf_chk(i8* %dst, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null) + ret i32 %ret +} + declare i8* @__mempcpy_chk(i8*, i8*, i64, i64) declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64) declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...) diff --git a/llvm/test/Transforms/InstCombine/memccpy.ll b/llvm/test/Transforms/InstCombine/memccpy.ll --- a/llvm/test/Transforms/InstCombine/memccpy.ll +++ b/llvm/test/Transforms/InstCombine/memccpy.ll @@ -39,6 +39,25 @@ ret void } +define void @memccpy_to_memcpy3_tail(i8* %dst) { +; CHECK-LABEL: @memccpy_to_memcpy3_tail( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 5, i1 false) +; CHECK-NEXT: ret void +; + %call = tail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10) ; 111 is 'o' + ret void +} + +define i8* @memccpy_to_memcpy3_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) { +; CHECK-LABEL: @memccpy_to_memcpy3_musttail( +; CHECK-NEXT: %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10) +; CHECK-NEXT: ret i8* %call +; + %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10) ; 111 is 'o' + ret i8* %call +} + + define void @memccpy_to_memcpy4(i8* %dst) { ; CHECK-LABEL: @memccpy_to_memcpy4( ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(11) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 11, i1 false) @@ -57,6 +76,24 @@ ret i8* %call } +define i8* @memccpy_to_memcpy5_tail(i8* %dst) { +; CHECK-LABEL: @memccpy_to_memcpy5_tail( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(7) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(7) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 7, i1 false) +; CHECK-NEXT: ret i8* null +; + %call = tail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7) + ret i8* %call +} + +define i8* @memccpy_to_memcpy5_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) { +; CHECK-LABEL: @memccpy_to_memcpy5_musttail( +; CHECK-NEXT: %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7) +; CHECK-NEXT: ret i8* %call +; + %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7) + ret i8* %call +} + define i8* @memccpy_to_memcpy6(i8* %dst) { ; CHECK-LABEL: @memccpy_to_memcpy6( ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(6) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 6, i1 false) @@ -212,3 +249,22 @@ %call = call i8* @memccpy(i8* %dst, i8* %dst, i32 %c, i64 %n) ret i8* %call } + +define i8* @memccpy_to_memcpy_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) { +; CHECK-LABEL: @memccpy_to_memcpy_musttail( +; CHECK-NEXT: %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 12) +; CHECK-NEXT: ret i8* %call +; + %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 12) ; 114 is 'r' + ret i8* %call +} + +define i8* @memccpy_to_memcpy2_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) { +; CHECK-LABEL: @memccpy_to_memcpy2_musttail( +; CHECK-NEXT: %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 8) +; CHECK-NEXT: ret i8* %call +; + %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 8) ; 114 is 'r' + ret i8* %call +} + diff --git a/llvm/test/Transforms/InstCombine/memcpy-1.ll b/llvm/test/Transforms/InstCombine/memcpy-1.ll --- a/llvm/test/Transforms/InstCombine/memcpy-1.ll +++ b/llvm/test/Transforms/InstCombine/memcpy-1.ll @@ -22,18 +22,44 @@ define i8* @test_simplify2(i8* %mem1, i8* %mem2, i32 %size) strictfp { ; CHECK-LABEL: @test_simplify2( -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[MEM1:%.*]], i8* align 1 [[MEM2:%.*]], i32 [[SIZE:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[MEM1:%.*]], i8* align 1 [[MEM2:%.*]], i32 [[SIZE:%.*]], i1 false) #[[ATTR0:[0-9]+]] ; CHECK-NEXT: ret i8* [[MEM1]] ; %ret = call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size) strictfp ret i8* %ret } +; Verify that the first parameter to memcpy could itself be a call that's not +; tail, while the call to @memcpy could be tail. +declare i8* @get_dest() + +define i8* @test_simplify3(i8* %mem2, i32 %size) { +; CHECK-LABEL: @test_simplify3( +; CHECK-NEXT: [[DEST:%.*]] = call i8* @get_dest() +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[MEM2:%.*]], i32 [[SIZE:%.*]], i1 false) +; CHECK-NEXT: ret i8* [[DEST]] +; + + %dest = call i8* @get_dest() + %ret = tail call i8* @memcpy(i8* %dest, i8* %mem2, i32 %size) + ret i8* %ret +} + define i8* @test_no_incompatible_attr(i8* %mem1, i8* %mem2, i32 %size) { ; CHECK-LABEL: @test_no_incompatible_attr( ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[MEM1:%.*]], i8* align 1 [[MEM2:%.*]], i32 [[SIZE:%.*]], i1 false) ; CHECK-NEXT: ret i8* [[MEM1]] +; %ret = call dereferenceable(1) i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size) ret i8* %ret } + +define i8* @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) { +; CHECK-LABEL: @test_no_simplify1( +; CHECK-NEXT: [[RET:%.*]] = musttail call i8* @memcpy(i8* [[MEM1:%.*]], i8* [[MEM2:%.*]], i32 [[SIZE:%.*]]) +; CHECK-NEXT: ret i8* [[RET]] +; + %ret = musttail call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size) + ret i8* %ret +} diff --git a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll --- a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll +++ b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll @@ -38,6 +38,18 @@ ret i8* %ret } +; Same as test_simplify1 but with a tail call rather than vanilla call. +define i8* @test_simplify3() { +; CHECK-LABEL: @test_simplify3( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*) +; + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T2* @t2 to i8* + %ret = tail call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824) + ret i8* %ret +} + ; Check cases where dstlen < len. define i8* @test_no_simplify1() { @@ -62,6 +74,15 @@ ret i8* %ret } +define i8* @test_no_simplify3(i8* %dst, i8* %src, i64 %a, i64 %b) { +; CHECK-LABEL: @test_no_simplify3( +; CHECK-NEXT: %ret = musttail call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824) +; CHECK-NEXT: ret i8* %ret +; + %ret = musttail call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824) + ret i8* %ret +} + define i8* @test_simplify_return_indcall(i8* ()* %alloc) { ; CHECK-LABEL: @test_simplify_return_indcall( ; CHECK-NEXT: [[DST:%.*]] = call i8* [[ALLOC:%.*]]() diff --git a/llvm/test/Transforms/InstCombine/memmove-1.ll b/llvm/test/Transforms/InstCombine/memmove-1.ll --- a/llvm/test/Transforms/InstCombine/memmove-1.ll +++ b/llvm/test/Transforms/InstCombine/memmove-1.ll @@ -16,6 +16,22 @@ ; CHECK: ret i8* %mem1 } +define i8* @test_simplify2(i8* %mem1, i8* %mem2, i32 %size) { +; CHECK-LABEL: @test_simplify2( +; CHECK-NEXT: tail call void @llvm.memmove +; CHECK-NEXT: ret i8* %mem1 + %ret = tail call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size) + ret i8* %ret +} + +define i8* @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) { +; CHECK-LABEL: @test_no_simplify1( +; CHECK-NEXT: %ret = musttail call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size) +; CHECK-NEXT: ret i8* %ret + %ret = musttail call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size) + ret i8* %ret +} + define i8* @test_no_incompatible_attr(i8* %mem1, i8* %mem2, i32 %size) { ; CHECK-LABEL: @test_no_incompatible_attr( %ret = call dereferenceable(1) i8* @memmove(i8* %mem1, i8* %mem2, i32 %size) diff --git a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll --- a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll +++ b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll @@ -40,6 +40,18 @@ ret i8* %ret } +define i8* @test_simplify3() { +; CHECK-LABEL: @test_simplify3( +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*) +; + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T2* @t2 to i8* + + %ret = tail call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824) + ret i8* %ret +} + ; Check cases where dstlen < len. define i8* @test_no_simplify1() { @@ -66,6 +78,15 @@ ret i8* %ret } +define i8* @test_no_simplify3(i8* %dst, i8* %src, i64 %a, i64 %b) { +; CHECK-LABEL: @test_no_simplify3( +; CHECK-NEXT: %ret = musttail call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824) +; CHECK-NEXT: ret i8* %ret +; + %ret = musttail call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824) + ret i8* %ret +} + define i8* @test_no_incompatible_attr(i8* %mem, i32 %val, i32 %size) { ; CHECK-LABEL: @test_no_incompatible_attr( ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false) diff --git a/llvm/test/Transforms/InstCombine/mempcpy.ll b/llvm/test/Transforms/InstCombine/mempcpy.ll --- a/llvm/test/Transforms/InstCombine/mempcpy.ll +++ b/llvm/test/Transforms/InstCombine/mempcpy.ll @@ -64,4 +64,13 @@ ret i32 undef } +define i8* @memcpy_no_simplify1(i8* %d, i8* nocapture readonly %s, i64 %n) { +; CHECK-LABEL: @memcpy_no_simplify1( +; CHECK-NEXT: %r = musttail call i8* @mempcpy(i8* %d, i8* %s, i64 %n) +; CHECK-NEXT: ret i8* %r +; + %r = musttail call i8* @mempcpy(i8* %d, i8* %s, i64 %n) + ret i8* %r +} + declare i8* @mempcpy(i8*, i8* nocapture readonly, i64) diff --git a/llvm/test/Transforms/InstCombine/memset-1.ll b/llvm/test/Transforms/InstCombine/memset-1.ll --- a/llvm/test/Transforms/InstCombine/memset-1.ll +++ b/llvm/test/Transforms/InstCombine/memset-1.ll @@ -21,6 +21,25 @@ ret i8* %ret } +define i8* @test_simplify1_tail(i8* %mem, i32 %val, i32 %size) { +; CHECK-LABEL: @test_simplify1_tail( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[VAL:%.*]] to i8 +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i32(i8* align 1 [[MEM:%.*]], i8 [[TMP1]], i32 [[SIZE:%.*]], i1 false) +; CHECK-NEXT: ret i8* [[MEM]] +; + %ret = tail call i8* @memset(i8* %mem, i32 %val, i32 %size) + ret i8* %ret +} + +define i8* @test_simplify1_musttail(i8* %mem, i32 %val, i32 %size) { +; CHECK-LABEL: @test_simplify1_musttail( +; CHECK-NEXT: %ret = musttail call i8* @memset(i8* %mem, i32 %val, i32 %size) +; CHECK-NEXT: ret i8* %ret +; + %ret = musttail call i8* @memset(i8* %mem, i32 %val, i32 %size) + ret i8* %ret +} + ; Malloc + memset pattern is now handled by DSE in a more general way. define i8* @pr25892_lite(i32 %size) #0 { diff --git a/llvm/test/Transforms/InstCombine/memset_chk-1.ll b/llvm/test/Transforms/InstCombine/memset_chk-1.ll --- a/llvm/test/Transforms/InstCombine/memset_chk-1.ll +++ b/llvm/test/Transforms/InstCombine/memset_chk-1.ll @@ -45,6 +45,18 @@ ret i8* %ret } +; Same as @test_simplify1 with tail call. +define i8* @test_simplify4() { +; CHECK-LABEL: @test_simplify4( +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*) +; + %dst = bitcast %struct.T* @t to i8* + + %ret = tail call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824) + ret i8* %ret +} + ; Check cases where dstlen < len. define i8* @test_no_simplify1() { @@ -69,6 +81,16 @@ ret i8* %ret } +define i8* @test_no_simplify3(i8* %dst, i32 %a, i64 %b, i64 %c) { +; CHECK-LABEL: @test_no_simplify3( +; CHECK-NEXT: %ret = musttail call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824) +; CHECK-NEXT: ret i8* %ret +; + %ret = musttail call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824) + ret i8* %ret +} + + ; Test that RAUW in SimplifyLibCalls for __memset_chk generates valid IR define i32 @test_rauw(i8* %a, i8* %b, i8** %c) { ; CHECK-LABEL: @test_rauw( diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll --- a/llvm/test/Transforms/InstCombine/objsize.ll +++ b/llvm/test/Transforms/InstCombine/objsize.ll @@ -251,7 +251,7 @@ define i32 @test11(i8** %esc) { ; CHECK-LABEL: @test11( -; CHECK-NEXT: [[STRDUP:%.*]] = call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) +; CHECK-NEXT: [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) ; CHECK-NEXT: store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8 ; CHECK-NEXT: ret i32 8 ; @@ -263,7 +263,7 @@ define i32 @test12(i8** %esc) { ; CHECK-LABEL: @test12( -; CHECK-NEXT: [[STRDUP:%.*]] = call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) +; CHECK-NEXT: [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) ; CHECK-NEXT: store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8 ; CHECK-NEXT: ret i32 8 ; @@ -275,7 +275,7 @@ define i32 @test13(i8** %esc) { ; CHECK-LABEL: @test13( -; CHECK-NEXT: [[STRDUP:%.*]] = call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) +; CHECK-NEXT: [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) ; CHECK-NEXT: store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8 ; CHECK-NEXT: ret i32 8 ; diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll --- a/llvm/test/Transforms/InstCombine/pow-1.ll +++ b/llvm/test/Transforms/InstCombine/pow-1.ll @@ -270,6 +270,23 @@ ret float %retval } +define float @powf_libcall_half_ninf_tail(float %x) { +; CHECK-LABEL: @powf_libcall_half_ninf_tail( +; ANY-NEXT: %sqrtf = call ninf float @sqrtf(float %x) +; ANY-NEXT: %abs = tail call ninf float @llvm.fabs.f32(float %sqrtf) +; ANY-NEXT: ret float %abs + %retval = tail call ninf float @powf(float %x, float 0.5) + ret float %retval +} + +define float @powf_libcall_half_ninf_musttail(float %x, float %y) { +; CHECK-LABEL: @powf_libcall_half_ninf_musttail( +; ANY-NEXT: %retval = musttail call ninf float @powf(float %x, float 5.000000e-01) +; ANY-NEXT: ret float %retval + %retval = musttail call ninf float @powf(float %x, float 0.5) + ret float %retval +} + ; Check pow(x, 0.5) where x may be -infinity does not call a library sqrt function. define double @pow_libcall_half_no_FMF(double %x) { diff --git a/llvm/test/Transforms/InstCombine/pow-exp.ll b/llvm/test/Transforms/InstCombine/pow-exp.ll --- a/llvm/test/Transforms/InstCombine/pow-exp.ll +++ b/llvm/test/Transforms/InstCombine/pow-exp.ll @@ -214,7 +214,7 @@ ; CHECK-LABEL: @pow_ok_base( ; Do not change 0xBFE0776{{.*}} to the exact constant, see PR42740 ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0xBFE0776{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]]) ; CHECK-NEXT: ret double [[EXP2]] ; %call = tail call afn nnan ninf double @pow(double 0x3FE6666666666666, double %e) @@ -224,7 +224,7 @@ define double @pow_ok_base_fast(double %e) { ; CHECK-LABEL: @pow_ok_base_fast( ; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[E:%.*]], 0xBFE0776{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call fast double @exp2(double [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast double @exp2(double [[MUL]]) ; CHECK-NEXT: ret double [[EXP2]] ; %call = tail call fast double @pow(double 0x3FE6666666666666, double %e) @@ -234,7 +234,7 @@ define double @pow_ok_base2(double %e) { ; CHECK-LABEL: @pow_ok_base2( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x4010952{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]]) ; CHECK-NEXT: ret double [[EXP2]] ; %call = tail call afn nnan ninf double @pow(double 1.770000e+01, double %e) @@ -244,7 +244,7 @@ define double @pow_ok_base3(double %e) { ; CHECK-LABEL: @pow_ok_base3( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x400AB0B5{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]]) ; CHECK-NEXT: ret double [[EXP2]] ; %call = tail call afn nnan ninf double @pow(double 1.010000e+01, double %e) @@ -254,7 +254,7 @@ define double @pow_ok_ten_base(double %e) { ; CHECK-LABEL: @pow_ok_ten_base( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x400A934F{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]]) ; CHECK-NEXT: ret double [[EXP2]] ; %call = tail call afn nnan ninf double @pow(double 1.000000e+01, double %e) @@ -264,7 +264,7 @@ define double @pow_ok_denorm_base(double %e) { ; CHECK-LABEL: @pow_ok_denorm_base( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0xC0904800000005C5 -; CHECK-NEXT: [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]]) ; CHECK-NEXT: ret double [[EXP2]] ; %call = tail call afn nnan ninf double @pow(double 0x00000000FFFFFFFF, double %e) @@ -274,7 +274,7 @@ define float @powf_ok_base(float %e) { ; CHECK-LABEL: @powf_ok_base( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0xBFE07762{{.*}} -; CHECK-NEXT: [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]]) +; CHECK-NEXT: [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]]) ; CHECK-NEXT: ret float [[EXP2F]] ; %call = tail call afn nnan ninf float @powf(float 0x3FE6666660000000, float %e) @@ -284,7 +284,7 @@ define float @powf_ok_base2(float %e) { ; CHECK-LABEL: @powf_ok_base2( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x4010952{{.*}} -; CHECK-NEXT: [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]]) +; CHECK-NEXT: [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]]) ; CHECK-NEXT: ret float [[EXP2F]] ; %call = tail call afn nnan ninf float @powf(float 0x4031B33340000000, float %e) @@ -294,7 +294,7 @@ define float @powf_ok_base3(float %e) { ; CHECK-LABEL: @powf_ok_base3( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x400AB0B5{{.*}} -; CHECK-NEXT: [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]]) +; CHECK-NEXT: [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]]) ; CHECK-NEXT: ret float [[EXP2F]] ; %call = tail call afn nnan ninf float @powf(float 0x4024333340000000, float %e) @@ -304,7 +304,7 @@ define float @powf_ok_ten_base(float %e) { ; CHECK-LABEL: @powf_ok_ten_base( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x400A934{{.*}} -; CHECK-NEXT: [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]]) +; CHECK-NEXT: [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]]) ; CHECK-NEXT: ret float [[EXP2F]] ; %call = tail call afn nnan ninf float @powf(float 1.000000e+01, float %e) @@ -314,7 +314,7 @@ define float @powf_ok_denorm_base(float %e) { ; CHECK-LABEL: @powf_ok_denorm_base( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], -1.350000e+02 -; CHECK-NEXT: [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]]) +; CHECK-NEXT: [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]]) ; CHECK-NEXT: ret float [[EXP2F]] ; %call = tail call afn nnan ninf float @powf(float 0x3780000000000000, float %e) @@ -371,7 +371,7 @@ define double @pow_multiuse(double %e) { ; CHECK-LABEL: @pow_multiuse( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x4002934{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]]) ; CHECK-NEXT: tail call void @use_d(double [[EXP2]]) ; CHECK-NEXT: ret double [[EXP2]] ; @@ -401,7 +401,7 @@ define double @pow_ok_base_no_ninf(double %e) { ; CHECK-LABEL: @pow_ok_base_no_ninf( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan afn double [[E:%.*]], 0xBFE0776{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call nnan afn double @exp2(double [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan afn double @exp2(double [[MUL]]) ; CHECK-NEXT: ret double [[EXP2]] ; %call = tail call afn nnan double @pow(double 0x3FE6666666666666, double %e) @@ -456,7 +456,7 @@ define float @powf_multiuse(float %e) { ; CHECK-LABEL: @powf_multiuse( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x4002934{{.*}} -; CHECK-NEXT: [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]]) +; CHECK-NEXT: [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]]) ; CHECK-NEXT: tail call void @use_f(float [[EXP2F]]) ; CHECK-NEXT: ret float [[EXP2F]] ; diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll --- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll +++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll @@ -5,7 +5,7 @@ define double @pow_sitofp_const_base_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -18,7 +18,7 @@ define double @pow_uitofp_const_base_fast(i31 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -30,7 +30,7 @@ define double @pow_sitofp_double_const_base_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_const_base_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[X:%.*]]) ; CHECK-NEXT: ret double [[TMP1]] ; %subfp = sitofp i32 %x to double @@ -41,7 +41,7 @@ define double @pow_uitofp_double_const_base_fast(i31 %x) { ; CHECK-LABEL: @pow_uitofp_double_const_base_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[TMP1]]) ; CHECK-NEXT: ret double [[TMP2]] ; %subfp = uitofp i31 %x to double @@ -51,7 +51,7 @@ define double @pow_sitofp_double_const_base_2_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_const_base_2_fast( -; CHECK-NEXT: [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i32 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[LDEXPF]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -65,7 +65,7 @@ ; CHECK-LABEL: @pow_sitofp_double_const_base_power_of_2_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -78,7 +78,7 @@ define double @pow_uitofp_const_base_2_fast(i31 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_2_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 -; CHECK-NEXT: [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i32 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[LDEXPF]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -92,7 +92,7 @@ ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i31 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -104,7 +104,7 @@ define double @pow_sitofp_float_base_fast(float %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -117,7 +117,7 @@ define double @pow_uitofp_float_base_fast(float %base, i31 %x) { ; CHECK-LABEL: @pow_uitofp_float_base_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -129,7 +129,7 @@ define double @pow_sitofp_double_base_fast(double %base, i32 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[X:%.*]]) ; CHECK-NEXT: ret double [[TMP1]] ; %subfp = sitofp i32 %x to double @@ -140,7 +140,7 @@ define double @pow_uitofp_double_base_fast(double %base, i31 %x) { ; CHECK-LABEL: @pow_uitofp_double_base_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: ret double [[TMP2]] ; %subfp = uitofp i31 %x to double @@ -151,7 +151,7 @@ define double @pow_sitofp_const_base_fast_i8(i8 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast_i8( ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -164,7 +164,7 @@ define double @pow_sitofp_const_base_fast_i16(i16 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast_i16( ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -178,7 +178,7 @@ define double @pow_uitofp_const_base_fast_i8(i8 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_fast_i8( ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -191,7 +191,7 @@ define double @pow_uitofp_const_base_fast_i16(i16 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_fast_i16( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -203,7 +203,7 @@ define double @powf_exp_const_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 40) +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 40) ; CHECK-NEXT: ret double [[TMP1]] ; %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01) @@ -212,7 +212,7 @@ define double @powf_exp_const2_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const2_int_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 -40) +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 -40) ; CHECK-NEXT: ret double [[TMP1]] ; %res = tail call fast double @llvm.pow.f64(double %base, double -4.000000e+01) @@ -225,7 +225,7 @@ ; CHECK-LABEL: @pow_uitofp_const_base_fast_i32( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x4006757{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -238,7 +238,7 @@ define double @pow_uitofp_const_base_2_fast_i32(i32 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_2_fast_i32( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[SUBFP]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[SUBFP]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -252,7 +252,7 @@ ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast_i32( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -291,7 +291,7 @@ ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i64 [[X:%.*]] to float ; Do not change 0x400675{{.*}} to the exact constant, see PR42740 ; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x400675{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -305,7 +305,7 @@ ; CHECK-LABEL: @pow_uitofp_const_base_fast_i64( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i64 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x400675{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -343,7 +343,7 @@ define double @pow_sitofp_const_base_2_no_fast(i32 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_2_no_fast( -; CHECK-NEXT: [[LDEXPF:%.*]] = call float @ldexpf(float 1.000000e+00, i32 [[X:%.*]]) +; CHECK-NEXT: [[LDEXPF:%.*]] = tail call float @ldexpf(float 1.000000e+00, i32 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[LDEXPF]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -357,7 +357,7 @@ ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -370,7 +370,7 @@ define double @pow_uitofp_const_base_2_no_fast(i32 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_2_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[SUBFP]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[SUBFP]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -384,7 +384,7 @@ ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int16.ll b/llvm/test/Transforms/InstCombine/pow_fp_int16.ll --- a/llvm/test/Transforms/InstCombine/pow_fp_int16.ll +++ b/llvm/test/Transforms/InstCombine/pow_fp_int16.ll @@ -5,7 +5,7 @@ define double @pow_sitofp_const_base_fast(i16 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -18,7 +18,7 @@ define double @pow_uitofp_const_base_fast(i15 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -30,7 +30,7 @@ define double @pow_sitofp_double_const_base_fast(i16 %x) { ; CHECK-LABEL: @pow_sitofp_double_const_base_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[X:%.*]]) ; CHECK-NEXT: ret double [[TMP1]] ; %subfp = sitofp i16 %x to double @@ -41,7 +41,7 @@ define double @pow_uitofp_double_const_base_fast(i15 %x) { ; CHECK-LABEL: @pow_uitofp_double_const_base_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[TMP1]]) ; CHECK-NEXT: ret double [[TMP2]] ; %subfp = uitofp i15 %x to double @@ -51,7 +51,7 @@ define double @pow_sitofp_double_const_base_2_fast(i16 %x) { ; CHECK-LABEL: @pow_sitofp_double_const_base_2_fast( -; CHECK-NEXT: [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i16 [[X:%.*]]) +; CHECK-NEXT: [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i16 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[LDEXPF]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -65,7 +65,7 @@ ; CHECK-LABEL: @pow_sitofp_double_const_base_power_of_2_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i16 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -78,7 +78,7 @@ define double @pow_uitofp_const_base_2_fast(i15 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_2_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16 -; CHECK-NEXT: [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i16 [[TMP1]]) +; CHECK-NEXT: [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i16 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[LDEXPF]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -92,7 +92,7 @@ ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i15 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -104,7 +104,7 @@ define double @pow_sitofp_float_base_fast(float %base, i16 %x) { ; CHECK-LABEL: @pow_sitofp_float_base_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -117,7 +117,7 @@ define double @pow_uitofp_float_base_fast(float %base, i15 %x) { ; CHECK-LABEL: @pow_uitofp_float_base_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -129,7 +129,7 @@ define double @pow_sitofp_double_base_fast(double %base, i16 %x) { ; CHECK-LABEL: @pow_sitofp_double_base_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[X:%.*]]) ; CHECK-NEXT: ret double [[TMP1]] ; %subfp = sitofp i16 %x to double @@ -140,7 +140,7 @@ define double @pow_uitofp_double_base_fast(double %base, i15 %x) { ; CHECK-LABEL: @pow_uitofp_double_base_fast( ; CHECK-NEXT: [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[TMP1]]) ; CHECK-NEXT: ret double [[TMP2]] ; %subfp = uitofp i15 %x to double @@ -151,7 +151,7 @@ define double @pow_sitofp_const_base_fast_i8(i8 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast_i8( ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -163,7 +163,7 @@ define double @pow_sitofp_const_base_fast_i16(i16 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_fast_i16( -; CHECK-NEXT: [[TMP1:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -177,7 +177,7 @@ define double @pow_uitofp_const_base_fast_i8(i8 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_fast_i8( ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[TMP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -202,7 +202,7 @@ define double @powf_exp_const_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const_int_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 40) +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 40) ; CHECK-NEXT: ret double [[TMP1]] ; %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01) @@ -211,7 +211,7 @@ define double @powf_exp_const2_int_fast(double %base) { ; CHECK-LABEL: @powf_exp_const2_int_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 -40) +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 -40) ; CHECK-NEXT: ret double [[TMP1]] ; %res = tail call fast double @llvm.pow.f64(double %base, double -4.000000e+01) @@ -224,7 +224,7 @@ ; CHECK-LABEL: @pow_uitofp_const_base_fast_i16( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x4006757{{.*}} -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -237,7 +237,7 @@ define double @pow_uitofp_const_base_2_fast_i16(i16 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_2_fast_i16( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[SUBFP]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[SUBFP]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -251,7 +251,7 @@ ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast_i16( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -313,7 +313,7 @@ define double @pow_sitofp_const_base_2_no_fast(i16 %x) { ; CHECK-LABEL: @pow_sitofp_const_base_2_no_fast( -; CHECK-NEXT: [[LDEXPF:%.*]] = call float @ldexpf(float 1.000000e+00, i16 [[X:%.*]]) +; CHECK-NEXT: [[LDEXPF:%.*]] = tail call float @ldexpf(float 1.000000e+00, i16 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[LDEXPF]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -327,7 +327,7 @@ ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = sitofp i16 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -340,7 +340,7 @@ define double @pow_uitofp_const_base_2_no_fast(i16 %x) { ; CHECK-LABEL: @pow_uitofp_const_base_2_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float -; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[SUBFP]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[SUBFP]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; @@ -354,7 +354,7 @@ ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_no_fast( ; CHECK-NEXT: [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00 -; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]]) ; CHECK-NEXT: [[RES:%.*]] = fpext float [[EXP2]] to double ; CHECK-NEXT: ret double [[RES]] ; diff --git a/llvm/test/Transforms/InstCombine/snprintf.ll b/llvm/test/Transforms/InstCombine/snprintf.ll --- a/llvm/test/Transforms/InstCombine/snprintf.ll +++ b/llvm/test/Transforms/InstCombine/snprintf.ll @@ -5,6 +5,8 @@ @.str.1 = private unnamed_addr constant [3 x i8] c"%%\00", align 1 @.str.2 = private unnamed_addr constant [3 x i8] c"%c\00", align 1 @.str.3 = private unnamed_addr constant [3 x i8] c"%s\00", align 1 +@.str.4 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 + declare i32 @snprintf(i8*, i64, i8*, ...) #1 @@ -136,3 +138,42 @@ %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 32, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2 ret i32 %call } + +; snprintf(buf, 32, "") -> memcpy -> store +define i32 @test_str_ok_size_tail(i8* %buf) { +; CHECK-LABEL: @test_str_ok_size_tail( +; CHECK-NEXT: store i8 0, i8* %buf, align 1 +; CHECK-NEXT: ret i32 0 +; + %1 = tail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.4, i64 0, i64 0)) + ret i32 %1 +} + +define i32 @test_str_ok_size_musttail(i8* %buf, i64 %x, i8* %y, ...) { +; CHECK-LABEL: @test_str_ok_size_musttail( +; CHECK-NEXT: %1 = musttail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.4, i64 0, i64 0), ...) +; CHECK-NEXT: ret i32 %1 +; + %1 = musttail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.4, i64 0, i64 0), ...) + ret i32 %1 +} + +; snprintf(buf, 32, "%s", "str") -> memcpy -> store +define i32 @test_str_ok_size_tail2(i8* %buf) { +; CHECK-LABEL: @test_str_ok_size_tail2( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BUF:%.*]] to i32* +; CHECK-NEXT: store i32 7500915, i32* [[TMP1]], align 1 +; CHECK-NEXT: ret i32 3 +; + %1 = tail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) + ret i32 %1 +} + +define i32 @test_str_ok_size_musttail2(i8* %buf, i64 %x, i8* %y, ...) { +; CHECK-LABEL: @test_str_ok_size_musttail2( +; CHECK-NEXT: %1 = musttail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), ...) +; CHECK-NEXT: ret i32 %1 +; + %1 = musttail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), ...) + ret i32 %1 +} diff --git a/llvm/test/Transforms/InstCombine/stpcpy-1.ll b/llvm/test/Transforms/InstCombine/stpcpy-1.ll --- a/llvm/test/Transforms/InstCombine/stpcpy-1.ll +++ b/llvm/test/Transforms/InstCombine/stpcpy-1.ll @@ -57,6 +57,15 @@ ret i8* %ret } +define i8* @test_no_simplify2(i8* %dst, i8* %src) { +; CHECK-LABEL: @test_no_simplify2( +; CHECK-NEXT: %ret = musttail call i8* @stpcpy(i8* %dst, i8* %src) +; CHECK-NEXT: ret i8* %ret +; + %ret = musttail call i8* @stpcpy(i8* %dst, i8* %src) + ret i8* %ret +} + define i8* @test_no_incompatible_attr() { ; CHECK-LABEL: @test_no_incompatible_attr( ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i32 6, i1 false) diff --git a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll --- a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll +++ b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll @@ -48,6 +48,18 @@ ret i8* %ret } +define i8* @test_simplify1_tail() { +; CHECK-LABEL: @test_simplify1_tail( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11) +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0 + + %ret = tail call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60) + ret i8* %ret +} + ; Check cases where there are no string constants. define i8* @test_simplify4() { @@ -62,6 +74,18 @@ ret i8* %ret } +define i8* @test_simplify4_tail() { +; CHECK-LABEL: @test_simplify4_tail( +; CHECK-NEXT: [[STPCPY:%.*]] = tail call i8* @stpcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0)) +; CHECK-NEXT: ret i8* [[STPCPY]] +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + + %ret = tail call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1) + ret i8* %ret +} + ; Check case where the string length is not constant. define i8* @test_simplify5() { @@ -93,6 +117,20 @@ ret i8* %ret } +; Check cases where there are no string constants, and is a tail call. + +define i8* @test_simplify7() { +; CHECK-LABEL: @test_simplify7( +; CHECK-NEXT: [[STPCPY:%.*]] = tail call i8* @stpcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0)) +; CHECK-NEXT: ret i8* [[STPCPY]] +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + + %ret = tail call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1) + ret i8* %ret +} + ; Check case where slen < strlen (src). define i8* @test_no_simplify1() { diff --git a/llvm/test/Transforms/InstCombine/strcpy-1.ll b/llvm/test/Transforms/InstCombine/strcpy-1.ll --- a/llvm/test/Transforms/InstCombine/strcpy-1.ll +++ b/llvm/test/Transforms/InstCombine/strcpy-1.ll @@ -63,6 +63,17 @@ ret i8* %ret } +define i8* @test_no_simplify2(i8* %dst, i8* %src) { +; CHECK-LABEL: @test_no_simplify2( +; CHECK-NEXT: %ret = musttail call i8* @strcpy(i8* %dst, i8* %src) +; CHECK-NEXT: ret i8* %ret +; + + %ret = musttail call i8* @strcpy(i8* %dst, i8* %src) + ret i8* %ret +} + + define void @test_no_incompatible_attr() { ; CHECK-LABEL: @test_no_incompatible_attr( ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i32 6, i1 false) diff --git a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll --- a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll +++ b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll @@ -24,6 +24,18 @@ ret i8* %ret } +define i8* @test_simplify1_tail() { +; CHECK-LABEL: @test_simplify1_tail( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0) +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0 + + %ret = tail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60) + ret i8* %ret +} + define i8* @test_simplify2() { ; CHECK-LABEL: @test_simplify2( ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false) @@ -62,6 +74,18 @@ ret i8* %ret } +define i8* @test_simplify4_tail() { +; CHECK-LABEL: @test_simplify4_tail( +; CHECK-NEXT: [[STRCPY:%.*]] = tail call i8* @strcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0)) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0) +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + + %ret = tail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) + ret i8* %ret +} + ; Check case where the string length is not constant. define i8* @test_simplify5() { @@ -93,6 +117,20 @@ ret i8* %ret } +; Check cases where there are no string constants, and is a tail call. + +define i8* @test_simplify7() { +; CHECK-LABEL: @test_simplify7( +; CHECK-NEXT: [[STRCPY:%.*]] = tail call i8* @strcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0)) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0) +; + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + + %ret = tail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) + ret i8* %ret +} + ; Check case where slen < strlen (src). define i8* @test_no_simplify1() { @@ -107,5 +145,14 @@ ret i8* %ret } +define i8* @test_no_simplify2(i8* %dst, i8* %src, i32 %a) { +; CHECK-LABEL: @test_no_simplify2( +; CHECK-NEXT: %ret = musttail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60) +; CHECK-NEXT: ret i8* %ret +; + %ret = musttail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60) + ret i8* %ret +} + declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) nounwind readonly diff --git a/llvm/test/Transforms/InstCombine/strncpy-1.ll b/llvm/test/Transforms/InstCombine/strncpy-1.ll --- a/llvm/test/Transforms/InstCombine/strncpy-1.ll +++ b/llvm/test/Transforms/InstCombine/strncpy-1.ll @@ -180,6 +180,25 @@ ret void } +define i8* @test_no_simplify3(i8* %dst, i8* %src, i32 %count) { +; CHECK-LABEL: @test_no_simplify3( +; CHECK-NEXT: %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32) +; CHECK-NEXT: ret i8* %ret +; + %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32) + ret i8* %ret +} + +define i8* @test_no_simplify4(i8* %dst, i8* %src, i32 %count) { +; CHECK-LABEL: @test_no_simplify4( +; CHECK-NEXT: %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6) +; CHECK-NEXT: ret i8* %ret +; + %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6) + ret i8* %ret +} + + define void @test_no_incompatible_attr() { ; CHECK-LABEL: @test_no_incompatible_attr( ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i32 6, i1 false)