diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -193,6 +193,15 @@
   }
 }
 
+// Copy CallInst "flags" like musttail, notail, and tail. Return New param for
+// easier chaining. Calls to emit* and B.createCall should probably be wrapped
+// in this function when New is created to replace Old.
+static Value *copyFlags(const CallInst &Old, Value *New) {
+  if (auto *NewCI = dyn_cast_or_null<CallInst>(New))
+    NewCI->setTailCallKind(Old.getTailCallKind());
+  return New;
+}
+
 //===----------------------------------------------------------------------===//
 // String and Memory Library Call Optimizations
 //===----------------------------------------------------------------------===//
@@ -215,7 +224,7 @@
   if (Len == 0)
     return Dst;
 
-  return emitStrLenMemCpy(Src, Dst, Len, B);
+  return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, Len, B));
 }
 
 Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
@@ -279,7 +288,7 @@
 
   // strncat(x, s, c) -> strcat(x, s)
   // s is constant so the strcat can be optimized further.
-  return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+  return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
 }
 
 Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
@@ -300,9 +309,11 @@
     if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
       return nullptr;
 
-    return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
-                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
-                      B, DL, TLI);
+    return copyFlags(
+        *CI,
+        emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+                   ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B,
+                   DL, TLI));
   }
 
   // Otherwise, the character is a constant, see if the first argument is
@@ -340,7 +351,7 @@
   if (!getConstantStringInfo(SrcStr, Str)) {
     // strrchr(s, 0) -> strchr(s, 0)
     if (CharC->isZero())
-      return emitStrChr(SrcStr, '\0', B, TLI);
+      return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
     return nullptr;
   }
 
@@ -385,25 +396,28 @@
     annotateDereferenceableBytes(CI, 1, Len2);
 
   if (Len1 && Len2) {
-    return emitMemCmp(Str1P, Str2P,
-                      ConstantInt::get(DL.getIntPtrType(CI->getContext()),
-                                       std::min(Len1, Len2)),
-                      B, DL, TLI);
+    return copyFlags(
+        *CI, emitMemCmp(Str1P, Str2P,
+                        ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+                                         std::min(Len1, Len2)),
+                        B, DL, TLI));
   }
 
   // strcmp to memcmp
   if (!HasStr1 && HasStr2) {
     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
-      return emitMemCmp(
-          Str1P, Str2P,
-          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
-          TLI);
+      return copyFlags(
+          *CI,
+          emitMemCmp(Str1P, Str2P,
+                     ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
+                     B, DL, TLI));
   } else if (HasStr1 && !HasStr2) {
     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
-      return emitMemCmp(
-          Str1P, Str2P,
-          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
-          TLI);
+      return copyFlags(
+          *CI,
+          emitMemCmp(Str1P, Str2P,
+                     ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
+                     B, DL, TLI));
   }
 
   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
@@ -430,7 +444,7 @@
     return ConstantInt::get(CI->getType(), 0);
 
   if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
-    return emitMemCmp(Str1P, Str2P, Size, B, DL, TLI);
+    return copyFlags(*CI, emitMemCmp(Str1P, Str2P, Size, B, DL, TLI));
 
   StringRef Str1, Str2;
   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -462,17 +476,19 @@
   if (!HasStr1 && HasStr2) {
     Len2 = std::min(Len2, Length);
     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
-      return emitMemCmp(
-          Str1P, Str2P,
-          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
-          TLI);
+      return copyFlags(
+          *CI,
+          emitMemCmp(Str1P, Str2P,
+                     ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
+                     B, DL, TLI));
   } else if (HasStr1 && !HasStr2) {
     Len1 = std::min(Len1, Length);
     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
-      return emitMemCmp(
-          Str1P, Str2P,
-          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
-          TLI);
+      return copyFlags(
+          *CI,
+          emitMemCmp(Str1P, Str2P,
+                     ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
+                     B, DL, TLI));
   }
 
   return nullptr;
@@ -485,7 +501,7 @@
   if (SrcLen && Size) {
     annotateDereferenceableBytes(CI, 0, SrcLen);
     if (SrcLen <= Size->getZExtValue() + 1)
-      return emitStrDup(Src, B, TLI);
+      return copyFlags(*CI, emitStrDup(Src, B, TLI));
   }
 
   return nullptr;
@@ -495,7 +511,7 @@
   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
   if (Dst == Src) // strcpy(x,x)  -> x
     return Src;
-  
+
   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
   // See if we can get the length of the input string.
   uint64_t Len = GetStringLength(Src);
@@ -511,6 +527,7 @@
                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return Dst;
 }
 
@@ -520,7 +537,7 @@
 
   // stpcpy(d,s) -> strcpy(d,s) if the result is not used.
   if (CI->use_empty())
-    return emitStrCpy(Dst, Src, B, TLI);
+    return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
 
   if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
     Value *StrLen = emitStrLen(Src, B, DL, TLI);
@@ -544,6 +561,7 @@
   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return DstEnd;
 }
 
@@ -583,6 +601,7 @@
     AttrBuilder ArgAttrs(CI->getAttributes().getParamAttrs(0));
     NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
         CI->getContext(), 0, ArgAttrs));
+    copyFlags(*CI, NewCI);
     return Dst;
   }
 
@@ -606,6 +625,7 @@
                                    ConstantInt::get(DL.getIntPtrType(PT), Len));
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return Dst;
 }
 
@@ -737,7 +757,7 @@
 
   // strpbrk(s, "a") -> strchr(s, 'a')
   if (HasS2 && S2.size() == 1)
-    return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
+    return copyFlags(*CI, emitStrChr(CI->getArgOperand(0), S2[0], B, TLI));
 
   return nullptr;
 }
@@ -793,7 +813,7 @@
 
   // strcspn(s, "") -> strlen(s)
   if (HasS2 && S2.empty())
-    return emitStrLen(CI->getArgOperand(0), B, DL, TLI);
+    return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, DL, TLI));
 
   return nullptr;
 }
@@ -1062,7 +1082,7 @@
     Value *LHS = CI->getArgOperand(0);
     Value *RHS = CI->getArgOperand(1);
     Value *Size = CI->getArgOperand(2);
-    return emitBCmp(LHS, RHS, Size, B, DL, TLI);
+    return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI));
   }
 
   return nullptr;
@@ -1083,6 +1103,7 @@
                                    CI->getArgOperand(1), Align(1), Size);
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return CI->getArgOperand(0);
 }
 
@@ -1110,7 +1131,8 @@
   size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
   if (Pos == StringRef::npos) {
     if (N->getZExtValue() <= SrcStr.size()) {
-      B.CreateMemCpy(Dst, Align(1), Src, Align(1), CI->getArgOperand(3));
+      copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1),
+                                    CI->getArgOperand(3)));
       return Constant::getNullValue(CI->getType());
     }
     return nullptr;
@@ -1119,7 +1141,7 @@
   Value *NewN =
       ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
   // memccpy -> llvm.memcpy
-  B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN);
+  copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN));
   return Pos + 1 <= N->getZExtValue()
              ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
              : Constant::getNullValue(CI->getType());
@@ -1136,6 +1158,7 @@
   // TODO: Attach return value attributes to the 1st operand to preserve them?
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
 }
 
@@ -1150,6 +1173,7 @@
                                     CI->getArgOperand(1), Align(1), Size);
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return CI->getArgOperand(0);
 }
 
@@ -1164,12 +1188,13 @@
   CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return CI->getArgOperand(0);
 }
 
 Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
   if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
-    return emitMalloc(CI->getArgOperand(1), B, DL, TLI);
+    return copyFlags(*CI, emitMalloc(CI->getArgOperand(1), B, DL, TLI));
 
   return nullptr;
 }
@@ -1311,7 +1336,8 @@
 
   Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt,
                                               CI->getType());
-  return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
+  return copyFlags(
+      *CI, B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs"));
 }
 
 static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
@@ -1334,14 +1360,16 @@
     // sin(-X) --> -sin(X)
     // tan(-X) --> -tan(X)
     if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X)))))
-      return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X));
+      return B.CreateFNeg(
+          copyFlags(*Call, B.CreateCall(Call->getCalledFunction(), X)));
     break;
   case LibFunc_cos:
   case LibFunc_cosf:
   case LibFunc_cosl:
     // cos(-X) --> cos(X)
     if (match(Call->getArgOperand(0), m_FNeg(m_Value(X))))
-      return B.CreateCall(Call->getCalledFunction(), X, "cos");
+      return copyFlags(*Call,
+                       B.CreateCall(Call->getCalledFunction(), X, "cos"));
     break;
   default:
     break;
@@ -1476,9 +1504,10 @@
       (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
       hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
-      return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI,
-                                   LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
-                                   B, Attrs);
+      return copyFlags(*Pow,
+                       emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI,
+                                             TLI, LibFunc_ldexp, LibFunc_ldexpf,
+                                             LibFunc_ldexpl, B, Attrs));
   }
 
   // pow(2.0 ** n, x) -> exp2(n * x)
@@ -1496,11 +1525,13 @@
       double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
       Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
       if (Pow->doesNotAccessMemory())
-        return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
-                            FMul, "exp2");
+        return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
+                                                Mod, Intrinsic::exp2, Ty),
+                                            FMul, "exp2"));
       else
-        return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
-                                    LibFunc_exp2l, B, Attrs);
+        return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
+                                                    LibFunc_exp2f,
+                                                    LibFunc_exp2l, B, Attrs));
     }
   }
 
@@ -1508,8 +1539,9 @@
   // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
   if (match(Base, m_SpecificFP(10.0)) &&
       hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
-    return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
-                                LibFunc_exp10l, B, Attrs);
+    return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10,
+                                                LibFunc_exp10f, LibFunc_exp10l,
+                                                B, Attrs));
 
   // pow(x, y) -> exp2(log2(x) * y)
   if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() &&
@@ -1528,11 +1560,13 @@
     if (Log) {
       Value *FMul = B.CreateFMul(Log, Expo, "mul");
       if (Pow->doesNotAccessMemory())
-        return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
-                            FMul, "exp2");
+        return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
+                                                Mod, Intrinsic::exp2, Ty),
+                                            FMul, "exp2"));
       else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l))
-        return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
-                                    LibFunc_exp2l, B, Attrs);
+        return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
+                                                    LibFunc_exp2f,
+                                                    LibFunc_exp2l, B, Attrs));
     }
   }
 
@@ -1592,7 +1626,7 @@
   // Handle signed zero base by expanding to fabs(sqrt(x)).
   if (!Pow->hasNoSignedZeros()) {
     Function *FAbsFn = Intrinsic::getDeclaration(Mod, Intrinsic::fabs, Ty);
-    Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs");
+    Sqrt = copyFlags(*Pow, B.CreateCall(FAbsFn, Sqrt, "abs"));
   }
 
   // Handle non finite base by expanding to
@@ -1721,15 +1755,18 @@
     if (ExpoF->isInteger() &&
         ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
             APFloat::opOK) {
-      return createPowWithIntegerExponent(
-          Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B);
+      return copyFlags(
+          *Pow,
+          createPowWithIntegerExponent(
+              Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
+              M, B));
     }
   }
 
   // powf(x, itofp(y)) -> powi(x, y)
   if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
-      return createPowWithIntegerExponent(Base, ExpoI, M, B);
+      return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B));
   }
 
   // Shrink pow() to powf() if the arguments are single precision,
@@ -1792,7 +1829,8 @@
   Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
                                                            : Intrinsic::maxnum;
   Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
-  return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
+  return copyFlags(
+      *CI, B.CreateCall(F, {CI->getArgOperand(0), CI->getArgOperand(1)}));
 }
 
 Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
@@ -2010,9 +2048,9 @@
     // of the square root calculation.
     Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
     Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
-    return B.CreateFMul(FabsCall, SqrtCall);
+    return copyFlags(*CI, B.CreateFMul(FabsCall, SqrtCall));
   }
-  return FabsCall;
+  return copyFlags(*CI, FabsCall);
 }
 
 // TODO: Generalize to handle any trig function and its inverse.
@@ -2327,7 +2365,7 @@
 
   // printf("x") -> putchar('x'), even for "%" and "%%".
   if (FormatStr.size() == 1 || FormatStr == "%%")
-    return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
+    return copyFlags(*CI, emitPutChar(B.getInt32(FormatStr[0]), B, TLI));
 
   // Try to remove call or emit putchar/puts.
   if (FormatStr == "%s" && CI->arg_size() > 1) {
@@ -2339,12 +2377,12 @@
       return (Value *)CI;
     // printf("%s", "a") --> putchar('a')
     if (OperandStr.size() == 1)
-      return emitPutChar(B.getInt32(OperandStr[0]), B, TLI);
+      return copyFlags(*CI, emitPutChar(B.getInt32(OperandStr[0]), B, TLI));
     // printf("%s", str"\n") --> puts(str)
     if (OperandStr.back() == '\n') {
       OperandStr = OperandStr.drop_back();
       Value *GV = B.CreateGlobalString(OperandStr, "str");
-      return emitPutS(GV, B, TLI);
+      return copyFlags(*CI, emitPutS(GV, B, TLI));
     }
     return nullptr;
   }
@@ -2356,19 +2394,19 @@
     // pass to be run after this pass, to merge duplicate strings.
     FormatStr = FormatStr.drop_back();
     Value *GV = B.CreateGlobalString(FormatStr, "str");
-    return emitPutS(GV, B, TLI);
+    return copyFlags(*CI, emitPutS(GV, B, TLI));
   }
 
   // Optimize specific format strings.
   // printf("%c", chr) --> putchar(chr)
   if (FormatStr == "%c" && CI->arg_size() > 1 &&
       CI->getArgOperand(1)->getType()->isIntegerTy())
-    return emitPutChar(CI->getArgOperand(1), B, TLI);
+    return copyFlags(*CI, emitPutChar(CI->getArgOperand(1), B, TLI));
 
   // printf("%s\n", str) --> puts(str)
   if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
       CI->getArgOperand(1)->getType()->isPointerTy())
-    return emitPutS(CI->getArgOperand(1), B, TLI);
+    return copyFlags(*CI, emitPutS(CI->getArgOperand(1), B, TLI));
   return nullptr;
 }
 
@@ -2459,7 +2497,7 @@
 
     if (CI->use_empty())
       // sprintf(dest, "%s", str) -> strcpy(dest, str)
-      return emitStrCpy(Dest, CI->getArgOperand(2), B, TLI);
+      return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI));
 
     uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
     if (SrcLen) {
@@ -2558,10 +2596,12 @@
 
     // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
     // strlen(fmt)+1)
-    B.CreateMemCpy(
-        CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
-        ConstantInt::get(DL.getIntPtrType(CI->getContext()),
-                         FormatStr.size() + 1)); // Copy the null byte.
+    copyFlags(
+        *CI,
+        B.CreateMemCpy(
+            CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
+            ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+                             FormatStr.size() + 1))); // Copy the null byte.
     return ConstantInt::get(CI->getType(), FormatStr.size());
   }
 
@@ -2599,8 +2639,10 @@
       else if (N < Str.size() + 1)
         return nullptr;
 
-      B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(3),
-                     Align(1), ConstantInt::get(CI->getType(), Str.size() + 1));
+      copyFlags(
+          *CI, B.CreateMemCpy(CI->getArgOperand(0), Align(1),
+                              CI->getArgOperand(3), Align(1),
+                              ConstantInt::get(CI->getType(), Str.size() + 1)));
 
       // The snprintf result is the unincremented number of bytes in the string.
       return ConstantInt::get(CI->getType(), Str.size());
@@ -2640,10 +2682,11 @@
     if (FormatStr.find('%') != StringRef::npos)
       return nullptr; // We found a format specifier.
 
-    return emitFWrite(
-        CI->getArgOperand(1),
-        ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
-        CI->getArgOperand(0), B, DL, TLI);
+    return copyFlags(
+        *CI, emitFWrite(CI->getArgOperand(1),
+                        ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+                                         FormatStr.size()),
+                        CI->getArgOperand(0), B, DL, TLI));
   }
 
   // The remaining optimizations require the format string to be "%s" or "%c"
@@ -2656,14 +2699,16 @@
     // fprintf(F, "%c", chr) --> fputc(chr, F)
     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
       return nullptr;
-    return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+    return copyFlags(
+        *CI, emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
   }
 
   if (FormatStr[1] == 's') {
     // fprintf(F, "%s", str) --> fputs(str, F)
     if (!CI->getArgOperand(2)->getType()->isPointerTy())
       return nullptr;
-    return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+    return copyFlags(
+        *CI, emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
   }
   return nullptr;
 }
@@ -2750,10 +2795,11 @@
     return nullptr;
 
   // Known to have no uses (see above).
-  return emitFWrite(
-      CI->getArgOperand(0),
-      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
-      CI->getArgOperand(1), B, DL, TLI);
+  return copyFlags(
+      *CI,
+      emitFWrite(CI->getArgOperand(0),
+                 ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
+                 CI->getArgOperand(1), B, DL, TLI));
 }
 
 Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
@@ -2765,15 +2811,16 @@
   // puts("") -> putchar('\n')
   StringRef Str;
   if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty())
-    return emitPutChar(B.getInt32('\n'), B, TLI);
+    return copyFlags(*CI, emitPutChar(B.getInt32('\n'), B, TLI));
 
   return nullptr;
 }
 
 Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
   // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
-  return B.CreateMemMove(CI->getArgOperand(1), Align(1), CI->getArgOperand(0),
-                         Align(1), CI->getArgOperand(2));
+  return copyFlags(*CI, B.CreateMemMove(CI->getArgOperand(1), Align(1),
+                                        CI->getArgOperand(0), Align(1),
+                                        CI->getArgOperand(2)));
 }
 
 bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
@@ -3212,6 +3259,7 @@
                        Align(1), CI->getArgOperand(2));
     NewCI->setAttributes(CI->getAttributes());
     NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+    copyFlags(*CI, NewCI);
     return CI->getArgOperand(0);
   }
   return nullptr;
@@ -3225,6 +3273,7 @@
                         Align(1), CI->getArgOperand(2));
     NewCI->setAttributes(CI->getAttributes());
     NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+    copyFlags(*CI, NewCI);
     return CI->getArgOperand(0);
   }
   return nullptr;
@@ -3238,6 +3287,7 @@
                                      CI->getArgOperand(2), Align(1));
     NewCI->setAttributes(CI->getAttributes());
     NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+    copyFlags(*CI, NewCI);
     return CI->getArgOperand(0);
   }
   return nullptr;
@@ -3252,6 +3302,7 @@
       CallInst *NewCI = cast<CallInst>(Call);
       NewCI->setAttributes(CI->getAttributes());
       NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+      copyFlags(*CI, NewCI);
       return NewCI;
     }
   return nullptr;
@@ -3277,9 +3328,9 @@
   // string lengths for varying.
   if (isFortifiedCallFoldable(CI, 2, None, 1)) {
     if (Func == LibFunc_strcpy_chk)
-      return emitStrCpy(Dst, Src, B, TLI);
+      return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
     else
-      return emitStpCpy(Dst, Src, B, TLI);
+      return copyFlags(*CI, emitStpCpy(Dst, Src, B, TLI));
   }
 
   if (OnlyLowerUnknownSize)
@@ -3299,6 +3350,7 @@
   Type *SizeTTy = DL.getIntPtrType(CI->getContext(), /*AddressSpace=*/0);
   Value *LenV = ConstantInt::get(SizeTTy, Len);
   Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
+  copyFlags(*CI, cast<CallInst>(Ret));
   // If the function was an __stpcpy_chk, and we were able to fold it into
   // a __memcpy_chk, we still need to return the correct end pointer.
   if (Ret && Func == LibFunc_stpcpy_chk)
@@ -3309,8 +3361,8 @@
 Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI,
                                                      IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 1, None, 0))
-    return emitStrLen(CI->getArgOperand(0), B, CI->getModule()->getDataLayout(),
-                      TLI);
+    return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B,
+                                     CI->getModule()->getDataLayout(), TLI));
   return nullptr;
 }
 
@@ -3319,11 +3371,13 @@
                                                        LibFunc Func) {
   if (isFortifiedCallFoldable(CI, 3, 2)) {
     if (Func == LibFunc_strncpy_chk)
-      return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                               CI->getArgOperand(2), B, TLI);
+      return copyFlags(*CI,
+                       emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                                   CI->getArgOperand(2), B, TLI));
     else
-      return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                         CI->getArgOperand(2), B, TLI);
+      return copyFlags(*CI,
+                       emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                                   CI->getArgOperand(2), B, TLI));
   }
 
   return nullptr;
@@ -3332,8 +3386,9 @@
 Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
                                                       IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 4, 3))
-    return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                       CI->getArgOperand(2), CI->getArgOperand(3), B, TLI);
+    return copyFlags(
+        *CI, emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                         CI->getArgOperand(2), CI->getArgOperand(3), B, TLI));
 
   return nullptr;
 }
@@ -3342,8 +3397,9 @@
                                                        IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5));
-    return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
-                        CI->getArgOperand(4), VariadicArgs, B, TLI);
+    return copyFlags(*CI,
+                     emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+                                  CI->getArgOperand(4), VariadicArgs, B, TLI));
   }
 
   return nullptr;
@@ -3353,8 +3409,9 @@
                                                       IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4));
-    return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
-                       B, TLI);
+    return copyFlags(*CI,
+                     emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+                                 VariadicArgs, B, TLI));
   }
 
   return nullptr;
@@ -3363,7 +3420,8 @@
 Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
                                                      IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 2))
-    return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI);
+    return copyFlags(
+        *CI, emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI));
 
   return nullptr;
 }
@@ -3371,8 +3429,9 @@
 Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
                                                    IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3))
-    return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
-                       CI->getArgOperand(2), B, TLI);
+    return copyFlags(*CI,
+                     emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 CI->getArgOperand(2), B, TLI));
 
   return nullptr;
 }
@@ -3380,8 +3439,9 @@
 Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
                                                       IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3))
-    return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
-                       CI->getArgOperand(2), B, TLI);
+    return copyFlags(*CI,
+                     emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 CI->getArgOperand(2), B, TLI));
 
   return nullptr;
 }
@@ -3389,8 +3449,9 @@
 Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
                                                       IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3))
-    return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                       CI->getArgOperand(2), B, TLI);
+    return copyFlags(*CI,
+                     emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 CI->getArgOperand(2), B, TLI));
 
   return nullptr;
 }
@@ -3398,8 +3459,9 @@
 Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
                                                         IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3, 1, None, 2))
-    return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
-                         CI->getArgOperand(4), CI->getArgOperand(5), B, TLI);
+    return copyFlags(
+        *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+                           CI->getArgOperand(4), CI->getArgOperand(5), B, TLI));
 
   return nullptr;
 }
@@ -3407,8 +3469,9 @@
 Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
                                                        IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 2, None, None, 1))
-    return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
-                        CI->getArgOperand(4), B, TLI);
+    return copyFlags(*CI,
+                     emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+                                  CI->getArgOperand(4), B, TLI));
 
   return nullptr;
 }
diff --git a/llvm/test/CodeGen/X86/memset-nonzero.ll b/llvm/test/CodeGen/X86/memset-nonzero.ll
--- a/llvm/test/CodeGen/X86/memset-nonzero.ll
+++ b/llvm/test/CodeGen/X86/memset-nonzero.ll
@@ -196,14 +196,9 @@
 define void @memset_256_nonzero_bytes(i8* %x) {
 ; SSE-LABEL: memset_256_nonzero_bytes:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pushq %rax
-; SSE-NEXT:    .cfi_def_cfa_offset 16
 ; SSE-NEXT:    movl $256, %edx # imm = 0x100
 ; SSE-NEXT:    movl $42, %esi
-; SSE-NEXT:    callq memset@PLT
-; SSE-NEXT:    popq %rax
-; SSE-NEXT:    .cfi_def_cfa_offset 8
-; SSE-NEXT:    retq
+; SSE-NEXT:    jmp memset@PLT # TAILCALL
 ;
 ; SSE2FAST-LABEL: memset_256_nonzero_bytes:
 ; SSE2FAST:       # %bb.0:
diff --git a/llvm/test/Transforms/InstCombine/cabs-array.ll b/llvm/test/Transforms/InstCombine/cabs-array.ll
--- a/llvm/test/Transforms/InstCombine/cabs-array.ll
+++ b/llvm/test/Transforms/InstCombine/cabs-array.ll
@@ -35,7 +35,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[REAL]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[IMAG]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast double @llvm.sqrt.f64(double [[TMP3]])
 ; CHECK-NEXT:    ret double [[CABS]]
 ;
   %call = tail call fast double @cabs([2 x double] %z)
@@ -49,7 +49,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[REAL]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[IMAG]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast float @llvm.sqrt.f32(float [[TMP3]])
 ; CHECK-NEXT:    ret float [[CABS]]
 ;
   %call = tail call fast float @cabsf([2 x float] %z)
@@ -63,7 +63,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast fp128 [[REAL]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast fp128 [[IMAG]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
 ; CHECK-NEXT:    ret fp128 [[CABS]]
 ;
   %call = tail call fast fp128 @cabsl([2 x fp128] %z)
diff --git a/llvm/test/Transforms/InstCombine/cabs-discrete.ll b/llvm/test/Transforms/InstCombine/cabs-discrete.ll
--- a/llvm/test/Transforms/InstCombine/cabs-discrete.ll
+++ b/llvm/test/Transforms/InstCombine/cabs-discrete.ll
@@ -33,7 +33,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[REAL:%.*]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[IMAG:%.*]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast double @llvm.sqrt.f64(double [[TMP3]])
 ; CHECK-NEXT:    ret double [[CABS]]
 ;
   %call = tail call fast double @cabs(double %real, double %imag)
@@ -45,7 +45,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[REAL:%.*]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[IMAG:%.*]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast float @llvm.sqrt.f32(float [[TMP3]])
 ; CHECK-NEXT:    ret float [[CABS]]
 ;
   %call = tail call fast float @cabsf(float %real, float %imag)
@@ -57,7 +57,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast fp128 [[REAL:%.*]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast fp128 [[IMAG:%.*]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
 ; CHECK-NEXT:    ret fp128 [[CABS]]
 ;
   %call = tail call fast fp128 @cabsl(fp128 %real, fp128 %imag)
diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll
--- a/llvm/test/Transforms/InstCombine/fortify-folding.ll
+++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll
@@ -31,6 +31,17 @@
   ret i8* %ret
 }
 
+define i8* @test_memccpy_tail() {
+; CHECK-LABEL: @test_memccpy_tail(
+; CHECK-NEXT:    [[MEMCCPY:%.*]] = tail call i8* @memccpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i32 0, i64 60)
+; CHECK-NEXT:    ret i8* [[MEMCCPY]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1)
+  ret i8* %ret
+}
+
 define i8* @test_mempcpy() {
 ; CHECK-LABEL: @test_mempcpy(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false)
@@ -53,6 +64,17 @@
   ret i8* %ret
 }
 
+define i8* @test_mempcpy_tail() {
+; CHECK-LABEL: @test_mempcpy_tail(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false)
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 15)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 15, i64 -1)
+  ret i8* %ret
+}
+
 define i32 @test_snprintf() {
 ; CHECK-LABEL: @test_snprintf(
 ; CHECK-NEXT:    [[SNPRINTF:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
@@ -77,6 +99,17 @@
   ret i32 %ret
 }
 
+define i32 @test_snprintf_tail() {
+; CHECK-LABEL: @test_snprintf_tail(
+; CHECK-NEXT:    [[SNPRINTF:%.*]] = tail call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    ret i32 [[SNPRINTF]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 0, i64 -1, i8* %fmt)
+  ret i32 %ret
+}
+
 define i32 @test_sprintf() {
 ; CHECK-LABEL: @test_sprintf(
 ; CHECK-NEXT:    [[SPRINTF:%.*]] = call i32 (i8*, i8*, ...) @sprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
@@ -101,6 +134,17 @@
   ret i32 %ret
 }
 
+define i32 @test_sprintf_tail() {
+; CHECK-LABEL: @test_sprintf_tail(
+; CHECK-NEXT:    [[SPRINTF:%.*]] = tail call i32 (i8*, i8*, ...) @sprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    ret i32 [[SPRINTF]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 0, i64 -1, i8* %fmt)
+  ret i32 %ret
+}
+
 define i8* @test_strcat() {
 ; CHECK-LABEL: @test_strcat(
 ; CHECK-NEXT:    [[STRCAT:%.*]] = call i8* @strcat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
@@ -123,6 +167,17 @@
   ret i8* %ret
 }
 
+define i8* @test_strcat_tail() {
+; CHECK-LABEL: @test_strcat_tail(
+; CHECK-NEXT:    [[STRCAT:%.*]] = tail call i8* @strcat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i8* @__strcat_chk(i8* %dst, i8* %src, i64 -1)
+  ret i8* %ret
+}
+
 define i64 @test_strlcat() {
 ; CHECK-LABEL: @test_strlcat(
 ; CHECK-NEXT:    [[STRLCAT:%.*]] = call i64 @strlcat(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
@@ -145,6 +200,17 @@
   ret i64 %ret
 }
 
+define i64 @test_strlcat_tail() {
+; CHECK-LABEL: @test_strlcat_tail(
+; CHECK-NEXT:    [[STRLCAT:%.*]] = tail call i64 @strlcat(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    ret i64 [[STRLCAT]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i64 @__strlcat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  ret i64 %ret
+}
+
 define i8* @test_strncat() {
 ; CHECK-LABEL: @test_strncat(
 ; CHECK-NEXT:    [[STRNCAT:%.*]] = call i8* @strncat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
@@ -167,6 +233,17 @@
   ret i8* %ret
 }
 
+define i8* @test_strncat_tail() {
+; CHECK-LABEL: @test_strncat_tail(
+; CHECK-NEXT:    [[STRNCAT:%.*]] = tail call i8* @strncat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i8* @__strncat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  ret i8* %ret
+}
+
 define i64 @test_strlcpy() {
 ; CHECK-LABEL: @test_strlcpy(
 ; CHECK-NEXT:    [[STRLCPY:%.*]] = call i64 @strlcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
@@ -189,6 +266,17 @@
   ret i64 %ret
 }
 
+define i64 @test_strlcpy_tail() {
+; CHECK-LABEL: @test_strlcpy_tail(
+; CHECK-NEXT:    [[STRLCPY:%.*]] = tail call i64 @strlcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    ret i64 [[STRLCPY]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i64 @__strlcpy_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  ret i64 %ret
+}
+
 define i32 @test_vsnprintf() {
 ; CHECK-LABEL: @test_vsnprintf(
 ; CHECK-NEXT:    [[VSNPRINTF:%.*]] = call i32 @vsnprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
@@ -215,6 +303,18 @@
   ret i32 %ret
 }
 
+define i32 @test_vsnprintf_tail() {
+; CHECK-LABEL: @test_vsnprintf_tail(
+; CHECK-NEXT:    [[VSNPRINTF:%.*]] = tail call i32 @vsnprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    ret i32 [[VSNPRINTF]]
+;
+  ; ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  ret i32 %ret
+}
+
 define i32 @test_vsprintf() {
 ; CHECK-LABEL: @test_vsprintf(
 ; CHECK-NEXT:    [[VSPRINTF:%.*]] = call i32 @vsprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
@@ -241,6 +341,18 @@
   ret i32 %ret
 }
 
+define i32 @test_vsprintf_tail() {
+; CHECK-LABEL: @test_vsprintf_tail(
+; CHECK-NEXT:    [[VSPRINTF:%.*]] = tail call i32 @vsprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    ret i32 [[VSPRINTF]]
+;
+  ; ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i32 @__vsprintf_chk(i8* %dst, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  ret i32 %ret
+}
+
 declare i8* @__mempcpy_chk(i8*, i8*, i64, i64)
 declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64)
 declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...)
diff --git a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -38,6 +38,18 @@
   ret i8* %ret
 }
 
+; Same as test_simplify1 but with a tail call rather than vanilla call.
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
+; CHECK-NEXT:    ret i8* bitcast (%struct.T1* @t1 to i8*)
+;
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+  %ret = tail call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
 ; Check cases where dstlen < len.
 
 define i8* @test_no_simplify1() {
diff --git a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -40,6 +40,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
+; CHECK-NEXT:    ret i8* bitcast (%struct.T1* @t1 to i8*)
+;
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+
+  %ret = tail call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
 ; Check cases where dstlen < len.
 
 define i8* @test_no_simplify1() {
diff --git a/llvm/test/Transforms/InstCombine/memset_chk-1.ll b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
@@ -45,6 +45,18 @@
   ret i8* %ret
 }
 
+; Same as @test_simplify1 with tail call.
+define i8* @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
+; CHECK-NEXT:    ret i8* bitcast (%struct.T* @t to i8*)
+;
+  %dst = bitcast %struct.T* @t to i8*
+
+  %ret = tail call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
 ; Check cases where dstlen < len.
 
 define i8* @test_no_simplify1() {
diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll
--- a/llvm/test/Transforms/InstCombine/objsize.ll
+++ b/llvm/test/Transforms/InstCombine/objsize.ll
@@ -251,7 +251,7 @@
 
 define i32 @test11(i8** %esc) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[STRDUP:%.*]] = call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
+; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
 ; CHECK-NEXT:    store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
@@ -263,7 +263,7 @@
 
 define i32 @test12(i8** %esc) {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[STRDUP:%.*]] = call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
+; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
 ; CHECK-NEXT:    store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
@@ -275,7 +275,7 @@
 
 define i32 @test13(i8** %esc) {
 ; CHECK-LABEL: @test13(
-; CHECK-NEXT:    [[STRDUP:%.*]] = call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
+; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
 ; CHECK-NEXT:    store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
diff --git a/llvm/test/Transforms/InstCombine/pow-exp.ll b/llvm/test/Transforms/InstCombine/pow-exp.ll
--- a/llvm/test/Transforms/InstCombine/pow-exp.ll
+++ b/llvm/test/Transforms/InstCombine/pow-exp.ll
@@ -214,7 +214,7 @@
 ; CHECK-LABEL: @pow_ok_base(
 ; Do not change 0xBFE0776{{.*}} to the exact constant, see PR42740
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0xBFE0776{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 0x3FE6666666666666, double %e)
@@ -224,7 +224,7 @@
 define double @pow_ok_base_fast(double %e) {
 ; CHECK-LABEL: @pow_ok_base_fast(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[E:%.*]], 0xBFE0776{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call fast double @pow(double 0x3FE6666666666666, double %e)
@@ -234,7 +234,7 @@
 define double @pow_ok_base2(double %e) {
 ; CHECK-LABEL: @pow_ok_base2(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x4010952{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 1.770000e+01, double %e)
@@ -244,7 +244,7 @@
 define double @pow_ok_base3(double %e) {
 ; CHECK-LABEL: @pow_ok_base3(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x400AB0B5{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 1.010000e+01, double %e)
@@ -254,7 +254,7 @@
 define double @pow_ok_ten_base(double %e) {
 ; CHECK-LABEL: @pow_ok_ten_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x400A934F{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 1.000000e+01, double %e)
@@ -264,7 +264,7 @@
 define double @pow_ok_denorm_base(double %e) {
 ; CHECK-LABEL: @pow_ok_denorm_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0xC0904800000005C5
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 0x00000000FFFFFFFF, double %e)
@@ -274,7 +274,7 @@
 define float @powf_ok_base(float %e) {
 ; CHECK-LABEL: @powf_ok_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0xBFE07762{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 0x3FE6666660000000, float %e)
@@ -284,7 +284,7 @@
 define float @powf_ok_base2(float %e) {
 ; CHECK-LABEL: @powf_ok_base2(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x4010952{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 0x4031B33340000000, float %e)
@@ -294,7 +294,7 @@
 define float @powf_ok_base3(float %e) {
 ; CHECK-LABEL: @powf_ok_base3(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x400AB0B5{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 0x4024333340000000, float %e)
@@ -304,7 +304,7 @@
 define float @powf_ok_ten_base(float %e) {
 ; CHECK-LABEL: @powf_ok_ten_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x400A934{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 1.000000e+01, float %e)
@@ -314,7 +314,7 @@
 define float @powf_ok_denorm_base(float %e) {
 ; CHECK-LABEL: @powf_ok_denorm_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], -1.350000e+02
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 0x3780000000000000, float %e)
@@ -371,7 +371,7 @@
 define double @pow_multiuse(double %e) {
 ; CHECK-LABEL: @pow_multiuse(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x4002934{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    tail call void @use_d(double [[EXP2]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
@@ -401,7 +401,7 @@
 define double @pow_ok_base_no_ninf(double %e) {
 ; CHECK-LABEL: @pow_ok_base_no_ninf(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan afn double [[E:%.*]], 0xBFE0776{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan double @pow(double 0x3FE6666666666666, double %e)
@@ -456,7 +456,7 @@
 define float @powf_multiuse(float %e) {
 ; CHECK-LABEL: @powf_multiuse(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x4002934{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    tail call void @use_f(float [[EXP2F]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
--- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll
+++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
@@ -5,7 +5,7 @@
 
 define double @pow_sitofp_const_base_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -18,7 +18,7 @@
 define double @pow_uitofp_const_base_fast(i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -30,7 +30,7 @@
 
 define double @pow_sitofp_double_const_base_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_const_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %subfp = sitofp i32 %x to double
@@ -41,7 +41,7 @@
 define double @pow_uitofp_double_const_base_fast(i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_double_const_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
   %subfp = uitofp i31 %x to double
@@ -51,7 +51,7 @@
 
 define double @pow_sitofp_double_const_base_2_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_const_base_2_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -65,7 +65,7 @@
 ; CHECK-LABEL: @pow_sitofp_double_const_base_power_of_2_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -78,7 +78,7 @@
 define double @pow_uitofp_const_base_2_fast(i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -92,7 +92,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i31 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -104,7 +104,7 @@
 
 define double @pow_sitofp_float_base_fast(float %base, i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_float_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -117,7 +117,7 @@
 define double @pow_uitofp_float_base_fast(float %base, i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_float_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -129,7 +129,7 @@
 
 define double @pow_sitofp_double_base_fast(double %base, i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[X:%.*]])
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %subfp = sitofp i32 %x to double
@@ -140,7 +140,7 @@
 define double @pow_uitofp_double_base_fast(double %base, i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_double_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
   %subfp = uitofp i31 %x to double
@@ -151,7 +151,7 @@
 define double @pow_sitofp_const_base_fast_i8(i8 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -164,7 +164,7 @@
 define double @pow_sitofp_const_base_fast_i16(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast_i16(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -178,7 +178,7 @@
 define double @pow_uitofp_const_base_fast_i8(i8 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -191,7 +191,7 @@
 define double @pow_uitofp_const_base_fast_i16(i16 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i16(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -203,7 +203,7 @@
 
 define double @powf_exp_const_int_fast(double %base) {
 ; CHECK-LABEL: @powf_exp_const_int_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 40)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 40)
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01)
@@ -212,7 +212,7 @@
 
 define double @powf_exp_const2_int_fast(double %base) {
 ; CHECK-LABEL: @powf_exp_const2_int_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 -40)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 -40)
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %res = tail call fast double @llvm.pow.f64(double %base, double -4.000000e+01)
@@ -225,7 +225,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i32(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x4006757{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -238,7 +238,7 @@
 define double @pow_uitofp_const_base_2_fast_i32(i32 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_fast_i32(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[SUBFP]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[SUBFP]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -252,7 +252,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast_i32(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -291,7 +291,7 @@
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i64 [[X:%.*]] to float
 ; Do not change 0x400675{{.*}} to the exact constant, see PR42740
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x400675{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -305,7 +305,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i64(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i64 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x400675{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -343,7 +343,7 @@
 
 define double @pow_sitofp_const_base_2_no_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_2_no_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -357,7 +357,7 @@
 ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -370,7 +370,7 @@
 define double @pow_uitofp_const_base_2_no_fast(i32 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[SUBFP]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[SUBFP]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -384,7 +384,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int16.ll b/llvm/test/Transforms/InstCombine/pow_fp_int16.ll
--- a/llvm/test/Transforms/InstCombine/pow_fp_int16.ll
+++ b/llvm/test/Transforms/InstCombine/pow_fp_int16.ll
@@ -5,7 +5,7 @@
 
 define double @pow_sitofp_const_base_fast(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -18,7 +18,7 @@
 define double @pow_uitofp_const_base_fast(i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -30,7 +30,7 @@
 
 define double @pow_sitofp_double_const_base_fast(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_const_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %subfp = sitofp i16 %x to double
@@ -41,7 +41,7 @@
 define double @pow_uitofp_double_const_base_fast(i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_double_const_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
   %subfp = uitofp i15 %x to double
@@ -51,7 +51,7 @@
 
 define double @pow_sitofp_double_const_base_2_fast(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_const_base_2_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -65,7 +65,7 @@
 ; CHECK-LABEL: @pow_sitofp_double_const_base_power_of_2_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -78,7 +78,7 @@
 define double @pow_uitofp_const_base_2_fast(i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -92,7 +92,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i15 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -104,7 +104,7 @@
 
 define double @pow_sitofp_float_base_fast(float %base, i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_float_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -117,7 +117,7 @@
 define double @pow_uitofp_float_base_fast(float %base, i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_float_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -129,7 +129,7 @@
 
 define double @pow_sitofp_double_base_fast(double %base, i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[X:%.*]])
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %subfp = sitofp i16 %x to double
@@ -140,7 +140,7 @@
 define double @pow_uitofp_double_base_fast(double %base, i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_double_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[TMP1]])
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
   %subfp = uitofp i15 %x to double
@@ -151,7 +151,7 @@
 define double @pow_sitofp_const_base_fast_i8(i8 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -163,7 +163,7 @@
 
 define double @pow_sitofp_const_base_fast_i16(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -177,7 +177,7 @@
 define double @pow_uitofp_const_base_fast_i8(i8 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -202,7 +202,7 @@
 
 define double @powf_exp_const_int_fast(double %base) {
 ; CHECK-LABEL: @powf_exp_const_int_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 40)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 40)
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01)
@@ -211,7 +211,7 @@
 
 define double @powf_exp_const2_int_fast(double %base) {
 ; CHECK-LABEL: @powf_exp_const2_int_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 -40)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 -40)
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %res = tail call fast double @llvm.pow.f64(double %base, double -4.000000e+01)
@@ -224,7 +224,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i16(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x4006757{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -237,7 +237,7 @@
 define double @pow_uitofp_const_base_2_fast_i16(i16 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_fast_i16(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[SUBFP]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[SUBFP]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -251,7 +251,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast_i16(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -313,7 +313,7 @@
 
 define double @pow_sitofp_const_base_2_no_fast(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_2_no_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call float @ldexpf(float 1.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call float @ldexpf(float 1.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -327,7 +327,7 @@
 ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -340,7 +340,7 @@
 define double @pow_uitofp_const_base_2_no_fast(i16 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[SUBFP]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[SUBFP]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -354,7 +354,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -48,6 +48,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify1_tail() {
+; CHECK-LABEL: @test_simplify1_tail(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+  %ret = tail call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret i8* %ret
+}
+
 ; Check cases where there are no string constants.
 
 define i8* @test_simplify4() {
@@ -62,6 +74,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify4_tail() {
+; CHECK-LABEL: @test_simplify4_tail(
+; CHECK-NEXT:    [[STPCPY:%.*]] = tail call i8* @stpcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT:    ret i8* [[STPCPY]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+  %ret = tail call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
 ; Check case where the string length is not constant.
 
 define i8* @test_simplify5() {
@@ -93,6 +117,20 @@
   ret i8* %ret
 }
 
+; Check cases where there are no string constants, and is a tail call.
+
+define i8* @test_simplify7() {
+; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT:    [[STPCPY:%.*]] = tail call i8* @stpcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT:    ret i8* [[STPCPY]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+  %ret = tail call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
 ; Check case where slen < strlen (src).
 
 define i8* @test_no_simplify1() {
diff --git a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -24,6 +24,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify1_tail() {
+; CHECK-LABEL: @test_simplify1_tail(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+  %ret = tail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret i8* %ret
+}
+
 define i8* @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
@@ -62,6 +74,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify4_tail() {
+; CHECK-LABEL: @test_simplify4_tail(
+; CHECK-NEXT:    [[STRCPY:%.*]] = tail call i8* @strcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+  %ret = tail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
 ; Check case where the string length is not constant.
 
 define i8* @test_simplify5() {
@@ -93,6 +117,20 @@
   ret i8* %ret
 }
 
+; Check cases where there are no string constants, and is a tail call.
+
+define i8* @test_simplify7() {
+; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT:    [[STRCPY:%.*]] = tail call i8* @strcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+  %ret = tail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
 ; Check case where slen < strlen (src).
 
 define i8* @test_no_simplify1() {