diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2472,6 +2472,12 @@
 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
   if (!CI->getCalledFunction()) return nullptr;
 
+  // Skip optimizing notail and musttail calls so
+  // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
+  // LibCallSimplifier::optimizeCall should try to preseve tail calls though.
+  if (CI->isMustTailCall() || CI->isNoTailCall())
+    return nullptr;
+
   auto InstCombineRAUW = [this](Instruction *From, Value *With) {
     replaceInstUsesWith(*From, With);
   };
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -193,6 +193,19 @@
   }
 }
 
+// Copy CallInst "flags" like musttail, notail, and tail. Return New param for
+// easier chaining. Calls to emit* and B.createCall should probably be wrapped
+// in this function when New is created to replace Old. Callers should take
+// care to check Old.isMustTailCall() if they aren't replacing Old directly
+// with New.
+static Value *copyFlags(const CallInst &Old, Value *New) {
+  assert(!Old.isMustTailCall() && "do not copy musttail call flags");
+  assert(!Old.isNoTailCall() && "do not copy notail call flags");
+  if (auto *NewCI = dyn_cast_or_null<CallInst>(New))
+    NewCI->setTailCallKind(Old.getTailCallKind());
+  return New;
+}
+
 //===----------------------------------------------------------------------===//
 // String and Memory Library Call Optimizations
 //===----------------------------------------------------------------------===//
@@ -215,7 +228,7 @@
   if (Len == 0)
     return Dst;
 
-  return emitStrLenMemCpy(Src, Dst, Len, B);
+  return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, Len, B));
 }
 
 Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
@@ -279,7 +292,7 @@
 
   // strncat(x, s, c) -> strcat(x, s)
   // s is constant so the strcat can be optimized further.
-  return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+  return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
 }
 
 Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
@@ -300,9 +313,11 @@
     if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
       return nullptr;
 
-    return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
-                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
-                      B, DL, TLI);
+    return copyFlags(
+        *CI,
+        emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+                   ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B,
+                   DL, TLI));
   }
 
   // Otherwise, the character is a constant, see if the first argument is
@@ -340,7 +355,7 @@
   if (!getConstantStringInfo(SrcStr, Str)) {
     // strrchr(s, 0) -> strchr(s, 0)
     if (CharC->isZero())
-      return emitStrChr(SrcStr, '\0', B, TLI);
+      return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
     return nullptr;
   }
 
@@ -385,25 +400,28 @@
     annotateDereferenceableBytes(CI, 1, Len2);
 
   if (Len1 && Len2) {
-    return emitMemCmp(Str1P, Str2P,
-                      ConstantInt::get(DL.getIntPtrType(CI->getContext()),
-                                       std::min(Len1, Len2)),
-                      B, DL, TLI);
+    return copyFlags(
+        *CI, emitMemCmp(Str1P, Str2P,
+                        ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+                                         std::min(Len1, Len2)),
+                        B, DL, TLI));
   }
 
   // strcmp to memcmp
   if (!HasStr1 && HasStr2) {
     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
-      return emitMemCmp(
-          Str1P, Str2P,
-          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
-          TLI);
+      return copyFlags(
+          *CI,
+          emitMemCmp(Str1P, Str2P,
+                     ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
+                     B, DL, TLI));
   } else if (HasStr1 && !HasStr2) {
     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
-      return emitMemCmp(
-          Str1P, Str2P,
-          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
-          TLI);
+      return copyFlags(
+          *CI,
+          emitMemCmp(Str1P, Str2P,
+                     ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
+                     B, DL, TLI));
   }
 
   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
@@ -430,7 +448,7 @@
     return ConstantInt::get(CI->getType(), 0);
 
   if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
-    return emitMemCmp(Str1P, Str2P, Size, B, DL, TLI);
+    return copyFlags(*CI, emitMemCmp(Str1P, Str2P, Size, B, DL, TLI));
 
   StringRef Str1, Str2;
   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -462,17 +480,19 @@
   if (!HasStr1 && HasStr2) {
     Len2 = std::min(Len2, Length);
     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
-      return emitMemCmp(
-          Str1P, Str2P,
-          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
-          TLI);
+      return copyFlags(
+          *CI,
+          emitMemCmp(Str1P, Str2P,
+                     ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
+                     B, DL, TLI));
   } else if (HasStr1 && !HasStr2) {
     Len1 = std::min(Len1, Length);
     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
-      return emitMemCmp(
-          Str1P, Str2P,
-          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
-          TLI);
+      return copyFlags(
+          *CI,
+          emitMemCmp(Str1P, Str2P,
+                     ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
+                     B, DL, TLI));
   }
 
   return nullptr;
@@ -485,7 +505,7 @@
   if (SrcLen && Size) {
     annotateDereferenceableBytes(CI, 0, SrcLen);
     if (SrcLen <= Size->getZExtValue() + 1)
-      return emitStrDup(Src, B, TLI);
+      return copyFlags(*CI, emitStrDup(Src, B, TLI));
   }
 
   return nullptr;
@@ -495,7 +515,7 @@
   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
   if (Dst == Src) // strcpy(x,x)  -> x
     return Src;
-  
+
   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
   // See if we can get the length of the input string.
   uint64_t Len = GetStringLength(Src);
@@ -511,6 +531,7 @@
                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return Dst;
 }
 
@@ -520,7 +541,7 @@
 
   // stpcpy(d,s) -> strcpy(d,s) if the result is not used.
   if (CI->use_empty())
-    return emitStrCpy(Dst, Src, B, TLI);
+    return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
 
   if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
     Value *StrLen = emitStrLen(Src, B, DL, TLI);
@@ -544,6 +565,7 @@
   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return DstEnd;
 }
 
@@ -583,6 +605,7 @@
     AttrBuilder ArgAttrs(CI->getAttributes().getParamAttrs(0));
     NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
         CI->getContext(), 0, ArgAttrs));
+    copyFlags(*CI, NewCI);
     return Dst;
   }
 
@@ -606,6 +629,7 @@
                                    ConstantInt::get(DL.getIntPtrType(PT), Len));
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return Dst;
 }
 
@@ -737,7 +761,7 @@
 
   // strpbrk(s, "a") -> strchr(s, 'a')
   if (HasS2 && S2.size() == 1)
-    return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
+    return copyFlags(*CI, emitStrChr(CI->getArgOperand(0), S2[0], B, TLI));
 
   return nullptr;
 }
@@ -793,7 +817,7 @@
 
   // strcspn(s, "") -> strlen(s)
   if (HasS2 && S2.empty())
-    return emitStrLen(CI->getArgOperand(0), B, DL, TLI);
+    return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, DL, TLI));
 
   return nullptr;
 }
@@ -1062,7 +1086,7 @@
     Value *LHS = CI->getArgOperand(0);
     Value *RHS = CI->getArgOperand(1);
     Value *Size = CI->getArgOperand(2);
-    return emitBCmp(LHS, RHS, Size, B, DL, TLI);
+    return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI));
   }
 
   return nullptr;
@@ -1083,6 +1107,7 @@
                                    CI->getArgOperand(1), Align(1), Size);
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return CI->getArgOperand(0);
 }
 
@@ -1110,7 +1135,8 @@
   size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
   if (Pos == StringRef::npos) {
     if (N->getZExtValue() <= SrcStr.size()) {
-      B.CreateMemCpy(Dst, Align(1), Src, Align(1), CI->getArgOperand(3));
+      copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1),
+                                    CI->getArgOperand(3)));
       return Constant::getNullValue(CI->getType());
     }
     return nullptr;
@@ -1119,7 +1145,7 @@
   Value *NewN =
       ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
   // memccpy -> llvm.memcpy
-  B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN);
+  copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN));
   return Pos + 1 <= N->getZExtValue()
              ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
              : Constant::getNullValue(CI->getType());
@@ -1136,6 +1162,7 @@
   // TODO: Attach return value attributes to the 1st operand to preserve them?
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
 }
 
@@ -1150,6 +1177,7 @@
                                     CI->getArgOperand(1), Align(1), Size);
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return CI->getArgOperand(0);
 }
 
@@ -1164,12 +1192,13 @@
   CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+  copyFlags(*CI, NewCI);
   return CI->getArgOperand(0);
 }
 
 Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
   if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
-    return emitMalloc(CI->getArgOperand(1), B, DL, TLI);
+    return copyFlags(*CI, emitMalloc(CI->getArgOperand(1), B, DL, TLI));
 
   return nullptr;
 }
@@ -1190,7 +1219,7 @@
   Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
   CallInst *NewCall = B.CreateCall(F, V);
   NewCall->takeName(CI);
-  return NewCall;
+  return copyFlags(*CI, NewCall);
 }
 
 /// Return a variant of Val with float type.
@@ -1311,7 +1340,8 @@
 
   Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt,
                                               CI->getType());
-  return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
+  return copyFlags(
+      *CI, B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs"));
 }
 
 static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
@@ -1334,14 +1364,16 @@
     // sin(-X) --> -sin(X)
     // tan(-X) --> -tan(X)
     if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X)))))
-      return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X));
+      return B.CreateFNeg(
+          copyFlags(*Call, B.CreateCall(Call->getCalledFunction(), X)));
     break;
   case LibFunc_cos:
   case LibFunc_cosf:
   case LibFunc_cosl:
     // cos(-X) --> cos(X)
     if (match(Call->getArgOperand(0), m_FNeg(m_Value(X))))
-      return B.CreateCall(Call->getCalledFunction(), X, "cos");
+      return copyFlags(*Call,
+                       B.CreateCall(Call->getCalledFunction(), X, "cos"));
     break;
   default:
     break;
@@ -1476,9 +1508,10 @@
       (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
       hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
-      return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI,
-                                   LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
-                                   B, Attrs);
+      return copyFlags(*Pow,
+                       emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI,
+                                             TLI, LibFunc_ldexp, LibFunc_ldexpf,
+                                             LibFunc_ldexpl, B, Attrs));
   }
 
   // pow(2.0 ** n, x) -> exp2(n * x)
@@ -1496,11 +1529,13 @@
       double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
       Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
       if (Pow->doesNotAccessMemory())
-        return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
-                            FMul, "exp2");
+        return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
+                                                Mod, Intrinsic::exp2, Ty),
+                                            FMul, "exp2"));
       else
-        return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
-                                    LibFunc_exp2l, B, Attrs);
+        return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
+                                                    LibFunc_exp2f,
+                                                    LibFunc_exp2l, B, Attrs));
     }
   }
 
@@ -1508,8 +1543,9 @@
   // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
   if (match(Base, m_SpecificFP(10.0)) &&
       hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
-    return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
-                                LibFunc_exp10l, B, Attrs);
+    return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10,
+                                                LibFunc_exp10f, LibFunc_exp10l,
+                                                B, Attrs));
 
   // pow(x, y) -> exp2(log2(x) * y)
   if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() &&
@@ -1528,11 +1564,13 @@
     if (Log) {
       Value *FMul = B.CreateFMul(Log, Expo, "mul");
       if (Pow->doesNotAccessMemory())
-        return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
-                            FMul, "exp2");
+        return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
+                                                Mod, Intrinsic::exp2, Ty),
+                                            FMul, "exp2"));
       else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l))
-        return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
-                                    LibFunc_exp2l, B, Attrs);
+        return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
+                                                    LibFunc_exp2f,
+                                                    LibFunc_exp2l, B, Attrs));
     }
   }
 
@@ -1595,6 +1633,8 @@
     Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs");
   }
 
+  Sqrt = copyFlags(*Pow, Sqrt);
+
   // Handle non finite base by expanding to
   // (x == -infinity ? +infinity : sqrt(x)).
   if (!Pow->hasNoInfs()) {
@@ -1721,15 +1761,18 @@
     if (ExpoF->isInteger() &&
         ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
             APFloat::opOK) {
-      return createPowWithIntegerExponent(
-          Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B);
+      return copyFlags(
+          *Pow,
+          createPowWithIntegerExponent(
+              Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
+              M, B));
     }
   }
 
   // powf(x, itofp(y)) -> powi(x, y)
   if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
-      return createPowWithIntegerExponent(Base, ExpoI, M, B);
+      return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B));
   }
 
   // Shrink pow() to powf() if the arguments are single precision,
@@ -1792,7 +1835,8 @@
   Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
                                                            : Intrinsic::maxnum;
   Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
-  return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
+  return copyFlags(
+      *CI, B.CreateCall(F, {CI->getArgOperand(0), CI->getArgOperand(1)}));
 }
 
 Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
@@ -2010,9 +2054,9 @@
     // of the square root calculation.
     Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
     Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
-    return B.CreateFMul(FabsCall, SqrtCall);
+    return copyFlags(*CI, B.CreateFMul(FabsCall, SqrtCall));
   }
-  return FabsCall;
+  return copyFlags(*CI, FabsCall);
 }
 
 // TODO: Generalize to handle any trig function and its inverse.
@@ -2327,7 +2371,7 @@
 
   // printf("x") -> putchar('x'), even for "%" and "%%".
   if (FormatStr.size() == 1 || FormatStr == "%%")
-    return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
+    return copyFlags(*CI, emitPutChar(B.getInt32(FormatStr[0]), B, TLI));
 
   // Try to remove call or emit putchar/puts.
   if (FormatStr == "%s" && CI->arg_size() > 1) {
@@ -2339,12 +2383,12 @@
       return (Value *)CI;
     // printf("%s", "a") --> putchar('a')
     if (OperandStr.size() == 1)
-      return emitPutChar(B.getInt32(OperandStr[0]), B, TLI);
+      return copyFlags(*CI, emitPutChar(B.getInt32(OperandStr[0]), B, TLI));
     // printf("%s", str"\n") --> puts(str)
     if (OperandStr.back() == '\n') {
       OperandStr = OperandStr.drop_back();
       Value *GV = B.CreateGlobalString(OperandStr, "str");
-      return emitPutS(GV, B, TLI);
+      return copyFlags(*CI, emitPutS(GV, B, TLI));
     }
     return nullptr;
   }
@@ -2356,19 +2400,19 @@
     // pass to be run after this pass, to merge duplicate strings.
     FormatStr = FormatStr.drop_back();
     Value *GV = B.CreateGlobalString(FormatStr, "str");
-    return emitPutS(GV, B, TLI);
+    return copyFlags(*CI, emitPutS(GV, B, TLI));
   }
 
   // Optimize specific format strings.
   // printf("%c", chr) --> putchar(chr)
   if (FormatStr == "%c" && CI->arg_size() > 1 &&
       CI->getArgOperand(1)->getType()->isIntegerTy())
-    return emitPutChar(CI->getArgOperand(1), B, TLI);
+    return copyFlags(*CI, emitPutChar(CI->getArgOperand(1), B, TLI));
 
   // printf("%s\n", str) --> puts(str)
   if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
       CI->getArgOperand(1)->getType()->isPointerTy())
-    return emitPutS(CI->getArgOperand(1), B, TLI);
+    return copyFlags(*CI, emitPutS(CI->getArgOperand(1), B, TLI));
   return nullptr;
 }
 
@@ -2459,7 +2503,7 @@
 
     if (CI->use_empty())
       // sprintf(dest, "%s", str) -> strcpy(dest, str)
-      return emitStrCpy(Dest, CI->getArgOperand(2), B, TLI);
+      return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI));
 
     uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
     if (SrcLen) {
@@ -2558,10 +2602,12 @@
 
     // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
     // strlen(fmt)+1)
-    B.CreateMemCpy(
-        CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
-        ConstantInt::get(DL.getIntPtrType(CI->getContext()),
-                         FormatStr.size() + 1)); // Copy the null byte.
+    copyFlags(
+        *CI,
+        B.CreateMemCpy(
+            CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
+            ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+                             FormatStr.size() + 1))); // Copy the null byte.
     return ConstantInt::get(CI->getType(), FormatStr.size());
   }
 
@@ -2599,8 +2645,10 @@
       else if (N < Str.size() + 1)
         return nullptr;
 
-      B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(3),
-                     Align(1), ConstantInt::get(CI->getType(), Str.size() + 1));
+      copyFlags(
+          *CI, B.CreateMemCpy(CI->getArgOperand(0), Align(1),
+                              CI->getArgOperand(3), Align(1),
+                              ConstantInt::get(CI->getType(), Str.size() + 1)));
 
       // The snprintf result is the unincremented number of bytes in the string.
       return ConstantInt::get(CI->getType(), Str.size());
@@ -2640,10 +2688,11 @@
     if (FormatStr.contains('%'))
       return nullptr; // We found a format specifier.
 
-    return emitFWrite(
-        CI->getArgOperand(1),
-        ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
-        CI->getArgOperand(0), B, DL, TLI);
+    return copyFlags(
+        *CI, emitFWrite(CI->getArgOperand(1),
+                        ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+                                         FormatStr.size()),
+                        CI->getArgOperand(0), B, DL, TLI));
   }
 
   // The remaining optimizations require the format string to be "%s" or "%c"
@@ -2656,14 +2705,16 @@
     // fprintf(F, "%c", chr) --> fputc(chr, F)
     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
       return nullptr;
-    return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+    return copyFlags(
+        *CI, emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
   }
 
   if (FormatStr[1] == 's') {
     // fprintf(F, "%s", str) --> fputs(str, F)
     if (!CI->getArgOperand(2)->getType()->isPointerTy())
       return nullptr;
-    return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+    return copyFlags(
+        *CI, emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
   }
   return nullptr;
 }
@@ -2750,10 +2801,11 @@
     return nullptr;
 
   // Known to have no uses (see above).
-  return emitFWrite(
-      CI->getArgOperand(0),
-      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
-      CI->getArgOperand(1), B, DL, TLI);
+  return copyFlags(
+      *CI,
+      emitFWrite(CI->getArgOperand(0),
+                 ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
+                 CI->getArgOperand(1), B, DL, TLI));
 }
 
 Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
@@ -2765,15 +2817,16 @@
   // puts("") -> putchar('\n')
   StringRef Str;
   if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty())
-    return emitPutChar(B.getInt32('\n'), B, TLI);
+    return copyFlags(*CI, emitPutChar(B.getInt32('\n'), B, TLI));
 
   return nullptr;
 }
 
 Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
   // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
-  return B.CreateMemMove(CI->getArgOperand(1), Align(1), CI->getArgOperand(0),
-                         Align(1), CI->getArgOperand(2));
+  return copyFlags(*CI, B.CreateMemMove(CI->getArgOperand(1), Align(1),
+                                        CI->getArgOperand(0), Align(1),
+                                        CI->getArgOperand(2)));
 }
 
 bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
@@ -2971,6 +3024,8 @@
 }
 
 Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
+  assert(!CI->isMustTailCall() && "These transforms aren't musttail safe.");
+
   // TODO: Split out the code below that operates on FP calls so that
   //       we can all non-FP calls with the StrictFP attribute to be
   //       optimized.
@@ -3212,6 +3267,7 @@
                        Align(1), CI->getArgOperand(2));
     NewCI->setAttributes(CI->getAttributes());
     NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+    copyFlags(*CI, NewCI);
     return CI->getArgOperand(0);
   }
   return nullptr;
@@ -3225,6 +3281,7 @@
                         Align(1), CI->getArgOperand(2));
     NewCI->setAttributes(CI->getAttributes());
     NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+    copyFlags(*CI, NewCI);
     return CI->getArgOperand(0);
   }
   return nullptr;
@@ -3238,6 +3295,7 @@
                                      CI->getArgOperand(2), Align(1));
     NewCI->setAttributes(CI->getAttributes());
     NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+    copyFlags(*CI, NewCI);
     return CI->getArgOperand(0);
   }
   return nullptr;
@@ -3252,7 +3310,7 @@
       CallInst *NewCI = cast<CallInst>(Call);
       NewCI->setAttributes(CI->getAttributes());
       NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
-      return NewCI;
+      return copyFlags(*CI, NewCI);
     }
   return nullptr;
 }
@@ -3277,9 +3335,9 @@
   // string lengths for varying.
   if (isFortifiedCallFoldable(CI, 2, None, 1)) {
     if (Func == LibFunc_strcpy_chk)
-      return emitStrCpy(Dst, Src, B, TLI);
+      return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
     else
-      return emitStpCpy(Dst, Src, B, TLI);
+      return copyFlags(*CI, emitStpCpy(Dst, Src, B, TLI));
   }
 
   if (OnlyLowerUnknownSize)
@@ -3303,14 +3361,14 @@
   // a __memcpy_chk, we still need to return the correct end pointer.
   if (Ret && Func == LibFunc_stpcpy_chk)
     return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
-  return Ret;
+  return copyFlags(*CI, cast<CallInst>(Ret));
 }
 
 Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI,
                                                      IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 1, None, 0))
-    return emitStrLen(CI->getArgOperand(0), B, CI->getModule()->getDataLayout(),
-                      TLI);
+    return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B,
+                                     CI->getModule()->getDataLayout(), TLI));
   return nullptr;
 }
 
@@ -3319,11 +3377,13 @@
                                                        LibFunc Func) {
   if (isFortifiedCallFoldable(CI, 3, 2)) {
     if (Func == LibFunc_strncpy_chk)
-      return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                               CI->getArgOperand(2), B, TLI);
+      return copyFlags(*CI,
+                       emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                                   CI->getArgOperand(2), B, TLI));
     else
-      return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                         CI->getArgOperand(2), B, TLI);
+      return copyFlags(*CI,
+                       emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                                   CI->getArgOperand(2), B, TLI));
   }
 
   return nullptr;
@@ -3332,8 +3392,9 @@
 Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
                                                       IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 4, 3))
-    return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                       CI->getArgOperand(2), CI->getArgOperand(3), B, TLI);
+    return copyFlags(
+        *CI, emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                         CI->getArgOperand(2), CI->getArgOperand(3), B, TLI));
 
   return nullptr;
 }
@@ -3342,8 +3403,9 @@
                                                        IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5));
-    return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
-                        CI->getArgOperand(4), VariadicArgs, B, TLI);
+    return copyFlags(*CI,
+                     emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+                                  CI->getArgOperand(4), VariadicArgs, B, TLI));
   }
 
   return nullptr;
@@ -3353,8 +3415,9 @@
                                                       IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4));
-    return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
-                       B, TLI);
+    return copyFlags(*CI,
+                     emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+                                 VariadicArgs, B, TLI));
   }
 
   return nullptr;
@@ -3363,7 +3426,8 @@
 Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
                                                      IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 2))
-    return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI);
+    return copyFlags(
+        *CI, emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI));
 
   return nullptr;
 }
@@ -3371,8 +3435,9 @@
 Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
                                                    IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3))
-    return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
-                       CI->getArgOperand(2), B, TLI);
+    return copyFlags(*CI,
+                     emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 CI->getArgOperand(2), B, TLI));
 
   return nullptr;
 }
@@ -3380,8 +3445,9 @@
 Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
                                                       IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3))
-    return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
-                       CI->getArgOperand(2), B, TLI);
+    return copyFlags(*CI,
+                     emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 CI->getArgOperand(2), B, TLI));
 
   return nullptr;
 }
@@ -3389,8 +3455,9 @@
 Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
                                                       IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3))
-    return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                       CI->getArgOperand(2), B, TLI);
+    return copyFlags(*CI,
+                     emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 CI->getArgOperand(2), B, TLI));
 
   return nullptr;
 }
@@ -3398,8 +3465,9 @@
 Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
                                                         IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 3, 1, None, 2))
-    return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
-                         CI->getArgOperand(4), CI->getArgOperand(5), B, TLI);
+    return copyFlags(
+        *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+                           CI->getArgOperand(4), CI->getArgOperand(5), B, TLI));
 
   return nullptr;
 }
@@ -3407,8 +3475,9 @@
 Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
                                                        IRBuilderBase &B) {
   if (isFortifiedCallFoldable(CI, 2, None, None, 1))
-    return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
-                        CI->getArgOperand(4), B, TLI);
+    return copyFlags(*CI,
+                     emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+                                  CI->getArgOperand(4), B, TLI));
 
   return nullptr;
 }
diff --git a/llvm/test/CodeGen/X86/memset-nonzero.ll b/llvm/test/CodeGen/X86/memset-nonzero.ll
--- a/llvm/test/CodeGen/X86/memset-nonzero.ll
+++ b/llvm/test/CodeGen/X86/memset-nonzero.ll
@@ -196,14 +196,9 @@
 define void @memset_256_nonzero_bytes(i8* %x) {
 ; SSE-LABEL: memset_256_nonzero_bytes:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pushq %rax
-; SSE-NEXT:    .cfi_def_cfa_offset 16
 ; SSE-NEXT:    movl $256, %edx # imm = 0x100
 ; SSE-NEXT:    movl $42, %esi
-; SSE-NEXT:    callq memset@PLT
-; SSE-NEXT:    popq %rax
-; SSE-NEXT:    .cfi_def_cfa_offset 8
-; SSE-NEXT:    retq
+; SSE-NEXT:    jmp memset@PLT # TAILCALL
 ;
 ; SSE2FAST-LABEL: memset_256_nonzero_bytes:
 ; SSE2FAST:       # %bb.0:
diff --git a/llvm/test/Transforms/InstCombine/cabs-array.ll b/llvm/test/Transforms/InstCombine/cabs-array.ll
--- a/llvm/test/Transforms/InstCombine/cabs-array.ll
+++ b/llvm/test/Transforms/InstCombine/cabs-array.ll
@@ -35,7 +35,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[REAL]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[IMAG]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast double @llvm.sqrt.f64(double [[TMP3]])
 ; CHECK-NEXT:    ret double [[CABS]]
 ;
   %call = tail call fast double @cabs([2 x double] %z)
@@ -49,7 +49,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[REAL]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[IMAG]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast float @llvm.sqrt.f32(float [[TMP3]])
 ; CHECK-NEXT:    ret float [[CABS]]
 ;
   %call = tail call fast float @cabsf([2 x float] %z)
@@ -63,7 +63,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast fp128 [[REAL]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast fp128 [[IMAG]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
 ; CHECK-NEXT:    ret fp128 [[CABS]]
 ;
   %call = tail call fast fp128 @cabsl([2 x fp128] %z)
diff --git a/llvm/test/Transforms/InstCombine/cabs-discrete.ll b/llvm/test/Transforms/InstCombine/cabs-discrete.ll
--- a/llvm/test/Transforms/InstCombine/cabs-discrete.ll
+++ b/llvm/test/Transforms/InstCombine/cabs-discrete.ll
@@ -33,7 +33,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[REAL:%.*]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[IMAG:%.*]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast double @llvm.sqrt.f64(double [[TMP3]])
 ; CHECK-NEXT:    ret double [[CABS]]
 ;
   %call = tail call fast double @cabs(double %real, double %imag)
@@ -45,7 +45,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[REAL:%.*]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[IMAG:%.*]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast float @llvm.sqrt.f32(float [[TMP3]])
 ; CHECK-NEXT:    ret float [[CABS]]
 ;
   %call = tail call fast float @cabsf(float %real, float %imag)
@@ -57,7 +57,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast fp128 [[REAL:%.*]], [[REAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast fp128 [[IMAG:%.*]], [[IMAG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
+; CHECK-NEXT:    [[CABS:%.*]] = tail call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
 ; CHECK-NEXT:    ret fp128 [[CABS]]
 ;
   %call = tail call fast fp128 @cabsl(fp128 %real, fp128 %imag)
diff --git a/llvm/test/Transforms/InstCombine/cos-1.ll b/llvm/test/Transforms/InstCombine/cos-1.ll
--- a/llvm/test/Transforms/InstCombine/cos-1.ll
+++ b/llvm/test/Transforms/InstCombine/cos-1.ll
@@ -29,6 +29,27 @@
   ret double %r
 }
 
+define double @cos_negated_arg_tail(double %x) {
+; ANY-LABEL: @cos_negated_arg_tail(
+; ANY-NEXT:    [[COS:%.*]] = tail call double @cos(double [[X:%.*]])
+; ANY-NEXT:    ret double [[COS]]
+;
+  %neg = fsub double -0.0, %x
+  %r = tail call double @cos(double %neg)
+  ret double %r
+}
+
+define double @cos_negated_arg_musttail(double %x) {
+; ANY-LABEL: @cos_negated_arg_musttail(
+; ANY-NEXT:    [[NEG:%.*]] = fneg double [[X:%.*]]
+; ANY-NEXT:    [[R:%.*]] = musttail call double @cos(double [[NEG]])
+; ANY-NEXT:    ret double [[R]]
+;
+  %neg = fsub double -0.0, %x
+  %r = musttail call double @cos(double %neg)
+  ret double %r
+}
+
 define double @cos_unary_negated_arg(double %x) {
 ; ANY-LABEL: @cos_unary_negated_arg(
 ; ANY-NEXT:    [[COS:%.*]] = call double @cos(double [[X:%.*]])
@@ -103,6 +124,17 @@
   ret double %r
 }
 
+define double @sin_unary_negated_arg_musttail(double %x) {
+; ANY-LABEL: @sin_unary_negated_arg_musttail(
+; ANY-NEXT:    [[NEG:%.*]] = fneg double [[X:%.*]]
+; ANY-NEXT:    [[R:%.*]] = musttail call double @sin(double [[NEG]])
+; ANY-NEXT:    ret double [[R]]
+;
+  %neg = fneg double %x
+  %r = musttail call double @sin(double %neg)
+  ret double %r
+}
+
 define float @sinf_negated_arg(float %x) {
 ; ANY-LABEL: @sinf_negated_arg(
 ; ANY-NEXT:    [[TMP1:%.*]] = call float @sinf(float [[X:%.*]])
@@ -235,6 +267,27 @@
   ret double %r
 }
 
+define double @tan_negated_arg_tail(double %x) {
+; ANY-LABEL: @tan_negated_arg_tail(
+; ANY-NEXT:    [[TMP1:%.*]] = tail call double @tan(double [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fneg double [[TMP1]]
+; ANY-NEXT:    ret double [[TMP2]]
+;
+  %neg = fsub double -0.0, %x
+  %r = tail call double @tan(double %neg)
+  ret double %r
+}
+define double @tan_negated_arg_musttail(double %x) {
+; ANY-LABEL: @tan_negated_arg_musttail(
+; ANY-NEXT:    [[NEG:%.*]] = fneg double [[X:%.*]]
+; ANY-NEXT:    [[R:%.*]] = musttail call double @tan(double [[NEG]])
+; ANY-NEXT:    ret double [[R]]
+;
+  %neg = fsub double -0.0, %x
+  %r = musttail call double @tan(double %neg)
+  ret double %r
+}
+
 define double @tan_unary_negated_arg(double %x) {
 ; ANY-LABEL: @tan_unary_negated_arg(
 ; ANY-NEXT:    [[TMP1:%.*]] = call double @tan(double [[X:%.*]])
diff --git a/llvm/test/Transforms/InstCombine/fabs-libcall.ll b/llvm/test/Transforms/InstCombine/fabs-libcall.ll
--- a/llvm/test/Transforms/InstCombine/fabs-libcall.ll
+++ b/llvm/test/Transforms/InstCombine/fabs-libcall.ll
@@ -5,7 +5,7 @@
 
 define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) {
 ; CHECK-LABEL: @replace_fabs_call_f80(
-; CHECK-NEXT:    [[FABSL:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]])
+; CHECK-NEXT:    [[FABSL:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]])
 ; CHECK-NEXT:    ret x86_fp80 [[FABSL]]
 ;
   %fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x)
@@ -14,7 +14,7 @@
 
 define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) {
 ; CHECK-LABEL: @fmf_replace_fabs_call_f80(
-; CHECK-NEXT:    [[FABSL:%.*]] = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]])
+; CHECK-NEXT:    [[FABSL:%.*]] = tail call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]])
 ; CHECK-NEXT:    ret x86_fp80 [[FABSL]]
 ;
   %fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x)
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -18,7 +18,7 @@
 
 define float @replace_fabs_call_f32(float %x) {
 ; CHECK-LABEL: @replace_fabs_call_f32(
-; CHECK-NEXT:    [[FABSF:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[FABSF:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]])
 ; CHECK-NEXT:    ret float [[FABSF]]
 ;
   %fabsf = tail call float @fabsf(float %x)
@@ -27,7 +27,7 @@
 
 define double @replace_fabs_call_f64(double %x) {
 ; CHECK-LABEL: @replace_fabs_call_f64(
-; CHECK-NEXT:    [[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
 ; CHECK-NEXT:    ret double [[FABS]]
 ;
   %fabs = tail call double @fabs(double %x)
@@ -36,7 +36,7 @@
 
 define fp128 @replace_fabs_call_f128(fp128 %x) {
 ; CHECK-LABEL: @replace_fabs_call_f128(
-; CHECK-NEXT:    [[FABSL:%.*]] = call fp128 @llvm.fabs.f128(fp128 [[X:%.*]])
+; CHECK-NEXT:    [[FABSL:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[X:%.*]])
 ; CHECK-NEXT:    ret fp128 [[FABSL]]
 ;
   %fabsl = tail call fp128 @fabsl(fp128 %x)
@@ -46,7 +46,7 @@
 ; Make sure fast math flags are preserved when replacing the libcall.
 define float @fmf_replace_fabs_call_f32(float %x) {
 ; CHECK-LABEL: @fmf_replace_fabs_call_f32(
-; CHECK-NEXT:    [[FABSF:%.*]] = call nnan float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[FABSF:%.*]] = tail call nnan float @llvm.fabs.f32(float [[X:%.*]])
 ; CHECK-NEXT:    ret float [[FABSF]]
 ;
   %fabsf = tail call nnan float @fabsf(float %x)
diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll
--- a/llvm/test/Transforms/InstCombine/fortify-folding.ll
+++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll
@@ -31,6 +31,17 @@
   ret i8* %ret
 }
 
+define i8* @test_memccpy_tail() {
+; CHECK-LABEL: @test_memccpy_tail(
+; CHECK-NEXT:    [[MEMCCPY:%.*]] = tail call i8* @memccpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i32 0, i64 60)
+; CHECK-NEXT:    ret i8* [[MEMCCPY]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1)
+  ret i8* %ret
+}
+
 define i8* @test_mempcpy() {
 ; CHECK-LABEL: @test_mempcpy(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false)
@@ -53,6 +64,17 @@
   ret i8* %ret
 }
 
+define i8* @test_mempcpy_tail() {
+; CHECK-LABEL: @test_mempcpy_tail(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false)
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 15)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 15, i64 -1)
+  ret i8* %ret
+}
+
 define i32 @test_snprintf() {
 ; CHECK-LABEL: @test_snprintf(
 ; CHECK-NEXT:    [[SNPRINTF:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
@@ -77,6 +99,17 @@
   ret i32 %ret
 }
 
+define i32 @test_snprintf_tail() {
+; CHECK-LABEL: @test_snprintf_tail(
+; CHECK-NEXT:    [[SNPRINTF:%.*]] = tail call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    ret i32 [[SNPRINTF]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 0, i64 -1, i8* %fmt)
+  ret i32 %ret
+}
+
 define i32 @test_sprintf() {
 ; CHECK-LABEL: @test_sprintf(
 ; CHECK-NEXT:    [[SPRINTF:%.*]] = call i32 (i8*, i8*, ...) @sprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
@@ -101,6 +134,17 @@
   ret i32 %ret
 }
 
+define i32 @test_sprintf_tail() {
+; CHECK-LABEL: @test_sprintf_tail(
+; CHECK-NEXT:    [[SPRINTF:%.*]] = tail call i32 (i8*, i8*, ...) @sprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    ret i32 [[SPRINTF]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 0, i64 -1, i8* %fmt)
+  ret i32 %ret
+}
+
 define i8* @test_strcat() {
 ; CHECK-LABEL: @test_strcat(
 ; CHECK-NEXT:    [[STRCAT:%.*]] = call i8* @strcat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
@@ -123,6 +167,17 @@
   ret i8* %ret
 }
 
+define i8* @test_strcat_tail() {
+; CHECK-LABEL: @test_strcat_tail(
+; CHECK-NEXT:    [[STRCAT:%.*]] = tail call i8* @strcat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i8* @__strcat_chk(i8* %dst, i8* %src, i64 -1)
+  ret i8* %ret
+}
+
 define i64 @test_strlcat() {
 ; CHECK-LABEL: @test_strlcat(
 ; CHECK-NEXT:    [[STRLCAT:%.*]] = call i64 @strlcat(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
@@ -145,6 +200,17 @@
   ret i64 %ret
 }
 
+define i64 @test_strlcat_tail() {
+; CHECK-LABEL: @test_strlcat_tail(
+; CHECK-NEXT:    [[STRLCAT:%.*]] = tail call i64 @strlcat(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    ret i64 [[STRLCAT]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i64 @__strlcat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  ret i64 %ret
+}
+
 define i8* @test_strncat() {
 ; CHECK-LABEL: @test_strncat(
 ; CHECK-NEXT:    [[STRNCAT:%.*]] = call i8* @strncat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
@@ -167,6 +233,17 @@
   ret i8* %ret
 }
 
+define i8* @test_strncat_tail() {
+; CHECK-LABEL: @test_strncat_tail(
+; CHECK-NEXT:    [[STRNCAT:%.*]] = tail call i8* @strncat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i8* @__strncat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  ret i8* %ret
+}
+
 define i64 @test_strlcpy() {
 ; CHECK-LABEL: @test_strlcpy(
 ; CHECK-NEXT:    [[STRLCPY:%.*]] = call i64 @strlcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
@@ -189,6 +266,17 @@
   ret i64 %ret
 }
 
+define i64 @test_strlcpy_tail() {
+; CHECK-LABEL: @test_strlcpy_tail(
+; CHECK-NEXT:    [[STRLCPY:%.*]] = tail call i64 @strlcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    ret i64 [[STRLCPY]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i64 @__strlcpy_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  ret i64 %ret
+}
+
 define i32 @test_vsnprintf() {
 ; CHECK-LABEL: @test_vsnprintf(
 ; CHECK-NEXT:    [[VSNPRINTF:%.*]] = call i32 @vsnprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
@@ -215,6 +303,18 @@
   ret i32 %ret
 }
 
+define i32 @test_vsnprintf_tail() {
+; CHECK-LABEL: @test_vsnprintf_tail(
+; CHECK-NEXT:    [[VSNPRINTF:%.*]] = tail call i32 @vsnprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    ret i32 [[VSNPRINTF]]
+;
+  ; ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  ret i32 %ret
+}
+
 define i32 @test_vsprintf() {
 ; CHECK-LABEL: @test_vsprintf(
 ; CHECK-NEXT:    [[VSPRINTF:%.*]] = call i32 @vsprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
@@ -241,6 +341,18 @@
   ret i32 %ret
 }
 
+define i32 @test_vsprintf_tail() {
+; CHECK-LABEL: @test_vsprintf_tail(
+; CHECK-NEXT:    [[VSPRINTF:%.*]] = tail call i32 @vsprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    ret i32 [[VSPRINTF]]
+;
+  ; ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = tail call i32 @__vsprintf_chk(i8* %dst, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  ret i32 %ret
+}
+
 declare i8* @__mempcpy_chk(i8*, i8*, i64, i64)
 declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64)
 declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...)
diff --git a/llvm/test/Transforms/InstCombine/memccpy.ll b/llvm/test/Transforms/InstCombine/memccpy.ll
--- a/llvm/test/Transforms/InstCombine/memccpy.ll
+++ b/llvm/test/Transforms/InstCombine/memccpy.ll
@@ -39,6 +39,25 @@
   ret void
 }
 
+define void @memccpy_to_memcpy3_tail(i8* %dst) {
+; CHECK-LABEL: @memccpy_to_memcpy3_tail(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 5, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %call = tail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10) ; 111 is 'o'
+  ret void
+}
+
+define i8* @memccpy_to_memcpy3_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) {
+; CHECK-LABEL: @memccpy_to_memcpy3_musttail(
+; CHECK-NEXT:    %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10)
+; CHECK-NEXT:    ret i8* %call
+;
+  %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10) ; 111 is 'o'
+  ret i8* %call
+}
+
+
 define void @memccpy_to_memcpy4(i8* %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy4(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(11) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 11, i1 false)
@@ -57,6 +76,24 @@
   ret i8* %call
 }
 
+define i8* @memccpy_to_memcpy5_tail(i8* %dst) {
+; CHECK-LABEL: @memccpy_to_memcpy5_tail(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(7) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(7) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 7, i1 false)
+; CHECK-NEXT:    ret i8* null
+;
+  %call = tail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7)
+  ret i8* %call
+}
+
+define i8* @memccpy_to_memcpy5_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) {
+; CHECK-LABEL: @memccpy_to_memcpy5_musttail(
+; CHECK-NEXT:    %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7)
+; CHECK-NEXT:    ret i8* %call
+;
+  %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7)
+  ret i8* %call
+}
+
 define i8* @memccpy_to_memcpy6(i8* %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy6(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(6) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 6, i1 false)
@@ -212,3 +249,22 @@
   %call = call i8* @memccpy(i8* %dst, i8* %dst, i32 %c, i64 %n)
   ret i8* %call
 }
+
+define i8* @memccpy_to_memcpy_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) {
+; CHECK-LABEL: @memccpy_to_memcpy_musttail(
+; CHECK-NEXT:    %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 12)
+; CHECK-NEXT:    ret i8* %call
+;
+  %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 12) ; 114 is 'r'
+  ret i8* %call
+}
+
+define i8* @memccpy_to_memcpy2_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) {
+; CHECK-LABEL: @memccpy_to_memcpy2_musttail(
+; CHECK-NEXT:    %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 8)
+; CHECK-NEXT:    ret i8* %call
+;
+  %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 8) ; 114 is 'r'
+  ret i8* %call
+}
+
diff --git a/llvm/test/Transforms/InstCombine/memcpy-1.ll b/llvm/test/Transforms/InstCombine/memcpy-1.ll
--- a/llvm/test/Transforms/InstCombine/memcpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy-1.ll
@@ -22,18 +22,44 @@
 
 define i8* @test_simplify2(i8* %mem1, i8* %mem2, i32 %size) strictfp {
 ; CHECK-LABEL: @test_simplify2(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[MEM1:%.*]], i8* align 1 [[MEM2:%.*]], i32 [[SIZE:%.*]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[MEM1:%.*]], i8* align 1 [[MEM2:%.*]], i32 [[SIZE:%.*]], i1 false) #[[ATTR0:[0-9]+]]
 ; CHECK-NEXT:    ret i8* [[MEM1]]
 ;
   %ret = call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size) strictfp
   ret i8* %ret
 }
 
+; Verify that the first parameter to memcpy could itself be a call that's not
+; tail, while the call to @memcpy could be tail.
+declare i8* @get_dest()
+
+define i8* @test_simplify3(i8* %mem2, i32 %size) {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    [[DEST:%.*]] = call i8* @get_dest()
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[MEM2:%.*]], i32 [[SIZE:%.*]], i1 false)
+; CHECK-NEXT:    ret i8* [[DEST]]
+;
+
+  %dest = call i8* @get_dest()
+  %ret = tail call i8* @memcpy(i8* %dest, i8* %mem2, i32 %size)
+  ret i8* %ret
+}
+
 define i8* @test_no_incompatible_attr(i8* %mem1, i8* %mem2, i32 %size) {
 ; CHECK-LABEL: @test_no_incompatible_attr(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[MEM1:%.*]], i8* align 1 [[MEM2:%.*]], i32 [[SIZE:%.*]], i1 false)
 ; CHECK-NEXT:    ret i8* [[MEM1]]
+;
 
   %ret = call dereferenceable(1) i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size)
   ret i8* %ret
 }
+
+define i8* @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK-LABEL: @test_no_simplify1(
+; CHECK-NEXT:    [[RET:%.*]] = musttail call i8* @memcpy(i8* [[MEM1:%.*]], i8* [[MEM2:%.*]], i32 [[SIZE:%.*]])
+; CHECK-NEXT:    ret i8* [[RET]]
+;
+  %ret = musttail call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size)
+  ret i8* %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -38,6 +38,18 @@
   ret i8* %ret
 }
 
+; Same as test_simplify1 but with a tail call rather than vanilla call.
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
+; CHECK-NEXT:    ret i8* bitcast (%struct.T1* @t1 to i8*)
+;
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+  %ret = tail call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
 ; Check cases where dstlen < len.
 
 define i8* @test_no_simplify1() {
@@ -62,6 +74,15 @@
   ret i8* %ret
 }
 
+define i8* @test_no_simplify3(i8* %dst, i8* %src, i64 %a, i64 %b) {
+; CHECK-LABEL: @test_no_simplify3(
+; CHECK-NEXT:    %ret = musttail call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+; CHECK-NEXT:    ret i8* %ret
+;
+  %ret = musttail call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
 define i8* @test_simplify_return_indcall(i8* ()* %alloc) {
 ; CHECK-LABEL: @test_simplify_return_indcall(
 ; CHECK-NEXT:    [[DST:%.*]] = call i8* [[ALLOC:%.*]]()
diff --git a/llvm/test/Transforms/InstCombine/memmove-1.ll b/llvm/test/Transforms/InstCombine/memmove-1.ll
--- a/llvm/test/Transforms/InstCombine/memmove-1.ll
+++ b/llvm/test/Transforms/InstCombine/memmove-1.ll
@@ -16,6 +16,22 @@
 ; CHECK: ret i8* %mem1
 }
 
+define i8* @test_simplify2(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT:   tail call void @llvm.memmove
+; CHECK-NEXT:   ret i8* %mem1
+  %ret = tail call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size)
+  ret i8* %ret
+}
+
+define i8* @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK-LABEL: @test_no_simplify1(
+; CHECK-NEXT:   %ret = musttail call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size)
+; CHECK-NEXT:   ret i8* %ret
+  %ret = musttail call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size)
+  ret i8* %ret
+}
+
 define i8* @test_no_incompatible_attr(i8* %mem1, i8* %mem2, i32 %size) {
 ; CHECK-LABEL: @test_no_incompatible_attr(
   %ret = call dereferenceable(1) i8* @memmove(i8* %mem1, i8* %mem2, i32 %size)
diff --git a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -40,6 +40,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
+; CHECK-NEXT:    ret i8* bitcast (%struct.T1* @t1 to i8*)
+;
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+
+  %ret = tail call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
 ; Check cases where dstlen < len.
 
 define i8* @test_no_simplify1() {
@@ -66,6 +78,15 @@
   ret i8* %ret
 }
 
+define i8* @test_no_simplify3(i8* %dst, i8* %src, i64 %a, i64 %b) {
+; CHECK-LABEL: @test_no_simplify3(
+; CHECK-NEXT:    %ret = musttail call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+; CHECK-NEXT:    ret i8* %ret
+;
+  %ret = musttail call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
 define i8* @test_no_incompatible_attr(i8* %mem, i32 %val, i32 %size) {
 ; CHECK-LABEL: @test_no_incompatible_attr(
 ; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
diff --git a/llvm/test/Transforms/InstCombine/mempcpy.ll b/llvm/test/Transforms/InstCombine/mempcpy.ll
--- a/llvm/test/Transforms/InstCombine/mempcpy.ll
+++ b/llvm/test/Transforms/InstCombine/mempcpy.ll
@@ -64,4 +64,13 @@
   ret i32 undef
 }
 
+define i8* @memcpy_no_simplify1(i8* %d, i8* nocapture readonly %s, i64 %n) {
+; CHECK-LABEL: @memcpy_no_simplify1(
+; CHECK-NEXT:    %r = musttail call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
+; CHECK-NEXT:    ret i8* %r
+;
+  %r = musttail call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
+  ret i8* %r
+}
+
 declare i8* @mempcpy(i8*, i8* nocapture readonly, i64)
diff --git a/llvm/test/Transforms/InstCombine/memset-1.ll b/llvm/test/Transforms/InstCombine/memset-1.ll
--- a/llvm/test/Transforms/InstCombine/memset-1.ll
+++ b/llvm/test/Transforms/InstCombine/memset-1.ll
@@ -21,6 +21,25 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify1_tail(i8* %mem, i32 %val, i32 %size) {
+; CHECK-LABEL: @test_simplify1_tail(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[VAL:%.*]] to i8
+; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i32(i8* align 1 [[MEM:%.*]], i8 [[TMP1]], i32 [[SIZE:%.*]], i1 false)
+; CHECK-NEXT:    ret i8* [[MEM]]
+;
+  %ret = tail call i8* @memset(i8* %mem, i32 %val, i32 %size)
+  ret i8* %ret
+}
+
+define i8* @test_simplify1_musttail(i8* %mem, i32 %val, i32 %size) {
+; CHECK-LABEL: @test_simplify1_musttail(
+; CHECK-NEXT:    %ret = musttail call i8* @memset(i8* %mem, i32 %val, i32 %size)
+; CHECK-NEXT:    ret i8* %ret
+;
+  %ret = musttail call i8* @memset(i8* %mem, i32 %val, i32 %size)
+  ret i8* %ret
+}
+
 ; Malloc + memset pattern is now handled by DSE in a more general way.
 
 define i8* @pr25892_lite(i32 %size) #0 {
diff --git a/llvm/test/Transforms/InstCombine/memset_chk-1.ll b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
@@ -45,6 +45,18 @@
   ret i8* %ret
 }
 
+; Same as @test_simplify1 with tail call.
+define i8* @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
+; CHECK-NEXT:    ret i8* bitcast (%struct.T* @t to i8*)
+;
+  %dst = bitcast %struct.T* @t to i8*
+
+  %ret = tail call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
 ; Check cases where dstlen < len.
 
 define i8* @test_no_simplify1() {
@@ -69,6 +81,16 @@
   ret i8* %ret
 }
 
+define i8* @test_no_simplify3(i8* %dst, i32 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: @test_no_simplify3(
+; CHECK-NEXT:    %ret = musttail call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
+; CHECK-NEXT:    ret i8* %ret
+;
+  %ret = musttail call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
+
 ; Test that RAUW in SimplifyLibCalls for __memset_chk generates valid IR
 define i32 @test_rauw(i8* %a, i8* %b, i8** %c) {
 ; CHECK-LABEL: @test_rauw(
diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll
--- a/llvm/test/Transforms/InstCombine/objsize.ll
+++ b/llvm/test/Transforms/InstCombine/objsize.ll
@@ -251,7 +251,7 @@
 
 define i32 @test11(i8** %esc) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[STRDUP:%.*]] = call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
+; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
 ; CHECK-NEXT:    store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
@@ -263,7 +263,7 @@
 
 define i32 @test12(i8** %esc) {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[STRDUP:%.*]] = call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
+; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
 ; CHECK-NEXT:    store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
@@ -275,7 +275,7 @@
 
 define i32 @test13(i8** %esc) {
 ; CHECK-LABEL: @test13(
-; CHECK-NEXT:    [[STRDUP:%.*]] = call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
+; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
 ; CHECK-NEXT:    store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll
--- a/llvm/test/Transforms/InstCombine/pow-1.ll
+++ b/llvm/test/Transforms/InstCombine/pow-1.ll
@@ -270,6 +270,23 @@
   ret float %retval
 }
 
+define float @powf_libcall_half_ninf_tail(float %x) {
+; CHECK-LABEL: @powf_libcall_half_ninf_tail(
+; ANY-NEXT:      %sqrtf = call ninf float @sqrtf(float %x)
+; ANY-NEXT:      %abs = tail call ninf float @llvm.fabs.f32(float %sqrtf)
+; ANY-NEXT:      ret float %abs
+  %retval = tail call ninf float @powf(float %x, float 0.5)
+  ret float %retval
+}
+
+define float @powf_libcall_half_ninf_musttail(float %x, float %y) {
+; CHECK-LABEL: @powf_libcall_half_ninf_musttail(
+; ANY-NEXT:      %retval = musttail call ninf float @powf(float %x, float 5.000000e-01)
+; ANY-NEXT:      ret float %retval
+  %retval = musttail call ninf float @powf(float %x, float 0.5)
+  ret float %retval
+}
+
 ; Check pow(x, 0.5) where x may be -infinity does not call a library sqrt function.
 
 define double @pow_libcall_half_no_FMF(double %x) {
diff --git a/llvm/test/Transforms/InstCombine/pow-exp.ll b/llvm/test/Transforms/InstCombine/pow-exp.ll
--- a/llvm/test/Transforms/InstCombine/pow-exp.ll
+++ b/llvm/test/Transforms/InstCombine/pow-exp.ll
@@ -214,7 +214,7 @@
 ; CHECK-LABEL: @pow_ok_base(
 ; Do not change 0xBFE0776{{.*}} to the exact constant, see PR42740
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0xBFE0776{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 0x3FE6666666666666, double %e)
@@ -224,7 +224,7 @@
 define double @pow_ok_base_fast(double %e) {
 ; CHECK-LABEL: @pow_ok_base_fast(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[E:%.*]], 0xBFE0776{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call fast double @pow(double 0x3FE6666666666666, double %e)
@@ -234,7 +234,7 @@
 define double @pow_ok_base2(double %e) {
 ; CHECK-LABEL: @pow_ok_base2(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x4010952{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 1.770000e+01, double %e)
@@ -244,7 +244,7 @@
 define double @pow_ok_base3(double %e) {
 ; CHECK-LABEL: @pow_ok_base3(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x400AB0B5{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 1.010000e+01, double %e)
@@ -254,7 +254,7 @@
 define double @pow_ok_ten_base(double %e) {
 ; CHECK-LABEL: @pow_ok_ten_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x400A934F{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 1.000000e+01, double %e)
@@ -264,7 +264,7 @@
 define double @pow_ok_denorm_base(double %e) {
 ; CHECK-LABEL: @pow_ok_denorm_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0xC0904800000005C5
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan ninf double @pow(double 0x00000000FFFFFFFF, double %e)
@@ -274,7 +274,7 @@
 define float @powf_ok_base(float %e) {
 ; CHECK-LABEL: @powf_ok_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0xBFE07762{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 0x3FE6666660000000, float %e)
@@ -284,7 +284,7 @@
 define float @powf_ok_base2(float %e) {
 ; CHECK-LABEL: @powf_ok_base2(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x4010952{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 0x4031B33340000000, float %e)
@@ -294,7 +294,7 @@
 define float @powf_ok_base3(float %e) {
 ; CHECK-LABEL: @powf_ok_base3(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x400AB0B5{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 0x4024333340000000, float %e)
@@ -304,7 +304,7 @@
 define float @powf_ok_ten_base(float %e) {
 ; CHECK-LABEL: @powf_ok_ten_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x400A934{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 1.000000e+01, float %e)
@@ -314,7 +314,7 @@
 define float @powf_ok_denorm_base(float %e) {
 ; CHECK-LABEL: @powf_ok_denorm_base(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], -1.350000e+02
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
   %call = tail call afn nnan ninf float @powf(float 0x3780000000000000, float %e)
@@ -371,7 +371,7 @@
 define double @pow_multiuse(double %e) {
 ; CHECK-LABEL: @pow_multiuse(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x4002934{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    tail call void @use_d(double [[EXP2]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
@@ -401,7 +401,7 @@
 define double @pow_ok_base_no_ninf(double %e) {
 ; CHECK-LABEL: @pow_ok_base_no_ninf(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan afn double [[E:%.*]], 0xBFE0776{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call nnan afn double @exp2(double [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan afn double @exp2(double [[MUL]])
 ; CHECK-NEXT:    ret double [[EXP2]]
 ;
   %call = tail call afn nnan double @pow(double 0x3FE6666666666666, double %e)
@@ -456,7 +456,7 @@
 define float @powf_multiuse(float %e) {
 ; CHECK-LABEL: @powf_multiuse(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x4002934{{.*}}
-; CHECK-NEXT:    [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]])
+; CHECK-NEXT:    [[EXP2F:%.*]] = tail call nnan ninf afn float @exp2f(float [[MUL]])
 ; CHECK-NEXT:    tail call void @use_f(float [[EXP2F]])
 ; CHECK-NEXT:    ret float [[EXP2F]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
--- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll
+++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
@@ -5,7 +5,7 @@
 
 define double @pow_sitofp_const_base_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -18,7 +18,7 @@
 define double @pow_uitofp_const_base_fast(i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -30,7 +30,7 @@
 
 define double @pow_sitofp_double_const_base_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_const_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %subfp = sitofp i32 %x to double
@@ -41,7 +41,7 @@
 define double @pow_uitofp_double_const_base_fast(i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_double_const_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i32(double 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
   %subfp = uitofp i31 %x to double
@@ -51,7 +51,7 @@
 
 define double @pow_sitofp_double_const_base_2_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_const_base_2_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -65,7 +65,7 @@
 ; CHECK-LABEL: @pow_sitofp_double_const_base_power_of_2_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -78,7 +78,7 @@
 define double @pow_uitofp_const_base_2_fast(i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -92,7 +92,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i31 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -104,7 +104,7 @@
 
 define double @pow_sitofp_float_base_fast(float %base, i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_float_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -117,7 +117,7 @@
 define double @pow_uitofp_float_base_fast(float %base, i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_float_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float [[BASE:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -129,7 +129,7 @@
 
 define double @pow_sitofp_double_base_fast(double %base, i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[X:%.*]])
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %subfp = sitofp i32 %x to double
@@ -140,7 +140,7 @@
 define double @pow_uitofp_double_base_fast(double %base, i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_double_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
   %subfp = uitofp i31 %x to double
@@ -151,7 +151,7 @@
 define double @pow_sitofp_const_base_fast_i8(i8 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -164,7 +164,7 @@
 define double @pow_sitofp_const_base_fast_i16(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast_i16(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -178,7 +178,7 @@
 define double @pow_uitofp_const_base_fast_i8(i8 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -191,7 +191,7 @@
 define double @pow_uitofp_const_base_fast_i16(i16 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i16(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i32(float 7.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -203,7 +203,7 @@
 
 define double @powf_exp_const_int_fast(double %base) {
 ; CHECK-LABEL: @powf_exp_const_int_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 40)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 40)
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01)
@@ -212,7 +212,7 @@
 
 define double @powf_exp_const2_int_fast(double %base) {
 ; CHECK-LABEL: @powf_exp_const2_int_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 -40)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 -40)
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %res = tail call fast double @llvm.pow.f64(double %base, double -4.000000e+01)
@@ -225,7 +225,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i32(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x4006757{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -238,7 +238,7 @@
 define double @pow_uitofp_const_base_2_fast_i32(i32 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_fast_i32(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[SUBFP]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[SUBFP]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -252,7 +252,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast_i32(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -291,7 +291,7 @@
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i64 [[X:%.*]] to float
 ; Do not change 0x400675{{.*}} to the exact constant, see PR42740
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x400675{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -305,7 +305,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i64(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i64 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x400675{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -343,7 +343,7 @@
 
 define double @pow_sitofp_const_base_2_no_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_2_no_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -357,7 +357,7 @@
 ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -370,7 +370,7 @@
 define double @pow_uitofp_const_base_2_no_fast(i32 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[SUBFP]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[SUBFP]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -384,7 +384,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int16.ll b/llvm/test/Transforms/InstCombine/pow_fp_int16.ll
--- a/llvm/test/Transforms/InstCombine/pow_fp_int16.ll
+++ b/llvm/test/Transforms/InstCombine/pow_fp_int16.ll
@@ -5,7 +5,7 @@
 
 define double @pow_sitofp_const_base_fast(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -18,7 +18,7 @@
 define double @pow_uitofp_const_base_fast(i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -30,7 +30,7 @@
 
 define double @pow_sitofp_double_const_base_fast(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_const_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %subfp = sitofp i16 %x to double
@@ -41,7 +41,7 @@
 define double @pow_uitofp_double_const_base_fast(i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_double_const_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i16(double 7.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
   %subfp = uitofp i15 %x to double
@@ -51,7 +51,7 @@
 
 define double @pow_sitofp_double_const_base_2_fast(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_const_base_2_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -65,7 +65,7 @@
 ; CHECK-LABEL: @pow_sitofp_double_const_base_power_of_2_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -78,7 +78,7 @@
 define double @pow_uitofp_const_base_2_fast(i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call afn float @ldexpf(float 1.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -92,7 +92,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i15 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call afn float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call afn float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -104,7 +104,7 @@
 
 define double @pow_sitofp_float_base_fast(float %base, i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_float_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -117,7 +117,7 @@
 define double @pow_uitofp_float_base_fast(float %base, i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_float_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float [[BASE:%.*]], i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -129,7 +129,7 @@
 
 define double @pow_sitofp_double_base_fast(double %base, i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_base_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[X:%.*]])
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %subfp = sitofp i16 %x to double
@@ -140,7 +140,7 @@
 define double @pow_uitofp_double_base_fast(double %base, i15 %x) {
 ; CHECK-LABEL: @pow_uitofp_double_base_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 [[TMP1]])
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
   %subfp = uitofp i15 %x to double
@@ -151,7 +151,7 @@
 define double @pow_sitofp_const_base_fast_i8(i8 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -163,7 +163,7 @@
 
 define double @pow_sitofp_const_base_fast_i16(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_fast_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -177,7 +177,7 @@
 define double @pow_uitofp_const_base_fast_i8(i8 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call afn float @llvm.powi.f32.i16(float 7.000000e+00, i16 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[TMP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -202,7 +202,7 @@
 
 define double @powf_exp_const_int_fast(double %base) {
 ; CHECK-LABEL: @powf_exp_const_int_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 40)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 40)
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %res = tail call fast double @llvm.pow.f64(double %base, double 4.000000e+01)
@@ -211,7 +211,7 @@
 
 define double @powf_exp_const2_int_fast(double %base) {
 ; CHECK-LABEL: @powf_exp_const2_int_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 -40)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 -40)
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %res = tail call fast double @llvm.pow.f64(double %base, double -4.000000e+01)
@@ -224,7 +224,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_fast_i16(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 0x4006757{{.*}}
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -237,7 +237,7 @@
 define double @pow_uitofp_const_base_2_fast_i16(i16 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_fast_i16(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[SUBFP]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[SUBFP]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -251,7 +251,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_fast_i16(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -313,7 +313,7 @@
 
 define double @pow_sitofp_const_base_2_no_fast(i16 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_2_no_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call float @ldexpf(float 1.000000e+00, i16 [[X:%.*]])
+; CHECK-NEXT:    [[LDEXPF:%.*]] = tail call float @ldexpf(float 1.000000e+00, i16 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -327,7 +327,7 @@
 ; CHECK-LABEL: @pow_sitofp_const_base_power_of_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = sitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -340,7 +340,7 @@
 define double @pow_uitofp_const_base_2_no_fast(i16 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[SUBFP]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[SUBFP]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -354,7 +354,7 @@
 ; CHECK-LABEL: @pow_uitofp_const_base_power_of_2_no_fast(
 ; CHECK-NEXT:    [[SUBFP:%.*]] = uitofp i16 [[X:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUBFP]], 4.000000e+00
-; CHECK-NEXT:    [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @llvm.exp2.f32(float [[MUL]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[EXP2]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/snprintf.ll b/llvm/test/Transforms/InstCombine/snprintf.ll
--- a/llvm/test/Transforms/InstCombine/snprintf.ll
+++ b/llvm/test/Transforms/InstCombine/snprintf.ll
@@ -5,6 +5,8 @@
 @.str.1 = private unnamed_addr constant [3 x i8] c"%%\00", align 1
 @.str.2 = private unnamed_addr constant [3 x i8] c"%c\00", align 1
 @.str.3 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+@.str.4 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
 
 declare i32 @snprintf(i8*, i64, i8*, ...) #1
 
@@ -136,3 +138,42 @@
   %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 32, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2
   ret i32 %call
 }
+
+; snprintf(buf, 32, "") -> memcpy -> store
+define i32 @test_str_ok_size_tail(i8* %buf) {
+; CHECK-LABEL: @test_str_ok_size_tail(
+; CHECK-NEXT:    store i8 0, i8* %buf, align 1
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = tail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.4, i64 0, i64 0))
+  ret i32 %1
+}
+
+define i32 @test_str_ok_size_musttail(i8* %buf, i64 %x, i8* %y, ...) {
+; CHECK-LABEL: @test_str_ok_size_musttail(
+; CHECK-NEXT:    %1 = musttail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.4, i64 0, i64 0), ...)
+; CHECK-NEXT:    ret i32 %1
+;
+  %1 = musttail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.4, i64 0, i64 0), ...)
+  ret i32 %1
+}
+
+; snprintf(buf, 32, "%s", "str") -> memcpy -> store
+define i32 @test_str_ok_size_tail2(i8* %buf) {
+; CHECK-LABEL: @test_str_ok_size_tail2(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    store i32 7500915, i32* [[TMP1]], align 1
+; CHECK-NEXT:    ret i32 3
+;
+  %1 = tail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0))
+  ret i32 %1
+}
+
+define i32 @test_str_ok_size_musttail2(i8* %buf, i64 %x, i8* %y, ...) {
+; CHECK-LABEL: @test_str_ok_size_musttail2(
+; CHECK-NEXT:    %1 = musttail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), ...)
+; CHECK-NEXT:    ret i32 %1
+;
+  %1 = musttail call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 8, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), ...)
+  ret i32 %1
+}
diff --git a/llvm/test/Transforms/InstCombine/stpcpy-1.ll b/llvm/test/Transforms/InstCombine/stpcpy-1.ll
--- a/llvm/test/Transforms/InstCombine/stpcpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpcpy-1.ll
@@ -57,6 +57,15 @@
   ret i8* %ret
 }
 
+define i8* @test_no_simplify2(i8* %dst, i8* %src) {
+; CHECK-LABEL: @test_no_simplify2(
+; CHECK-NEXT:    %ret = musttail call i8* @stpcpy(i8* %dst, i8* %src)
+; CHECK-NEXT:    ret i8* %ret
+;
+  %ret = musttail call i8* @stpcpy(i8* %dst, i8* %src)
+  ret i8* %ret
+}
+
 define i8* @test_no_incompatible_attr() {
 ; CHECK-LABEL: @test_no_incompatible_attr(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i32 6, i1 false)
diff --git a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -48,6 +48,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify1_tail() {
+; CHECK-LABEL: @test_simplify1_tail(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+  %ret = tail call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret i8* %ret
+}
+
 ; Check cases where there are no string constants.
 
 define i8* @test_simplify4() {
@@ -62,6 +74,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify4_tail() {
+; CHECK-LABEL: @test_simplify4_tail(
+; CHECK-NEXT:    [[STPCPY:%.*]] = tail call i8* @stpcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT:    ret i8* [[STPCPY]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+  %ret = tail call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
 ; Check case where the string length is not constant.
 
 define i8* @test_simplify5() {
@@ -93,6 +117,20 @@
   ret i8* %ret
 }
 
+; Check cases where there are no string constants, and is a tail call.
+
+define i8* @test_simplify7() {
+; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT:    [[STPCPY:%.*]] = tail call i8* @stpcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT:    ret i8* [[STPCPY]]
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+  %ret = tail call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
 ; Check case where slen < strlen (src).
 
 define i8* @test_no_simplify1() {
diff --git a/llvm/test/Transforms/InstCombine/strcpy-1.ll b/llvm/test/Transforms/InstCombine/strcpy-1.ll
--- a/llvm/test/Transforms/InstCombine/strcpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/strcpy-1.ll
@@ -63,6 +63,17 @@
   ret i8* %ret
 }
 
+define i8* @test_no_simplify2(i8* %dst, i8* %src) {
+; CHECK-LABEL: @test_no_simplify2(
+; CHECK-NEXT:    %ret = musttail call i8* @strcpy(i8* %dst, i8* %src)
+; CHECK-NEXT:    ret i8* %ret
+;
+
+  %ret = musttail call i8* @strcpy(i8* %dst, i8* %src)
+  ret i8* %ret
+}
+
+
 define void @test_no_incompatible_attr() {
 ; CHECK-LABEL: @test_no_incompatible_attr(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i32 6, i1 false)
diff --git a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
--- a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -24,6 +24,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify1_tail() {
+; CHECK-LABEL: @test_simplify1_tail(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+  %ret = tail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret i8* %ret
+}
+
 define i8* @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
@@ -62,6 +74,18 @@
   ret i8* %ret
 }
 
+define i8* @test_simplify4_tail() {
+; CHECK-LABEL: @test_simplify4_tail(
+; CHECK-NEXT:    [[STRCPY:%.*]] = tail call i8* @strcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+  %ret = tail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
 ; Check case where the string length is not constant.
 
 define i8* @test_simplify5() {
@@ -93,6 +117,20 @@
   ret i8* %ret
 }
 
+; Check cases where there are no string constants, and is a tail call.
+
+define i8* @test_simplify7() {
+; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT:    [[STRCPY:%.*]] = tail call i8* @strcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+;
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+  %ret = tail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
 ; Check case where slen < strlen (src).
 
 define i8* @test_no_simplify1() {
@@ -107,5 +145,14 @@
   ret i8* %ret
 }
 
+define i8* @test_no_simplify2(i8* %dst, i8* %src, i32 %a) {
+; CHECK-LABEL: @test_no_simplify2(
+; CHECK-NEXT:    %ret = musttail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
+; CHECK-NEXT:    ret i8* %ret
+;
+  %ret = musttail call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret i8* %ret
+}
+
 declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
 declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) nounwind readonly
diff --git a/llvm/test/Transforms/InstCombine/strncpy-1.ll b/llvm/test/Transforms/InstCombine/strncpy-1.ll
--- a/llvm/test/Transforms/InstCombine/strncpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/strncpy-1.ll
@@ -180,6 +180,25 @@
   ret void
 }
 
+define i8* @test_no_simplify3(i8* %dst, i8* %src, i32 %count) {
+; CHECK-LABEL: @test_no_simplify3(
+; CHECK-NEXT:    %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32)
+; CHECK-NEXT:    ret i8* %ret
+;
+  %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32)
+  ret i8* %ret
+}
+
+define i8* @test_no_simplify4(i8* %dst, i8* %src, i32 %count) {
+; CHECK-LABEL: @test_no_simplify4(
+; CHECK-NEXT:    %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6)
+; CHECK-NEXT:    ret i8* %ret
+;
+  %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6)
+  ret i8* %ret
+}
+
+
 define void @test_no_incompatible_attr() {
 ; CHECK-LABEL: @test_no_incompatible_attr(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0), i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i32 6, i1 false)