Index: include/llvm/InitializePasses.h
===================================================================
--- include/llvm/InitializePasses.h
+++ include/llvm/InitializePasses.h
@@ -389,6 +389,7 @@
 void initializeStripNonLineTableDebugInfoPass(PassRegistry&);
 void initializeStripSymbolsPass(PassRegistry&);
 void initializeStructurizeCFGPass(PassRegistry&);
+void initializeTailCallMarkPass(PassRegistry &);
 void initializeTailCallElimPass(PassRegistry&);
 void initializeTailDuplicatePass(PassRegistry&);
 void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&);
Index: include/llvm/LinkAllPasses.h
===================================================================
--- include/llvm/LinkAllPasses.h
+++ include/llvm/LinkAllPasses.h
@@ -170,6 +170,7 @@
       (void) llvm::createStripNonDebugSymbolsPass();
       (void) llvm::createStripDeadDebugInfoPass();
       (void) llvm::createStripDeadPrototypesPass();
+      (void) llvm::createTailCallMarkingPass();
       (void) llvm::createTailCallEliminationPass();
       (void) llvm::createJumpThreadingPass();
       (void) llvm::createUnifyFunctionExitNodesPass();
Index: include/llvm/Transforms/Scalar.h
===================================================================
--- include/llvm/Transforms/Scalar.h
+++ include/llvm/Transforms/Scalar.h
@@ -271,6 +271,12 @@
 /// regions that only contain uniform branches.
 Pass *createStructurizeCFGPass(bool SkipUniformRegions = false);
 
+//===----------------------------------------------------------------------===//
+//
+// TailCallMarking - This pass mark call instructions as tail if possible
+//
+FunctionPass *createTailCallMarkingPass();
+
 //===----------------------------------------------------------------------===//
 //
 // TailCallElimination - This pass eliminates call instructions to the current
Index: include/llvm/Transforms/Scalar/TailCallMarking.h
===================================================================
--- /dev/null
+++ include/llvm/Transforms/Scalar/TailCallMarking.h
@@ -0,0 +1,26 @@
+//===-- TailCallMarking.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass marks call instruction as "tail" if possible
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H
+#define LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct TailCallMarkPass : PassInfoMixin<TailCallMarkPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H
Index: lib/Passes/PassBuilder.cpp
===================================================================
--- lib/Passes/PassBuilder.cpp
+++ lib/Passes/PassBuilder.cpp
@@ -152,6 +152,7 @@
 #include "llvm/Transforms/Scalar/Sink.h"
 #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
+#include "llvm/Transforms/Scalar/TailCallMarking.h"
 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
 #include "llvm/Transforms/Utils/AddDiscriminators.h"
@@ -410,6 +411,7 @@
       !isOptimizingForSize(Level))
     FPM.addPass(PGOMemOPSizeOpt());
 
+  FPM.addPass(TailCallMarkPass());
   FPM.addPass(TailCallElimPass());
   FPM.addPass(SimplifyCFGPass());
 
@@ -1187,6 +1189,7 @@
 
   // LTO provides additional opportunities for tailcall elimination due to
   // link-time inlining, and visibility of nocapture attribute.
+  FPM.addPass(TailCallMarkPass());
   FPM.addPass(TailCallElimPass());
 
   // Run a few AA driver optimizations here and now to cleanup the code.
Index: lib/Passes/PassRegistry.def
===================================================================
--- lib/Passes/PassRegistry.def
+++ lib/Passes/PassRegistry.def
@@ -224,6 +224,7 @@
 FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
 FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
 FUNCTION_PASS("sroa", SROA())
+FUNCTION_PASS("tailcallmark", TailCallMarkPass())
 FUNCTION_PASS("tailcallelim", TailCallElimPass())
 FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
 FUNCTION_PASS("verify", VerifierPass())
Index: lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- lib/Transforms/IPO/PassManagerBuilder.cpp
+++ lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -353,6 +353,7 @@
   if (SizeLevel == 0)
     MPM.add(createPGOMemOPSizeOptLegacyPass());
 
+  MPM.add(createTailCallMarkingPass());     // Mark tail calls
   MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   MPM.add(createReassociatePass());           // Reassociate expressions
@@ -898,6 +899,7 @@
 
   // LTO provides additional opportunities for tailcall elimination due to
   // link-time inlining, and visibility of nocapture attribute.
+  PM.add(createTailCallMarkingPass());
   PM.add(createTailCallEliminationPass());
 
   // Run a few AA driven optimizations here and now, to cleanup the code.
Index: lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- lib/Transforms/Scalar/CMakeLists.txt
+++ lib/Transforms/Scalar/CMakeLists.txt
@@ -69,6 +69,7 @@
   SpeculateAroundPHIs.cpp
   StraightLineStrengthReduce.cpp
   StructurizeCFG.cpp
+  TailCallMarking.cpp
   TailRecursionElimination.cpp
   WarnMissedTransforms.cpp
 
Index: lib/Transforms/Scalar/Scalar.cpp
===================================================================
--- lib/Transforms/Scalar/Scalar.cpp
+++ lib/Transforms/Scalar/Scalar.cpp
@@ -95,6 +95,7 @@
   initializeStructurizeCFGPass(Registry);
   initializeSimpleLoopUnswitchLegacyPassPass(Registry);
   initializeSinkingLegacyPassPass(Registry);
+  initializeTailCallMarkPass(Registry);
   initializeTailCallElimPass(Registry);
   initializeSeparateConstOffsetFromGEPPass(Registry);
   initializeSpeculativeExecutionLegacyPassPass(Registry);
@@ -235,6 +236,10 @@
   // NOTE: The simplify-libcalls pass has been removed.
 }
 
+void LLVMAddTailCallMarkPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createTailCallMarkingPass());
+}
+
 void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createTailCallEliminationPass());
 }
Index: lib/Transforms/Scalar/TailCallMarking.cpp
===================================================================
--- /dev/null
+++ lib/Transforms/Scalar/TailCallMarking.cpp
@@ -0,0 +1,287 @@
+//===- TailCallMarking.cpp - Mark Tail Calls ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file mark call instruction as "tail" if possible.
+// If it is guaranteed that callees do not access their caller stack frame,
+// calls would be marked as "tail".
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/TailCallMarking.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "tailcallmark"
+
+STATISTIC(NumMarked, "Number of calls marked as tail");
+
+namespace {
+struct AllocaDerivedValueTracker {
+  // Start at a root value and walk its use-def chain to mark calls that use the
+  // value or a derived value in AllocaUsers, and places where it may escape in
+  // EscapePoints.
+  void walk(Value *Root) {
+    SmallVector<Use *, 32> Worklist;
+    SmallPtrSet<Use *, 32> Visited;
+
+    auto AddUsesToWorklist = [&](Value *V) {
+      for (auto &U : V->uses()) {
+        if (!Visited.insert(&U).second)
+          continue;
+        Worklist.push_back(&U);
+      }
+    };
+
+    AddUsesToWorklist(Root);
+
+    while (!Worklist.empty()) {
+      Use *U = Worklist.pop_back_val();
+      Instruction *I = cast<Instruction>(U->getUser());
+
+      switch (I->getOpcode()) {
+      case Instruction::Call:
+      case Instruction::Invoke: {
+        CallSite CS(I);
+        // If the alloca-derived argument is passed byval it is not an escape
+        // point, or a use of an alloca. Calling with byval copies the contents
+        // of the alloca into argument registers or stack slots, which exist
+        // beyond the lifetime of the current frame.
+        if (CS.isArgOperand(U) && CS.isByValArgument(CS.getArgumentNo(U)))
+          continue;
+        bool IsNocapture =
+            CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
+        callUsesLocalStack(CS, IsNocapture);
+        if (IsNocapture) {
+          // If the alloca-derived argument is passed in as nocapture, then it
+          // can't propagate to the call's return. That would be capturing.
+          continue;
+        }
+        break;
+      }
+      case Instruction::Load: {
+        // The result of a load is not alloca-derived (unless an alloca has
+        // otherwise escaped, but this is a local analysis).
+        continue;
+      }
+      case Instruction::Store: {
+        if (U->getOperandNo() == 0)
+          EscapePoints.insert(I);
+        continue; // Stores have no users to analyze.
+      }
+      case Instruction::BitCast:
+      case Instruction::GetElementPtr:
+      case Instruction::PHI:
+      case Instruction::Select:
+      case Instruction::AddrSpaceCast:
+        break;
+      default:
+        EscapePoints.insert(I);
+        break;
+      }
+
+      AddUsesToWorklist(I);
+    }
+  }
+
+  void callUsesLocalStack(CallSite CS, bool IsNocapture) {
+    // Add it to the list of alloca users.
+    AllocaUsers.insert(CS.getInstruction());
+
+    // If it's nocapture then it can't capture this alloca.
+    if (IsNocapture)
+      return;
+
+    // If it can write to memory, it can leak the alloca value.
+    if (!CS.onlyReadsMemory())
+      EscapePoints.insert(CS.getInstruction());
+  }
+
+  SmallPtrSet<Instruction *, 32> AllocaUsers;
+  SmallPtrSet<Instruction *, 32> EscapePoints;
+};
+} // namespace
+
+static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
+  if (F.callsFunctionThatReturnsTwice())
+    return false;
+
+  // The local stack holds all alloca instructions and all byval arguments.
+  AllocaDerivedValueTracker Tracker;
+  for (Argument &Arg : F.args()) {
+    if (Arg.hasByValAttr())
+      Tracker.walk(&Arg);
+  }
+  for (auto &BB : F) {
+    for (auto &I : BB)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+        Tracker.walk(AI);
+  }
+
+  bool Modified = false;
+
+  // Track whether a block is reachable after an alloca has escaped. Blocks that
+  // contain the escaping instruction will be marked as being visited without an
+  // escaped alloca, since that is how the block began.
+  enum VisitType { UNVISITED, UNESCAPED, ESCAPED };
+  DenseMap<BasicBlock *, VisitType> Visited;
+
+  // We propagate the fact that an alloca has escaped from block to successor.
+  // Visit the blocks that are propagating the escapedness first. To do this, we
+  // maintain two worklists.
+  SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
+
+  // We may enter a block and visit it thinking that no alloca has escaped yet,
+  // then see an escape point and go back around a loop edge and come back to
+  // the same block twice. Because of this, we defer setting tail on calls when
+  // we first encounter them in a block. Every entry in this list does not
+  // statically use an alloca via use-def chain analysis, but may find an alloca
+  // through other means if the block turns out to be reachable after an escape
+  // point.
+  SmallVector<CallInst *, 32> DeferredTails;
+
+  BasicBlock *BB = &F.getEntryBlock();
+  VisitType Escaped = UNESCAPED;
+  do {
+    for (auto &I : *BB) {
+      if (Tracker.EscapePoints.count(&I))
+        Escaped = ESCAPED;
+
+      CallInst *CI = dyn_cast<CallInst>(&I);
+      if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I))
+        continue;
+
+      bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles();
+
+      if (!IsNoTail && CI->doesNotAccessMemory()) {
+        // A call to a readnone function whose arguments are all things computed
+        // outside this function can be marked tail. Even if you stored the
+        // alloca address into a global, a readnone function can't load the
+        // global anyhow.
+        //
+        // Note that this runs whether we know an alloca has escaped or not. If
+        // it has, then we can't trust Tracker.AllocaUsers to be accurate.
+        bool SafeToTail = true;
+        for (auto &Arg : CI->arg_operands()) {
+          if (isa<Constant>(Arg.getUser()))
+            continue;
+          if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
+            if (!A->hasByValAttr())
+              continue;
+          SafeToTail = false;
+          break;
+        }
+        if (SafeToTail) {
+          using namespace ore;
+          ORE->emit([&]() {
+            return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI)
+                   << "marked as tail call candidate (readnone)";
+          });
+          CI->setTailCall();
+          NumMarked++;
+          Modified = true;
+          continue;
+        }
+      }
+
+      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+        DeferredTails.push_back(CI);
+      }
+    }
+
+    for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
+      auto &State = Visited[SuccBB];
+      if (State < Escaped) {
+        State = Escaped;
+        if (State == ESCAPED)
+          WorklistEscaped.push_back(SuccBB);
+        else
+          WorklistUnescaped.push_back(SuccBB);
+      }
+    }
+
+    if (!WorklistEscaped.empty()) {
+      BB = WorklistEscaped.pop_back_val();
+      Escaped = ESCAPED;
+    } else {
+      BB = nullptr;
+      while (!WorklistUnescaped.empty()) {
+        auto *NextBB = WorklistUnescaped.pop_back_val();
+        if (Visited[NextBB] == UNESCAPED) {
+          BB = NextBB;
+          Escaped = UNESCAPED;
+          break;
+        }
+      }
+    }
+  } while (BB);
+
+  for (CallInst *CI : DeferredTails) {
+    if (Visited[CI->getParent()] != ESCAPED) {
+      // If the escape point was part way through the block, calls after the
+      // escape point wouldn't have been put into DeferredTails.
+      LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
+      CI->setTailCall();
+      NumMarked++;
+      Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+namespace {
+struct TailCallMark : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  TailCallMark() : FunctionPass(ID) {
+    initializeTailCallMarkPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<PostDominatorTreeWrapperPass>();
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+    return markTails(
+        F, &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
+  }
+};
+} // namespace
+
+char TailCallMark::ID = 0;
+INITIALIZE_PASS_BEGIN(TailCallMark, "tailcallmark", "Tail Call Marking", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(TailCallMark, "tailcallmark", "Tail Call Marking", false,
+                    false)
+
+FunctionPass *llvm::createTailCallMarkingPass() { return new TailCallMark(); }
+PreservedAnalyses TailCallMarkPass::run(Function &F,
+                                        FunctionAnalysisManager &AM) {
+
+  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  markTails(F, &ORE);
+  return PreservedAnalyses::all();
+}
Index: lib/Transforms/Scalar/TailRecursionElimination.cpp
===================================================================
--- lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -50,6 +50,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -99,228 +100,6 @@
   });
 }
 
-namespace {
-struct AllocaDerivedValueTracker {
-  // Start at a root value and walk its use-def chain to mark calls that use the
-  // value or a derived value in AllocaUsers, and places where it may escape in
-  // EscapePoints.
-  void walk(Value *Root) {
-    SmallVector<Use *, 32> Worklist;
-    SmallPtrSet<Use *, 32> Visited;
-
-    auto AddUsesToWorklist = [&](Value *V) {
-      for (auto &U : V->uses()) {
-        if (!Visited.insert(&U).second)
-          continue;
-        Worklist.push_back(&U);
-      }
-    };
-
-    AddUsesToWorklist(Root);
-
-    while (!Worklist.empty()) {
-      Use *U = Worklist.pop_back_val();
-      Instruction *I = cast<Instruction>(U->getUser());
-
-      switch (I->getOpcode()) {
-      case Instruction::Call:
-      case Instruction::Invoke: {
-        CallSite CS(I);
-        // If the alloca-derived argument is passed byval it is not an escape
-        // point, or a use of an alloca. Calling with byval copies the contents
-        // of the alloca into argument registers or stack slots, which exist
-        // beyond the lifetime of the current frame.
-        if (CS.isArgOperand(U) && CS.isByValArgument(CS.getArgumentNo(U)))
-          continue;
-        bool IsNocapture =
-            CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
-        callUsesLocalStack(CS, IsNocapture);
-        if (IsNocapture) {
-          // If the alloca-derived argument is passed in as nocapture, then it
-          // can't propagate to the call's return. That would be capturing.
-          continue;
-        }
-        break;
-      }
-      case Instruction::Load: {
-        // The result of a load is not alloca-derived (unless an alloca has
-        // otherwise escaped, but this is a local analysis).
-        continue;
-      }
-      case Instruction::Store: {
-        if (U->getOperandNo() == 0)
-          EscapePoints.insert(I);
-        continue;  // Stores have no users to analyze.
-      }
-      case Instruction::BitCast:
-      case Instruction::GetElementPtr:
-      case Instruction::PHI:
-      case Instruction::Select:
-      case Instruction::AddrSpaceCast:
-        break;
-      default:
-        EscapePoints.insert(I);
-        break;
-      }
-
-      AddUsesToWorklist(I);
-    }
-  }
-
-  void callUsesLocalStack(CallSite CS, bool IsNocapture) {
-    // Add it to the list of alloca users.
-    AllocaUsers.insert(CS.getInstruction());
-
-    // If it's nocapture then it can't capture this alloca.
-    if (IsNocapture)
-      return;
-
-    // If it can write to memory, it can leak the alloca value.
-    if (!CS.onlyReadsMemory())
-      EscapePoints.insert(CS.getInstruction());
-  }
-
-  SmallPtrSet<Instruction *, 32> AllocaUsers;
-  SmallPtrSet<Instruction *, 32> EscapePoints;
-};
-}
-
-static bool markTails(Function &F, bool &AllCallsAreTailCalls,
-                      OptimizationRemarkEmitter *ORE) {
-  if (F.callsFunctionThatReturnsTwice())
-    return false;
-  AllCallsAreTailCalls = true;
-
-  // The local stack holds all alloca instructions and all byval arguments.
-  AllocaDerivedValueTracker Tracker;
-  for (Argument &Arg : F.args()) {
-    if (Arg.hasByValAttr())
-      Tracker.walk(&Arg);
-  }
-  for (auto &BB : F) {
-    for (auto &I : BB)
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
-        Tracker.walk(AI);
-  }
-
-  bool Modified = false;
-
-  // Track whether a block is reachable after an alloca has escaped. Blocks that
-  // contain the escaping instruction will be marked as being visited without an
-  // escaped alloca, since that is how the block began.
-  enum VisitType {
-    UNVISITED,
-    UNESCAPED,
-    ESCAPED
-  };
-  DenseMap<BasicBlock *, VisitType> Visited;
-
-  // We propagate the fact that an alloca has escaped from block to successor.
-  // Visit the blocks that are propagating the escapedness first. To do this, we
-  // maintain two worklists.
-  SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
-
-  // We may enter a block and visit it thinking that no alloca has escaped yet,
-  // then see an escape point and go back around a loop edge and come back to
-  // the same block twice. Because of this, we defer setting tail on calls when
-  // we first encounter them in a block. Every entry in this list does not
-  // statically use an alloca via use-def chain analysis, but may find an alloca
-  // through other means if the block turns out to be reachable after an escape
-  // point.
-  SmallVector<CallInst *, 32> DeferredTails;
-
-  BasicBlock *BB = &F.getEntryBlock();
-  VisitType Escaped = UNESCAPED;
-  do {
-    for (auto &I : *BB) {
-      if (Tracker.EscapePoints.count(&I))
-        Escaped = ESCAPED;
-
-      CallInst *CI = dyn_cast<CallInst>(&I);
-      if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I))
-        continue;
-
-      bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles();
-
-      if (!IsNoTail && CI->doesNotAccessMemory()) {
-        // A call to a readnone function whose arguments are all things computed
-        // outside this function can be marked tail. Even if you stored the
-        // alloca address into a global, a readnone function can't load the
-        // global anyhow.
-        //
-        // Note that this runs whether we know an alloca has escaped or not. If
-        // it has, then we can't trust Tracker.AllocaUsers to be accurate.
-        bool SafeToTail = true;
-        for (auto &Arg : CI->arg_operands()) {
-          if (isa<Constant>(Arg.getUser()))
-            continue;
-          if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
-            if (!A->hasByValAttr())
-              continue;
-          SafeToTail = false;
-          break;
-        }
-        if (SafeToTail) {
-          using namespace ore;
-          ORE->emit([&]() {
-            return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI)
-                   << "marked as tail call candidate (readnone)";
-          });
-          CI->setTailCall();
-          Modified = true;
-          continue;
-        }
-      }
-
-      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
-        DeferredTails.push_back(CI);
-      } else {
-        AllCallsAreTailCalls = false;
-      }
-    }
-
-    for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
-      auto &State = Visited[SuccBB];
-      if (State < Escaped) {
-        State = Escaped;
-        if (State == ESCAPED)
-          WorklistEscaped.push_back(SuccBB);
-        else
-          WorklistUnescaped.push_back(SuccBB);
-      }
-    }
-
-    if (!WorklistEscaped.empty()) {
-      BB = WorklistEscaped.pop_back_val();
-      Escaped = ESCAPED;
-    } else {
-      BB = nullptr;
-      while (!WorklistUnescaped.empty()) {
-        auto *NextBB = WorklistUnescaped.pop_back_val();
-        if (Visited[NextBB] == UNESCAPED) {
-          BB = NextBB;
-          Escaped = UNESCAPED;
-          break;
-        }
-      }
-    }
-  } while (BB);
-
-  for (CallInst *CI : DeferredTails) {
-    if (Visited[CI->getParent()] != ESCAPED) {
-      // If the escape point was part way through the block, calls after the
-      // escape point wouldn't have been put into DeferredTails.
-      LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
-      CI->setTailCall();
-      Modified = true;
-    } else {
-      AllCallsAreTailCalls = false;
-    }
-  }
-
-  return Modified;
-}
-
 /// Return true if it is safe to move the specified
 /// instruction from after the call to before the call, assuming that all
 /// instructions between the call and this instruction are movable.
@@ -744,7 +523,20 @@
   return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
                                     ArgumentPHIs, AA, ORE, DTU);
 }
+/// Return true if all call instructions which can be reachble entry blcok are
+/// marked as "tail".
+///
+static bool areAllCallsTailCalls(Function &F) {
 
+  for (BasicBlock *BB : depth_first(&F)) {
+    for (Instruction &I : *BB) {
+      CallInst *CI = dyn_cast<CallInst>(&I);
+      if (CI && !CI->isTailCall())
+        return false;
+    }
+  }
+  return true;
+}
 static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
                                    AliasAnalysis *AA,
                                    OptimizationRemarkEmitter *ORE,
@@ -753,8 +545,7 @@
     return false;
 
   bool MadeChange = false;
-  bool AllCallsAreTailCalls = false;
-  MadeChange |= markTails(F, AllCallsAreTailCalls, ORE);
+  bool AllCallsAreTailCalls = areAllCallsTailCalls(F);
   if (!AllCallsAreTailCalls)
     return MadeChange;
 
Index: test/Other/new-pm-defaults.ll
===================================================================
--- test/Other/new-pm-defaults.ll
+++ test/Other/new-pm-defaults.ll
@@ -140,6 +140,7 @@
 ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
+; CHECK-O-NEXT: Running pass: TailCallMarkPass
 ; CHECK-O-NEXT: Running pass: TailCallElimPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: ReassociatePass
Index: test/Other/new-pm-lto-defaults.ll
===================================================================
--- test/Other/new-pm-lto-defaults.ll
+++ test/Other/new-pm-lto-defaults.ll
@@ -81,6 +81,7 @@
 ; CHECK-O2-NEXT: Running pass: JumpThreadingPass
 ; CHECK-O2-NEXT: Running analysis: LazyValueAnalysis
 ; CHECK-O2-NEXT: Running pass: SROA on foo
+; CHECK-O2-NEXT: Running pass: TailCallMarkPass on foo
 ; CHECK-O2-NEXT: Running pass: TailCallElimPass on foo
 ; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
 ; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass>
Index: test/Other/new-pm-thinlto-defaults.ll
===================================================================
--- test/Other/new-pm-thinlto-defaults.ll
+++ test/Other/new-pm-thinlto-defaults.ll
@@ -120,6 +120,7 @@
 ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
+; CHECK-O-NEXT: Running pass: TailCallMarkPass
 ; CHECK-O-NEXT: Running pass: TailCallElimPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: ReassociatePass
Index: test/Other/opt-O2-pipeline.ll
===================================================================
--- test/Other/opt-O2-pipeline.ll
+++ test/Other/opt-O2-pipeline.ll
@@ -82,6 +82,11 @@
 ; CHECK-NEXT:         Lazy Block Frequency Analysis
 ; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         PGOMemOPSize
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         Tail Call Marking
 ; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:         Function Alias Analysis Results
 ; CHECK-NEXT:         Natural Loop Information
Index: test/Other/opt-O3-pipeline.ll
===================================================================
--- test/Other/opt-O3-pipeline.ll
+++ test/Other/opt-O3-pipeline.ll
@@ -87,6 +87,11 @@
 ; CHECK-NEXT:         Lazy Block Frequency Analysis
 ; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         PGOMemOPSize
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         Tail Call Marking
 ; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:         Function Alias Analysis Results
 ; CHECK-NEXT:         Natural Loop Information
Index: test/Other/opt-Os-pipeline.ll
===================================================================
--- test/Other/opt-Os-pipeline.ll
+++ test/Other/opt-Os-pipeline.ll
@@ -75,6 +75,13 @@
 ; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         Combine redundant instructions
 ; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         Tail Call Marking
+; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         Tail Call Elimination
 ; CHECK-NEXT:         Simplify the CFG
 ; CHECK-NEXT:         Reassociate expressions
Index: test/Transforms/Inline/byval-tail-call.ll
===================================================================
--- test/Transforms/Inline/byval-tail-call.ll
+++ test/Transforms/Inline/byval-tail-call.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -tailcallelim -inline -instcombine -dse -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(tailcallelim),cgscc(inline,function(instcombine,dse))' -S | FileCheck %s
+; RUN: opt < %s -basicaa -tailcallmark -tailcallelim -inline -instcombine -dse -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(tailcallmark,tailcallelim),cgscc(inline,function(instcombine,dse))' -S | FileCheck %s
 ; PR7272
 
 ; Calls that capture byval parameters cannot be marked as tail calls. Other
Index: test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
===================================================================
--- test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
+++ test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s
 ; PR7328
 ; PR7506
 define i32 @foo(i32 %x) {
Index: test/Transforms/TailCallElim/accum_recursion.ll
===================================================================
--- test/Transforms/TailCallElim/accum_recursion.ll
+++ test/Transforms/TailCallElim/accum_recursion.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 define i32 @test1_factorial(i32 %x) {
 entry:
Index: test/Transforms/TailCallElim/ackermann.ll
===================================================================
--- test/Transforms/TailCallElim/ackermann.ll
+++ test/Transforms/TailCallElim/ackermann.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; This function contains two tail calls, which should be eliminated
-; RUN: opt < %s -tailcallelim -verify-dom-info -stats -disable-output 2>&1 | grep "2 tailcallelim"
+; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -stats -disable-output 2>&1 | grep "2 tailcallelim"
 
 define i32 @Ack(i32 %M.1, i32 %N.1) {
 entry:
Index: test/Transforms/TailCallElim/basic.ll
===================================================================
--- test/Transforms/TailCallElim/basic.ll
+++ test/Transforms/TailCallElim/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s
 
 declare void @noarg()
 declare void @use(i32*)
Index: test/Transforms/TailCallElim/debugloc.ll
===================================================================
--- test/Transforms/TailCallElim/debugloc.ll
+++ test/Transforms/TailCallElim/debugloc.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -debugify -tailcallelim -S | FileCheck %s
+; RUN: opt < %s -debugify -tailcallmark -tailcallelim -S | FileCheck %s
 
 define void @foo() {
 entry:
Index: test/Transforms/TailCallElim/deopt-bundle.ll
===================================================================
--- test/Transforms/TailCallElim/deopt-bundle.ll
+++ test/Transforms/TailCallElim/deopt-bundle.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s
 
 define i32 @f_1(i32 %x) {
 ; CHECK-LABEL: @f_1(
Index: test/Transforms/TailCallElim/dup_tail.ll
===================================================================
--- test/Transforms/TailCallElim/dup_tail.ll
+++ test/Transforms/TailCallElim/dup_tail.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; Duplicate the return into if.end to enable TCE.
-; RUN: opt -tailcallelim -verify-dom-info -stats -disable-output < %s 2>&1 | FileCheck %s
+; RUN: opt -tailcallmark -tailcallelim -verify-dom-info -stats -disable-output < %s 2>&1 | FileCheck %s
 
 ; CHECK: Number of return duplicated
 
Index: test/Transforms/TailCallElim/inf-recursion.ll
===================================================================
--- test/Transforms/TailCallElim/inf-recursion.ll
+++ test/Transforms/TailCallElim/inf-recursion.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s
 
 ; Don't turn this into an infinite loop, this is probably the implementation
 ; of fabs and we expect the codegen to lower fabs.
Index: test/Transforms/TailCallElim/notail.ll
===================================================================
--- test/Transforms/TailCallElim/notail.ll
+++ test/Transforms/TailCallElim/notail.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s
 
 ; CHECK: tail call void @callee0()
 ; CHECK: notail call void @callee1()
Index: test/Transforms/TailCallElim/opt-remarks-recursion.ll
===================================================================
--- test/Transforms/TailCallElim/opt-remarks-recursion.ll
+++ test/Transforms/TailCallElim/opt-remarks-recursion.ll
@@ -1,5 +1,5 @@
-; RUN: opt %s -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s
-; RUN: opt %s -o /dev/null -passes='require<opt-remark-emit>,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s
+; RUN: opt %s -tailcallmark -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt %s -o /dev/null -passes='require<opt-remark-emit>,tailcallmark,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s
 
 ; CHECK: /home/davide/pat.c:2:20: transforming tail recursion into loop
 define i32 @fib(i32 %n) nounwind ssp {
Index: test/Transforms/TailCallElim/reorder_load.ll
===================================================================
--- test/Transforms/TailCallElim/reorder_load.ll
+++ test/Transforms/TailCallElim/reorder_load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s
 ; PR4323
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"