Index: llvm/bindings/go/llvm/transforms_scalar.go
===================================================================
--- llvm/bindings/go/llvm/transforms_scalar.go
+++ llvm/bindings/go/llvm/transforms_scalar.go
@@ -39,6 +39,7 @@
 	C.LLVMAddScalarReplAggregatesPassWithThreshold(pm.C, C.int(threshold))
 }
 func (pm PassManager) AddSimplifyLibCallsPass()       { C.LLVMAddSimplifyLibCallsPass(pm.C) }
+func (pm PassManager) AddTailCallMarkingPass()        { C.LLVMAddTailCallMarkingPass(pm.C) }
 func (pm PassManager) AddTailCallEliminationPass()    { C.LLVMAddTailCallEliminationPass(pm.C) }
 func (pm PassManager) AddDemoteMemoryToRegisterPass() { C.LLVMAddDemoteMemoryToRegisterPass(pm.C) }
 func (pm PassManager) AddVerifierPass()               { C.LLVMAddVerifierPass(pm.C) }
Index: llvm/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli
===================================================================
--- llvm/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli
+++ llvm/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli
@@ -156,6 +156,11 @@
   : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
   = "llvm_add_simplify_lib_calls"
 
+(** See the [llvm::createTailCallMarkingPass] function. *)
+external add_tail_call_marking
+  : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
+  = "llvm_add_tail_call_marking"
+
 (** See the [llvm::createTailCallEliminationPass] function. *)
 external add_tail_call_elimination
   : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
Index: llvm/bindings/ocaml/transforms/scalar_opts/scalar_opts_ocaml.c
===================================================================
--- llvm/bindings/ocaml/transforms/scalar_opts/scalar_opts_ocaml.c
+++ llvm/bindings/ocaml/transforms/scalar_opts/scalar_opts_ocaml.c
@@ -200,6 +200,12 @@
   return Val_unit;
 }
 
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+value llvm_add_tail_call_marking(LLVMPassManagerRef PM) {
+  LLVMAddTailCallMarkingPass(PM);
+  return Val_unit;
+}
+
 /* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
 value llvm_add_demote_memory_to_register(LLVMPassManagerRef PM) {
   LLVMAddDemoteMemoryToRegisterPass(PM);
Index: llvm/examples/OrcV2Examples/LLJITWithOptimizingIRTransform/LLJITWithOptimizingIRTransform.cpp
===================================================================
--- llvm/examples/OrcV2Examples/LLJITWithOptimizingIRTransform/LLJITWithOptimizingIRTransform.cpp
+++ llvm/examples/OrcV2Examples/LLJITWithOptimizingIRTransform/LLJITWithOptimizingIRTransform.cpp
@@ -72,6 +72,7 @@
 class MyOptimizationTransform {
 public:
   MyOptimizationTransform() : PM(std::make_unique<legacy::PassManager>()) {
+    PM->add(createTailCallMarkingPass());
     PM->add(createTailCallEliminationPass());
     PM->add(createFunctionInliningPass());
     PM->add(createIndVarSimplifyPass());
Index: llvm/include/llvm-c/Transforms/Scalar.h
===================================================================
--- llvm/include/llvm-c/Transforms/Scalar.h
+++ llvm/include/llvm-c/Transforms/Scalar.h
@@ -125,6 +125,9 @@
 /** See llvm::createSimplifyLibCallsPass function. */
 void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM);
 
+/** See llvm::createTailCallMarkingPass function. */
+void LLVMAddTailCallMarkingPass(LLVMPassManagerRef PM);
+
 /** See llvm::createTailCallEliminationPass function. */
 void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM);
 
Index: llvm/include/llvm/InitializePasses.h
===================================================================
--- llvm/include/llvm/InitializePasses.h
+++ llvm/include/llvm/InitializePasses.h
@@ -439,6 +439,7 @@
 void initializeStripSymbolsPass(PassRegistry&);
 void initializeStructurizeCFGLegacyPassPass(PassRegistry &);
 void initializeTailCallElimPass(PassRegistry&);
+void initializeTailCallMarkPass(PassRegistry&);
 void initializeTailDuplicatePass(PassRegistry&);
 void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&);
 void initializeTargetPassConfigPass(PassRegistry&);
Index: llvm/include/llvm/LinkAllPasses.h
===================================================================
--- llvm/include/llvm/LinkAllPasses.h
+++ llvm/include/llvm/LinkAllPasses.h
@@ -175,6 +175,7 @@
       (void) llvm::createStripNonDebugSymbolsPass();
       (void) llvm::createStripDeadDebugInfoPass();
       (void) llvm::createStripDeadPrototypesPass();
+      (void) llvm::createTailCallMarkingPass();
       (void) llvm::createTailCallEliminationPass();
       (void) llvm::createJumpThreadingPass();
       (void) llvm::createDFAJumpThreadingPass();
Index: llvm/include/llvm/Transforms/Scalar.h
===================================================================
--- llvm/include/llvm/Transforms/Scalar.h
+++ llvm/include/llvm/Transforms/Scalar.h
@@ -286,6 +286,12 @@
 /// regions that only contain uniform branches.
 Pass *createStructurizeCFGPass(bool SkipUniformRegions = false);
 
+//===----------------------------------------------------------------------===//
+//
+// TailCallMarking - This pass mark call instructions as tail if possible
+//
+FunctionPass *createTailCallMarkingPass();
+
 //===----------------------------------------------------------------------===//
 //
 // TailCallElimination - This pass eliminates call instructions to the current
Index: llvm/include/llvm/Transforms/Scalar/TailCallMarking.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Transforms/Scalar/TailCallMarking.h
@@ -0,0 +1,28 @@
+//===-- TailCallMarking.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass marks call instruction as "tail". More specifically, if it can
+// prove that callees do not access their caller stack frame, they are marked as
+// eligible for tail call elimination (by the code generator).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H
+#define LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct TailCallMarkPass : PassInfoMixin<TailCallMarkPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -212,6 +212,7 @@
 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
 #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
+#include "llvm/Transforms/Scalar/TailCallMarking.h"
 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
 #include "llvm/Transforms/Utils/AddDiscriminators.h"
Index: llvm/lib/Passes/PassBuilderPipelines.cpp
===================================================================
--- llvm/lib/Passes/PassBuilderPipelines.cpp
+++ llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -112,6 +112,7 @@
 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
+#include "llvm/Transforms/Scalar/TailCallMarking.h"
 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
 #include "llvm/Transforms/Utils/AddDiscriminators.h"
@@ -437,6 +438,7 @@
       !Level.isOptimizingForSize())
     FPM.addPass(PGOMemOPSizeOpt());
 
+  FPM.addPass(TailCallMarkPass());
   FPM.addPass(TailCallElimPass());
   FPM.addPass(SimplifyCFGPass());
 
@@ -1590,6 +1592,7 @@
 
   // LTO provides additional opportunities for tailcall elimination due to
   // link-time inlining, and visibility of nocapture attribute.
+  FPM.addPass(TailCallMarkPass());
   FPM.addPass(TailCallElimPass());
 
   // Run a few AA driver optimizations here and now to cleanup the code.
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -350,6 +350,7 @@
 FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
 FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
 FUNCTION_PASS("tailcallelim", TailCallElimPass())
+FUNCTION_PASS("tailcallmark", TailCallMarkPass())
 FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass())
 FUNCTION_PASS("vector-combine", VectorCombinePass())
 FUNCTION_PASS("verify", VerifierPass())
Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -432,10 +432,12 @@
     MPM.add(createPGOMemOPSizeOptLegacyPass());
 
   // TODO: Investigate the cost/benefit of tail call elimination on debugging.
-  if (OptLevel > 1)
+  if (OptLevel > 1) {
+    MPM.add(createTailCallMarkingPass());     // Eliminate tail calls
     MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
-  MPM.add(createCFGSimplificationPass());      // Merge & remove BBs
-  MPM.add(createReassociatePass());           // Reassociate expressions
+  }
+  MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+  MPM.add(createReassociatePass());       // Reassociate expressions
 
   // The matrix extension can introduce large vector operations early, which can
   // benefit from running vector-combine early on.
@@ -1112,8 +1114,10 @@
 
   // LTO provides additional opportunities for tailcall elimination due to
   // link-time inlining, and visibility of nocapture attribute.
-  if (OptLevel > 1)
+  if (OptLevel > 1) {
+    PM.add(createTailCallMarkingPass());
     PM.add(createTailCallEliminationPass());
+  }
 
   // Infer attributes on declarations, call sites, arguments, etc.
   PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
Index: llvm/lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -76,6 +76,7 @@
   SpeculativeExecution.cpp
   StraightLineStrengthReduce.cpp
   StructurizeCFG.cpp
+  TailCallMarking.cpp
   TailRecursionElimination.cpp
   WarnMissedTransforms.cpp
 
Index: llvm/lib/Transforms/Scalar/Scalar.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/Scalar.cpp
+++ llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -103,6 +103,7 @@
   initializeStructurizeCFGLegacyPassPass(Registry);
   initializeSimpleLoopUnswitchLegacyPassPass(Registry);
   initializeSinkingLegacyPassPass(Registry);
+  initializeTailCallMarkPass(Registry);
   initializeTailCallElimPass(Registry);
   initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
   initializeSpeculativeExecutionLegacyPassPass(Registry);
@@ -255,6 +256,10 @@
   // NOTE: The simplify-libcalls pass has been removed.
 }
 
+void LLVMAddTailCallMarkPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createTailCallMarkingPass());
+}
+
 void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createTailCallEliminationPass());
 }
Index: llvm/lib/Transforms/Scalar/TailCallMarking.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Transforms/Scalar/TailCallMarking.cpp
@@ -0,0 +1,321 @@
+//===- TailCallMarking.cpp - Mark Tail Calls ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file mark call instruction as "tail" if possible.
+// If it is guaranteed that callees do not access their caller stack frame,
+// calls would be marked as "tail".
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/TailCallMarking.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#define DEBUG_TYPE "tailcallmark"
+
+using namespace llvm;
+
+STATISTIC(NumMarked, "Number of calls marked as tail");
+
+namespace {
+struct AllocaDerivedValueTracker {
+  // Start at a root value and walk its use-def chain to mark calls that use the
+  // value or a derived value in AllocaUsers, and places where it may escape in
+  // EscapePoints.
+  void walk(Value *Root) {
+    SmallVector<Use *, 32> Worklist;
+    SmallPtrSet<Use *, 32> Visited;
+
+    auto AddUsesToWorklist = [&](Value *V) {
+      for (auto &U : V->uses()) {
+        if (!Visited.insert(&U).second)
+          continue;
+        Worklist.push_back(&U);
+      }
+    };
+
+    AddUsesToWorklist(Root);
+
+    while (!Worklist.empty()) {
+      Use *U = Worklist.pop_back_val();
+      Instruction *I = cast<Instruction>(U->getUser());
+
+      switch (I->getOpcode()) {
+      case Instruction::Call:
+      case Instruction::Invoke: {
+        auto &CB = cast<CallBase>(*I);
+        // If the alloca-derived argument is passed byval it is not an escape
+        // point, or a use of an alloca. Calling with byval copies the contents
+        // of the alloca into argument registers or stack slots, which exist
+        // beyond the lifetime of the current frame.
+        if (CB.isArgOperand(U) && CB.isByValArgument(CB.getArgOperandNo(U)))
+          continue;
+        bool IsNocapture =
+            CB.isDataOperand(U) && CB.doesNotCapture(CB.getDataOperandNo(U));
+        callUsesLocalStack(CB, IsNocapture);
+        if (IsNocapture) {
+          // If the alloca-derived argument is passed in as nocapture, then it
+          // can't propagate to the call's return. That would be capturing.
+          continue;
+        }
+        break;
+      }
+      case Instruction::Load: {
+        // The result of a load is not alloca-derived (unless an alloca has
+        // otherwise escaped, but this is a local analysis).
+        continue;
+      }
+      case Instruction::Store: {
+        if (U->getOperandNo() == 0)
+          EscapePoints.insert(I);
+        continue; // Stores have no users to analyze.
+      }
+      case Instruction::BitCast:
+      case Instruction::GetElementPtr:
+      case Instruction::PHI:
+      case Instruction::Select:
+      case Instruction::AddrSpaceCast:
+        break;
+      default:
+        EscapePoints.insert(I);
+        break;
+      }
+
+      AddUsesToWorklist(I);
+    }
+  }
+
+  void callUsesLocalStack(CallBase &CB, bool IsNocapture) {
+    // Add it to the list of alloca users.
+    AllocaUsers.insert(&CB);
+
+    // If it's nocapture then it can't capture this alloca.
+    if (IsNocapture)
+      return;
+
+    // If it can write to memory, it can leak the alloca value.
+    if (!CB.onlyReadsMemory())
+      EscapePoints.insert(&CB);
+  }
+
+  SmallPtrSet<Instruction *, 32> AllocaUsers;
+  SmallPtrSet<Instruction *, 32> EscapePoints;
+};
+} // namespace
+
+static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
+  if (F.callsFunctionThatReturnsTwice())
+    return false;
+
+  // The local stack holds all alloca instructions and all byval arguments.
+  AllocaDerivedValueTracker Tracker;
+  for (Argument &Arg : F.args()) {
+    if (Arg.hasByValAttr())
+      Tracker.walk(&Arg);
+  }
+  for (auto &BB : F) {
+    for (auto &I : BB)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+        Tracker.walk(AI);
+  }
+
+  bool Modified = false;
+
+  // Track whether a block is reachable after an alloca has escaped. Blocks that
+  // contain the escaping instruction will be marked as being visited without an
+  // escaped alloca, since that is how the block began.
+  enum VisitType { UNVISITED, UNESCAPED, ESCAPED };
+  DenseMap<BasicBlock *, VisitType> Visited;
+
+  // We propagate the fact that an alloca has escaped from block to successor.
+  // Visit the blocks that are propagating the escapedness first. To do this, we
+  // maintain two worklists.
+  SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
+
+  // We may enter a block and visit it thinking that no alloca has escaped yet,
+  // then see an escape point and go back around a loop edge and come back to
+  // the same block twice. Because of this, we defer setting tail on calls when
+  // we first encounter them in a block. Every entry in this list does not
+  // statically use an alloca via use-def chain analysis, but may find an alloca
+  // through other means if the block turns out to be reachable after an escape
+  // point.
+  SmallVector<CallInst *, 32> DeferredTails;
+
+  BasicBlock *BB = &F.getEntryBlock();
+  VisitType Escaped = UNESCAPED;
+  do {
+    for (auto &I : *BB) {
+      if (Tracker.EscapePoints.count(&I))
+        Escaped = ESCAPED;
+
+      CallInst *CI = dyn_cast<CallInst>(&I);
+      // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is
+      // considered accessing memory and will be marked as a tail call if we
+      // don't bail out here.
+      if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I) ||
+          isa<PseudoProbeInst>(&I))
+        continue;
+
+      // Special-case operand bundle "clang.arc.attachedcall".
+      bool IsNoTail =
+          CI->isNoTailCall() || CI->hasOperandBundlesOtherThan(
+                                    LLVMContext::OB_clang_arc_attachedcall);
+
+      if (!IsNoTail && CI->doesNotAccessMemory()) {
+        // A call to a readnone function whose arguments are all things computed
+        // outside this function can be marked tail. Even if you stored the
+        // alloca address into a global, a readnone function can't load the
+        // global anyhow.
+        //
+        // Note that this runs whether we know an alloca has escaped or not. If
+        // it has, then we can't trust Tracker.AllocaUsers to be accurate.
+        bool SafeToTail = true;
+        for (auto &Arg : CI->args()) {
+          if (isa<Constant>(Arg.getUser()))
+            continue;
+          if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
+            if (!A->hasByValAttr())
+              continue;
+          SafeToTail = false;
+          break;
+        }
+        if (SafeToTail) {
+          using namespace ore;
+          ORE->emit([&]() {
+            return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI)
+                   << "marked as tail call candidate (readnone)";
+          });
+          CI->setTailCall();
+          NumMarked++;
+          Modified = true;
+          continue;
+        }
+      }
+
+      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
+        DeferredTails.push_back(CI);
+    }
+
+    for (auto *SuccBB : successors(BB)) {
+      auto &State = Visited[SuccBB];
+      if (State < Escaped) {
+        State = Escaped;
+        if (State == ESCAPED)
+          WorklistEscaped.push_back(SuccBB);
+        else
+          WorklistUnescaped.push_back(SuccBB);
+      }
+    }
+
+    if (!WorklistEscaped.empty()) {
+      BB = WorklistEscaped.pop_back_val();
+      Escaped = ESCAPED;
+    } else {
+      BB = nullptr;
+      while (!WorklistUnescaped.empty()) {
+        auto *NextBB = WorklistUnescaped.pop_back_val();
+        if (Visited[NextBB] == UNESCAPED) {
+          BB = NextBB;
+          Escaped = UNESCAPED;
+          break;
+        }
+      }
+    }
+  } while (BB);
+
+  for (CallInst *CI : DeferredTails) {
+    if (Visited[CI->getParent()] != ESCAPED) {
+      // If the escape point was part way through the block, calls after the
+      // escape point wouldn't have been put into DeferredTails.
+      LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
+      CI->setTailCall();
+      NumMarked++;
+      Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+namespace {
+struct TailCallMark : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  TailCallMark() : FunctionPass(ID) {
+    initializeTailCallMarkPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+    AU.setPreservesCFG();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+    return markTails(
+        F, &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
+  }
+};
+} // namespace
+
+char TailCallMark::ID = 0;
+INITIALIZE_PASS_BEGIN(TailCallMark, "tailcallmark", "Tail Call Marking", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(TailCallMark, "tailcallmark", "Tail Call Marking", false,
+                    false)
+
+FunctionPass *llvm::createTailCallMarkingPass() { return new TailCallMark(); }
+PreservedAnalyses TailCallMarkPass::run(Function &F,
+                                        FunctionAnalysisManager &AM) {
+
+  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  bool Changed = markTails(F, &ORE);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
Index: llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -191,141 +191,7 @@
 };
 }
 
-static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
-  if (F.callsFunctionThatReturnsTwice())
-    return false;
-
-  // The local stack holds all alloca instructions and all byval arguments.
-  AllocaDerivedValueTracker Tracker;
-  for (Argument &Arg : F.args()) {
-    if (Arg.hasByValAttr())
-      Tracker.walk(&Arg);
-  }
-  for (auto &BB : F) {
-    for (auto &I : BB)
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
-        Tracker.walk(AI);
-  }
 
-  bool Modified = false;
-
-  // Track whether a block is reachable after an alloca has escaped. Blocks that
-  // contain the escaping instruction will be marked as being visited without an
-  // escaped alloca, since that is how the block began.
-  enum VisitType {
-    UNVISITED,
-    UNESCAPED,
-    ESCAPED
-  };
-  DenseMap<BasicBlock *, VisitType> Visited;
-
-  // We propagate the fact that an alloca has escaped from block to successor.
-  // Visit the blocks that are propagating the escapedness first. To do this, we
-  // maintain two worklists.
-  SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
-
-  // We may enter a block and visit it thinking that no alloca has escaped yet,
-  // then see an escape point and go back around a loop edge and come back to
-  // the same block twice. Because of this, we defer setting tail on calls when
-  // we first encounter them in a block. Every entry in this list does not
-  // statically use an alloca via use-def chain analysis, but may find an alloca
-  // through other means if the block turns out to be reachable after an escape
-  // point.
-  SmallVector<CallInst *, 32> DeferredTails;
-
-  BasicBlock *BB = &F.getEntryBlock();
-  VisitType Escaped = UNESCAPED;
-  do {
-    for (auto &I : *BB) {
-      if (Tracker.EscapePoints.count(&I))
-        Escaped = ESCAPED;
-
-      CallInst *CI = dyn_cast<CallInst>(&I);
-      // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is
-      // considered accessing memory and will be marked as a tail call if we
-      // don't bail out here.
-      if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I) ||
-          isa<PseudoProbeInst>(&I))
-        continue;
-
-      // Special-case operand bundle "clang.arc.attachedcall".
-      bool IsNoTail =
-          CI->isNoTailCall() || CI->hasOperandBundlesOtherThan(
-                                    LLVMContext::OB_clang_arc_attachedcall);
-
-      if (!IsNoTail && CI->doesNotAccessMemory()) {
-        // A call to a readnone function whose arguments are all things computed
-        // outside this function can be marked tail. Even if you stored the
-        // alloca address into a global, a readnone function can't load the
-        // global anyhow.
-        //
-        // Note that this runs whether we know an alloca has escaped or not. If
-        // it has, then we can't trust Tracker.AllocaUsers to be accurate.
-        bool SafeToTail = true;
-        for (auto &Arg : CI->args()) {
-          if (isa<Constant>(Arg.getUser()))
-            continue;
-          if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
-            if (!A->hasByValAttr())
-              continue;
-          SafeToTail = false;
-          break;
-        }
-        if (SafeToTail) {
-          using namespace ore;
-          ORE->emit([&]() {
-            return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI)
-                   << "marked as tail call candidate (readnone)";
-          });
-          CI->setTailCall();
-          Modified = true;
-          continue;
-        }
-      }
-
-      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
-        DeferredTails.push_back(CI);
-    }
-
-    for (auto *SuccBB : successors(BB)) {
-      auto &State = Visited[SuccBB];
-      if (State < Escaped) {
-        State = Escaped;
-        if (State == ESCAPED)
-          WorklistEscaped.push_back(SuccBB);
-        else
-          WorklistUnescaped.push_back(SuccBB);
-      }
-    }
-
-    if (!WorklistEscaped.empty()) {
-      BB = WorklistEscaped.pop_back_val();
-      Escaped = ESCAPED;
-    } else {
-      BB = nullptr;
-      while (!WorklistUnescaped.empty()) {
-        auto *NextBB = WorklistUnescaped.pop_back_val();
-        if (Visited[NextBB] == UNESCAPED) {
-          BB = NextBB;
-          Escaped = UNESCAPED;
-          break;
-        }
-      }
-    }
-  } while (BB);
-
-  for (CallInst *CI : DeferredTails) {
-    if (Visited[CI->getParent()] != ESCAPED) {
-      // If the escape point was part way through the block, calls after the
-      // escape point wouldn't have been put into DeferredTails.
-      LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
-      CI->setTailCall();
-      Modified = true;
-    }
-  }
-
-  return Modified;
-}
 
 /// Return true if it is safe to move the specified
 /// instruction from after the call to before the call, assuming that all
@@ -850,7 +716,6 @@
     return false;
 
   bool MadeChange = false;
-  MadeChange |= markTails(F, ORE);
 
   // If this function is a varargs function, we won't be able to PHI the args
   // right, so don't even try to convert it...
Index: llvm/test/Other/new-pm-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-defaults.ll
+++ llvm/test/Other/new-pm-defaults.ll
@@ -156,6 +156,7 @@
 ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
+; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass
 ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: ReassociatePass
Index: llvm/test/Other/new-pm-lto-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-lto-defaults.ll
+++ llvm/test/Other/new-pm-lto-defaults.ll
@@ -81,6 +81,7 @@
 ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
 ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
 ; CHECK-O23SZ-NEXT: Running pass: SROAPass on foo
+; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass on (foo)
 ; CHECK-O23SZ-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
Index: llvm/test/Other/new-pm-thinlto-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -122,6 +122,7 @@
 ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
+; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass
 ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: ReassociatePass
Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -96,6 +96,7 @@
 ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
+; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass
 ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: ReassociatePass
Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -105,6 +105,7 @@
 ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
+; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass
 ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: ReassociatePass
Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -134,6 +134,7 @@
 ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O2-NEXT: Running pass: PGOMemOPSizeOpt
 ; CHECK-O3-NEXT: Running pass: PGOMemOPSizeOpt
+; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass
 ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: ReassociatePass
Index: llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -100,6 +100,7 @@
 ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
+; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass
 ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: ReassociatePass
Index: llvm/test/Transforms/Inline/byval-tail-call.ll
===================================================================
--- llvm/test/Transforms/Inline/byval-tail-call.ll
+++ llvm/test/Transforms/Inline/byval-tail-call.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basic-aa -tailcallelim -inline -instcombine -dse -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(tailcallelim),cgscc(inline,function(instcombine,dse))' -S | FileCheck %s
+; RUN: opt < %s -basic-aa -tailcallmark -tailcallelim -inline -instcombine -dse -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(tailcallmark,tailcallelim),cgscc(inline,function(instcombine,dse))' -S | FileCheck %s
 ; PR7272
 
 ; Calls that capture byval parameters cannot be marked as tail calls. Other
Index: llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
+++ llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 ; PR7328
 ; PR7506
 define i32 @test1_constants(i32 %x) {
Index: llvm/test/Transforms/TailCallElim/EraseBB.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/EraseBB.ll
+++ llvm/test/Transforms/TailCallElim/EraseBB.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes=tailcallelim -verify-dom-info -S < %s 2>&1 | FileCheck %s
+; RUN: opt -passes=tailcallmark,tailcallelim -verify-dom-info -S < %s 2>&1 | FileCheck %s
 
 ; CHECK: add nsw i32
 ; CHECK-NEXT: br label
Index: llvm/test/Transforms/TailCallElim/accum_recursion.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/accum_recursion.ll
+++ llvm/test/Transforms/TailCallElim/accum_recursion.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 define i32 @test1_factorial(i32 %x) {
 entry:
Index: llvm/test/Transforms/TailCallElim/ackermann.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/ackermann.ll
+++ llvm/test/Transforms/TailCallElim/ackermann.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; This function contains two tail calls, which should be eliminated
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -stats -disable-output 2>&1 | grep "2 tailcallelim"
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -stats -disable-output 2>&1 | grep "2 tailcallelim"
 
 define i32 @Ack(i32 %M.1, i32 %N.1) {
 entry:
Index: llvm/test/Transforms/TailCallElim/basic.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/basic.ll
+++ llvm/test/Transforms/TailCallElim/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 declare void @noarg()
 declare void @use(i32*)
Index: llvm/test/Transforms/TailCallElim/debugloc.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/debugloc.ll
+++ llvm/test/Transforms/TailCallElim/debugloc.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=debugify,tailcallelim -S | FileCheck %s
+; RUN: opt < %s -passes=debugify,tailcallmark,tailcallelim -S | FileCheck %s
 
 define void @foo() {
 entry:
Index: llvm/test/Transforms/TailCallElim/deopt-bundle.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/deopt-bundle.ll
+++ llvm/test/Transforms/TailCallElim/deopt-bundle.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 define i32 @f_1(i32 %x) {
 ; CHECK-LABEL: @f_1(
Index: llvm/test/Transforms/TailCallElim/dont_reorder_load.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/dont_reorder_load.ll
+++ llvm/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | grep call | count 4
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | grep call | count 4
 ; PR4323
 
 ; Several cases where tail call elimination should not move the load above the
Index: llvm/test/Transforms/TailCallElim/dup_tail.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/dup_tail.ll
+++ llvm/test/Transforms/TailCallElim/dup_tail.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; Duplicate the return into if.end to enable TCE.
-; RUN: opt -passes=tailcallelim -verify-dom-info -stats -disable-output < %s 2>&1 | FileCheck %s
+; RUN: opt -passes=tailcallmark,tailcallelim -verify-dom-info -stats -disable-output < %s 2>&1 | FileCheck %s
 
 ; CHECK: Number of return duplicated
 
Index: llvm/test/Transforms/TailCallElim/inf-recursion.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/inf-recursion.ll
+++ llvm/test/Transforms/TailCallElim/inf-recursion.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 ; Don't turn this into an infinite loop, this is probably the implementation
 ; of fabs and we expect the codegen to lower fabs.
Index: llvm/test/Transforms/TailCallElim/notail.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/notail.ll
+++ llvm/test/Transforms/TailCallElim/notail.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 ; CHECK: tail call void @callee0()
 ; CHECK: notail call void @callee1()
Index: llvm/test/Transforms/TailCallElim/opt-remarks-recursion.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/opt-remarks-recursion.ll
+++ llvm/test/Transforms/TailCallElim/opt-remarks-recursion.ll
@@ -1,5 +1,5 @@
-; RUN: opt %s -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s
-; RUN: opt %s -o /dev/null -passes='require<opt-remark-emit>,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s
+; RUN: opt %s -tailcallmark -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt %s -o /dev/null -passes='require<opt-remark-emit>,tailcallmark,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s
 
 ; CHECK: /home/davide/pat.c:2:20: transforming tail recursion into loop
 define i32 @fib(i32 %n) nounwind ssp {
Index: llvm/test/Transforms/TailCallElim/reorder_load.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/reorder_load.ll
+++ llvm/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 ; PR4323
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
Index: llvm/test/Transforms/TailCallElim/setjmp.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/setjmp.ll
+++ llvm/test/Transforms/TailCallElim/setjmp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 ; Test that we don't tail call in a functions that calls returns_twice
 ; functions.
Index: llvm/test/Transforms/TailCallElim/tre-byval-parameter-2.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/tre-byval-parameter-2.ll
+++ llvm/test/Transforms/TailCallElim/tre-byval-parameter-2.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 ; the test was generated from the following C++ source:
 ;
Index: llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll
+++ llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 ; the test was generated from the following C++ source:
 ;
Index: llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
+++ llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 ; This test checks that TRE would be done for only one recursive call.
 ; The test_multiple_exits function has three recursive calls.
Index: llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
+++ llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s
 
 ; IR for that test was generated from the following C++ source:
 ;
Index: polly/lib/CodeGen/CodegenCleanup.cpp
===================================================================
--- polly/lib/CodeGen/CodegenCleanup.cpp
+++ polly/lib/CodeGen/CodegenCleanup.cpp
@@ -73,6 +73,7 @@
     FPM->add(createCFGSimplificationPass());
     FPM->add(createInstructionCombiningPass(true));
     FPM->add(createLibCallsShrinkWrapPass());
+    FPM->add(createTailCallMarkingPass());
     FPM->add(createTailCallEliminationPass());
     FPM->add(createCFGSimplificationPass());
     FPM->add(createReassociatePass());
Index: polly/lib/Transform/Canonicalization.cpp
===================================================================
--- polly/lib/Transform/Canonicalization.cpp
+++ polly/lib/Transform/Canonicalization.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Transforms/Scalar/LoopRotation.h"
 #include "llvm/Transforms/Scalar/Reassociate.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Scalar/TailCallMarking.h"
 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/Mem2Reg.h"
@@ -45,6 +46,7 @@
   PM.add(llvm::createEarlyCSEPass(UseMemSSA));
   PM.add(llvm::createInstructionCombiningPass());
   PM.add(llvm::createCFGSimplificationPass());
+  PM.add(llvm::createTailCallMarkingPass());
   PM.add(llvm::createTailCallEliminationPass());
   PM.add(llvm::createCFGSimplificationPass());
   PM.add(llvm::createReassociatePass());
@@ -100,6 +102,7 @@
   FPM.addPass(EarlyCSEPass(UseMemSSA));
   FPM.addPass(InstCombinePass());
   FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(TailCallMarkPass());
   FPM.addPass(TailCallElimPass());
   FPM.addPass(SimplifyCFGPass());
   FPM.addPass(ReassociatePass());