diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h
--- a/llvm/include/llvm/Transforms/IPO/Inliner.h
+++ b/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -121,7 +121,7 @@
   ModuleInlinerWrapperPass(
       InlineParams Params = getInlineParams(), bool MandatoryFirst = true,
       InliningAdvisorMode Mode = InliningAdvisorMode::Default,
-      unsigned MaxDevirtIterations = 0);
+      unsigned MaxDevirtIterations = 0, bool UseModuleInliner = false);
   ModuleInlinerWrapperPass(ModuleInlinerWrapperPass &&Arg) = default;
 
   PreservedAnalyses run(Module &, ModuleAnalysisManager &);
diff --git a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
@@ -0,0 +1,49 @@
+//===- ModuleInliner.h - Module level Inliner pass --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_MODULEINLINER_H
+#define LLVM_TRANSFORMS_IPO_MODULEINLINER_H
+
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/IR/PassManager.h"
+#include <utility>
+
+namespace llvm {
+
+class AssumptionCacheTracker;
+class ProfileSummaryInfo;
+
+/// The module inliner pass for the new pass manager.
+///
+/// This pass wires together the inlining utilities and the inline cost
+/// analysis into a module pass. Different from SCC inliner, it considers every
+/// call in every function in the whole module and tries to inline if
+/// profitable. With this module level inliner, it is possible to evaluate more
+/// heuristics in the module level such like PrioriInlineOrder. It can be tuned
+/// with a number of parameters to control what cost model is used and what
+/// tradeoffs are made when making the decision.
+class ModuleInlinerPass : public PassInfoMixin<ModuleInlinerPass> {
+public:
+  ModuleInlinerPass(bool OnlyMandatory = false)
+      : OnlyMandatory(OnlyMandatory) {}
+  ModuleInlinerPass(ModuleInlinerPass &&Arg) = default;
+
+  PreservedAnalyses run(Module &, ModuleAnalysisManager &);
+
+private:
+  InlineAdvisor &getAdvisor(const ModuleAnalysisManager &MAM,
+                            FunctionAnalysisManager &FAM, Module &M);
+  std::unique_ptr<InlineAdvisor> OwnedAdvisor;
+  const bool OnlyMandatory;
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_MODULEINLINER_H
\ No newline at end of file
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -109,6 +109,7 @@
 #include "llvm/Transforms/IPO/LoopExtractor.h"
 #include "llvm/Transforms/IPO/LowerTypeTests.h"
 #include "llvm/Transforms/IPO/MergeFunctions.h"
+#include "llvm/Transforms/IPO/ModuleInliner.h"
 #include "llvm/Transforms/IPO/OpenMPOpt.h"
 #include "llvm/Transforms/IPO/PartialInlining.h"
 #include "llvm/Transforms/IPO/SCCP.h"
@@ -274,6 +275,10 @@
     cl::desc("Perform mandatory inlinings module-wide, before performing "
              "inlining."));
 
+static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
+                                         cl::init(false), cl::Hidden,
+                                         cl::desc("Enable module inliner"));
+
 static cl::opt<bool> EnableO3NonTrivialUnswitching(
     "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
     cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
@@ -977,8 +982,12 @@
   if (PGOOpt)
     IP.EnableDeferral = EnablePGOInlineDeferral;
 
+  if (EnableModuleInliner)
+    IP.EnableDeferral = false;
+
   ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
-                                UseInlineAdvisor, MaxDevirtIterations);
+                                UseInlineAdvisor, MaxDevirtIterations,
+                                EnableModuleInliner);
 
   // Require the GlobalsAA analysis for the module so we can query it within
   // the CGSCC pipeline.
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -115,6 +115,7 @@
 MODULE_PASS("dfsan", DataFlowSanitizerPass())
 MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
 MODULE_PASS("msan-module", MemorySanitizerPass({}))
+MODULE_PASS("module-inline", ModuleInlinerPass())
 MODULE_PASS("tsan-module", ThreadSanitizerPass())
 MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
 MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -29,6 +29,7 @@
   LoopExtractor.cpp
   LowerTypeTests.cpp
   MergeFunctions.cpp
+  ModuleInliner.cpp
   OpenMPOpt.cpp
   PartialInlining.cpp
   PassManagerBuilder.cpp
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -58,6 +58,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/ModuleInliner.h"
 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -1157,7 +1158,8 @@
 ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
                                                    bool MandatoryFirst,
                                                    InliningAdvisorMode Mode,
-                                                   unsigned MaxDevirtIterations)
+                                                   unsigned MaxDevirtIterations,
+                                                   bool UseModuleInliner)
     : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations),
       PM(), MPM() {
   // Run the inliner first. The theory is that we are walking bottom-up and so
@@ -1165,6 +1167,13 @@
   // into the callers so that our optimizations can reflect that.
   // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
   // because it makes profile annotation in the backend inaccurate.
+  if (UseModuleInliner) {
+    if (MandatoryFirst)
+      MPM.addPass(ModuleInlinerPass(/*OnlyMandatory*/ true));
+    MPM.addPass(ModuleInlinerPass());
+    return;
+  }
+
   if (MandatoryFirst)
     PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
   PM.addPass(InlinerPass());
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -0,0 +1,509 @@
+//===- ModuleInliner.cpp - Code related to module inliner -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls in the module level. It doesn't need any infromation about
+// SCC or call graph, which is different from the SCC inliner.  The decisions of
+// which calls are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ModuleInliner.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <sstream>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "module-inline"
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+
+static cl::opt<std::string> ModuleInlineReplayFile(
+    "module-inline-replay", cl::init(""), cl::value_desc("filename"),
+    cl::desc(
+        "Optimization remarks file containing inline remarks to be replayed "
+        "by inlining from module inline remarks."),
+    cl::Hidden);
+
+static cl::opt<bool> InlineEnablePriorityOrder(
+    "module-inline-enable-priority-order", cl::Hidden, cl::init(true),
+    cl::desc("Enable the priority inline order for the module inliner"));
+
+/// Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool inlineHistoryIncludes(
+    Function *F, int InlineHistoryID,
+    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+  while (InlineHistoryID != -1) {
+    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+           "Invalid inline history ID");
+    if (InlineHistory[InlineHistoryID].first == F)
+      return true;
+    InlineHistoryID = InlineHistory[InlineHistoryID].second;
+  }
+  return false;
+}
+
+template <typename T> class InlineOrder {
+public:
+  using reference = T &;
+  using const_reference = const T &;
+
+  virtual ~InlineOrder() {}
+
+  virtual size_t size() = 0;
+
+  virtual void push(const T &Elt) = 0;
+
+  virtual T pop() = 0;
+
+  virtual const_reference front() = 0;
+
+  virtual void erase_if(function_ref<bool(T)> Pred) = 0;
+
+  bool empty() { return !size(); }
+};
+
+template <typename T, typename Container = SmallVector<T, 16>>
+class DefaultInlineOrder : public InlineOrder<T> {
+  using reference = T &;
+  using const_reference = const T &;
+
+public:
+  size_t size() override { return Calls.size() - FirstIndex; }
+
+  void push(const T &Elt) override { Calls.push_back(Elt); }
+
+  T pop() override {
+    assert(size() > 0);
+    return Calls[FirstIndex++];
+  }
+
+  const_reference front() override {
+    assert(size() > 0);
+    return Calls[FirstIndex];
+  }
+
+  void erase_if(function_ref<bool(T)> Pred) override {
+    Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred),
+                Calls.end());
+  }
+
+private:
+  Container Calls;
+  size_t FirstIndex = 0;
+};
+
+class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
+  using T = std::pair<CallBase *, int>;
+  using reference = T &;
+  using const_reference = const T &;
+
+  // Return true if S1 is more desirable than S2.
+  static bool isMoreDesirable(int S1, int S2) { return S1 < S2; }
+
+  static bool cmp(const T &P1, const T &P2) {
+    return isMoreDesirable(P2.second, P1.second);
+  }
+
+  int evaluate(CallBase *CB) {
+    Function *Callee = CB->getCalledFunction();
+    return (int)Callee->getInstructionCount();
+  }
+
+  // A call site could become less desirable for inlining because of the size
+  // growth from prior inlining into the callee. This method is used to lazily
+  // update the desirability of a call site if it's decreasing. It is only
+  // called on pop() or front(), not every time the desirability changes. When
+  // the desirability of the front call site decreases, an updated one would be
+  // pushed right back into the heap. For simplicity, those cases where
+  // the desirability of a call site increases are ignored here.
+  void adjust() {
+    bool Changed = false;
+    do {
+      CallBase *CB = Heap.front().first;
+      const int PreviousGoodness = Heap.front().second;
+      const int CurrentGoodness = evaluate(CB);
+      Changed = isMoreDesirable(PreviousGoodness, CurrentGoodness);
+      if (Changed) {
+        std::pop_heap(Heap.begin(), Heap.end(), cmp);
+        Heap.pop_back();
+        Heap.push_back({CB, CurrentGoodness});
+        std::push_heap(Heap.begin(), Heap.end(), cmp);
+      }
+    } while (Changed);
+  }
+
+public:
+  size_t size() override { return Heap.size(); }
+
+  void push(const T &Elt) override {
+    CallBase *CB = Elt.first;
+    const int InlineHistoryID = Elt.second;
+    const int Goodness = evaluate(CB);
+
+    Heap.push_back({CB, Goodness});
+    std::push_heap(Heap.begin(), Heap.end(), cmp);
+    InlineHistoryMap[CB] = InlineHistoryID;
+  }
+
+  T pop() override {
+    assert(size() > 0);
+    adjust();
+
+    CallBase *CB = Heap.front().first;
+    T Result = std::make_pair(CB, InlineHistoryMap[CB]);
+    InlineHistoryMap.erase(CB);
+    std::pop_heap(Heap.begin(), Heap.end(), cmp);
+    Heap.pop_back();
+    return Result;
+  }
+
+  const_reference front() override {
+    assert(size() > 0);
+    adjust();
+
+    CallBase *CB = Heap.front().first;
+    return *InlineHistoryMap.find(CB);
+  }
+
+  void erase_if(function_ref<bool(T)> Pred) override {
+    Heap.erase(std::remove_if(Heap.begin(), Heap.end(), Pred), Heap.end());
+    std::make_heap(Heap.begin(), Heap.end(), cmp);
+  }
+
+private:
+  SmallVector<T, 16> Heap;
+  DenseMap<CallBase *, int> InlineHistoryMap;
+};
+
+InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
+                                             FunctionAnalysisManager &FAM,
+                                             Module &M) {
+  if (OwnedAdvisor)
+    return *OwnedAdvisor;
+
+  auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
+  if (!IAA) {
+    // It should still be possible to run the inliner as a stand-alone module
+    // pass, for test scenarios. In that case, we default to the
+    // DefaultInlineAdvisor, which doesn't need to keep state between module
+    // pass runs. It also uses just the default InlineParams. In this case, we
+    // need to use the provided FAM, which is valid for the duration of the
+    // inliner pass, and thus the lifetime of the owned advisor. The one we
+    // would get from the MAM can be invalidated as a result of the inliner's
+    // activity.
+    OwnedAdvisor =
+        std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
+
+    if (!ModuleInlineReplayFile.empty())
+      OwnedAdvisor = std::make_unique<ReplayInlineAdvisor>(
+          M, FAM, M.getContext(), std::move(OwnedAdvisor),
+          ModuleInlineReplayFile,
+          /*EmitRemarks=*/true);
+
+    return *OwnedAdvisor;
+  }
+  assert(IAA->getAdvisor() &&
+         "Expected a present InlineAdvisorAnalysis also have an "
+         "InlineAdvisor initialized");
+  return *IAA->getAdvisor();
+}
+
+static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
+  LibFunc LF;
+
+  // Either this is a normal library function or a "vectorizable"
+  // function.  Not using the VFDatabase here because this query
+  // is related only to libraries handled via the TLI.
+  return TLI.getLibFunc(F, LF) ||
+         TLI.isKnownVectorFunctionInLibrary(F.getName());
+}
+
+PreservedAnalyses ModuleInlinerPass::run(Module &M,
+                                         ModuleAnalysisManager &MAM) {
+  LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n");
+
+  bool Changed = false;
+
+  ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+
+  FunctionAnalysisManager &FAM =
+      MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+  auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+
+  InlineAdvisor &Advisor = getAdvisor(MAM, FAM, M);
+  Advisor.onPassEntry();
+
+  auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); });
+
+  // In the module inliner, a priority-based worklist is used for calls across
+  // the entire Module. With this module inliner, the inline order is not
+  // limited to bottom-up order. More globally scope inline order is enabled.
+  // Also, the inline deferral logic become unnecessary in this module inliner.
+  // It is possible to use other priority heuristics, e.g. profile-based
+  // heuristic.
+  //
+  // TODO: Here is a huge amount duplicate code between the module inliner and
+  // the SCC inliner, which need some refactoring.
+  std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
+  if (InlineEnablePriorityOrder)
+    Calls = std::make_unique<PriorityInlineOrder>();
+  else
+    Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
+  assert(Calls != nullptr && "Expected an initialized InlineOrder");
+
+  // Populate the initial list of calls in this module.
+  for (Function &F : M) {
+    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+    // We want to generally process call sites top-down in order for
+    // simplifications stemming from replacing the call with the returned value
+    // after inlining to be visible to subsequent inlining decisions.
+    // FIXME: Using instructions sequence is a really bad way to do this.
+    // Instead we should do an actual RPO walk of the function body.
+    for (Instruction &I : instructions(F))
+      if (auto *CB = dyn_cast<CallBase>(&I))
+        if (Function *Callee = CB->getCalledFunction()) {
+          if (!Callee->isDeclaration())
+            Calls->push({CB, -1});
+          else if (!isa<IntrinsicInst>(I)) {
+            using namespace ore;
+            setInlineRemark(*CB, "unavailable definition");
+            ORE.emit([&]() {
+              return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
+                     << NV("Callee", Callee) << " will not be inlined into "
+                     << NV("Caller", CB->getCaller())
+                     << " because its definition is unavailable"
+                     << setIsVerbose();
+            });
+          }
+        }
+  }
+  if (Calls->empty())
+    return PreservedAnalyses::all();
+
+  // When inlining a callee produces new call sites, we want to keep track of
+  // the fact that they were inlined from the callee.  This allows us to avoid
+  // infinite inlining in some obscure cases.  To represent this, we use an
+  // index into the InlineHistory vector.
+  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+
+  // Track a set vector of inlined callees so that we can augment the caller
+  // with all of their edges in the call graph before pruning out the ones that
+  // got simplified away.
+  SmallSetVector<Function *, 4> InlinedCallees;
+
+  // Track the dead functions to delete once finished with inlining calls. We
+  // defer deleting these to make it easier to handle the call graph updates.
+  SmallVector<Function *, 4> DeadFunctions;
+
+  // Loop forward over all of the calls.
+  while (!Calls->empty()) {
+    // We expect the calls to typically be batched with sequences of calls that
+    // have the same caller, so we first set up some shared infrastructure for
+    // this caller. We also do any pruning we can at this layer on the caller
+    // alone.
+    Function &F = *Calls->front().first->getCaller();
+
+    LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
+                      << "    Function size: " << F.getInstructionCount()
+                      << "\n");
+
+    auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
+      return FAM.getResult<AssumptionAnalysis>(F);
+    };
+
+    // Now process as many calls as we have within this caller in the sequence.
+    // We bail out as soon as the caller has to change so we can
+    // prepare the context of that new caller.
+    bool DidInline = false;
+    while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
+      auto P = Calls->pop();
+      CallBase *CB = P.first;
+      const int InlineHistoryID = P.second;
+      Function &Callee = *CB->getCalledFunction();
+
+      if (InlineHistoryID != -1 &&
+          inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
+        setInlineRemark(*CB, "recursive");
+        continue;
+      }
+
+      auto Advice = Advisor.getAdvice(*CB, OnlyMandatory);
+      // Check whether we want to inline this callsite.
+      if (!Advice->isInliningRecommended()) {
+        Advice->recordUnattemptedInlining();
+        continue;
+      }
+
+      // Setup the data structure used to plumb customization into the
+      // `InlineFunction` routine.
+      InlineFunctionInfo IFI(
+          /*cg=*/nullptr, GetAssumptionCache, PSI,
+          &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
+          &FAM.getResult<BlockFrequencyAnalysis>(Callee));
+
+      InlineResult IR =
+          InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
+      if (!IR.isSuccess()) {
+        Advice->recordUnsuccessfulInlining(IR);
+        continue;
+      }
+
+      DidInline = true;
+      InlinedCallees.insert(&Callee);
+      ++NumInlined;
+
+      LLVM_DEBUG(dbgs() << "    Size after inlining: "
+                        << F.getInstructionCount() << "\n");
+
+      // Add any new callsites to defined functions to the worklist.
+      if (!IFI.InlinedCallSites.empty()) {
+        int NewHistoryID = InlineHistory.size();
+        InlineHistory.push_back({&Callee, InlineHistoryID});
+
+        for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
+          Function *NewCallee = ICB->getCalledFunction();
+          if (!NewCallee) {
+            // Try to promote an indirect (virtual) call without waiting for
+            // the post-inline cleanup and the next DevirtSCCRepeatedPass
+            // iteration because the next iteration may not happen and we may
+            // miss inlining it.
+            if (tryPromoteCall(*ICB))
+              NewCallee = ICB->getCalledFunction();
+          }
+          if (NewCallee)
+            if (!NewCallee->isDeclaration())
+              Calls->push({ICB, NewHistoryID});
+        }
+      }
+
+      // Merge the attributes based on the inlining.
+      AttributeFuncs::mergeAttributesForInlining(F, Callee);
+
+      // For local functions, check whether this makes the callee trivially
+      // dead. In that case, we can drop the body of the function eagerly
+      // which may reduce the number of callers of other functions to one,
+      // changing inline cost thresholds.
+      bool CalleeWasDeleted = false;
+      if (Callee.hasLocalLinkage()) {
+        // To check this we also need to nuke any dead constant uses (perhaps
+        // made dead by this operation on other functions).
+        Callee.removeDeadConstantUsers();
+        // if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
+        if (Callee.use_empty() && !isKnownLibFunction(Callee, GetTLI(Callee))) {
+          Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
+            return Call.first->getCaller() == &Callee;
+          });
+          // Clear the body and queue the function itself for deletion when we
+          // finish inlining.
+          // Note that after this point, it is an error to do anything other
+          // than use the callee's address or delete it.
+          Callee.dropAllReferences();
+          assert(!is_contained(DeadFunctions, &Callee) &&
+                 "Cannot put cause a function to become dead twice!");
+          DeadFunctions.push_back(&Callee);
+          CalleeWasDeleted = true;
+        }
+      }
+      if (CalleeWasDeleted)
+        Advice->recordInliningWithCalleeDeleted();
+      else
+        Advice->recordInlining();
+    }
+
+    if (!DidInline)
+      continue;
+    Changed = true;
+
+    InlinedCallees.clear();
+  }
+
+  // Now that we've finished inlining all of the calls across this module,
+  // delete all of the trivially dead functions.
+  //
+  // Note that this walks a pointer set which has non-deterministic order but
+  // that is OK as all we do is delete things and add pointers to unordered
+  // sets.
+  for (Function *DeadF : DeadFunctions) {
+    // Clear out any cached analyses.
+    FAM.clear(*DeadF, DeadF->getName());
+
+    // And delete the actual function from the module.
+    // The Advisor may use Function pointers to efficiently index various
+    // internal maps, e.g. for memoization. Function cleanup passes like
+    // argument promotion create new functions. It is possible for a new
+    // function to be allocated at the address of a deleted function. We could
+    // index using names, but that's inefficient. Alternatively, we let the
+    // Advisor free the functions when it sees fit.
+    DeadF->getBasicBlockList().clear();
+    M.getFunctionList().remove(DeadF);
+
+    ++NumDeleted;
+  }
+
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/Inline/callbr.ll b/llvm/test/Transforms/Inline/callbr.ll
--- a/llvm/test/Transforms/Inline/callbr.ll
+++ b/llvm/test/Transforms/Inline/callbr.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -inline -S < %s | FileCheck %s
 ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s
+; RUN: opt -passes='module-inline' -S < %s | FileCheck %s
 
 define dso_local i32 @main() #0 {
   %1 = alloca i32, align 4
diff --git a/llvm/test/Transforms/Inline/casts.ll b/llvm/test/Transforms/Inline/casts.ll
--- a/llvm/test/Transforms/Inline/casts.ll
+++ b/llvm/test/Transforms/Inline/casts.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -inline -S | FileCheck %s
 ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
+; RUN: opt < %s -passes='module-inline' -S | FileCheck %s
 
 define i32 @testByte(i8 %X) {
 entry:
diff --git a/llvm/test/Transforms/Inline/comdat-ipo.ll b/llvm/test/Transforms/Inline/comdat-ipo.ll
--- a/llvm/test/Transforms/Inline/comdat-ipo.ll
+++ b/llvm/test/Transforms/Inline/comdat-ipo.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -inline -S < %s | FileCheck %s
 ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s
+; RUN: opt -passes='module-inline' -S < %s | FileCheck %s
 
 define i32 @caller() {
 ; CHECK-LABEL: @caller(
diff --git a/llvm/test/Transforms/Inline/crash-lifetime-marker.ll b/llvm/test/Transforms/Inline/crash-lifetime-marker.ll
--- a/llvm/test/Transforms/Inline/crash-lifetime-marker.ll
+++ b/llvm/test/Transforms/Inline/crash-lifetime-marker.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -inline -S | FileCheck %s
 ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
+; RUN: opt < %s -passes='module-inline' -S | FileCheck %s
 
 ; InlineFunction would assert inside the loop that leaves lifetime markers if
 ; there was an zero-sized AllocaInst. Check that it doesn't assert and doesn't
diff --git a/llvm/test/Transforms/Inline/frameescape.ll b/llvm/test/Transforms/Inline/frameescape.ll
--- a/llvm/test/Transforms/Inline/frameescape.ll
+++ b/llvm/test/Transforms/Inline/frameescape.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -inline -S < %s | FileCheck %s
 ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s
+; RUN: opt -passes='module-inline' -S < %s | FileCheck %s
 
 ; PR23216: We can't inline functions using llvm.localescape.
 
diff --git a/llvm/test/Transforms/Inline/inline-assume.ll b/llvm/test/Transforms/Inline/inline-assume.ll
--- a/llvm/test/Transforms/Inline/inline-assume.ll
+++ b/llvm/test/Transforms/Inline/inline-assume.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -inline -S -o - < %s | FileCheck %s
 ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s
+; RUN: opt -passes='module-inline' -S < %s | FileCheck %s
 
 %0 = type opaque
 %struct.Foo = type { i32, %0* }
diff --git a/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll b/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
--- a/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
+++ b/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -S -inline < %s | FileCheck %s
 ; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s
+; RUN: opt -S -passes='module-inline' < %s | FileCheck %s
 
 target datalayout = "e-p3:32:32-p4:64:64-n32"
 
diff --git a/llvm/test/Transforms/Inline/inline-fast-math-flags.ll b/llvm/test/Transforms/Inline/inline-fast-math-flags.ll
--- a/llvm/test/Transforms/Inline/inline-fast-math-flags.ll
+++ b/llvm/test/Transforms/Inline/inline-fast-math-flags.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -S -inline -inline-threshold=20 | FileCheck %s
 ; RUN: opt < %s -S -passes='cgscc(inline)' -inline-threshold=20 | FileCheck %s
+; RUN: opt < %s -S -passes='module-inline' -inline-threshold=20 | FileCheck %s
 ; Check that we don't drop FastMathFlag when estimating inlining profitability.
 ;
 ; In this test we should inline 'foo'  to 'boo', because it'll fold to a
diff --git a/llvm/test/Transforms/Inline/inline-vla.ll b/llvm/test/Transforms/Inline/inline-vla.ll
--- a/llvm/test/Transforms/Inline/inline-vla.ll
+++ b/llvm/test/Transforms/Inline/inline-vla.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -S -inline %s -o - | FileCheck %s
 ; RUN: opt -S -passes='cgscc(inline)' %s -o - | FileCheck %s
+; RUN: opt -S -passes='module-inline' %s -o - | FileCheck %s
 
 ; Check that memcpy2 is completely inlined away.
 ; CHECK-NOT: memcpy2
diff --git a/llvm/test/Transforms/Inline/invoke-cleanup.ll b/llvm/test/Transforms/Inline/invoke-cleanup.ll
--- a/llvm/test/Transforms/Inline/invoke-cleanup.ll
+++ b/llvm/test/Transforms/Inline/invoke-cleanup.ll
@@ -1,5 +1,6 @@
 ; RUN: opt %s -inline -S | FileCheck %s
 ; RUN: opt %s -passes='cgscc(inline)' -S | FileCheck %s
+; RUN: opt %s -passes='module-inline' -S | FileCheck %s
 
 declare void @external_func()
 
diff --git a/llvm/test/Transforms/Inline/invoke-combine-clauses.ll b/llvm/test/Transforms/Inline/invoke-combine-clauses.ll
--- a/llvm/test/Transforms/Inline/invoke-combine-clauses.ll
+++ b/llvm/test/Transforms/Inline/invoke-combine-clauses.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %s -passes='cgscc(inline)' -S | FileCheck %s
+; RUN: opt %s -passes='module-inline' -S | FileCheck %s
 
 declare void @external_func()
 declare void @abort()
diff --git a/llvm/test/Transforms/Inline/invoke_test-1.ll b/llvm/test/Transforms/Inline/invoke_test-1.ll
--- a/llvm/test/Transforms/Inline/invoke_test-1.ll
+++ b/llvm/test/Transforms/Inline/invoke_test-1.ll
@@ -3,6 +3,7 @@
 
 ; RUN: opt < %s -inline -S | FileCheck %s
 ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
+; RUN: opt < %s -passes='module-inline' -S | FileCheck %s
 
 declare void @might_throw()
 
diff --git a/llvm/test/Transforms/Inline/invoke_test-3.ll b/llvm/test/Transforms/Inline/invoke_test-3.ll
--- a/llvm/test/Transforms/Inline/invoke_test-3.ll
+++ b/llvm/test/Transforms/Inline/invoke_test-3.ll
@@ -3,6 +3,7 @@
 
 ; RUN: opt < %s -inline -S | FileCheck %s
 ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
+; RUN: opt < %s -passes='module-inline' -S | FileCheck %s
 
 declare void @might_throw()
 
diff --git a/llvm/test/Transforms/Inline/nested-inline.ll b/llvm/test/Transforms/Inline/nested-inline.ll
--- a/llvm/test/Transforms/Inline/nested-inline.ll
+++ b/llvm/test/Transforms/Inline/nested-inline.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -inline -S | FileCheck %s
 ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
+; RUN: opt < %s -passes='module-inline' -S | FileCheck %s
 ; Test that bar and bar2 are both inlined throughout and removed.
 @A = weak global i32 0		; <i32*> [#uses=1]
 @B = weak global i32 0		; <i32*> [#uses=1]
diff --git a/llvm/test/Transforms/Inline/nonnull.ll b/llvm/test/Transforms/Inline/nonnull.ll
--- a/llvm/test/Transforms/Inline/nonnull.ll
+++ b/llvm/test/Transforms/Inline/nonnull.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -S -inline %s | FileCheck %s
 ; RUN: opt -S -passes='cgscc(inline)' %s | FileCheck %s
+; RUN: opt -S -passes='module-inline' %s | FileCheck %s
 
 declare void @foo()
 declare void @bar()
diff --git a/llvm/test/Transforms/Inline/pr21206.ll b/llvm/test/Transforms/Inline/pr21206.ll
--- a/llvm/test/Transforms/Inline/pr21206.ll
+++ b/llvm/test/Transforms/Inline/pr21206.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -inline -S | FileCheck %s
 ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
+; RUN: opt < %s -passes='module-inline' -S | FileCheck %s
 
 $c = comdat any
 ; CHECK: $c = comdat any