diff --git a/llvm/include/llvm/Analysis/InliningAdvisor.h b/llvm/include/llvm/Analysis/InliningAdvisor.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Analysis/InliningAdvisor.h
@@ -0,0 +1,227 @@
+//===- InliningAdvisor.h - Inlining decision making abstraction -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_INLININGADVISOR_H_
+#define LLVM_INLININGADVISOR_H_
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class BasicBlock;
+class CallBase;
+class Function;
+class Module;
+class PreservedAnalyses;
+class OptimizationRemarkEmitter;
+
+/// There are 3 scenarios we can use the InliningAdvisor:
+/// - Default - use manual heuristics.
+///
+/// - Release mode, the expected mode for production, day to day deployments.
+/// In this mode, when building the compiler, we also compile a pre-trained ML
+/// model to native code, and link it as a static library. This mode has low
+/// overhead and no additional dependencies for the compiler runtime.
+///
+/// - Development mode, for training new models.
+/// In this mode, we trade off runtime performance for flexibility. This mode
+/// requires the full C Tensorflow API library, and evaluates models
+/// dynamically. This mode also permits generating training logs, for offline
+/// training.
+enum class InliningAdvisorMode : int { Default, Release, Development };
+
+class InliningAdvisor;
+/// Capture state between an inlining decision having had been made, and
+/// its impact being observable. When collecting model training data, this
+/// allows recording features/decisions/partial reward data sets.
+///
+/// Derivations of this type are expected to be tightly coupled with their
+/// InliningAdvisors. The base type implements the minimal contractual
+/// obligations.
+class PendingInliningRecord {
+public:
+  PendingInliningRecord(PendingInliningRecord &&) = delete;
+  PendingInliningRecord(const PendingInliningRecord &) = delete;
+  virtual ~PendingInliningRecord() {
+    assert(Recorded && "PendingInliningRecord should have been informed of the "
+                       "inliner's decision in all cases");
+  }
+
+  /// Exactly one of the record* APIs must be called. Implementers may extend
+  /// behavior by implementing the corresponding record*Impl.
+  ///
+  /// Call after inlining succeeded, and did not result in deleting the callee.
+  void recordInlining() {
+    markRecorded();
+    recordInliningImpl();
+  }
+
+  /// Call after inlining succeeded, and resulted in deleting the callee.
+  void recordInliningWithCalleeDeleted();
+
+  /// Call after the decision for a call site was to not inline.
+  void recordUnsuccessfulInlining(const InlineResult &Result) {
+    markRecorded();
+    recordUnsuccessfulInliningImpl(Result);
+  }
+
+  /// Call to indicate inlining was not attempted.
+  void recordUnattemptedInlining() {
+    markRecorded();
+    recordUnattemptedInliningImpl();
+  }
+
+  /// Get the inlining recommendation.
+  bool isInliningRecommended() const { return IsInliningRecommended; }
+
+protected:
+  PendingInliningRecord(InliningAdvisor *Advisor, CallBase &CB,
+                        bool IsInliningRecommended);
+
+  virtual void recordInliningImpl(){};
+  virtual void recordInliningWithCalleeDeletedImpl() {}
+  virtual void recordUnsuccessfulInliningImpl(const InlineResult &Result) {}
+  virtual void recordUnattemptedInliningImpl() {}
+
+  InliningAdvisor *const Advisor;
+  /// Caller and Callee are pre-inlining.
+  Function *const Caller;
+  Function *const Callee;
+  const bool IsInliningRecommended;
+
+private:
+  void markRecorded() {
+    assert(!Recorded && "Recording should happen exactly once");
+    Recorded = true;
+  }
+
+  bool Recorded = false;
+};
+
+/// Interface for deciding whether to inline a call site or not.
+class InliningAdvisor {
+public:
+  InliningAdvisor(InliningAdvisor &&) = delete;
+  virtual ~InliningAdvisor() { freeDeletedFunctions(); };
+
+  /// Get a PendingInliningRecord containing a recommendation on whether to
+  /// inline or not. \p CB is assumed to be a direct call. \p FAM is assumed to
+  /// be up-to-date wrt previous inlining decisions.
+  /// Returns a PendingInliningRecord with the inlining recommendation.
+  virtual std::unique_ptr<PendingInliningRecord>
+  getInliningAdvice(CallBase &CB, FunctionAnalysisManager &FAM) = 0;
+
+  /// This must be called when the Inliner pass is entered, to allow the
+  /// InliningAdvisor update internal state, as result of function passes run
+  /// between Inliner pass runs (for the same module).
+  virtual void OnPassEntry(){};
+
+  /// This must be called when the Inliner pass is exited, as function passes
+  /// may be run subsequently. This allows an implementation of InliningAdvisor
+  /// to prepare for a partial update.
+  virtual void OnPassExit(){};
+
+protected:
+  InliningAdvisor() = default;
+
+  /// We may want to defer deleting functions to after the inlining for a whole
+  /// module has finished. This allows us to reliably use function pointers as
+  /// unique identifiers, as an efficient implementation detail of the
+  /// InliningAdvisor. Otherwise, it is possible the memory allocator
+  /// re-allocate Function objects at the same address of a deleted Function;
+  /// and Functions are potentially created during the function passes called
+  /// after each SCC inlining (e.g. argument promotion does that).
+  void freeDeletedFunctions();
+
+  bool isFunctionDeleted(Function *F) const {
+    return DeletedFunctions.count(F);
+  }
+
+private:
+  friend class PendingInliningRecord;
+  void markFunctionAsDeleted(Function *F);
+  std::unordered_set<Function *> DeletedFunctions;
+};
+
+/// The default (manual heuristics) implementation of the InliningAdvisor. This
+/// implementation does not need to keep state between inliner pass runs, and is
+/// reusable as-is for inliner pass test scenarios, as well as for regular use.
+class DefaultInliningAdvisor : public InliningAdvisor {
+public:
+  DefaultInliningAdvisor(InlineParams Params) : Params(Params) {}
+
+private:
+  std::unique_ptr<PendingInliningRecord>
+  getInliningAdvice(CallBase &CB, FunctionAnalysisManager &FAM) override;
+
+  void OnPassExit() override { freeDeletedFunctions(); }
+  InlineParams Params;
+};
+
+/// The InliningAdvisorAnalysis is a module pass because the InliningAdvisor
+/// needs to capture state right before inlining commences over a module.
+class InliningAdvisorAnalysis
+    : public AnalysisInfoMixin<InliningAdvisorAnalysis> {
+public:
+  static AnalysisKey Key;
+  InliningAdvisorAnalysis() = default;
+  struct Result {
+    Result(Module &M, ModuleAnalysisManager &MAM) : M(M), MAM(MAM) {}
+    bool invalidate(Module &, const PreservedAnalyses &,
+                    ModuleAnalysisManager::Invalidator &) {
+      // InliningAdvisor must be preserved across analysis invalidations.
+      return false;
+    }
+    bool tryCreate(InlineParams Params, InliningAdvisorMode Mode);
+    InliningAdvisor *getAdvisor() const { return Advisor.get(); }
+    void clear() { Advisor.reset(); }
+
+  private:
+    Module &M;
+    ModuleAnalysisManager &MAM;
+    std::unique_ptr<InliningAdvisor> Advisor;
+  };
+
+  Result run(Module &M, ModuleAnalysisManager &MAM) { return Result(M, MAM); }
+};
+
+// Default (manual policy) decision making helper APIs. Shared with the legacy
+// pass manager inliner.
+
+/// Return true if inlining of CB can block the caller from being
+/// inlined which is proved to be more beneficial. \p IC is the
+/// estimated inline cost associated with callsite \p CB.
+/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
+/// caller if \p CB is suppressed for inlining.
+bool shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost,
+                      function_ref<InlineCost(CallBase &CB)> GetInlineCost);
+
+/// Return the cost only if the inliner should attempt to inline at the given
+/// CallSite. If we return the cost, we will emit an optimisation remark later
+/// using that cost, so we won't do so from this function. Return None if
+/// inlining should not be attempted.
+Optional<InlineCost>
+shouldInline(CallBase &CB, function_ref<InlineCost(CallBase &CB)> GetInlineCost,
+             OptimizationRemarkEmitter &ORE);
+
+/// Emit ORE message.
+void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
+                     const BasicBlock *Block, const Function &Callee,
+                     const Function &Caller, const InlineCost &IC);
+
+/// Set the inline-remark attribute.
+void setInlineRemark(CallBase &CB, StringRef Message);
+
+/// Utility for extracting the inline cost message to a string.
+std::string inlineCostStr(const InlineCost &IC);
+} // namespace llvm
+#endif // LLVM_INLININGADVISOR_H_
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include <vector>
@@ -345,9 +346,9 @@
 
   /// Construct the module pipeline that performs inlining as well as
   /// the inlining-driven cleanups.
-  ModulePassManager buildInlinerPipeline(OptimizationLevel Level,
-                                         ThinLTOPhase Phase,
-                                         bool DebugLogging = false);
+  ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level,
+                                                ThinLTOPhase Phase,
+                                                bool DebugLogging = false);
 
   /// Construct the core LLVM module optimization pipeline.
   ///
diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h
--- a/llvm/include/llvm/Transforms/IPO/Inliner.h
+++ b/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -12,6 +12,7 @@
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InliningAdvisor.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
@@ -93,21 +94,47 @@
 /// passes be composed to achieve the same end result.
 class InlinerPass : public PassInfoMixin<InlinerPass> {
 public:
-  InlinerPass(InlineParams Params = getInlineParams())
-      : Params(std::move(Params)) {}
+  InlinerPass() = default;
   ~InlinerPass();
   InlinerPass(InlinerPass &&Arg)
-      : Params(std::move(Arg.Params)),
-        ImportedFunctionsStats(std::move(Arg.ImportedFunctionsStats)) {}
+      : ImportedFunctionsStats(std::move(Arg.ImportedFunctionsStats)) {}
 
   PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);
 
 private:
-  InlineParams Params;
+  InliningAdvisor &getAdvisor(const ModuleAnalysisManager &MAM, Module &M);
   std::unique_ptr<ImportedFunctionsInliningStatistics> ImportedFunctionsStats;
+  Optional<DefaultInliningAdvisor> OwnedDefaultAdvisor;
 };
 
+/// Module pass, wrapping the inliner pass. This works in conjunction with the
+/// InliningAdvisorAnalysis to facilitate inlining decisions taking into account
+/// module-wide state, that need to keep track of inter-inliner pass runs, for
+/// a given module. An InliningAdvisor is configured and kept alive for the
+/// duration of the ModuleInlinerWrapperPass::run.
+class ModuleInlinerWrapperPass
+    : public PassInfoMixin<ModuleInlinerWrapperPass> {
+public:
+  ModuleInlinerWrapperPass(
+      InlineParams Params = getInlineParams(), bool Debugging = false,
+      InliningAdvisorMode Mode = InliningAdvisorMode::Default,
+      unsigned MaxDevirtIterations = 0);
+  ModuleInlinerWrapperPass(ModuleInlinerWrapperPass &&Arg) = default;
+
+  PreservedAnalyses run(Module &, ModuleAnalysisManager &);
+
+  /// Allow adding more CGSCC passes, besides inlining. This should be called
+  /// before run is called, as part of pass pipeline building.
+  CGSCCPassManager &getPM() { return PM; }
+
+private:
+  const InlineParams Params;
+  const InliningAdvisorMode Mode;
+  const unsigned MaxDevirtIterations;
+  const bool Debugging;
+  CGSCCPassManager PM;
+};
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_IPO_INLINER_H
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -40,6 +40,7 @@
   IVUsers.cpp
   IndirectCallPromotionAnalysis.cpp
   InlineCost.cpp
+  InliningAdvisor.cpp
   InstCount.cpp
   InstructionPrecedenceTracking.cpp
   InstructionSimplify.cpp
diff --git a/llvm/lib/Analysis/InliningAdvisor.cpp b/llvm/lib/Analysis/InliningAdvisor.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Analysis/InliningAdvisor.cpp
@@ -0,0 +1,377 @@
+//===- InliningAdvisor.cpp - analysis pass implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements InliningAdvisorAnalysis and DefaultInliningAdvisor, and
+// related types.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InliningAdvisor.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <sstream>
+
+using namespace llvm;
+#define DEBUG_TYPE "inline"
+
+// This weirdly named statistic tracks the number of times that, when attempting
+// to inline a function A into B, we analyze the callers of B in order to see
+// if those would be more profitable and blocked inline steps.
+STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
+
+/// Flag to add inline messages as callsite attributes 'inline-remark'.
+static cl::opt<bool>
+    InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
+                          cl::Hidden,
+                          cl::desc("Enable adding inline-remark attribute to"
+                                   " callsites processed by inliner but decided"
+                                   " to be not inlined"));
+
+// An integer used to limit the cost of inline deferral.  The default negative
+// number tells shouldBeDeferred to only take the secondary cost into account.
+static cl::opt<int>
+    InlineDeferralScale("inline-deferral-scale",
+                        cl::desc("Scale to limit the cost of inline deferral"),
+                        cl::init(-1), cl::Hidden);
+
+namespace llvm {
+std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
+                                     const ore::NV &Arg) {
+  return R << Arg.Val;
+}
+
+template <class RemarkT>
+RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
+  using namespace ore;
+  if (IC.isAlways()) {
+    R << "(cost=always)";
+  } else if (IC.isNever()) {
+    R << "(cost=never)";
+  } else {
+    R << "(cost=" << ore::NV("Cost", IC.getCost())
+      << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
+  }
+  if (const char *Reason = IC.getReason())
+    R << ": " << ore::NV("Reason", Reason);
+  return R;
+}
+} // namespace llvm
+
+namespace {
+class DefaultPendingRecord : public PendingInliningRecord {
+public:
+  DefaultPendingRecord(DefaultInliningAdvisor *Advisor, CallBase &CB,
+                       Optional<InlineCost> OIC, OptimizationRemarkEmitter &ORE)
+      : PendingInliningRecord(Advisor, CB, OIC.hasValue()), OriginalCB(&CB),
+        OIC(OIC), ORE(ORE), DLoc(CB.getDebugLoc()), Block(CB.getParent()) {}
+
+private:
+  void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
+    using namespace ore;
+    llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) +
+                                           "; " + inlineCostStr(*OIC));
+    ORE.emit([&]() {
+      return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+             << NV("Callee", Callee) << " will not be inlined into "
+             << NV("Caller", Caller) << ": "
+             << NV("Reason", Result.getFailureReason());
+    });
+  }
+
+  void recordInliningWithCalleeDeletedImpl() override {
+    emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+  }
+
+  void recordInliningImpl() override {
+    emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+  }
+
+private:
+  CallBase *const OriginalCB;
+  Optional<InlineCost> OIC;
+  OptimizationRemarkEmitter &ORE;
+
+  // Capture the context of CB before inlining, as a successful inlining may
+  // change that context, and we want to report success or failure in the
+  // original context.
+  const DebugLoc DLoc;
+  const BasicBlock *const Block;
+};
+
+} // namespace
+
+std::unique_ptr<PendingInliningRecord>
+DefaultInliningAdvisor::getInliningAdvice(CallBase &CB,
+                                          FunctionAnalysisManager &FAM) {
+  Function &Callee = *CB.getCalledFunction();
+  Function &F = *CB.getCaller();
+  ProfileSummaryInfo *PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
+                                .getManager()
+                                .getCachedResult<ProfileSummaryAnalysis>(
+                                    *CB.getParent()->getParent()->getParent());
+
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+      [&](Function &F) -> AssumptionCache & {
+    return FAM.getResult<AssumptionAnalysis>(Callee);
+  };
+  auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
+    return FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+
+  auto GetInlineCost = [&](CallBase &CB) {
+    Function &Callee = *CB.getCalledFunction();
+    auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
+    bool RemarksEnabled =
+        Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
+            DEBUG_TYPE);
+    return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
+                         GetTLI, PSI, RemarksEnabled ? &ORE : nullptr);
+  };
+  auto OIC = llvm::shouldInline(CB, GetInlineCost, ORE);
+  std::unique_ptr<PendingInliningRecord> Ret(
+      new DefaultPendingRecord(this, CB, OIC, ORE));
+  return Ret;
+}
+
+PendingInliningRecord::PendingInliningRecord(InliningAdvisor *Advisor,
+                                             CallBase &CB,
+                                             bool IsInliningRecommended)
+    : Advisor(Advisor), Caller(CB.getCaller()), Callee(CB.getCalledFunction()),
+      IsInliningRecommended(IsInliningRecommended) {}
+
+void InliningAdvisor::markFunctionAsDeleted(Function *F) {
+  assert((!DeletedFunctions.count(F)) &&
+         "Cannot put cause a function to become dead twice!");
+  DeletedFunctions.insert(F);
+}
+
+void InliningAdvisor::freeDeletedFunctions() {
+  for (auto *F : DeletedFunctions)
+    delete (F);
+  DeletedFunctions.clear();
+}
+
+void PendingInliningRecord::recordInliningWithCalleeDeleted() {
+  markRecorded();
+  Advisor->markFunctionAsDeleted(Callee);
+  recordInliningWithCalleeDeletedImpl();
+}
+
+AnalysisKey InliningAdvisorAnalysis::Key;
+
+bool InliningAdvisorAnalysis::Result::tryCreate(InlineParams Params,
+                                                InliningAdvisorMode Mode) {
+  switch (Mode) {
+  case InliningAdvisorMode::Default:
+    Advisor.reset(new DefaultInliningAdvisor(Params));
+    break;
+  case InliningAdvisorMode::Development:
+    // To be added subsequently under conditional compilation.
+    break;
+  case InliningAdvisorMode::Release:
+    // To be added subsequently under conditional compilation.
+    break;
+  }
+  return !!Advisor;
+}
+
+void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
+                           const BasicBlock *Block, const Function &Callee,
+                           const Function &Caller, const InlineCost &IC) {
+  using namespace ore;
+  ORE.emit([&]() {
+    bool AlwaysInline = IC.isAlways();
+    StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
+    return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
+           << ore::NV("Callee", &Callee) << " inlined into "
+           << ore::NV("Caller", &Caller) << " with " << IC;
+  });
+}
+
+/// Return true if inlining of CB can block the caller from being
+/// inlined which is proved to be more beneficial. \p IC is the
+/// estimated inline cost associated with callsite \p CB.
+/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
+/// caller if \p CB is suppressed for inlining.
+bool llvm::shouldBeDeferred(
+    Function *Caller, InlineCost IC, int &TotalSecondaryCost,
+    function_ref<InlineCost(CallBase &CB)> GetInlineCost) {
+  // For now we only handle local or inline functions.
+  if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
+    return false;
+  // If the cost of inlining CB is non-positive, it is not going to prevent the
+  // caller from being inlined into its callers and hence we don't need to
+  // defer.
+  if (IC.getCost() <= 0)
+    return false;
+  // Try to detect the case where the current inlining candidate caller (call
+  // it B) is a static or linkonce-ODR function and is an inlining candidate
+  // elsewhere, and the current candidate callee (call it C) is large enough
+  // that inlining it into B would make B too big to inline later. In these
+  // circumstances it may be best not to inline C into B, but to inline B into
+  // its callers.
+  //
+  // This only applies to static and linkonce-ODR functions because those are
+  // expected to be available for inlining in the translation units where they
+  // are used. Thus we will always have the opportunity to make local inlining
+  // decisions. Importantly the linkonce-ODR linkage covers inline functions
+  // and templates in C++.
+  //
+  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+  // the internal implementation of the inline cost metrics rather than
+  // treating them as truly abstract units etc.
+  TotalSecondaryCost = 0;
+  // The candidate cost to be imposed upon the current function.
+  int CandidateCost = IC.getCost() - 1;
+  // If the caller has local linkage and can be inlined to all its callers, we
+  // can apply a huge negative bonus to TotalSecondaryCost.
+  bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
+  // This bool tracks what happens if we DO inline C into B.
+  bool InliningPreventsSomeOuterInline = false;
+  unsigned NumCallerUsers = 0;
+  for (User *U : Caller->users()) {
+    CallBase *CS2 = dyn_cast<CallBase>(U);
+
+    // If this isn't a call to Caller (it could be some other sort
+    // of reference) skip it.  Such references will prevent the caller
+    // from being removed.
+    if (!CS2 || CS2->getCalledFunction() != Caller) {
+      ApplyLastCallBonus = false;
+      continue;
+    }
+
+    InlineCost IC2 = GetInlineCost(*CS2);
+    ++NumCallerCallersAnalyzed;
+    if (!IC2) {
+      ApplyLastCallBonus = false;
+      continue;
+    }
+    if (IC2.isAlways())
+      continue;
+
+    // See if inlining of the original callsite would erase the cost delta of
+    // this callsite. We subtract off the penalty for the call instruction,
+    // which we would be deleting.
+    if (IC2.getCostDelta() <= CandidateCost) {
+      InliningPreventsSomeOuterInline = true;
+      TotalSecondaryCost += IC2.getCost();
+      NumCallerUsers++;
+    }
+  }
+
+  if (!InliningPreventsSomeOuterInline)
+    return false;
+
+  // If all outer calls to Caller would get inlined, the cost for the last
+  // one is set very low by getInlineCost, in anticipation that Caller will
+  // be removed entirely.  We did not account for this above unless there
+  // is only one caller of Caller.
+  if (ApplyLastCallBonus)
+    TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
+
+  // If InlineDeferralScale is negative, then ignore the cost of primary
+  // inlining -- IC.getCost() multiplied by the number of callers to Caller.
+  if (InlineDeferralScale < 0)
+    return TotalSecondaryCost < IC.getCost();
+
+  int TotalCost = TotalSecondaryCost + IC.getCost() * NumCallerUsers;
+  int Allowance = IC.getCost() * InlineDeferralScale;
+  return TotalCost < Allowance;
+}
+
+void llvm::setInlineRemark(CallBase &CB, StringRef Message) {
+  if (!InlineRemarkAttribute)
+    return;
+
+  Attribute Attr = Attribute::get(CB.getContext(), "inline-remark", Message);
+  CB.addAttribute(AttributeList::FunctionIndex, Attr);
+}
+
+/// Return the cost only if the inliner should attempt to inline at the given
+/// CallSite. If we return the cost, we will emit an optimisation remark later
+/// using that cost, so we won't do so from this function. Return None if
+/// inlining should not be attempted.
+Optional<InlineCost>
+llvm::shouldInline(CallBase &CB,
+                   function_ref<InlineCost(CallBase &CB)> GetInlineCost,
+                   OptimizationRemarkEmitter &ORE) {
+  using namespace ore;
+
+  InlineCost IC = GetInlineCost(CB);
+  Instruction *Call = &CB;
+  Function *Callee = CB.getCalledFunction();
+  Function *Caller = CB.getCaller();
+
+  if (IC.isAlways()) {
+    LLVM_DEBUG(dbgs() << "    Inlining " << inlineCostStr(IC)
+                      << ", Call: " << CB << "\n");
+    return IC;
+  }
+
+  if (!IC) {
+    LLVM_DEBUG(dbgs() << "    NOT Inlining " << inlineCostStr(IC)
+                      << ", Call: " << CB << "\n");
+    if (IC.isNever()) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+               << NV("Callee", Callee) << " not inlined into "
+               << NV("Caller", Caller) << " because it should never be inlined "
+               << IC;
+      });
+    } else {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+               << NV("Callee", Callee) << " not inlined into "
+               << NV("Caller", Caller) << " because too costly to inline "
+               << IC;
+      });
+    }
+    setInlineRemark(CB, inlineCostStr(IC));
+    return None;
+  }
+
+  int TotalSecondaryCost = 0;
+  if (shouldBeDeferred(Caller, IC, TotalSecondaryCost, GetInlineCost)) {
+    LLVM_DEBUG(dbgs() << "    NOT Inlining: " << CB
+                      << " Cost = " << IC.getCost()
+                      << ", outer Cost = " << TotalSecondaryCost << '\n');
+    ORE.emit([&]() {
+      return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
+                                      Call)
+             << "Not inlining. Cost of inlining " << NV("Callee", Callee)
+             << " increases the cost of inlining " << NV("Caller", Caller)
+             << " in other contexts";
+    });
+    setInlineRemark(CB, "deferred");
+    // IC does not bool() to false, so get an InlineCost that will.
+    // This will not be inspected to make an error message.
+    return None;
+  }
+
+  LLVM_DEBUG(dbgs() << "    Inlining " << inlineCostStr(IC) << ", Call: " << CB
+                    << '\n');
+  return IC;
+}
+
+std::string llvm::inlineCostStr(const InlineCost &IC) {
+  std::stringstream Remark;
+  Remark << IC;
+  return Remark.str();
+}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Analysis/DominanceFrontier.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/InliningAdvisor.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
@@ -215,6 +216,16 @@
     "enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
 
+static cl::opt<InliningAdvisorMode> UseInliningAdvisor(
+    "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
+    cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
+    cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
+                          "Heuristics-based inliner version."),
+               clEnumValN(InliningAdvisorMode::Development, "development",
+                          "Use development mode (runtime-loadable model)."),
+               clEnumValN(InliningAdvisorMode::Release, "release",
+                          "Use release mode (AOT-compiled model).")));
+
 static cl::opt<bool> EnableGVNSink(
     "enable-npm-gvn-sink", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
@@ -611,10 +622,8 @@
     // This should probably be lowered after performance testing.
     // FIXME: this comment is cargo culted from the old pass manager, revisit).
     IP.HintThreshold = 325;
-
-    CGSCCPassManager CGPipeline(DebugLogging);
-
-    CGPipeline.addPass(InlinerPass(IP));
+    ModuleInlinerWrapperPass MIWP(IP, DebugLogging);
+    CGSCCPassManager &CGPipeline = MIWP.getPM();
 
     FunctionPassManager FPM;
     FPM.addPass(SROA());
@@ -625,7 +634,7 @@
 
     CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
 
-    MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline)));
+    MPM.addPass(std::move(MIWP));
 
     // Delete anything that is now dead to make sure that we don't instrument
     // dead code. Instrumentation can end up keeping dead code around and
@@ -690,33 +699,28 @@
   return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
 }
 
-ModulePassManager PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
-                                                    ThinLTOPhase Phase,
-                                                    bool DebugLogging) {
-  ModulePassManager MPM(DebugLogging);
+ModuleInlinerWrapperPass
+PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase,
+                                  bool DebugLogging) {
+  InlineParams IP = getInlineParamsFromOptLevel(Level);
+  if (Phase == PassBuilder::ThinLTOPhase::PreLink && PGOOpt &&
+      PGOOpt->Action == PGOOptions::SampleUse)
+    IP.HotCallSiteThreshold = 0;
+
+  ModuleInlinerWrapperPass MIWP(IP, DebugLogging, UseInliningAdvisor,
+                                MaxDevirtIterations);
 
   // Now begin the main postorder CGSCC pipeline.
   // FIXME: The current CGSCC pipeline has its origins in the legacy pass
   // manager and trying to emulate its precise behavior. Much of this doesn't
   // make a lot of sense and we should revisit the core CGSCC structure.
-  CGSCCPassManager MainCGPipeline(DebugLogging);
+  CGSCCPassManager &MainCGPipeline = MIWP.getPM();
 
   // Note: historically, the PruneEH pass was run first to deduce nounwind and
   // generally clean up exception handling overhead. It isn't clear this is
   // valuable as the inliner doesn't currently care whether it is inlining an
   // invoke or a call.
 
-  // Run the inliner first. The theory is that we are walking bottom-up and so
-  // the callees have already been fully optimized, and we want to inline them
-  // into the callers so that our optimizations can reflect that.
-  // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
-  // because it makes profile annotation in the backend inaccurate.
-  InlineParams IP = getInlineParamsFromOptLevel(Level);
-  if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
-      PGOOpt->Action == PGOOptions::SampleUse)
-    IP.HotCallSiteThreshold = 0;
-  MainCGPipeline.addPass(InlinerPass(IP));
-
   if (AttributorRun & AttributorRunOption::CGSCC)
     MainCGPipeline.addPass(AttributorCGSCCPass());
 
@@ -744,15 +748,7 @@
   for (auto &C : CGSCCOptimizerLateEPCallbacks)
     C(MainCGPipeline, Level);
 
-  // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
-  // to detect when we devirtualize indirect calls and iterate the SCC passes
-  // in that case to try and catch knock-on inlining or function attrs
-  // opportunities. Then we add it to the module pipeline by walking the SCCs
-  // in postorder (or bottom-up).
-  MPM.addPass(
-      createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
-          std::move(MainCGPipeline), MaxDevirtIterations)));
-  return MPM;
+  return MIWP;
 }
 
 ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
@@ -1327,8 +1323,8 @@
   // valuable as the inliner doesn't currently care whether it is inlining an
   // invoke or a call.
   // Run the inliner now.
-  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
-      InlinerPass(getInlineParamsFromOptLevel(Level))));
+  MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level),
+                                       DebugLogging));
 
   // Optimize globals again after we ran the inliner.
   MPM.addPass(GlobalOptPass());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -27,6 +27,7 @@
 MODULE_ANALYSIS("verify", VerifierAnalysis())
 MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
 MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis())
+MODULE_ANALYSIS("inlining-advisor", InliningAdvisorAnalysis())
 
 #ifndef MODULE_ALIAS_ANALYSIS
 #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS)                               \
@@ -57,6 +58,7 @@
 MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false))
 MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
 MODULE_PASS("inferattrs", InferFunctionAttrsPass())
+MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
 MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
 MODULE_PASS("instrorderfile", InstrOrderFilePass())
 MODULE_PASS("instrprof", InstrProfiling())
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -29,6 +30,7 @@
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InliningAdvisor.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -77,11 +79,6 @@
 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
 
-// This weirdly named statistic tracks the number of times that, when attempting
-// to inline a function A into B, we analyze the callers of B in order to see
-// if those would be more profitable and blocked inline steps.
-STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
-
 /// Flag to disable manual alloca merging.
 ///
 /// Merging of allocas was originally done as a stack-size saving technique
@@ -93,13 +90,6 @@
     DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
                                 cl::init(false), cl::Hidden);
 
-// An integer used to limit the cost of inline deferral.  The default negative
-// number tells shouldBeDeferred to only take the secondary cost into account.
-static cl::opt<int>
-    InlineDeferralScale("inline-deferral-scale",
-                        cl::desc("Scale to limit the cost of inline deferral"),
-                        cl::init(-1), cl::Hidden);
-
 namespace {
 
 enum class InlinerFunctionImportStatsOpts {
@@ -119,14 +109,6 @@
                           "printing of statistics for each inlined function")),
     cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
 
-/// Flag to add inline messages as callsite attributes 'inline-remark'.
-static cl::opt<bool>
-    InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
-                          cl::Hidden,
-                          cl::desc("Enable adding inline-remark attribute to"
-                                   " callsites processed by inliner but decided"
-                                   " to be not inlined"));
-
 LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
 
 LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -305,197 +287,6 @@
   return IR; // success
 }
 
-/// Return true if inlining of CB can block the caller from being
-/// inlined which is proved to be more beneficial. \p IC is the
-/// estimated inline cost associated with callsite \p CB.
-/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
-/// caller if \p CB is suppressed for inlining.
-static bool
-shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost,
-                 function_ref<InlineCost(CallBase &CB)> GetInlineCost) {
-  // For now we only handle local or inline functions.
-  if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
-    return false;
-  // If the cost of inlining CB is non-positive, it is not going to prevent the
-  // caller from being inlined into its callers and hence we don't need to
-  // defer.
-  if (IC.getCost() <= 0)
-    return false;
-  // Try to detect the case where the current inlining candidate caller (call
-  // it B) is a static or linkonce-ODR function and is an inlining candidate
-  // elsewhere, and the current candidate callee (call it C) is large enough
-  // that inlining it into B would make B too big to inline later. In these
-  // circumstances it may be best not to inline C into B, but to inline B into
-  // its callers.
-  //
-  // This only applies to static and linkonce-ODR functions because those are
-  // expected to be available for inlining in the translation units where they
-  // are used. Thus we will always have the opportunity to make local inlining
-  // decisions. Importantly the linkonce-ODR linkage covers inline functions
-  // and templates in C++.
-  //
-  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
-  // the internal implementation of the inline cost metrics rather than
-  // treating them as truly abstract units etc.
-  TotalSecondaryCost = 0;
-  // The candidate cost to be imposed upon the current function.
-  int CandidateCost = IC.getCost() - 1;
-  // If the caller has local linkage and can be inlined to all its callers, we
-  // can apply a huge negative bonus to TotalSecondaryCost.
-  bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
-  // This bool tracks what happens if we DO inline C into B.
-  bool InliningPreventsSomeOuterInline = false;
-  unsigned NumCallerUsers = 0;
-  for (User *U : Caller->users()) {
-    CallBase *CS2 = dyn_cast<CallBase>(U);
-
-    // If this isn't a call to Caller (it could be some other sort
-    // of reference) skip it.  Such references will prevent the caller
-    // from being removed.
-    if (!CS2 || CS2->getCalledFunction() != Caller) {
-      ApplyLastCallBonus = false;
-      continue;
-    }
-
-    InlineCost IC2 = GetInlineCost(*CS2);
-    ++NumCallerCallersAnalyzed;
-    if (!IC2) {
-      ApplyLastCallBonus = false;
-      continue;
-    }
-    if (IC2.isAlways())
-      continue;
-
-    // See if inlining of the original callsite would erase the cost delta of
-    // this callsite. We subtract off the penalty for the call instruction,
-    // which we would be deleting.
-    if (IC2.getCostDelta() <= CandidateCost) {
-      InliningPreventsSomeOuterInline = true;
-      TotalSecondaryCost += IC2.getCost();
-      NumCallerUsers++;
-    }
-  }
-
-  if (!InliningPreventsSomeOuterInline)
-    return false;
-
-  // If all outer calls to Caller would get inlined, the cost for the last
-  // one is set very low by getInlineCost, in anticipation that Caller will
-  // be removed entirely.  We did not account for this above unless there
-  // is only one caller of Caller.
-  if (ApplyLastCallBonus)
-    TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
-
-  // If InlineDeferralScale is negative, then ignore the cost of primary
-  // inlining -- IC.getCost() multiplied by the number of callers to Caller.
-  if (InlineDeferralScale < 0)
-    return TotalSecondaryCost < IC.getCost();
-
-  int TotalCost = TotalSecondaryCost + IC.getCost() * NumCallerUsers;
-  int Allowance = IC.getCost() * InlineDeferralScale;
-  return TotalCost < Allowance;
-}
-
-static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
-                                            const ore::NV &Arg) {
-  return R << Arg.Val;
-}
-
-template <class RemarkT>
-RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
-  using namespace ore;
-  if (IC.isAlways()) {
-    R << "(cost=always)";
-  } else if (IC.isNever()) {
-    R << "(cost=never)";
-  } else {
-    R << "(cost=" << ore::NV("Cost", IC.getCost())
-      << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
-  }
-  if (const char *Reason = IC.getReason())
-    R << ": " << ore::NV("Reason", Reason);
-  return R;
-}
-
-static std::string inlineCostStr(const InlineCost &IC) {
-  std::stringstream Remark;
-  Remark << IC;
-  return Remark.str();
-}
-
-static void setInlineRemark(CallBase &CB, StringRef Message) {
-  if (!InlineRemarkAttribute)
-    return;
-
-  Attribute Attr = Attribute::get(CB.getContext(), "inline-remark", Message);
-  CB.addAttribute(AttributeList::FunctionIndex, Attr);
-}
-
-/// Return the cost only if the inliner should attempt to inline at the given
-/// CallSite. If we return the cost, we will emit an optimisation remark later
-/// using that cost, so we won't do so from this function. Return None if
-/// inlining should not be attempted.
-static Optional<InlineCost>
-shouldInline(CallBase &CB, function_ref<InlineCost(CallBase &CB)> GetInlineCost,
-             OptimizationRemarkEmitter &ORE) {
-  using namespace ore;
-
-  InlineCost IC = GetInlineCost(CB);
-  Instruction *Call = &CB;
-  Function *Callee = CB.getCalledFunction();
-  Function *Caller = CB.getCaller();
-
-  if (IC.isAlways()) {
-    LLVM_DEBUG(dbgs() << "    Inlining " << inlineCostStr(IC)
-                      << ", Call: " << CB << "\n");
-    return IC;
-  }
-
-  if (!IC) {
-    LLVM_DEBUG(dbgs() << "    NOT Inlining " << inlineCostStr(IC)
-                      << ", Call: " << CB << "\n");
-    if (IC.isNever()) {
-      ORE.emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
-               << NV("Callee", Callee) << " not inlined into "
-               << NV("Caller", Caller) << " because it should never be inlined "
-               << IC;
-      });
-    } else {
-      ORE.emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
-               << NV("Callee", Callee) << " not inlined into "
-               << NV("Caller", Caller) << " because too costly to inline "
-               << IC;
-      });
-    }
-    setInlineRemark(CB, inlineCostStr(IC));
-    return None;
-  }
-
-  int TotalSecondaryCost = 0;
-  if (shouldBeDeferred(Caller, IC, TotalSecondaryCost, GetInlineCost)) {
-    LLVM_DEBUG(dbgs() << "    NOT Inlining: " << CB
-                      << " Cost = " << IC.getCost()
-                      << ", outer Cost = " << TotalSecondaryCost << '\n');
-    ORE.emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
-                                      Call)
-             << "Not inlining. Cost of inlining " << NV("Callee", Callee)
-             << " increases the cost of inlining " << NV("Caller", Caller)
-             << " in other contexts";
-    });
-    setInlineRemark(CB, "deferred");
-    // IC does not bool() to false, so get an InlineCost that will.
-    // This will not be inspected to make an error message.
-    return None;
-  }
-
-  LLVM_DEBUG(dbgs() << "    Inlining " << inlineCostStr(IC) << ", Call: " << CB
-                    << '\n');
-  return IC;
-}
-
 /// Return true if the specified inline history ID
 /// indicates an inline history that includes the specified function.
 static bool inlineHistoryIncludes(
@@ -523,18 +314,6 @@
   return inlineCalls(SCC);
 }
 
-static void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc,
-                            const BasicBlock *Block, const Function &Callee,
-                            const Function &Caller, const InlineCost &IC) {
-  ORE.emit([&]() {
-    bool AlwaysInline = IC.isAlways();
-    StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
-    return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
-           << ore::NV("Callee", &Callee) << " inlined into "
-           << ore::NV("Caller", &Caller) << " with " << IC;
-  });
-}
-
 static bool
 inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
                 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
@@ -886,6 +665,22 @@
   }
 }
 
+InliningAdvisor &InlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
+                                         Module &M) {
+  auto *IAA = MAM.getCachedResult<InliningAdvisorAnalysis>(M);
+  if (!IAA) {
+    // It should still be possible to run the inliner as a stand-alone SCC pass,
+    // for test scenarios. In that case, we default to the
+    // DefaultInliningAdvisor, which doesn't need to keep state between SCC pass
+    // runs. It also uses just the default InlineParams.
+    OwnedDefaultAdvisor.emplace(getInlineParams());
+    return OwnedDefaultAdvisor.getValue();
+  }
+  assert(IAA->getAdvisor() && "Expected a present InliningAdvisorAnalysis also have an "
+                      "InliningAdvisor initialized");
+  return *IAA->getAdvisor();
+}
+
 PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
                                    CGSCCAnalysisManager &AM, LazyCallGraph &CG,
                                    CGSCCUpdateResult &UR) {
@@ -897,6 +692,11 @@
   Module &M = *InitialC.begin()->getFunction().getParent();
   ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
 
+  InliningAdvisor &Advisor = getAdvisor(MAM, M);
+  Advisor.OnPassEntry();
+
+  auto AdvisorOnExit = make_scope_exit([&] { Advisor.OnPassExit(); });
+
   if (!ImportedFunctionsStats &&
       InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
     ImportedFunctionsStats =
@@ -1011,29 +811,10 @@
     FunctionAnalysisManager &FAM =
         AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).getManager();
 
-    // Get the remarks emission analysis for the caller.
-    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
     std::function<AssumptionCache &(Function &)> GetAssumptionCache =
         [&](Function &F) -> AssumptionCache & {
       return FAM.getResult<AssumptionAnalysis>(F);
     };
-    auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
-      return FAM.getResult<BlockFrequencyAnalysis>(F);
-    };
-    auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
-      return FAM.getResult<TargetLibraryAnalysis>(F);
-    };
-
-    auto GetInlineCost = [&](CallBase &CB) {
-      Function &Callee = *CB.getCalledFunction();
-      auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
-      bool RemarksEnabled =
-          Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
-              DEBUG_TYPE);
-      return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
-                           GetTLI, PSI, RemarksEnabled ? &ORE : nullptr);
-    };
 
     // Now process as many calls as we have within this caller in the sequnece.
     // We bail out as soon as the caller has to change so we can update the
@@ -1065,101 +846,88 @@
         continue;
       }
 
-      auto OIC = shouldInline(*CB, GetInlineCost, ORE);
+      auto PendingRecord = Advisor.getInliningAdvice(*CB, FAM);
       // Check whether we want to inline this callsite.
-      if (!OIC)
+      if (!PendingRecord->isInliningRecommended()) {
+        PendingRecord->recordUnattemptedInlining();
         continue;
-      auto DoInline = [&]() -> InlineResult {
-        // Setup the data structure used to plumb customization into the
-        // `InlineFunction` routine.
-        InlineFunctionInfo IFI(
-            /*cg=*/nullptr, &GetAssumptionCache, PSI,
-            &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
-            &FAM.getResult<BlockFrequencyAnalysis>(Callee));
-
-        InlineResult IR = InlineFunction(*CB, IFI);
-        if (!IR.isSuccess())
-          return IR;
-
-        DidInline = true;
-        InlinedCallees.insert(&Callee);
-        ++NumInlined;
+      }
 
-        // Add any new callsites to defined functions to the worklist.
-        if (!IFI.InlinedCallSites.empty()) {
-          int NewHistoryID = InlineHistory.size();
-          InlineHistory.push_back({&Callee, InlineHistoryID});
-
-          for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
-            Function *NewCallee = ICB->getCalledFunction();
-            if (!NewCallee) {
-              // Try to promote an indirect (virtual) call without waiting for
-              // the post-inline cleanup and the next DevirtSCCRepeatedPass
-              // iteration because the next iteration may not happen and we may
-              // miss inlining it.
-              if (tryPromoteCall(*ICB))
-                NewCallee = ICB->getCalledFunction();
-            }
-            if (NewCallee)
-              if (!NewCallee->isDeclaration())
-                Calls.push_back({ICB, NewHistoryID});
-          }
-        }
+      // Setup the data structure used to plumb customization into the
+      // `InlineFunction` routine.
+      InlineFunctionInfo IFI(
+          /*cg=*/nullptr, &GetAssumptionCache, PSI,
+          &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
+          &FAM.getResult<BlockFrequencyAnalysis>(Callee));
+
+      InlineResult IR = InlineFunction(*CB, IFI);
+      if (!IR.isSuccess()) {
+        PendingRecord->recordUnsuccessfulInlining(IR);
+        continue;
+      }
 
-        if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
-          ImportedFunctionsStats->recordInline(F, Callee);
-
-        // Merge the attributes based on the inlining.
-        AttributeFuncs::mergeAttributesForInlining(F, Callee);
-
-        // For local functions, check whether this makes the callee trivially
-        // dead. In that case, we can drop the body of the function eagerly
-        // which may reduce the number of callers of other functions to one,
-        // changing inline cost thresholds.
-        if (Callee.hasLocalLinkage()) {
-          // To check this we also need to nuke any dead constant uses (perhaps
-          // made dead by this operation on other functions).
-          Callee.removeDeadConstantUsers();
-          if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
-            Calls.erase(
-                std::remove_if(Calls.begin() + I + 1, Calls.end(),
-                               [&](const std::pair<CallBase *, int> &Call) {
-                                 return Call.first->getCaller() == &Callee;
-                               }),
-                Calls.end());
-            // Clear the body and queue the function itself for deletion when we
-            // finish inlining and call graph updates.
-            // Note that after this point, it is an error to do anything other
-            // than use the callee's address or delete it.
-            Callee.dropAllReferences();
-            assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
-                   "Cannot put cause a function to become dead twice!");
-            DeadFunctions.push_back(&Callee);
+      DidInline = true;
+      InlinedCallees.insert(&Callee);
+      ++NumInlined;
+
+      // Add any new callsites to defined functions to the worklist.
+      if (!IFI.InlinedCallSites.empty()) {
+        int NewHistoryID = InlineHistory.size();
+        InlineHistory.push_back({&Callee, InlineHistoryID});
+
+        for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
+          Function *NewCallee = ICB->getCalledFunction();
+          if (!NewCallee) {
+            // Try to promote an indirect (virtual) call without waiting for
+            // the post-inline cleanup and the next DevirtSCCRepeatedPass
+            // iteration because the next iteration may not happen and we may
+            // miss inlining it.
+            if (tryPromoteCall(*ICB))
+              NewCallee = ICB->getCalledFunction();
           }
+          if (NewCallee)
+            if (!NewCallee->isDeclaration())
+              Calls.push_back({ICB, NewHistoryID});
         }
-        return IR;
-      };
-      // Capture the context of CB before inlining, as a successful inlining may
-      // change that context, and we want to report success or failure in the
-      // original context.
-      auto DLoc = CB->getDebugLoc();
-      auto *Block = CB->getParent();
-
-      auto Outcome = DoInline();
-      if (!Outcome.isSuccess()) {
-        using namespace ore;
-        setInlineRemark(*CB, std::string(Outcome.getFailureReason()) + "; " +
-                                 inlineCostStr(*OIC));
-        ORE.emit([&]() {
-          return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
-                 << NV("Callee", &Callee) << " will not be inlined into "
-                 << NV("Caller", &F) << ": "
-                 << NV("Reason", Outcome.getFailureReason());
-        });
-        continue;
       }
 
-      emitInlinedInto(ORE, DLoc, Block, Callee, F, *OIC);
+      if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
+        ImportedFunctionsStats->recordInline(F, Callee);
+
+      // Merge the attributes based on the inlining.
+      AttributeFuncs::mergeAttributesForInlining(F, Callee);
+
+      // For local functions, check whether this makes the callee trivially
+      // dead. In that case, we can drop the body of the function eagerly
+      // which may reduce the number of callers of other functions to one,
+      // changing inline cost thresholds.
+      bool CalleeWasDeleted = false;
+      if (Callee.hasLocalLinkage()) {
+        // To check this we also need to nuke any dead constant uses (perhaps
+        // made dead by this operation on other functions).
+        Callee.removeDeadConstantUsers();
+        if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
+          Calls.erase(
+              std::remove_if(Calls.begin() + I + 1, Calls.end(),
+                             [&](const std::pair<CallBase *, int> &Call) {
+                               return Call.first->getCaller() == &Callee;
+                             }),
+              Calls.end());
+          // Clear the body and queue the function itself for deletion when we
+          // finish inlining and call graph updates.
+          // Note that after this point, it is an error to do anything other
+          // than use the callee's address or delete it.
+          Callee.dropAllReferences();
+          assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
+                 "Cannot put cause a function to become dead twice!");
+          DeadFunctions.push_back(&Callee);
+          CalleeWasDeleted = true;
+        }
+      }
+      if (CalleeWasDeleted)
+        PendingRecord->recordInliningWithCalleeDeleted();
+      else
+        PendingRecord->recordInlining();
     }
 
     // Back the call index up by one to put us in a good position to go around
@@ -1235,7 +1003,7 @@
   // sets.
   for (Function *DeadF : DeadFunctions) {
     // Get the necessary information out of the call graph and nuke the
-    // function there. Also, cclear out any cached analyses.
+    // function there. Also, clear out any cached analyses.
     auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
     FunctionAnalysisManager &FAM =
         AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG).getManager();
@@ -1250,7 +1018,15 @@
     UR.InvalidatedRefSCCs.insert(&DeadRC);
 
     // And delete the actual function from the module.
-    M.getFunctionList().erase(DeadF);
+    // The Advisor may use Function pointers to efficiently index various
+    // internal maps, e.g. for memoization. Function cleanup passes like
+    // argument promotion create new functions. It is possible for a new
+    // function to be allocated at the address of a deleted function. We could
+    // index using names, but that's inefficient. Alternatively, we let the
+    // Advisor free the functions when it sees fit.
+    DeadF->getBasicBlockList().clear();
+    M.getFunctionList().remove(DeadF);
+
     ++NumDeleted;
   }
 
@@ -1263,3 +1039,44 @@
   PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
   return PA;
 }
+
+ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
+                                                   bool Debugging,
+                                                   InliningAdvisorMode Mode,
+                                                   unsigned MaxDevirtIterations)
+    : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations),
+      Debugging(Debugging), PM(Debugging) {
+  // Run the inliner first. The theory is that we are walking bottom-up and so
+  // the callees have already been fully optimized, and we want to inline them
+  // into the callers so that our optimizations can reflect that.
+  // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
+  // because it makes profile annotation in the backend inaccurate.
+  PM.addPass(InlinerPass());
+}
+
+PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
+                                                ModuleAnalysisManager &MAM) {
+  auto &IAA = MAM.getResult<InliningAdvisorAnalysis>(M);
+  if (!IAA.tryCreate(Params, Mode)) {
+    M.getContext().emitError(
+        "Could not setup Inlining Advisor for the requested "
+        "mode and/or options");
+    return PreservedAnalyses::all();
+  }
+
+  ModulePassManager MPM(Debugging);
+  // If devirtualization iterations are requested, we wrap the CGSCC pipeline in
+  // a devirtualization repeater. This will try to detect when we devirtualize
+  // indirect calls and iterate the SCC passes in that case to try and catch
+  // knock-on inlining or function attrs opportunities. Then we add it to the
+  // module pipeline by walking the SCCs in postorder (or bottom-up).
+  if (MaxDevirtIterations > 0)
+    MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+        createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations)));
+  else
+    MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(PM)));
+  auto Ret = MPM.run(M, MAM);
+
+  IAA.clear();
+  return Ret;
+}
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -132,7 +132,8 @@
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
-; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
+; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis
 ; CHECK-O-NEXT: Starting llvm::Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -73,7 +73,14 @@
 ; CHECK-O2-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
 ; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
-; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}InlinerPass>
+; CHECK-O2-NEXT: Running pass: ModuleInlinerWrapperPass
+; CHECK-O2-NEXT: Running analysis: InliningAdvisorAnalysis
+; CHECK-O2-NEXT: Starting llvm::Module pass manager run.
+; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
+; CHECK-O2-NEXT: Starting CGSCC pass manager run.
+; CHECK-O2-NEXT: Running pass: InlinerPass
+; CHECK-O2-NEXT: Finished CGSCC pass manager run.
+; CHECK-O2-NEXT: Finished llvm::Module pass manager run.
 ; CHECK-O2-NEXT: Running pass: GlobalOptPass
 ; CHECK-O2-NEXT: Running pass: GlobalDCEPass
 ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -97,7 +97,8 @@
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
-; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
+; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis
 ; CHECK-O-NEXT: Starting llvm::Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -70,7 +70,8 @@
 ; CHECK-O-NEXT: Running analysis: GlobalsAA
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
-; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
+; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis
 ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -78,7 +78,8 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
 ; CHECK-O-NEXT: Running analysis: GlobalsAA
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
-; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
+; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis
 ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -65,6 +65,9 @@
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
+; CHECK-O123-NEXT: Running pass: ModuleInlinerWrapperPass
+; CHECK-O123-NEXT: Running analysis: InliningAdvisorAnalysis
+; CHECK-O123-NEXT: Starting llvm::Module pass manager run.
 ; CHECK-O123-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PassManager<{{.*}}LazyCallGraph::SCC
 ; CHECK-O123-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O123-NEXT: Running analysis: LazyCallGraphAnalysis
@@ -75,6 +78,7 @@
 ; CHECK-O123-NEXT: Running pass: InlinerPass on (foo)
 ; CHECK-O123-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
 ; CHECK-O123-NEXT: Finished CGSCC pass manager run.
+; CHECK-O123-NEXT: Finished {{.*}}Module pass manager run.
 ; CHECK-O123-NEXT: Running pass: GlobalDCEPass
 ; CHECK-O-NEXT: Running pass: PGOInstrumentationUse
 ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
@@ -97,7 +101,9 @@
 ; CHECK-O-NEXT: Running analysis: GlobalsAA
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
-; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
+; CHECK-Os-NEXT: Running analysis: InliningAdvisorAnalysis
+; CHECK-Oz-NEXT: Running analysis: InliningAdvisorAnalysis
 ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -78,7 +78,8 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
 ; CHECK-O-NEXT: Running analysis: GlobalsAA
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
-; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
+; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis
 ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
diff --git a/llvm/test/Other/scc-deleted-printer.ll b/llvm/test/Other/scc-deleted-printer.ll
--- a/llvm/test/Other/scc-deleted-printer.ll
+++ b/llvm/test/Other/scc-deleted-printer.ll
@@ -3,6 +3,11 @@
 ; RUN: opt < %s 2>&1 -disable-output \
 ; RUN: 	   -passes=inline -print-before-all -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD
 
+; RUN: opt < %s 2>&1 -disable-output \
+; RUN: 	   -passes=inliner-wrapper -print-before-all -print-after-all | FileCheck %s -check-prefix=INL
+; RUN: opt < %s 2>&1 -disable-output \
+; RUN: 	   -passes=inliner-wrapper -print-before-all -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD
+
 ; INL: IR Dump Before {{InlinerPass .*scc: .tester, foo}}
 ; INL-NOT: IR Dump After {{InlinerPass}}
 ; INL: IR Dump Before {{InlinerPass .*scc: .tester}}
diff --git a/llvm/test/Other/scc-pass-printer.ll b/llvm/test/Other/scc-pass-printer.ll
--- a/llvm/test/Other/scc-pass-printer.ll
+++ b/llvm/test/Other/scc-pass-printer.ll
@@ -3,9 +3,13 @@
 ; RUN: opt < %s 2>&1 -disable-output \
 ; RUN: 	   -passes=inline -print-after-all | FileCheck %s -check-prefix=INL
 ; RUN: opt < %s 2>&1 -disable-output \
+; RUN: 	   -passes=inliner-wrapper -print-after-all | FileCheck %s -check-prefix=INL
+; RUN: opt < %s 2>&1 -disable-output \
 ; RUN: 	   -inline -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD
 ; RUN: opt < %s 2>&1 -disable-output \
 ; RUN: 	   -passes=inline -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD
+; RUN: opt < %s 2>&1 -disable-output \
+; RUN: 	   -passes=inliner-wrapper -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD
 
 ; INL: IR Dump After {{Function Integration/Inlining|InlinerPass .*scc: .bar, foo}}
 ; INL: define void @bar()
diff --git a/llvm/test/Transforms/Inline/inline_stats.ll b/llvm/test/Transforms/Inline/inline_stats.ll
--- a/llvm/test/Transforms/Inline/inline_stats.ll
+++ b/llvm/test/Transforms/Inline/inline_stats.ll
@@ -6,6 +6,9 @@
 ; RUN: opt -S -passes=inline -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK
 ; RUN: opt -S -passes=inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK
 
+; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK
+; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK
+
 ; CHECK: ------- Dumping inliner stats for [<stdin>] -------
 ; CHECK-BASIC-NOT: -- List of inlined functions:
 ; CHECK-BASIC-NOT: -- Inlined not imported function
diff --git a/llvm/test/Transforms/Inline/inlining-advisor-default.ll b/llvm/test/Transforms/Inline/inlining-advisor-default.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inlining-advisor-default.ll
@@ -0,0 +1,9 @@
+; Check that, in the absence of dependencies, we emit an error message when
+; trying to use ML-driven inlining.
+;
+; RUN: not opt -passes=scc-oz-module-inliner -enable-ml-inliner=development -S < %s 2>&1 | FileCheck %s
+; RUN: not opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -S < %s 2>&1 | FileCheck %s
+
+declare i64 @f1()
+
+; CHECK: Could not setup Inlining Advisor for the requested mode and/or options
\ No newline at end of file
diff --git a/llvm/test/Transforms/Inline/internal-scc-members.ll b/llvm/test/Transforms/Inline/internal-scc-members.ll
--- a/llvm/test/Transforms/Inline/internal-scc-members.ll
+++ b/llvm/test/Transforms/Inline/internal-scc-members.ll
@@ -3,6 +3,7 @@
 ;
 ; RUN: opt < %s -S -inline | FileCheck %s
 ; RUN: opt < %s -S -passes=inline | FileCheck %s
+; RUN: opt < %s -S -passes=inliner-wrapper | FileCheck %s
 
 ; CHECK-LABEL: define internal void @test1_scc0()
 ; CHECK-NOT: call
diff --git a/llvm/test/Transforms/Inline/module-inlining.ll b/llvm/test/Transforms/Inline/module-inlining.ll
--- a/llvm/test/Transforms/Inline/module-inlining.ll
+++ b/llvm/test/Transforms/Inline/module-inlining.ll
@@ -6,6 +6,7 @@
 ; a 'ret 10'
 ;
 ; RUN: opt -passes=inline -S < %s | FileCheck %s --check-prefix=INLINE --check-prefix=CHECK
+; RUN: opt -passes=inliner-wrapper -S < %s | FileCheck %s --check-prefix=INLINE --check-prefix=CHECK
 ; RUN: opt -passes=scc-oz-module-inliner -S < %s | FileCheck %s --check-prefix=MODULE --check-prefix=CHECK
 
 define void @modify_value({i32, float}* %v) {
diff --git a/llvm/test/Transforms/Inline/monster_scc.ll b/llvm/test/Transforms/Inline/monster_scc.ll
--- a/llvm/test/Transforms/Inline/monster_scc.ll
+++ b/llvm/test/Transforms/Inline/monster_scc.ll
@@ -41,6 +41,7 @@
 ;
 ; RUN: opt -S < %s -inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,OLD
 ; RUN: opt -S < %s -passes=inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW
+; RUN: opt -S < %s -passes=inliner-wrapper -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll b/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll
--- a/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll
+++ b/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll
@@ -5,6 +5,10 @@
 ; RUN:    -pass-remarks-with-hotness -pass-remarks-hotness-threshold=1 2>&1 | \
 ; RUN:    FileCheck -allow-empty -check-prefix=THRESHOLD %s
 
+; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-output=%t -pass-remarks=inline \
+; RUN:    -pass-remarks-with-hotness -pass-remarks-hotness-threshold=1 2>&1 | \
+; RUN:    FileCheck -allow-empty -check-prefix=THRESHOLD %s
+
 ; Check that when any threshold is specified we ignore remarks with no
 ; hotness -- these are blocks that have not been executed during training.
 
diff --git a/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll b/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll
--- a/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll
+++ b/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll
@@ -8,6 +8,11 @@
 ; RUN:    -pass-remarks-with-hotness 2>&1 | FileCheck %s
 ; RUN: cat %t | FileCheck -check-prefix=YAML %s
 
+; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-output=%t -pass-remarks=inline \
+; RUN:    -pass-remarks-missed=inline -pass-remarks-analysis=inline \
+; RUN:    -pass-remarks-with-hotness 2>&1 | FileCheck %s
+; RUN: cat %t | FileCheck -check-prefix=YAML %s
+
 ; Check the YAML file for inliner-generated passed and analysis remarks.  This
 ; is the input:
 
diff --git a/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll b/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll
--- a/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll
+++ b/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll
@@ -4,6 +4,9 @@
 ; RUN: opt < %s -passes=inline -pass-remarks=inline -pass-remarks-missed=inline \
 ; RUN:     -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \
 ; RUN:     | FileCheck %s
+; RUN: opt < %s -passes=inliner-wrapper -pass-remarks=inline -pass-remarks-missed=inline \
+; RUN:     -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \
+; RUN:     | FileCheck %s
 
 ; CHECK: foo inlined into bar with (cost=always): always inline attribute (hotness: 30)
 ; CHECK: foz not inlined into bar because it should never be inlined (cost=never): noinline function attribute (hotness: 30)
diff --git a/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll b/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll
--- a/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll
+++ b/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll
@@ -34,6 +34,25 @@
 ; RUN: opt < %s -S -passes=inline \
 ; RUN:     -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \
 ; RUN:     -pass-remarks-output=%t.threshold
+
+; Inliner - Module Wrapper
+; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-missed=inline \
+; RUN:     -pass-remarks-with-hotness -pass-remarks-hotness-threshold 15 \
+; RUN:     -pass-remarks-output=%t 2>&1 | FileCheck %s
+; RUN: cat %t | FileCheck -check-prefix=YAML %s
+; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-with-hotness -pass-remarks-output=%t
+; RUN: cat %t | FileCheck -check-prefix=YAML %s
+;
+; Verify that remarks that don't meet the hotness threshold are not output.
+; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-missed=inline \
+; RUN:     -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \
+; RUN:     -pass-remarks-output=%t.threshold 2>&1 | \
+; RUN:     FileCheck -check-prefix=THRESHOLD %s
+; RUN: test ! -s %t.threshold
+; RUN: opt < %s -S -passes=inliner-wrapper \
+; RUN:     -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \
+; RUN:     -pass-remarks-output=%t.threshold
+
 ; The remarks output file should be empty.
 ; RUN: test ! -s %t.threshold
 
diff --git a/llvm/test/Transforms/Inline/optimization-remarks.ll b/llvm/test/Transforms/Inline/optimization-remarks.ll
--- a/llvm/test/Transforms/Inline/optimization-remarks.ll
+++ b/llvm/test/Transforms/Inline/optimization-remarks.ll
@@ -12,6 +12,13 @@
 ; RUN:       -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 | \
 ; RUN:       FileCheck -check-prefix=CHECK -check-prefix=HOTNESS %s
 
+; RUN: opt < %s -passes=inliner-wrapper -pass-remarks=inline -pass-remarks-missed=inline \
+; RUN:       -pass-remarks-analysis=inline -S 2>&1 | \
+; RUN:       FileCheck -check-prefix=CHECK -check-prefix=NO_HOTNESS %s
+; RUN: opt < %s -passes=inliner-wrapper -pass-remarks=inline -pass-remarks-missed=inline \
+; RUN:       -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 | \
+; RUN:       FileCheck -check-prefix=CHECK -check-prefix=HOTNESS %s
+
 ; HOTNESS: fox will not be inlined into bar because its definition is unavailable
 ; NO_HOTNESS-NOT: fox will not be inlined into bar because its definition is unavailable
 ; CHECK: foo inlined into bar with (cost=always): always inline attribute