Index: llvm/CMakeLists.txt
===================================================================
--- llvm/CMakeLists.txt
+++ llvm/CMakeLists.txt
@@ -896,6 +896,34 @@
   llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2")
 endif()
 
+set(LLVM_USE_ML_POLICY "" CACHE STRING
+  "Opt in to using machine learned optimization policies. '' (default) to opt out. Use 'Rel' for using pre-trained policy, 'Dev' for building LLVM for training purposes.")
+
+string(TOUPPER "${LLVM_USE_ML_POLICY}" LLVM_USE_ML_POLICY)
+
+if (LLVM_USE_ML_POLICY AND
+    NOT LLVM_USE_ML_POLICY MATCHES "^(REL|DEV|)$")
+  message(FATAL_ERROR "Invalid value for LLVM_USE_ML_POLICY: ${LLVM_USE_ML_POLICY}")
+endif()
+
+if (LLVM_USE_ML_POLICY STREQUAL "DEV")
+  add_definitions("-DLLVM_USE_ML_POLICY_DEV")
+  set(LLVM_TF_C_LIB "" CACHE PATH "Path to TensorFlow C library install")
+  include_directories(${LLVM_TF_C_LIB}/include)
+  find_library(tensorflow_c_api tensorflow PATHS ${LLVM_TF_C_LIB}/lib)
+endif ()
+
+if (LLVM_USE_ML_POLICY STREQUAL "REL")
+  add_definitions("-DLLVM_USE_ML_POLICY_REL")
+  set(LLVM_TF_AOT_COMPILER "saved_model_cli" CACHE PATH
+    "Path/command line for TensorFlow AOT compiler")
+  set(LLVM_TF_AOT_RUNTIME "" CACHE PATH "Path to TensorFlow pip install dir")
+  include_directories(${LLVM_TF_AOT_RUNTIME}/include)
+  add_subdirectory(${LLVM_TF_AOT_RUNTIME}/xla_aot_runtime_src
+    ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/tf_runtime)
+  add_llvm_library(LLVMtf_aot_runtime $<TARGET_OBJECTS:tf_xla_runtime_objects>)
+endif ()
+
 # Put this before tblgen. Else we have a circular dependence.
 add_subdirectory(lib/Demangle)
 add_subdirectory(lib/Support)
Index: llvm/cmake/modules/TensorFlowCompile.cmake
===================================================================
--- /dev/null
+++ llvm/cmake/modules/TensorFlowCompile.cmake
@@ -0,0 +1,18 @@
+function(tfcompile model tag_set signature_def_key fname cpp_class)
+  if (IS_ABSOLUTE ${model})
+    set(LLVM_ML_MODELS_ABSOLUTE ${model})
+  else()
+    set(LLVM_ML_MODELS_ABSOLUTE
+      ${CMAKE_CURRENT_SOURCE_DIR}/${model})
+  endif()
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${fname}.o ${CMAKE_CURRENT_BINARY_DIR}/${fname}.h
+    COMMAND "XLA_FLAGS=\"--xla_cpu_multi_thread_eigen=false\"" ${LLVM_TF_AOT_COMPILER} aot_compile_cpu
+          --dir ${LLVM_ML_MODELS_ABSOLUTE}
+          --tag_set ${tag_set}
+          --signature_def_key ${signature_def_key}
+          --output_prefix ${CMAKE_CURRENT_BINARY_DIR}/${fname}
+          --cpp_class ${cpp_class}
+    )
+
+  set(TF_AOT_OBJ ${CMAKE_CURRENT_BINARY_DIR}/${fname}.o PARENT_SCOPE)
+endfunction()
\ No newline at end of file
Index: llvm/include/llvm/Analysis/InlineCost.h
===================================================================
--- llvm/include/llvm/Analysis/InlineCost.h
+++ llvm/include/llvm/Analysis/InlineCost.h
@@ -236,6 +236,16 @@
               function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
               ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE);
 
+Optional<InlineCost> getTrivialInliningDecision(
+    CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
+    std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
+
+Optional<int> getInliningCostEstimate(
+    CallBase &Call, TargetTransformInfo &CalleeTTI,
+    std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
+    ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE);
 /// Minimal filter to detect invalid constructs for inlining.
 InlineResult isInlineViable(Function &Callee);
 } // namespace llvm
Index: llvm/include/llvm/Analysis/ML/IRToNativeSizeLearning.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Analysis/ML/IRToNativeSizeLearning.h
@@ -0,0 +1,50 @@
+#ifndef LLVM_ML_IRTONATIVESIZELEARNING_H_
+#define LLVM_ML_IRTONATIVESIZELEARNING_H_
+
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class IRToNativeSizeLearning {
+public:
+  struct FunctionFeatures {
+    static const size_t FeatureCount = 214;
+    int32_t InitialSize = 0;
+    int32_t Blocks = 0;
+    int32_t Loops = 0;
+    int32_t Calls = 0;
+    bool IsLocal = false;
+    bool IsLinkOnceODR = false;
+    bool IsLinkOnce = false;
+    int32_t MaxLoopDepth = 0;
+    int32_t MaxDomTreeLevel = 0;
+    std::vector<int32_t> InstructionHistogram;
+    std::vector<int32_t> InstructionPairHistogram;
+    void FillTensor(int32_t *Ptr) const;
+  };
+
+  IRToNativeSizeLearning() = default;
+
+  static FunctionFeatures getFunctionFeatures(Function &F,
+                                              FunctionAnalysisManager &FAM);
+
+private:
+  static unsigned getMaxInstructionID();
+};
+
+raw_ostream &
+operator<<(raw_ostream &Out,
+           const IRToNativeSizeLearning::FunctionFeatures &Features);
+
+} // namespace llvm
+#endif // LLVM_ML_IRTONATIVESIZELEARNING_H_
Index: llvm/include/llvm/Analysis/ML/InliningAdvisor.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Analysis/ML/InliningAdvisor.h
@@ -0,0 +1,87 @@
+//===- InlinerML.h - ML infrastructure for inliner --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_ML_INLINERML_H_
+#define LLVM_ML_INLINERML_H_
+
+#include <memory>
+#include <vector>
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class CallBase;
+class Function;
+class Module;
+class PreservedAnalyses;
+
+struct PendingInliningRecordImpl;
+class PendingInliningRecord {
+public:
+  PendingInliningRecord();
+  PendingInliningRecord(PendingInliningRecord &&);
+  PendingInliningRecord &operator=(PendingInliningRecord &&);
+
+  PendingInliningRecord(const PendingInliningRecord &) = delete;
+
+  void recordInlining(bool CalleeWasDeleted, bool SiteWasInlined);
+  ~PendingInliningRecord();
+  operator bool() const { return !!Impl; }
+
+private:
+  friend class InliningAdvisorImpl;
+  PendingInliningRecord(PendingInliningRecordImpl *Impl);
+  std::unique_ptr<PendingInliningRecordImpl> Impl;
+};
+
+class InliningAdvisorImpl;
+class InliningAdvisor {
+public:
+  InliningAdvisor(Module &M, ModuleAnalysisManager &MAM);
+  InliningAdvisor(InliningAdvisor &&);
+
+  ~InliningAdvisor();
+
+  PendingInliningRecord shouldInline(CallBase *CB,
+                                     bool &AlternativeRecommendation,
+                                     bool Mandatory, int CostEstimate);
+
+  bool invalidate(Module &, const PreservedAnalyses &,
+                  ModuleAnalysisManager::Invalidator &) {
+    // InliningAdvisor must be preserved across analysis invalidations.
+    return false;
+  }
+
+  void OnPassEntry();
+  void OnPassExit();
+  void OnSuccessfulInlining(const Function *F);
+
+  void OnAllInliningCompleted();
+  void OnFunctionDeleted(Function *F);
+
+private:
+  std::unique_ptr<InliningAdvisorImpl> Impl;
+};
+
+class InliningAdvisorAnalysis
+    : public AnalysisInfoMixin<InliningAdvisorAnalysis> {
+public:
+  using Result = InliningAdvisor;
+  Result run(Module &M, ModuleAnalysisManager &MAM) { return Result(M, MAM); }
+  static AnalysisKey Key;
+};
+
+class InliningAdvisorCleanupPass
+    : public PassInfoMixin<InliningAdvisorCleanupPass> {
+public:
+  InliningAdvisorCleanupPass() = default;
+  PreservedAnalyses run(Module &, ModuleAnalysisManager &);
+};
+
+} // namespace llvm
+#endif // LLVM_ML_INLINERML_H_
Index: llvm/include/llvm/Passes/PassBuilder.h
===================================================================
--- llvm/include/llvm/Passes/PassBuilder.h
+++ llvm/include/llvm/Passes/PassBuilder.h
@@ -344,6 +344,12 @@
                                     ThinLTOPhase Phase,
                                     bool DebugLogging = false);
 
+  /// Construct the module pipeline that performs inlining as well as
+  /// the inlining-driven cleanups.
+  ModulePassManager buildInlinerPipeline(OptimizationLevel Level,
+                                         ThinLTOPhase Phase,
+                                         bool DebugLogging = false);
+
   /// Construct the core LLVM module optimization pipeline.
   ///
   /// This pipeline focuses on optimizing the execution speed of the IR. It
Index: llvm/include/llvm/Transforms/IPO/Inliner.h
===================================================================
--- llvm/include/llvm/Transforms/IPO/Inliner.h
+++ llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -94,12 +94,14 @@
 /// passes be composed to achieve the same end result.
 class InlinerPass : public PassInfoMixin<InlinerPass> {
 public:
-  InlinerPass(InlineParams Params = getInlineParams())
-      : Params(std::move(Params)) {}
+  InlinerPass(InlineParams Params = getInlineParams(),
+              bool MandatoryOnly = false)
+      : Params(std::move(Params)), MandatoryOnly(MandatoryOnly) {}
   ~InlinerPass();
   InlinerPass(InlinerPass &&Arg)
       : Params(std::move(Arg.Params)),
-        ImportedFunctionsStats(std::move(Arg.ImportedFunctionsStats)) {}
+        ImportedFunctionsStats(std::move(Arg.ImportedFunctionsStats)),
+        MandatoryOnly(Arg.MandatoryOnly) {}
 
   PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);
@@ -107,6 +109,7 @@
 private:
   InlineParams Params;
   std::unique_ptr<ImportedFunctionsInliningStatistics> ImportedFunctionsStats;
+  const bool MandatoryOnly;
 };
 
 } // end namespace llvm
Index: llvm/lib/Analysis/CMakeLists.txt
===================================================================
--- llvm/lib/Analysis/CMakeLists.txt
+++ llvm/lib/Analysis/CMakeLists.txt
@@ -1,3 +1,8 @@
+if (NOT LLVM_USE_ML_POLICY STREQUAL "")
+  add_subdirectory(ML)
+  set(EXTRADEPS LLVMMLPolicies)
+endif()
+
 add_llvm_component_library(LLVMAnalysis
   AliasAnalysis.cpp
   AliasAnalysisEvaluator.cpp
@@ -39,6 +44,7 @@
   IVUsers.cpp
   IndirectCallPromotionAnalysis.cpp
   InlineCost.cpp
+  InliningAdvisor.cpp
   InstCount.cpp
   InstructionPrecedenceTracking.cpp
   InstructionSimplify.cpp
@@ -105,4 +111,8 @@
 
   DEPENDS
   intrinsics_gen
+  ${EXTRADEPS}
+
+  LINK_LIBS
+  ${EXTRADEPS}
   )
Index: llvm/lib/Analysis/InlineCost.cpp
===================================================================
--- llvm/lib/Analysis/InlineCost.cpp
+++ llvm/lib/Analysis/InlineCost.cpp
@@ -401,7 +401,10 @@
 
 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
 /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
-class InlineCostCallAnalyzer final : public CallAnalyzer {
+class InlineCostCallAnalyzer : public CallAnalyzer {
+  /// FIXME(mtrofin): remove the 'protected' here, only needed for
+  /// CompleteInlineCostAnalyzer
+protected:
   const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
   const bool ComputeFullInlineCost;
   int LoadEliminationCost = 0;
@@ -704,6 +707,64 @@
   int getThreshold() { return Threshold; }
   int getCost() { return Cost; }
 };
+
+/// FIXME(mtrofin): point in time solution for evaluating the aggregate cost
+/// of a call site, and distinguishing between 'too expensive' and 'invalid'.
+/// We delegate to InlineCostAnalyzer, which doesn't worry about cost, if
+/// InlineParams have ComputeFullInlineCost == true, until finalizeAnalisis.
+/// The long term solution we want here is to avoid cost calculation altogether,
+/// and, instead, collect the individual features (i.e. DCEd instructions, nr
+/// loops, indirect calls - the basic blocks on which cost is calculated)
+class CompleteInlineCostAnalyzer final : public InlineCostCallAnalyzer {
+public:
+  CompleteInlineCostAnalyzer(
+      const TargetTransformInfo &TTI,
+      std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+      Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
+      ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
+      CallBase &Call,
+      const InlineParams Params = {/* DefaultThreshold*/ 0,
+                                   /*HintThreshold*/ {},
+                                   /*ColdThreshold*/ {},
+                                   /*OptSizeThreshold*/ {},
+                                   /*OptMinSizeThreshold*/ {},
+                                   /*HotCallSiteThreshold*/ {},
+                                   /*LocallyHotCallSiteThreshold*/ {},
+                                   /*ColdCallSiteThreshold*/ {},
+                                   /* ComputeFullInlineCost*/ true},
+      bool BoostIndirect = true)
+      : InlineCostCallAnalyzer(TTI, GetAssumptionCache, GetBFI, PSI, ORE,
+                               Callee, Call, Params, BoostIndirect) {}
+
+  // Ignore parent's result, and just return success.
+  InlineResult finalizeAnalysis() override {
+    InlineCostCallAnalyzer::finalizeAnalysis();
+    return InlineResult::success();
+  }
+
+  // Same as parent's onLoweredCall, just that it uses itself internally. Not
+  // worth doing something more elegant, because this implementation will go
+  // away.
+  void onLoweredCall(Function *F, CallBase &Call,
+                     bool IsIndirectCall) override {
+    addCost(Call.arg_size() * InlineConstants::InstrCost);
+
+    if (IsIndirectCall && BoostIndirectCalls) {
+      auto IndirectCallParams = Params;
+      IndirectCallParams.DefaultThreshold =
+          InlineConstants::IndirectCallThreshold;
+      CompleteInlineCostAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE,
+                                    *F, Call, IndirectCallParams, false);
+      if (CA.analyze().isSuccess()) {
+        // We were able to inline the indirect call! Subtract the cost from the
+        // threshold to get the bonus we want to apply, but don't go below zero.
+        Cost -= std::max(0, CA.getThreshold() - CA.getCost());
+      }
+    } else
+      // Otherwise simply add the cost for merely making the call.
+      addCost(InlineConstants::CallPenalty);
+  }
+};
 } // namespace
 
 /// Test whether the given value is an Alloca-derived function argument.
@@ -2210,14 +2271,23 @@
                        GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
 }
 
-InlineCost llvm::getInlineCost(
-    CallBase &Call, Function *Callee, const InlineParams &Params,
-    TargetTransformInfo &CalleeTTI,
+Optional<int> llvm::getInliningCostEstimate(
+    CallBase &Call, TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
-    function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
+  CompleteInlineCostAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE,
+                                *Call.getCalledFunction(), Call);
+  auto R = CA.analyze();
+  if (!R.isSuccess())
+    return {};
+  return CA.getCost();
+}
 
+Optional<InlineCost> llvm::getTrivialInliningDecision(
+    CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
+    std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
   // Cannot inline indirect calls.
   if (!Callee)
     return llvm::InlineCost::getNever("indirect call");
@@ -2272,8 +2342,25 @@
   if (Call.isNoInline())
     return llvm::InlineCost::getNever("noinline call site attribute");
 
+  return {};
+}
+
+InlineCost llvm::getInlineCost(
+    CallBase &Call, Function *Callee, const InlineParams &Params,
+    TargetTransformInfo &CalleeTTI,
+    std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
+    function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+    ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
+  auto TrivialDecision = getTrivialInliningDecision(Call, Callee, CalleeTTI,
+                                                    GetAssumptionCache, GetTLI);
+
+  if (TrivialDecision.hasValue())
+    return TrivialDecision.getValue();
+
   LLVM_DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
-                          << "... (caller:" << Caller->getName() << ")\n");
+                          << "... (caller:" << Call.getCaller()->getName()
+                          << ")\n");
 
   InlineCostCallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE,
                             *Callee, Call, Params);
Index: llvm/lib/Analysis/InliningAdvisor.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/InliningAdvisor.cpp
@@ -0,0 +1,63 @@
+//===- InlinerMLDefault.cpp - Common implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements InlinerML APIs for the case we do not want ML heuristics
+// The goal is to keep InlinerML.cpp simple, without conditional compilation,
+// and oportunisitically factor out some common APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ML/InliningAdvisor.h"
+
+namespace llvm {
+
+AnalysisKey InliningAdvisorAnalysis::Key;
+
+PreservedAnalyses InliningAdvisorCleanupPass::run(Module &M,
+                                                  ModuleAnalysisManager &MAM) {
+  InliningAdvisor *Advisor = MAM.getCachedResult<InliningAdvisorAnalysis>(M);
+  if (Advisor)
+    Advisor->OnAllInliningCompleted();
+  return PreservedAnalyses::all();
+}
+
+#if !LLVM_USE_ML_POLICY_DEV && !LLVM_USE_ML_POLICY_REL
+
+class InliningAdvisorImpl {};
+struct PendingInliningRecordImpl {};
+
+void PendingInliningRecord::recordInlining(bool, bool) {}
+
+InliningAdvisor::InliningAdvisor(Module &, ModuleAnalysisManager &)
+    : Impl(nullptr) {}
+
+PendingInliningRecord::PendingInliningRecord(PendingInliningRecordImpl *)
+    : PendingInliningRecord() {}
+
+PendingInliningRecord InliningAdvisor::shouldInline(CallBase *, bool &, bool,
+                                                    int) {
+  return PendingInliningRecord();
+}
+void InliningAdvisor::OnPassEntry() {}
+void InliningAdvisor::OnPassExit() {}
+void InliningAdvisor::OnSuccessfulInlining(const Function *) {}
+void InliningAdvisor::OnAllInliningCompleted() {}
+void InliningAdvisor::OnFunctionDeleted(Function *) {}
+
+PendingInliningRecord::~PendingInliningRecord() = default;
+PendingInliningRecord::PendingInliningRecord() = default;
+PendingInliningRecord::PendingInliningRecord(PendingInliningRecord &&) =
+    default;
+PendingInliningRecord &
+PendingInliningRecord::operator=(PendingInliningRecord &&) = default;
+
+InliningAdvisor::~InliningAdvisor() = default;
+InliningAdvisor::InliningAdvisor(InliningAdvisor &&) = default;
+#endif
+} // namespace llvm
Index: llvm/lib/Analysis/ML/CMakeLists.txt
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/ML/CMakeLists.txt
@@ -0,0 +1,31 @@
+set(SRCS InliningAdvisor.cpp)
+
+if (LLVM_USE_ML_POLICY STREQUAL "DEV")
+  set(SRCS ${SRCS}
+    IRToNativeSizeLearning.cpp
+    TFUtils.cpp
+    )
+  set(EXTRA_DEPS ${tensorflow_c_api})
+endif ()
+
+if (LLVM_USE_ML_POLICY STREQUAL "REL")
+  include(TensorFlowCompile)
+  tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
+  set(SRCS ${SRCS} ${TF_AOT_OBJ})
+  set(EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR})
+  set(EXTRA_DEPS LLVMtf_aot_runtime)
+  set(LLVM_OPTIONAL_SOURCES IRToNativeSizeLearning.cpp TFUtils.cpp)
+endif ()
+
+add_llvm_library(LLVMMLPolicies STATIC
+  ${SRCS}
+
+  ADDITIONAL_HEADER_DIRS
+  ${EXTRA_INCLUDES}
+
+  DEPS
+  ${EXTRA_DEPS}
+
+  LINK_LIBS
+  ${EXTRA_DEPS}
+  )
Index: llvm/lib/Analysis/ML/IRToNativeSizeLearning.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/ML/IRToNativeSizeLearning.cpp
@@ -0,0 +1,174 @@
+//===- IRToNativeSizeLearning.cpp - Infra to learn IR to native size model-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements feature and label extraction for offline supervised learning
+// of a IR to native size model.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ML/IRToNativeSizeLearning.h"
+
+#include <algorithm>
+#include <deque>
+
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+namespace {
+size_t getSize(Function &F, TargetTransformInfo &TTI) {
+  size_t Ret = 0;
+  for (auto &BB : F)
+    for (auto &I : BB)
+      Ret += TTI.getInstructionCost(
+          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
+  return Ret;
+}
+
+size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
+  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+  return getSize(F, TTI);
+}
+
+int getStableID(const Instruction &I) {
+  switch (I.getOpcode()) {
+#define HANDLE_INST(ID, ENUM_NAME, IGNORE)                                     \
+  case Instruction::ENUM_NAME:                                                 \
+    return ID;
+#include "llvm/IR/Instruction.def"
+  default:
+    return -1;
+  }
+}
+
+unsigned getMaxDominatorTreeDepth(const Function &F,
+                                  const DominatorTree &Tree) {
+  unsigned Ret = 0;
+  for (auto &BB : F)
+    if (auto *TN = Tree.getNode(&BB))
+      Ret = std::max(Ret, TN->getLevel());
+  return Ret;
+}
+} // namespace
+
+llvm::raw_ostream &
+llvm::operator<<(llvm::raw_ostream &Out,
+                 const IRToNativeSizeLearning::FunctionFeatures &Features) {
+  std::vector<int32_t> T(Features.FeatureCount);
+  Features.FillTensor(T.data());
+  for (auto &V : T)
+    Out << V << ",";
+  return Out;
+}
+
+unsigned IRToNativeSizeLearning::getMaxInstructionID() {
+#define LAST_OTHER_INST(NR) return NR;
+#include "llvm/IR/Instruction.def"
+}
+
+IRToNativeSizeLearning::FunctionFeatures
+IRToNativeSizeLearning::getFunctionFeatures(Function &F,
+                                            FunctionAnalysisManager &FAM) {
+  static std::vector<std::pair<size_t, size_t>> InterestingSuccessions(
+      {{1, 34},  {15, 27}, {53, 53}, {53, 34}, {1, 11},  {32, 2},  {2, 48},
+       {28, 48}, {1, 45},  {49, 32}, {57, 56}, {55, 53}, {1, 28},  {57, 34},
+       {1, 1},   {32, 28}, {32, 15}, {49, 28}, {53, 1},  {2, 53},  {48, 34},
+       {28, 53}, {2, 32},  {1, 40},  {32, 48}, {29, 56}, {56, 32}, {55, 56},
+       {48, 56}, {1, 31},  {33, 34}, {2, 28},  {1, 12},  {55, 1},  {31, 31},
+       {65, 1},  {33, 56}, {32, 32}, {13, 13}, {1, 26},  {13, 26}, {2, 1},
+       {1, 33},  {47, 49}, {64, 1},  {2, 38},  {34, 53}, {48, 2},  {55, 34},
+       {34, 32}, {1, 5},   {56, 13}, {2, 2},   {2, 49},  {33, 2},  {49, 39},
+       {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
+       {47, 15}, {13, 34}, {2, 33},  {32, 49}, {49, 34}, {56, 33}, {1, 30},
+       {33, 33}, {31, 33}, {2, 29},  {56, 7},  {32, 13}, {2, 55},  {56, 56},
+       {2, 34},  {1, 42},  {34, 49}, {1, 20},  {32, 33}, {1, 25},  {53, 28},
+       {1, 14},  {31, 49}, {28, 2},  {2, 13},  {2, 56},  {1, 32},  {56, 53},
+       {65, 65}, {33, 53}, {64, 64}, {13, 2},  {34, 33}, {1, 4},   {49, 2},
+       {1, 9},   {56, 1},  {33, 1},  {53, 57}, {32, 53}, {13, 56}, {32, 56},
+       {55, 55}, {1, 18},  {49, 56}, {34, 34}, {1, 7},   {56, 64}, {32, 1},
+       {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
+       {32, 40}, {1, 29},  {53, 2},  {34, 1},  {32, 34}, {49, 49}, {1, 24},
+       {40, 34}, {1, 13},  {38, 34}, {29, 2},  {34, 2},  {1, 39},  {1, 22},
+       {1, 27},  {49, 1},  {1, 8},   {56, 2}});
+  // We pay for this the first time, then it's sorted and we pay a traversal.
+  std::sort(InterestingSuccessions.begin(), InterestingSuccessions.end());
+
+  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
+  FunctionFeatures FF;
+  size_t InstrCount = getMaxInstructionID() + 1;
+  FF.InstructionHistogram.resize(InstrCount);
+
+  FF.InstructionPairHistogram.resize(InterestingSuccessions.size());
+
+  auto StartID = 0;
+  auto LastID = StartID;
+  auto getPairIndex = [](size_t a, size_t b) {
+    auto I = std::find(InterestingSuccessions.begin(),
+                       InterestingSuccessions.end(), std::make_pair(a, b));
+    if (I == InterestingSuccessions.end())
+      return -1;
+    return static_cast<int>(std::distance(InterestingSuccessions.begin(), I));
+  };
+
+  // We don't want debug calls, because they'd just add noise.
+  for (auto &BB : F) {
+    for (auto I = BB.instructionsWithoutDebug().begin(),
+              E = BB.instructionsWithoutDebug().end();
+         I != E; ++I) {
+      auto ID = getStableID(*I);
+
+      ++FF.InstructionHistogram[ID];
+      int PairIndex = getPairIndex(LastID, ID);
+      if (PairIndex >= 0)
+        ++FF.InstructionPairHistogram[PairIndex];
+      LastID = ID;
+      if (isa<CallBase>(*I))
+        ++FF.Calls;
+    }
+  }
+
+  FF.InitialSize = getSize(F, FAM);
+  FF.IsLocal = F.hasLocalLinkage();
+  FF.IsLinkOnceODR = F.hasLinkOnceODRLinkage();
+  FF.IsLinkOnce = F.hasLinkOnceLinkage();
+  FF.Blocks =
+      std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
+  auto &LI = FAM.getResult<LoopAnalysis>(F);
+  FF.Loops = std::distance(LI.begin(), LI.end());
+  for (auto &L : LI)
+    FF.MaxLoopDepth =
+        std::max(FF.MaxLoopDepth, static_cast<int32_t>(L->getLoopDepth()));
+  FF.MaxDomTreeLevel = getMaxDominatorTreeDepth(F, DomTree);
+  return FF;
+}
+
+void IRToNativeSizeLearning::FunctionFeatures::FillTensor(int32_t *Ptr) const {
+  int Pos = 0;
+  Ptr[Pos++] = InitialSize;
+  Ptr[Pos++] = Blocks;
+  Ptr[Pos++] = Calls;
+  Ptr[Pos++] = IsLocal;
+  Ptr[Pos++] = IsLinkOnceODR;
+  Ptr[Pos++] = IsLinkOnce;
+  Ptr[Pos++] = Loops;
+  Ptr[Pos++] = MaxLoopDepth;
+  Ptr[Pos++] = MaxDomTreeLevel;
+  for (auto Count : InstructionHistogram)
+    Ptr[Pos++] = Count;
+
+  for (auto Count : InstructionPairHistogram)
+    Ptr[Pos++] = Count;
+}
Index: llvm/lib/Analysis/ML/InliningAdvisor.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/ML/InliningAdvisor.cpp
@@ -0,0 +1,548 @@
+//===- InlinerML.cpp - machine learned inlining heuristics ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface between the inliner and a learned model.
+//
+//===----------------------------------------------------------------------===//
+
+#include <limits>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "llvm/Analysis/ML/InliningAdvisor.h"
+
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Path.h"
+
+#ifdef LLVM_USE_ML_POLICY_DEV
+#include "InliningModelRunnerTraining.h"
+#elif defined LLVM_USE_ML_POLICY_REL
+#include "InliningModelRunnerProduction.h"
+#else
+#error Unsupported ML Execution Model
+#endif
+
+using namespace llvm;
+
+static cl::opt<std::string>
+    DebugTrainingLog("debug-training-log", cl::Hidden,
+                     cl::desc("Output a debug training log."));
+
+static cl::opt<float> SizeIncreaseThreshold(
+    "ml-advisor-size-increase-threshold", cl::Hidden,
+    cl::desc("Maximum factor by which expected native size may increase before "
+             "blocking any further inlining."),
+    cl::init(2.0));
+
+namespace llvm {
+
+struct CallSiteInfo {
+  CallSiteInfo(CallBase *CB, unsigned H) : Call(CB), Height(H) {}
+  CallBase *const Call;
+  const unsigned Height;
+};
+
+struct FuncDesc {
+  // Optionally store the name for debugging. To avoid tying this to any
+  // Function lifetime considerations, we store it as a std::string
+  std::string Name;
+  int BasicBlockCount = 0;
+  int ConditionallyExecutedBlocks = 0;
+  int Users = 0;
+};
+
+static int getNrOfUsers(const Function &F) {
+  return ((!F.hasLocalLinkage()) ? 1 : 0) +
+         std::distance(F.user_begin(), F.user_end());
+}
+
+class InliningAdvisorImpl {
+public:
+  InliningAdvisorImpl(Module &M, ModuleAnalysisManager &MAM);
+  InliningAdvisorImpl(InliningAdvisorImpl &&) = default;
+
+  ~InliningAdvisorImpl();
+  CallGraph *callGraph() const { return CG.get(); }
+
+  PendingInliningRecord shouldInline(CallBase *CB,
+                                     bool &AlternativeRecommendation,
+                                     bool Mandatory, int CostEstimate) {
+    return shouldInline(CallSiteInfo(CB, FunctionLevels[CB->getCaller()]),
+                        AlternativeRecommendation, Mandatory, CostEstimate);
+  }
+
+  PendingInliningRecord shouldInline(const CallSiteInfo &CSI,
+                                     bool &AlternativeRecommendation,
+                                     bool Mandatory, int CostEstimate);
+  bool invalidate(Module &, const PreservedAnalyses &,
+                  llvm::ModuleAnalysisManager::Invalidator &) {
+    // InliningAdvisorImpl must be preserved across analysis invalidations.
+    return false;
+  }
+  bool forceStop() const { return ForceStop; }
+  size_t getTotalSizeEstimate() const;
+
+  void OnPassEntry() {
+    for (auto *F : LastInlinedFunctions) {
+      FuncDescs.reset(F);
+      NativeSizeEstimates.reset(F);
+      NrOfCalls.reset(F);
+    }
+    LastInlinedFunctions.clear();
+  }
+
+  void OnPassExit() {
+    for (auto *F : LastInlinedFunctions) {
+      if (DeletedFunctions.find(F) != DeletedFunctions.end())
+        continue;
+    }
+  }
+
+  void OnSuccessfulInlining(const Function *F) {
+    LastInlinedFunctions.insert(F);
+
+    FuncDescs.reset(F);
+    NativeSizeEstimates.reset(F);
+    NrOfCalls.reset(F);
+  }
+
+  void OnAllInliningCompleted() {
+    for (auto *F : DeletedFunctions)
+      delete (F);
+  }
+
+  void OnFunctionDeleted(Function *F) {
+    assert(DeletedFunctions.count(F) == 0 &&
+           "Can only delete a function once.");
+    DeletedFunctions.insert(F);
+  }
+
+private:
+  friend class InliningAdvisor;
+  friend PendingInliningRecordImpl;
+
+  template <typename K, typename V> class Memoized {
+    mutable std::unordered_map<K, V> Memo;
+
+  public:
+    Memoized() = default;
+    Memoized(const Memoized &) = delete;
+    Memoized(Memoized &&) = default;
+    void reset(K Key) { Memo.erase(Key); }
+    // We can't have F as part of the state, because, if it captures state -
+    // likely the Advisor's - moving or copying the Advisor would result in an
+    // invalid capture. We could maybe bind the parameters, but this alternative
+    // is easier for now, since we only need to call get in one place.
+    V getOrInsert(K Key, std::function<V(K)> F) {
+      V Default;
+      auto I = Memo.insert(std::make_pair(Key, Default));
+      if (I.second)
+        I.first->second = F(Key);
+      return I.first->second;
+    }
+  };
+
+  bool isLogging() const {
+    return !DebugTrainingLog.empty() || ModelRunner->isLogging();
+  }
+
+  unsigned getLocalCalls(const Function &F) const;
+  size_t getSizeEstimate(const Function &F) const;
+  size_t getIRSize(const Function &F) const { return F.getInstructionCount(); }
+  FuncDesc getFuncDesc(const Function &F) const;
+  size_t getModuleSize() const;
+
+  int64_t NodeCount = 0;
+  int64_t EdgeCount = 0;
+  struct InliningRecordForDebugging {
+    InliningRecordForDebugging(const std::string &CallerName,
+                               const std::string &CalleeName,
+                               const InliningFeatures &Features,
+                               bool SiteWasInlined, int NativeDeltaSize,
+                               bool Mandatory)
+        : CallerName(CallerName), CalleeName(CalleeName), Features(Features),
+          SiteWasInlined(SiteWasInlined), NativeDeltaSize(NativeDeltaSize),
+          Mandatory(Mandatory) {}
+    const std::string CallerName;
+    const std::string CalleeName;
+    const InliningFeatures Features;
+    const bool SiteWasInlined;
+    const int NativeDeltaSize;
+    const bool Mandatory;
+  };
+  std::vector<InliningRecordForDebugging> InliningRecords;
+  std::unordered_set<const Function *> DeletedFunctions;
+  int32_t InitialNativeSize = 0;
+  int32_t CurrentNativeSize = 0;
+  int32_t InitialIRSize = 0;
+  int32_t CurrentIRSize = 0;
+  std::unique_ptr<CallGraph> CG;
+  bool ForceStop = false;
+  FunctionAnalysisManager &FAM;
+  // Using a unique_ptr to avoid complicated move semantics implementation in
+  // InliningModelRunner.
+  std::unique_ptr<InliningModelRunner> ModelRunner;
+  std::unique_ptr<FunctionSizeEstimator> SizeEstimator;
+
+  std::map<const Function *, unsigned> FunctionLevels;
+  mutable Memoized<const Function *, FuncDesc> FuncDescs;
+  mutable Memoized<const Function *, size_t> NativeSizeEstimates;
+  mutable Memoized<const Function *, unsigned> NrOfCalls;
+  std::unordered_set<const Function *> LastInlinedFunctions;
+};
+
+struct PendingInliningRecordImpl {
+  PendingInliningRecordImpl(InliningAdvisorImpl *Advisor,
+                            const Function *Caller, const Function *Callee,
+                            bool Mandatory);
+  void recordInlining(bool CalleeWasDeleted, bool SiteWasInlined);
+  ~PendingInliningRecordImpl() {
+    assert(Recorded && "Unrecorded PendingInliningRecord");
+  }
+  InliningAdvisorImpl *const Advisor;
+  const Function *const Caller;
+  const Function *const Callee;
+  const size_t EstimatedNativeSizeCallerBefore;
+  const size_t EstimatedNativeSizeCalleeBefore;
+  const bool Mandatory;
+  const int CallerIRSize;
+  const int CalleeIRSize;
+  const unsigned CallerAndCalleeEdges;
+
+private:
+  bool Recorded = false;
+};
+
+PendingInliningRecord::PendingInliningRecord(PendingInliningRecordImpl *Record)
+    : Impl(Record) {}
+
+void PendingInliningRecord::recordInlining(bool CalleeWasDeleted,
+                                           bool SiteWasInlined) {
+  Impl->recordInlining(CalleeWasDeleted, SiteWasInlined);
+}
+
+PendingInliningRecordImpl::PendingInliningRecordImpl(
+    InliningAdvisorImpl *Advisor, const Function *Caller,
+    const Function *Callee, bool Mandatory)
+    : Advisor(Advisor), Caller(Caller), Callee(Callee),
+      EstimatedNativeSizeCallerBefore(
+          Advisor->isLogging() ? Advisor->getSizeEstimate(*Caller) : 0),
+      EstimatedNativeSizeCalleeBefore(
+          Advisor->isLogging() ? Advisor->getSizeEstimate(*Callee) : 0),
+      Mandatory(Mandatory), CallerIRSize(Advisor->getIRSize(*Caller)),
+      CalleeIRSize(Advisor->getIRSize(*Callee)),
+      CallerAndCalleeEdges(Advisor->getLocalCalls(*Caller) +
+                           Advisor->getLocalCalls(*Callee)) {}
+
+void PendingInliningRecordImpl::recordInlining(bool CalleeWasDeleted,
+                                               bool SiteWasInlined) {
+  Recorded = true;
+  assert(!CalleeWasDeleted || SiteWasInlined);
+  if (SiteWasInlined)
+    Advisor->OnSuccessfulInlining(Caller);
+
+  int IRSizeAfter =
+      Advisor->getIRSize(*Caller) + (CalleeWasDeleted ? 0 : CalleeIRSize);
+  Advisor->CurrentIRSize += IRSizeAfter - (CallerIRSize + CalleeIRSize);
+  if (Advisor->CurrentIRSize > SizeIncreaseThreshold * Advisor->InitialIRSize)
+    Advisor->ForceStop = true;
+
+  unsigned NewCallerAndCalleeEdges = Advisor->getLocalCalls(*Caller);
+  if (CalleeWasDeleted)
+    --Advisor->NodeCount;
+  else
+    NewCallerAndCalleeEdges += Advisor->getLocalCalls(*Callee);
+  Advisor->EdgeCount -= CallerAndCalleeEdges;
+  Advisor->EdgeCount += NewCallerAndCalleeEdges;
+  assert(Advisor->CurrentIRSize >= 0 && Advisor->EdgeCount >= 0 &&
+         Advisor->NodeCount >= 0);
+
+  // If we don't train or produce a debug log, we don't want to compute native
+  // size. All adjustments are recorded, so we can return.
+  if (!Advisor->isLogging())
+    return;
+
+  int NativeDeltaSize = std::numeric_limits<int>::max();
+  if (!Advisor->ForceStop) {
+    int NativeSizeAfter =
+        Advisor->getSizeEstimate(*Caller) +
+        (CalleeWasDeleted ? 0 : EstimatedNativeSizeCalleeBefore);
+    NativeDeltaSize = NativeSizeAfter - (EstimatedNativeSizeCalleeBefore +
+                                         EstimatedNativeSizeCallerBefore);
+  }
+  if (!Mandatory) {
+    Advisor->ModelRunner->receiveReward(NativeDeltaSize);
+  }
+
+  if (!DebugTrainingLog.empty()) {
+    InliningFeatures FeaturesCopy(FeatureList::NumberOfFeatures);
+    for (size_t I = 0; I < FeaturesCopy.size(); ++I)
+      FeaturesCopy[I] = Advisor->ModelRunner->get_feature(I);
+    Advisor->InliningRecords.emplace_back(
+        Caller->getName().str(), Callee->getName().str(), FeaturesCopy,
+        SiteWasInlined, NativeDeltaSize, Mandatory);
+  }
+
+  // Don't adjust CurrentNativeSize with numeric_limits::max()
+  if (Advisor->ForceStop)
+    return;
+
+  Advisor->CurrentNativeSize += NativeDeltaSize;
+  assert(Advisor->CurrentNativeSize >= 0);
+}
+
+CallBase *getInlinableCS(Instruction &I) {
+  if (auto *CS = dyn_cast<CallBase>(&I))
+    if (Function *Callee = CS->getCalledFunction()) {
+      if (!Callee->isDeclaration()) {
+        return CS;
+      }
+    }
+  return nullptr;
+}
+
+InliningAdvisorImpl::InliningAdvisorImpl(Module &M, ModuleAnalysisManager &MAM)
+    : CG(new CallGraph(M)),
+      FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
+      ModelRunner(std::make_unique<InliningModelRunner>(M.getContext())) {
+  for (const Function &F : CG->getModule()) {
+    if (F.isDeclaration())
+      continue;
+    ++NodeCount;
+    EdgeCount += getLocalCalls(F);
+  }
+
+  if (isLogging()) {
+    SizeEstimator =
+        std::make_unique<FunctionSizeEstimator>(M.getContext(), FAM);
+    if (!SizeEstimator || !SizeEstimator->isValid()) {
+      SizeEstimator.reset();
+      M.getContext().emitError("Could not initialize Size Estimator");
+      return;
+    }
+    InitialNativeSize = getTotalSizeEstimate();
+    CurrentNativeSize = InitialNativeSize;
+  }
+  InitialIRSize = getModuleSize();
+  CurrentIRSize = InitialIRSize;
+
+  for (auto I = scc_begin(CG.get()); !I.isAtEnd(); ++I) {
+    const std::vector<CallGraphNode *> &CGNodes = *I;
+    unsigned Level = 0;
+    for (auto CGNode : CGNodes) {
+      Function *F = CGNode->getFunction();
+      if (F && !F->isDeclaration()) {
+        for (auto &I : instructions(F)) {
+          if (auto CS = getInlinableCS(I)) {
+            auto *Called = CS->getCalledFunction();
+            auto Pos = FunctionLevels.find(Called);
+            // In bottom up traversal, an inlinable call is either in the
+            // same SCC, or to a function in a visited SCC. So not finding its
+            // level means we haven't visited it yet, meaning it's in this SCC.
+            if (Pos == FunctionLevels.end())
+              continue;
+            Level = std::max(Level, Pos->second + 1);
+          }
+        }
+      }
+    }
+    for (auto CGNode : CGNodes) {
+      Function *F = CGNode->getFunction();
+      if (F && !F->isDeclaration())
+        FunctionLevels[F] = Level;
+    }
+  }
+}
+
+size_t InliningAdvisorImpl::getModuleSize() const {
+  size_t Ret = 0;
+  for (auto &F : CG->getModule())
+    if (!F.isDeclaration())
+      Ret += getIRSize(F);
+  return Ret;
+}
+
+InliningAdvisorImpl::~InliningAdvisorImpl() {
+  if (DebugTrainingLog.empty() || callGraph() == nullptr)
+    return;
+
+  std::error_code ErrorCode;
+  raw_fd_ostream OutFile(DebugTrainingLog, ErrorCode);
+
+  OutFile << "ModuleName: " << callGraph()->getModule().getName() << "\n"
+          << "InitialSize: " << InitialNativeSize << "\n";
+  int DecisionID = 0;
+  for (const auto &Decision : InliningRecords) {
+    OutFile << "Inlining Decision: " << DecisionID++ << "\n";
+    OutFile << "\t"
+            << "DeltaSize: " << Decision.NativeDeltaSize << "\n";
+    OutFile << "\t"
+            << "Success: " << Decision.SiteWasInlined << "\n";
+    OutFile << "\tCallerName: " << Decision.CallerName << "\n";
+    OutFile << "\tCalleeName: " << Decision.CalleeName << "\n";
+    OutFile << "\tFeatures: \n";
+
+    for (size_t I = 0; I < Decision.Features.size(); ++I) {
+      OutFile << "\t\t" << FeatureNameMap[I] << ": " << Decision.Features[I]
+              << "\n";
+    }
+  }
+  OutFile << "FinalNodeCount: " << NodeCount << "\n";
+  OutFile << "FinalEdgeCount: " << EdgeCount << "\n";
+  OutFile << "FinalSize: " << getTotalSizeEstimate() << "\n";
+}
+
+PendingInliningRecord
+InliningAdvisorImpl::shouldInline(const CallSiteInfo &CSI,
+                                  bool &AlternativeRecommendation,
+                                  bool Mandatory, int CostEstimate) {
+  if (ForceStop) {
+    AlternativeRecommendation = Mandatory;
+    return PendingInliningRecord();
+  }
+
+  auto &CS = *CSI.Call;
+  auto &Callee = *CS.getCalledFunction();
+  auto &Caller = *CS.getCaller();
+
+  if (!Mandatory || !DebugTrainingLog.empty()) {
+    auto NrCtantParams = 0;
+    for (auto I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) {
+      NrCtantParams += (isa<Constant>(*I));
+    }
+    auto CallerBefore = getFuncDesc(Caller);
+    auto CalleeBefore = getFuncDesc(Callee);
+
+    ModelRunner->set_feature(FeatureList::CalleeBasicBlockCount,
+                             CalleeBefore.BasicBlockCount);
+    ModelRunner->set_feature(FeatureList::CallSiteHeight, CSI.Height);
+    ModelRunner->set_feature(FeatureList::NodeCount, NodeCount);
+    ModelRunner->set_feature(FeatureList::NrCtantParams, NrCtantParams);
+    ModelRunner->set_feature(FeatureList::CostEstimate, CostEstimate);
+    ModelRunner->set_feature(FeatureList::EdgeCount, EdgeCount);
+    ModelRunner->set_feature(FeatureList::CallerUsers, CallerBefore.Users);
+    ModelRunner->set_feature(FeatureList::CallerConditionallyExecutedBlocks,
+                             CallerBefore.ConditionallyExecutedBlocks);
+    ModelRunner->set_feature(FeatureList::CallerBasicBlockCount,
+                             CallerBefore.BasicBlockCount);
+    ModelRunner->set_feature(FeatureList::CalleeConditionallyExecutedBlocks,
+                             CalleeBefore.ConditionallyExecutedBlocks);
+    ModelRunner->set_feature(FeatureList::CalleeUsers, CalleeBefore.Users);
+  }
+
+  PendingInliningRecord Ret(
+      new PendingInliningRecordImpl(this, &Caller, &Callee, Mandatory));
+
+  if (Mandatory)
+    return Ret;
+
+  AlternativeRecommendation = static_cast<bool>(
+      ModelRunner->run(static_cast<int64_t>(AlternativeRecommendation)));
+
+  return Ret;
+}
+
+size_t InliningAdvisorImpl::getSizeEstimate(const Function &F) const {
+  return NativeSizeEstimates.getOrInsert(&F, [this](const Function *F) {
+    return SizeEstimator->getSizeEstimate(*F);
+  });
+}
+
+unsigned InliningAdvisorImpl::getLocalCalls(const Function &F) const {
+  return NrOfCalls.getOrInsert(&F, [this](const Function *F) {
+    unsigned Ret = 0;
+    for (const BasicBlock &BB : *F) {
+      for (const Instruction &I : BB) {
+        if (auto CS = dyn_cast<CallBase>(&I)) {
+          const Function *Callee = CS->getCalledFunction();
+          if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
+            ++Ret;
+        }
+      }
+    }
+    return Ret;
+  });
+}
+
+FuncDesc InliningAdvisorImpl::getFuncDesc(const Function &F) const {
+  return FuncDescs.getOrInsert(&F, [this](const Function *F) {
+    FuncDesc Ret;
+    if (!DebugTrainingLog.empty())
+      Ret.Name = F->getName().str();
+    if (DeletedFunctions.count(F) > 0) {
+      assert(getNrOfUsers(*F) == 0);
+      return Ret;
+    }
+    Ret.Users = getNrOfUsers(*F);
+    for (const auto &BB : *F) {
+      ++Ret.BasicBlockCount;
+      if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
+        if (BI->isConditional()) {
+          Ret.ConditionallyExecutedBlocks += BI->getNumSuccessors();
+        }
+      } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
+        Ret.ConditionallyExecutedBlocks +=
+            (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
+      }
+    }
+    return Ret;
+  });
+}
+
+size_t InliningAdvisorImpl::getTotalSizeEstimate() const {
+  size_t Ret = 0;
+  for (auto const &F : CG->getModule()) {
+    if (F.isDeclaration())
+      continue;
+    if (DeletedFunctions.count(&F) > 0)
+      continue;
+    Ret += getSizeEstimate(F);
+  }
+  return Ret;
+}
+
+// Implement pImpl for InliningAdvisor and PendingInliningRecord.
+InliningAdvisor::InliningAdvisor(Module &M, ModuleAnalysisManager &MAM)
+    : Impl(std::make_unique<InliningAdvisorImpl>(M, MAM)) {}
+
+PendingInliningRecord
+InliningAdvisor::shouldInline(CallBase *CB, bool &AlternateRecommendation,
+                              bool Mandatory, int CostEstimate) {
+  return PendingInliningRecord(Impl->shouldInline(
+      CallSiteInfo(CB, Impl->FunctionLevels[CB->getCaller()]),
+      AlternateRecommendation, Mandatory, CostEstimate));
+}
+void InliningAdvisor::OnPassEntry() { Impl->OnPassEntry(); }
+void InliningAdvisor::OnPassExit() { Impl->OnPassExit(); }
+void InliningAdvisor::OnSuccessfulInlining(const Function *F) {
+  Impl->OnSuccessfulInlining(F);
+}
+void InliningAdvisor::OnAllInliningCompleted() {
+  Impl->OnAllInliningCompleted();
+}
+void InliningAdvisor::OnFunctionDeleted(Function *F) {
+  Impl->OnFunctionDeleted(F);
+}
+
+PendingInliningRecord::~PendingInliningRecord() = default;
+PendingInliningRecord::PendingInliningRecord() = default;
+PendingInliningRecord::PendingInliningRecord(PendingInliningRecord &&) =
+    default;
+PendingInliningRecord &
+PendingInliningRecord::operator=(PendingInliningRecord &&) = default;
+
+InliningAdvisor::~InliningAdvisor() = default;
+InliningAdvisor::InliningAdvisor(InliningAdvisor &&) = default;
+} // namespace llvm
Index: llvm/lib/Analysis/ML/InliningModelFeatureMaps.h
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/ML/InliningModelFeatureMaps.h
@@ -0,0 +1,58 @@
+//===- InliningModelFeatureMaps.h - common model runner defs ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_INLININGMODELFEATUREMAPS_H
+#define LLVM_INLININGMODELFEATUREMAPS_H
+
+#include <array>
+
+namespace llvm {
+enum FeatureList {
+  CalleeBasicBlockCount = 0,
+  CallSiteHeight,
+  NodeCount,
+  NrCtantParams,
+  CostEstimate,
+  EdgeCount,
+  CallerUsers,
+  CallerConditionallyExecutedBlocks,
+  CallerBasicBlockCount,
+  CalleeConditionallyExecutedBlocks,
+  CalleeUsers,
+
+  // Last value tracks the total number of values in the enum - it's not an
+  // actual feature.
+  NumberOfFeatures
+};
+
+static const std::array<std::string, FeatureList::NumberOfFeatures>
+    FeatureNameMap{
+        /*FeatureList::CalleeBasicBlockCount*/ "callee_basic_block_count",
+        /*FeatureList::CallSiteHeight*/ "callsite_height",
+        /*FeatureList::NodeCount*/ "node_count",
+        /*FeatureList::NrCtantParams*/ "nr_ctant_params",
+        /*FeatureList::CostEstimate*/ "cost_estimate",
+        /*FeatureList::EdgeCount*/ "edge_count",
+        /*FeatureList::CallerUsers*/ "caller_users",
+        /*FeatureList::CallerConditionallyExecutedBlocks*/
+        "caller_conditionally_executed_blocks",
+        /*FeatureList::CallerBasicBlockCount*/ "caller_basic_block_count",
+        /*FeatureList::CalleeConditionallyExecutedBlocks*/
+        "callee_conditionally_executed_blocks",
+        /*FeatureList::CalleeUsers*/ "callee_users",
+    };
+
+static const char *const DecisionName = "inlining_decision";
+static const char *const DefaultDecisionName = "inlining_default";
+static const char *const RewardName = "delta_size";
+
+using InliningFeatures = std::vector<int64_t>;
+
+} // namespace llvm
+#endif // LLVM_INLININGMODELFEATUREMAPS_H
Index: llvm/lib/Analysis/ML/InliningModelRunnerProduction.h
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/ML/InliningModelRunnerProduction.h
@@ -0,0 +1,122 @@
+//===- InliningModelRunnerProduction.h - production ML runner ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_INLININGMODELRUNNERPRODUCTION_H
+#define LLVM_INLININGMODELRUNNERPRODUCTION_H
+
+#include "InlinerSizeModel.h"
+#include "InliningModelFeatureMaps.h"
+
+#include "llvm/IR/LLVMContext.h"
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+
+static const char *const FeedPrefix = "feed_";
+static const char *const FetchPrefix = "fetch_";
+
+/// InliningModelRunner - production mode implementation. It uses a AOT-compiled
+/// SavedModel for efficient execution.
+class InliningModelRunner {
+public:
+  InliningModelRunner(LLVMContext &Ctx);
+
+  ~InliningModelRunner();
+
+  int64_t run(int64_t DefaultDecision);
+
+  void receiveReward(int64_t Reward);
+
+  // Disallows copy and assign.
+  InliningModelRunner(const InliningModelRunner &) = delete;
+  InliningModelRunner &operator=(const InliningModelRunner &) = delete;
+
+  void set_feature(int Index, int64_t Value);
+  int64_t get_feature(int Index) const;
+  bool isLogging() const { return false; }
+
+private:
+  bool isUsingInference() const;
+
+  LLVMContext &Ctx;
+
+  std::vector<int32_t> FeatureIndices;
+  int32_t ResultIndex = -1;
+  std::unique_ptr<llvm::InlinerSizeModel> CompiledModel;
+};
+
+class FunctionSizeEstimator {
+public:
+  FunctionSizeEstimator(LLVMContext &Ctx, FunctionAnalysisManager &FAM) {}
+  // Just return the number of blocks. This is interesting for debugging only.
+  size_t getSizeEstimate(const Function &F) { return F.size(); }
+  bool isValid() const { return true; }
+};
+
+InliningModelRunner::InliningModelRunner(LLVMContext &Ctx)
+    : Ctx(Ctx), CompiledModel(std::make_unique<llvm::InlinerSizeModel>()) {
+  if (!isUsingInference())
+    return;
+
+  // TODO(yundi): CompiledModel->num_args() becomes 11 + 6 = 17 instead of
+  // 11 after adding loadable weights functionality in AOT, disable this check
+  // for now.
+  // assert(CompiledModel->num_args() == FeatureList::NumberOfFeatures &&
+  //        "Features in inlining model does not match FeatureNameMap");
+  FeatureIndices.reserve(FeatureList::NumberOfFeatures);
+
+  for (int I = 0; I < FeatureList::NumberOfFeatures; ++I) {
+    const int Index =
+        CompiledModel->LookupArgIndex(FeedPrefix + FeatureNameMap[I]);
+    if (Index < 0) {
+      Ctx.emitError("Cannot find Feature in inlining model");
+      CompiledModel.reset();
+      return;
+    }
+    FeatureIndices[I] = Index;
+  }
+
+  ResultIndex =
+      CompiledModel->LookupResultIndex(std::string(FetchPrefix) + DecisionName);
+  if (ResultIndex < 0) {
+    Ctx.emitError("Cannot find DecisionName in inlining model");
+    CompiledModel.reset();
+  }
+}
+
+bool InliningModelRunner::isUsingInference() const { return !!CompiledModel; }
+
+int64_t InliningModelRunner::get_feature(int Index) const {
+  return *static_cast<int64_t *>(
+      CompiledModel->arg_data(FeatureIndices[Index]));
+}
+
+void InliningModelRunner::set_feature(int Index, int64_t Value) {
+  *static_cast<int64_t *>(CompiledModel->arg_data(FeatureIndices[Index])) =
+      Value;
+}
+
+int64_t InliningModelRunner::run(int64_t DefaultDecision) {
+  int64_t Ret = DefaultDecision;
+  if (isUsingInference()) {
+    CompiledModel->Run();
+    Ret = *static_cast<int64_t *>(CompiledModel->result_data(ResultIndex));
+  }
+  return Ret;
+}
+
+InliningModelRunner::~InliningModelRunner() {}
+
+void InliningModelRunner::receiveReward(int64_t) {}
+
+} // namespace llvm
+
+#endif // LLVM_INLININGMODELRUNNERPRODUCTION_H
Index: llvm/lib/Analysis/ML/InliningModelRunnerTraining.h
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/ML/InliningModelRunnerTraining.h
@@ -0,0 +1,263 @@
+//===- InliningModelRunnerProduction.h -training mode ML runner -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_INLININGMODELRUNNERTRAINING_H
+#define LLVM_INLININGMODELRUNNERTRAINING_H
+
+#include "InliningModelFeatureMaps.h"
+#include "TFUtils.h"
+
+#include "llvm/Analysis/ML/IRToNativeSizeLearning.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+
+#include "tensorflow/c/c_api.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>
+    TrainingLog("training-log", cl::Hidden,
+                cl::desc("Path where the inlining log is saved."));
+
+static cl::opt<std::string> TFTrainedModelPath(
+    "ml-inliner-trained-model", cl::Hidden,
+    cl::desc(
+        "Path to saved model to use as policy during this training session."));
+
+static cl::opt<std::string> TFIR2NativeModelPath(
+    "ml-inliner-ir2native-model", cl::Hidden,
+    cl::desc("Path to saved model evaluating native size from IR."));
+
+static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
+                                         cl::Hidden, cl::init("action_"),
+                                         cl::desc("Prefix for feature names."));
+
+static cl::opt<std::string> TFDecisionName(
+    "ml-inliner-trained-model-decision-name", cl::Hidden,
+    cl::init("StatefulPartitionedCall"),
+    cl::desc("Name of the graph operation representing the decision."));
+
+namespace llvm {
+/// InliningModelRunner - training mode implementation. It uses TF C APIs to
+/// dynamically load and evaluate a TF SavedModel. Runtime performance is
+/// sacrificed for ease of use while training.
+class InliningModelRunner {
+public:
+  InliningModelRunner(LLVMContext &Ctx);
+
+  ~InliningModelRunner();
+
+  int64_t run(int64_t DefaultDecision);
+
+  void receiveReward(int64_t Reward);
+
+  // Disallows copy and assign.
+  InliningModelRunner(const InliningModelRunner &) = delete;
+  InliningModelRunner &operator=(const InliningModelRunner &) = delete;
+
+  void set_feature(int Index, int64_t Value);
+  int64_t get_feature(int Index) const;
+  bool isLogging() const { return !TrainingLog.empty(); }
+
+private:
+  bool isUsingInference() const;
+
+  std::vector<InliningFeatures> LoggedFeatures;
+  std::vector<int64_t> LoggedDefaultDecisions;
+  std::vector<int64_t> LoggedDecisions;
+  std::vector<int64_t> LoggedRewards;
+
+  std::unique_ptr<TFModelEvaluator> Evaluator;
+
+  InliningFeatures FeatureStorage;
+
+  // The training framework needs some additional features, that just need to be
+  // set to 0.
+  struct TensorSpec {
+    std::string Name;
+    TF_DataType Type;
+  };
+
+  const std::vector<TensorSpec> TrainingOnlyFeatures{
+      {"inlining_default", TF_INT64},
+      {"discount", TF_FLOAT},
+      {"reward", TF_FLOAT},
+      {"step_type", TF_INT32}};
+};
+
+class FunctionSizeEstimator {
+public:
+  FunctionSizeEstimator(LLVMContext &Ctx, FunctionAnalysisManager &FAM);
+  size_t getSizeEstimate(const Function &F);
+  bool isValid() const { return Evaluator && Evaluator->isValid(); }
+
+private:
+  std::unique_ptr<TFModelEvaluator> Evaluator;
+  FunctionAnalysisManager &FAM;
+};
+
+InliningModelRunner::InliningModelRunner(LLVMContext &Ctx) {
+  if (!TFTrainedModelPath.empty()) {
+    std::vector<std::string> InputNames;
+    std::vector<std::string> OutputNames;
+    for (auto I = 0; I < FeatureList::NumberOfFeatures; ++I)
+      InputNames.push_back(TFFeedPrefix + FeatureNameMap[I]);
+    for (size_t I = 0; I < TrainingOnlyFeatures.size(); ++I)
+      InputNames.push_back(TFFeedPrefix + TrainingOnlyFeatures[I].Name);
+    OutputNames.push_back(TFDecisionName);
+
+    Evaluator = std::make_unique<TFModelEvaluator>(Ctx, TFTrainedModelPath,
+                                                   InputNames, OutputNames);
+    if (!Evaluator || !Evaluator->isValid()) {
+      Ctx.emitError("Failed to create inliner saved model evaluator");
+      Evaluator.reset();
+      return;
+    }
+  } else {
+    FeatureStorage.resize(FeatureList::NumberOfFeatures);
+  }
+
+  if (!TrainingLog.empty()) {
+    for (auto I = 0; I < FeatureList::NumberOfFeatures; ++I) {
+      LoggedFeatures.push_back(InliningFeatures());
+    }
+  }
+  if (!isUsingInference())
+    return;
+
+  static const std::vector<int64_t> Dim{1};
+
+  size_t InputIndex = 0;
+  for (; InputIndex < FeatureList::NumberOfFeatures; ++InputIndex) {
+    Evaluator->initInput(InputIndex, TF_INT64, Dim);
+  }
+
+  for (; InputIndex < Evaluator->getInput().size(); ++InputIndex) {
+    auto TFType =
+        TrainingOnlyFeatures[InputIndex - FeatureList::NumberOfFeatures].Type;
+    Evaluator->initInput(InputIndex, TFType, Dim);
+  }
+}
+
+bool InliningModelRunner::isUsingInference() const { return !!Evaluator; }
+
+int64_t InliningModelRunner::run(int64_t DefaultDecision) {
+  int64_t Decision = DefaultDecision;
+  if (isUsingInference()) {
+    std::vector<TF_Tensor *> Output{nullptr};
+    if (!Evaluator->evaluate(Output))
+      return DefaultDecision;
+    Decision = *(static_cast<int64_t *>(TF_TensorData(Output[0])));
+    TF_DeleteTensor(Output[0]);
+  }
+  if (!TrainingLog.empty()) {
+    for (auto I = 0; I < FeatureList::NumberOfFeatures; ++I) {
+      LoggedFeatures[I].push_back(get_feature(I));
+    }
+    LoggedDefaultDecisions.push_back(DefaultDecision);
+    LoggedDecisions.push_back(Decision);
+  }
+  return Decision;
+}
+
+int64_t InliningModelRunner::get_feature(int Index) const {
+  if (isUsingInference())
+    return *(
+        static_cast<int64_t *>(TF_TensorData(Evaluator->getInput()[Index])));
+  return FeatureStorage[Index];
+}
+
+void InliningModelRunner::set_feature(int Index, int64_t Value) {
+  if (isUsingInference())
+    *(static_cast<int64_t *>(TF_TensorData(Evaluator->getInput()[Index]))) =
+        Value;
+  else
+    FeatureStorage[Index] = Value;
+}
+
+#define LOG_DUMP(Name, Feature)                                                \
+  do {                                                                         \
+    OutFile << "  feature_list: {\n";                                          \
+    OutFile << "    key: "                                                     \
+            << "\"" << Name << "\""                                            \
+            << " \n";                                                          \
+    OutFile << "    value: {\n";                                               \
+    for (const auto &feature : Feature) {                                      \
+      OutFile << "      feature: {\n";                                         \
+      OutFile << "        int64_list: {\n";                                    \
+      OutFile << "          value: [ " << feature << " ]\n";                   \
+      OutFile << "        }\n";                                                \
+      OutFile << "      }\n";                                                  \
+    }                                                                          \
+    OutFile << "    }\n";                                                      \
+    OutFile << "  }\n";                                                        \
+  } while (0);
+
+InliningModelRunner::~InliningModelRunner() {
+  if (!TrainingLog.empty()) {
+    std::error_code ErrorCode;
+    raw_fd_ostream OutFile(TrainingLog, ErrorCode);
+
+    if (!LoggedDefaultDecisions.empty()) {
+      OutFile << "feature_lists: {\n";
+
+      for (size_t i = 0; i < LoggedFeatures.size(); i++) {
+        LOG_DUMP(FeatureNameMap.at(static_cast<FeatureList>(i)),
+                 LoggedFeatures[i]);
+      }
+      LOG_DUMP(DefaultDecisionName, LoggedDefaultDecisions);
+      LOG_DUMP(DecisionName, LoggedDecisions);
+      LOG_DUMP(RewardName, LoggedRewards);
+
+      OutFile << "}\n";
+    }
+  }
+}
+
+#undef LOG_DUMP
+
+void InliningModelRunner::receiveReward(int64_t Reward) {
+  if (!TrainingLog.empty()) {
+    LoggedRewards.push_back(Reward);
+  }
+}
+
+FunctionSizeEstimator::FunctionSizeEstimator(LLVMContext &Ctx,
+                                             FunctionAnalysisManager &FAM)
+    : FAM(FAM) {
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  Evaluator = std::make_unique<TFModelEvaluator>(
+      Ctx, TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
+  if (!Evaluator || !Evaluator->isValid())
+    Evaluator.reset();
+  static const std::vector<int64_t> Dim{
+      1, IRToNativeSizeLearning::FunctionFeatures::FeatureCount};
+
+  Evaluator->initInput(0, TF_INT32, Dim);
+}
+
+size_t FunctionSizeEstimator::getSizeEstimate(const Function &F) {
+  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
+      const_cast<Function &>(F), FAM);
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator->getInput()[0]));
+  Features.FillTensor(V);
+  std::vector<TF_Tensor *> Output{nullptr};
+  if (!Evaluator->evaluate(Output))
+    return 0;
+  float Ret = *(static_cast<float *>(TF_TensorData(Output[0])));
+  TF_DeleteTensor(Output[0]);
+  if (Ret < 0)
+    Ret = 0.0;
+  return static_cast<size_t>(Ret);
+}
+} // namespace llvm
+
+#endif // LLVM_INLININGMODELRUNNERTRAINING_H
Index: llvm/lib/Analysis/ML/TFUtils.h
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/ML/TFUtils.h
@@ -0,0 +1,74 @@
+//===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_TFUTILS_H
+#define LLVM_TFUTILS_H
+
+#include "tensorflow/c/c_api.h"
+#include "llvm/IR/LLVMContext.h"
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+
+bool ensureInitTF();
+
+using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
+using TFSessionOptionsPtr =
+    std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
+using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
+
+TFGraphPtr createTFGraph();
+TFStatusPtr createTFStatus();
+TFSessionOptionsPtr createTFSessionOptions();
+
+/// Load a SavedModel, find the given inputs and outputs, and setup storage
+/// for input tensors. The user is responsible for allocating, initializing,
+/// setting values, and deallocating the input tensors, and for deallocating the
+/// output tensors.
+class TFModelEvaluator final {
+public:
+  TFModelEvaluator(LLVMContext &Ctx, StringRef SavedModelPath,
+                   const std::vector<std::string> &InputNames,
+                   const std::vector<std::string> &OutputNames,
+                   const char *Tags = "serve");
+  ~TFModelEvaluator();
+  TFModelEvaluator(const TFModelEvaluator &) = delete;
+  TFModelEvaluator(TFModelEvaluator &&) = delete;
+
+  /// Evaluate the model, assuming it is valid. Returns false if the evaluation
+  /// fails or the model is invalid, true otherwise. The inputs are assumed to
+  /// have been already provided via getInput(). When returning false, it also
+  /// marks the object invalid.
+  bool evaluate(std::vector<TF_Tensor *> &Output);
+
+  /// Provides access to the input vector. It is already dimensioned correctly,
+  /// but the values need to be allocated by the user.
+  std::vector<TF_Tensor *> &getInput() { return Input; }
+
+  /// Returns true if the tensorflow model was loaded successfully, false
+  /// otherwise.
+  bool isValid() const { return !!Session; }
+  void initInput(int Index, TF_DataType Type,
+                 const std::vector<int64_t> &Dimensions);
+
+private:
+  LLVMContext &Ctx;
+  TF_Session *Session = nullptr;
+  TFGraphPtr Graph;
+  TFSessionOptionsPtr Options;
+  std::vector<TF_Output> InputFeed;
+  std::vector<TF_Tensor *> Input;
+  std::vector<TF_Output> OutputFeed;
+  void DeleteSession();
+  bool CheckReportAndReset(const TF_Output &Output, StringRef Name);
+};
+} // namespace llvm
+
+#endif // LLVM_TFUTILS_H
Index: llvm/lib/Analysis/ML/TFUtils.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/ML/TFUtils.cpp
@@ -0,0 +1,138 @@
+//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for interfacing with tensorflow C APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TFUtils.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "tensorflow/c/c_api_experimental.h"
+
+#include <cassert>
+
+namespace llvm {
+
+struct TFInitializer {
+  TFInitializer() {
+    assert(!isInitialized && "TFInitialized should be called only once");
+    int Argc = 1;
+    const char *Name = "";
+    const char **NamePtr = &Name;
+    TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
+    isInitialized = true;
+  }
+  bool isInitialized = false;
+};
+
+llvm::ManagedStatic<TFInitializer> TFInitializer;
+
+bool ensureInitTF() { return TFInitializer->isInitialized; }
+
+TFGraphPtr createTFGraph() {
+  return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
+}
+
+TFStatusPtr createTFStatus() {
+  return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
+}
+TFSessionOptionsPtr createTFSessionOptions() {
+  return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
+}
+
+TFModelEvaluator::TFModelEvaluator(LLVMContext &Ctx, StringRef SavedModelPath,
+                                   const std::vector<std::string> &InputNames,
+                                   const std::vector<std::string> &OutputNames,
+                                   const char *Tags)
+    : Ctx(Ctx), Graph(createTFGraph()), Options(createTFSessionOptions()),
+      InputFeed(InputNames.size()), Input(InputNames.size()),
+      OutputFeed(OutputNames.size()) {
+  if (!ensureInitTF()) {
+    Ctx.emitError("Tensorflow should have been initialized");
+    return;
+  }
+  auto Status = createTFStatus();
+
+  Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
+                                         SavedModelPath.str().c_str(), &Tags, 1,
+                                         Graph.get(), nullptr, Status.get());
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
+    Ctx.emitError(TF_Message(Status.get()));
+    DeleteSession();
+  }
+  for (size_t I = 0; I < InputNames.size(); ++I) {
+    InputFeed[I] = {
+        TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0};
+    if (!CheckReportAndReset(InputFeed[I], InputNames[I]))
+      return;
+  }
+  for (size_t I = 0; I < OutputNames.size(); ++I) {
+    OutputFeed[I] = {
+        TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0};
+    if (!CheckReportAndReset(OutputFeed[I], OutputNames[I]))
+      return;
+  }
+}
+
+TFModelEvaluator::~TFModelEvaluator() {
+  for (auto *T : Input) {
+    TF_DeleteTensor(T);
+  }
+  DeleteSession();
+}
+
+bool TFModelEvaluator::CheckReportAndReset(const TF_Output &Output,
+                                           StringRef Name) {
+  if (Output.oper)
+    return true;
+  Ctx.emitError("Could not find TF_Output named: " + Name);
+  DeleteSession();
+  return false;
+}
+
+void TFModelEvaluator::DeleteSession() {
+  if (Session == nullptr)
+    return;
+  auto Status = createTFStatus();
+  TF_DeleteSession(Session, Status.get());
+  Session = nullptr;
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
+    Ctx.emitError("Could not delete TF session");
+}
+
+bool TFModelEvaluator::evaluate(std::vector<TF_Tensor *> &Output) {
+  if (!isValid())
+    return false;
+  auto Status = createTFStatus();
+  TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), Input.size(),
+                OutputFeed.data(), Output.data(), Output.size(), nullptr, 0,
+                nullptr, Status.get());
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
+    Ctx.emitError(TF_Message(Status.get()));
+    return false;
+  }
+  return true;
+}
+
+void TFModelEvaluator::initInput(int Index, TF_DataType Type,
+                                 const std::vector<int64_t> &Dimensions) {
+  int64_t TotalSize = TF_DataTypeSize(Type);
+  for (auto &D : Dimensions)
+    TotalSize *= D;
+
+  Input[Index] =
+      TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
+  std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
+}
+
+} // namespace llvm
Index: llvm/lib/CMakeLists.txt
===================================================================
--- llvm/lib/CMakeLists.txt
+++ llvm/lib/CMakeLists.txt
@@ -34,3 +34,4 @@
   add_subdirectory(Testing)
 endif()
 add_subdirectory(WindowsManifest)
+
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Analysis/LoopCacheAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopNestAnalysis.h"
+#include "llvm/Analysis/ML/InliningAdvisor.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
@@ -215,6 +216,15 @@
     "enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
 
+static cl::opt<bool> EnableMLInliner(
+    "enable-ml-inliner", cl::init(false), cl::Hidden,
+    cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"));
+
+cl::opt<bool> PerformMandatoryInliningsFirst(
+    "mandatory-inlinings-first", cl::init(false),
+    cl::desc("Perform all mandatory (always-inline) inlinings first, for the "
+             "whole module."));
+
 static cl::opt<bool> EnableGVNSink(
     "enable-npm-gvn-sink", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
@@ -690,10 +700,95 @@
   return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
 }
 
-ModulePassManager
-PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
-                                               ThinLTOPhase Phase,
-                                               bool DebugLogging) {
+ModulePassManager PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
+                                                    ThinLTOPhase Phase,
+                                                    bool DebugLogging) {
+  ModulePassManager MPM(DebugLogging);
+
+  if (PerformMandatoryInliningsFirst) {
+    CGSCCPassManager AlwaysInliningPipeline(DebugLogging);
+    AlwaysInliningPipeline.addPass(
+        InlinerPass(getInlineParamsFromOptLevel(Level), true));
+    AlwaysInliningPipeline.addPass(AttributorCGSCCPass());
+
+    if (PTO.Coroutines)
+      AlwaysInliningPipeline.addPass(CoroSplitPass());
+
+    AlwaysInliningPipeline.addPass(PostOrderFunctionAttrsPass());
+    AlwaysInliningPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+        buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
+    MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+        std::move(AlwaysInliningPipeline)));
+  }
+
+  // Now begin the main postorder CGSCC pipeline.
+  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
+  // manager and trying to emulate its precise behavior. Much of this doesn't
+  // make a lot of sense and we should revisit the core CGSCC structure.
+  CGSCCPassManager MainCGPipeline(DebugLogging);
+
+  // Note: historically, the PruneEH pass was run first to deduce nounwind and
+  // generally clean up exception handling overhead. It isn't clear this is
+  // valuable as the inliner doesn't currently care whether it is inlining an
+  // invoke or a call.
+
+  // Run the inliner first. The theory is that we are walking bottom-up and so
+  // the callees have already been fully optimized, and we want to inline them
+  // into the callers so that our optimizations can reflect that.
+  // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
+  // because it makes profile annotation in the backend inaccurate.
+  InlineParams IP = getInlineParamsFromOptLevel(Level);
+  if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
+      PGOOpt->Action == PGOOptions::SampleUse)
+    IP.HotCallSiteThreshold = 0;
+
+  if (EnableMLInliner) {
+    MPM.addPass(RequireAnalysisPass<InliningAdvisorAnalysis, Module>());
+  }
+
+  MainCGPipeline.addPass(InlinerPass(IP));
+  if (!DisableAttributor)
+    MainCGPipeline.addPass(AttributorCGSCCPass());
+
+  if (PTO.Coroutines)
+    MainCGPipeline.addPass(CoroSplitPass());
+
+  // Now deduce any function attributes based in the current code.
+  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+  // When at O3 add argument promotion to the pass pipeline.
+  // FIXME: It isn't at all clear why this should be limited to O3.
+  if (Level == OptimizationLevel::O3)
+    MainCGPipeline.addPass(ArgumentPromotionPass());
+
+  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+  // there are no OpenMP runtime calls present in the module.
+  if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+    MainCGPipeline.addPass(OpenMPOptPass());
+
+  // Lastly, add the core function simplification pipeline nested inside the
+  // CGSCC walk.
+  MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+      buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
+
+  for (auto &C : CGSCCOptimizerLateEPCallbacks)
+    C(MainCGPipeline, Level);
+
+  // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
+  // to detect when we devirtualize indirect calls and iterate the SCC passes
+  // in that case to try and catch knock-on inlining or function attrs
+  // opportunities. Then we add it to the module pipeline by walking the SCCs
+  // in postorder (or bottom-up).
+  MPM.addPass(
+      createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
+          std::move(MainCGPipeline), MaxDevirtIterations)));
+  if (EnableMLInliner)
+    MPM.addPass(InliningAdvisorCleanupPass());
+  return MPM;
+}
+
+ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
+    OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) {
   ModulePassManager MPM(DebugLogging);
 
   bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
@@ -836,58 +931,7 @@
   // make a lot of sense and we should revisit the core CGSCC structure.
   CGSCCPassManager MainCGPipeline(DebugLogging);
 
-  // Note: historically, the PruneEH pass was run first to deduce nounwind and
-  // generally clean up exception handling overhead. It isn't clear this is
-  // valuable as the inliner doesn't currently care whether it is inlining an
-  // invoke or a call.
-
-  // Run the inliner first. The theory is that we are walking bottom-up and so
-  // the callees have already been fully optimized, and we want to inline them
-  // into the callers so that our optimizations can reflect that.
-  // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
-  // because it makes profile annotation in the backend inaccurate.
-  InlineParams IP = getInlineParamsFromOptLevel(Level);
-  if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
-      PGOOpt->Action == PGOOptions::SampleUse)
-    IP.HotCallSiteThreshold = 0;
-  MainCGPipeline.addPass(InlinerPass(IP));
-
-  if (!DisableAttributor)
-    MainCGPipeline.addPass(AttributorCGSCCPass());
-
-  if (PTO.Coroutines)
-    MainCGPipeline.addPass(CoroSplitPass());
-
-  // Now deduce any function attributes based in the current code.
-  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
-
-  // When at O3 add argument promotion to the pass pipeline.
-  // FIXME: It isn't at all clear why this should be limited to O3.
-  if (Level == OptimizationLevel::O3)
-    MainCGPipeline.addPass(ArgumentPromotionPass());
-
-  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
-  // there are no OpenMP runtime calls present in the module.
-  if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
-    MainCGPipeline.addPass(OpenMPOptPass());
-
-  // Lastly, add the core function simplification pipeline nested inside the
-  // CGSCC walk.
-  MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
-      buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
-
-  for (auto &C : CGSCCOptimizerLateEPCallbacks)
-    C(MainCGPipeline, Level);
-
-  // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
-  // to detect when we devirtualize indirect calls and iterate the SCC passes
-  // in that case to try and catch knock-on inlining or function attrs
-  // opportunities. Then we add it to the module pipeline by walking the SCCs
-  // in postorder (or bottom-up).
-  MPM.addPass(
-      createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
-          std::move(MainCGPipeline), MaxDevirtIterations)));
-
+  MPM.addPass(buildInlinerPipeline(Level, Phase, DebugLogging));
   return MPM;
 }
 
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -27,6 +27,7 @@
 MODULE_ANALYSIS("verify", VerifierAnalysis())
 MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
 MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis())
+MODULE_ANALYSIS("inlining-advisor", InliningAdvisorAnalysis())
 
 #ifndef MODULE_ALIAS_ANALYSIS
 #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS)                               \
@@ -65,6 +66,8 @@
 MODULE_PASS("ipsccp", IPSCCPPass())
 MODULE_PASS("lowertypetests", LowerTypeTestsPass(nullptr, nullptr))
 MODULE_PASS("mergefunc", MergeFunctionsPass())
+MODULE_PASS("scc-oz-module-inliner",
+  buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging))
 MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
 MODULE_PASS("no-op-module", NoOpModulePass())
 MODULE_PASS("partial-inliner", PartialInlinerPass())
Index: llvm/lib/Transforms/IPO/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/IPO/CMakeLists.txt
+++ llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -1,3 +1,8 @@
+if (${LLVM_ENABLE_ML_HEURISTICS})
+  set(LLVM_IPO_ML_DEPS MLHeuristics)
+  add_subdirectory(ML)
+endif()
+
 add_llvm_component_library(LLVMipo
   AlwaysInliner.cpp
   ArgumentPromotion.cpp
@@ -44,4 +49,7 @@
 
   DEPENDS
   intrinsics_gen
+  ${LLVM_IPO_ML_DEPS}
+
+  LINK_LIBS ${LLVM_IPO_ML_DEPS}
   )
Index: llvm/lib/Transforms/IPO/Inliner.cpp
===================================================================
--- llvm/lib/Transforms/IPO/Inliner.cpp
+++ llvm/lib/Transforms/IPO/Inliner.cpp
@@ -30,12 +30,11 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/ML/InliningAdvisor.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/CallPromotionUtils.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
@@ -58,8 +57,10 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <algorithm>
 #include <cassert>
@@ -159,9 +160,9 @@
 /// *actually make it to the backend*, which is really what we want.
 ///
 /// Because we don't have this information, we do this simple and useful hack.
-static void mergeInlinedArrayAllocas(
-    Function *Caller, InlineFunctionInfo &IFI,
-    InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
+static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI,
+                                     InlinedArrayAllocasTy &InlinedArrayAllocas,
+                                     int InlineHistory) {
   SmallPtrSet<AllocaInst *, 16> UsedAllocas;
 
   // When processing our SCC, check to see if CS was inlined from some other
@@ -897,6 +898,24 @@
   assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
   Module &M = *InitialC.begin()->getFunction().getParent();
   ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+  InliningAdvisor *Advisor = MAM.getCachedResult<InliningAdvisorAnalysis>(M);
+  assert(!MandatoryOnly || !Advisor);
+  if (Advisor)
+    Advisor->OnPassEntry();
+
+  // Avoid subtle bugs due to alternative exits from this method - if we have
+  // an advisor, ensure it is always informed when we're done with a scc.
+  class AdvisorExitCapture final {
+    InliningAdvisor *const Advisor;
+
+  public:
+    AdvisorExitCapture(InliningAdvisor *A) : Advisor(A) {}
+    ~AdvisorExitCapture() {
+      if (Advisor)
+        Advisor->OnPassExit();
+    }
+  };
+  AdvisorExitCapture Capturer(Advisor);
 
   if (!ImportedFunctionsStats &&
       InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
@@ -1010,8 +1029,7 @@
     // node however because those functions aren't going to be mutated by this
     // pass.
     FunctionAnalysisManager &FAM =
-        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
-            .getManager();
+        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).getManager();
 
     // Get the remarks emission analysis for the caller.
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
@@ -1068,17 +1086,58 @@
         continue;
       }
 
-      Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
-      // Check whether we want to inline this callsite.
-      if (!OIC.hasValue()) {
-        setInlineRemark(CS, "deferred");
+      CallBase *CB = dyn_cast<CallBase>(CS.getInstruction());
+      auto TrivialDecision = llvm::getTrivialInliningDecision(
+          *CB, CS.getCalledFunction(), FAM.getResult<TargetIRAnalysis>(Callee),
+          GetAssumptionCache, GetTLI);
+
+      if (Advisor &&
+          ((TrivialDecision.hasValue() && TrivialDecision->isNever()) ||
+           &Callee == &F))
+        continue;
+      if (MandatoryOnly &&
+          (!TrivialDecision.hasValue() || !TrivialDecision->isAlways()))
         continue;
-      }
 
-      if (!OIC.getValue()) {
-        // shouldInline() call returned a negative inline cost that explains
-        // why this callsite should not be inlined.
-        setInlineRemark(CS, inlineCostStr(*OIC));
+      // TODO(mtrofin): this replicates the already calculated
+      // TrivialDecision part when we don't do inference. Refactor to avoid.
+      const bool Mandatory =
+          TrivialDecision.hasValue() && TrivialDecision->isAlways();
+
+      // TODO(mtrofin): no need to compute OIC if Advisor is doing inference and
+      // no logging.
+      Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
+      assert(!Mandatory || (OIC.hasValue() && OIC.getValue()));
+      bool ShouldInline = Mandatory || (OIC.hasValue() && OIC.getValue());
+      // A deep analysis of the callsite may reveal blocking reasons for not
+      // inlining, such as VarArgs, or large stack sizes. Stop in that case, as
+      // inlining would cause a correctness problem.
+      int CostEstimate = 0;
+      // If the inlining is mandatory, we won't use the cost, so can set it to 0
+      if (!Mandatory) {
+        auto IsCallsiteInlinable = llvm::getInliningCostEstimate(
+            *CB, FAM.getResult<TargetIRAnalysis>(Callee), GetAssumptionCache,
+            {}, nullptr, nullptr);
+        if (!IsCallsiteInlinable)
+          continue;
+        CostEstimate = IsCallsiteInlinable.getValue();
+      }
+      PendingInliningRecord PendingRecord;
+      if (Advisor) {
+        PendingRecord =
+            Advisor->shouldInline(CB, ShouldInline, Mandatory, CostEstimate);
+      }
+      if (!ShouldInline) {
+        // Check whether we want to inline this callsite.
+        if (!OIC.hasValue()) {
+          setInlineRemark(CS, "deferred");
+        } else if (!OIC.getValue()) {
+          // shouldInline() call returned a negative inline cost that explains
+          // why this callsite should not be inlined.
+          setInlineRemark(CS, inlineCostStr(*OIC));
+        }
+        if (PendingRecord)
+          PendingRecord.recordInlining(false, false);
         continue;
       }
 
@@ -1097,14 +1156,17 @@
 
       InlineResult IR = InlineFunction(CS, IFI);
       if (!IR.isSuccess()) {
-        setInlineRemark(CS, std::string(IR.getFailureReason()) + "; " +
-                                inlineCostStr(*OIC));
+        setInlineRemark(
+            CS, std::string(IR.getFailureReason()) + "; " +
+                    (OIC.hasValue() ? inlineCostStr(*OIC) : "ML Advisor"));
         ORE.emit([&]() {
           return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
                  << NV("Callee", &Callee) << " will not be inlined into "
                  << NV("Caller", &F) << ": "
                  << NV("Reason", IR.getFailureReason());
         });
+        if (PendingRecord)
+          PendingRecord.recordInlining(false, false);
         continue;
       }
       DidInline = true;
@@ -1112,7 +1174,10 @@
 
       ++NumInlined;
 
-      emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
+      // TODO(mtrofin): OIC may not have value if Advisor decided against
+      // inlining. We should still emit a remark.
+      if (OIC.hasValue())
+        emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
 
       // Add any new callsites to defined functions to the worklist.
       if (!IFI.InlinedCallSites.empty()) {
@@ -1144,6 +1209,7 @@
       // dead. In that case, we can drop the body of the function eagerly
       // which may reduce the number of callers of other functions to one,
       // changing inline cost thresholds.
+      bool CalleeWasDeleted = false;
       if (Callee.hasLocalLinkage()) {
         // To check this we also need to nuke any dead constant uses (perhaps
         // made dead by this operation on other functions).
@@ -1163,8 +1229,13 @@
           assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
                  "Cannot put cause a function to become dead twice!");
           DeadFunctions.push_back(&Callee);
+          CalleeWasDeleted = true;
+          if (Advisor)
+            Advisor->OnFunctionDeleted(&Callee);
         }
       }
+      if (PendingRecord)
+        PendingRecord.recordInlining(CalleeWasDeleted, true);
     }
 
     // Back the call index up by one to put us in a good position to go around
@@ -1243,8 +1314,7 @@
     // function there. Also, cclear out any cached analyses.
     auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
     FunctionAnalysisManager &FAM =
-        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
-            .getManager();
+        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG).getManager();
     FAM.clear(*DeadF, DeadF->getName());
     AM.clear(DeadC, DeadC.getName());
     auto &DeadRC = DeadC.getOuterRefSCC();
@@ -1256,7 +1326,19 @@
     UR.InvalidatedRefSCCs.insert(&DeadRC);
 
     // And delete the actual function from the module.
-    M.getFunctionList().erase(DeadF);
+    // If we use the Advisor, it uses Function pointers to index various
+    // maps, e.g. memoization. Function cleanup passes like argument promotion
+    // create new functions. It is possible for a new function to be allocated
+    // at the address of a deleted function.
+    // We could index using names, but that's inefficient. Alternatively,
+    // we let the Advisor free the functions.
+    if (Advisor) {
+      DeadF->getBasicBlockList().clear();
+      M.getFunctionList().remove(DeadF);
+    } else {
+      M.getFunctionList().erase(DeadF);
+    }
+
     ++NumDeleted;
   }
 
Index: llvm/test/Bindings/Go/lit.local.cfg
===================================================================
--- llvm/test/Bindings/Go/lit.local.cfg
+++ llvm/test/Bindings/Go/lit.local.cfg
@@ -9,6 +9,9 @@
 if not config.root.include_go_tests:
     config.unsupported = True
 
+if config.use_ml_policies != '':
+    config.unsupported = True
+
 def find_executable(executable, path=None):
     if path is None:
         path = os.environ['PATH']
Index: llvm/test/Other/new-pm-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-defaults.ll
+++ llvm/test/Other/new-pm-defaults.ll
@@ -132,6 +132,8 @@
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting llvm::Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
@@ -226,6 +228,7 @@
 ; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass
 ; CHECK-O-NEXT: Finished CGSCC pass manager run.
 ; CHECK-O-NEXT: Finished llvm::Module pass manager run.
+; CHECK-O-NEXT: Finished llvm::Module pass manager run.
 ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
 ; CHECK-O-NEXT: Starting llvm::Module pass manager run.
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
Index: llvm/test/Other/new-pm-thinlto-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -97,6 +97,8 @@
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting llvm::Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
@@ -196,6 +198,7 @@
 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
 ; CHECK-O-NEXT: Finished CGSCC pass manager run.
 ; CHECK-O-NEXT: Finished llvm::Module pass manager run.
+; CHECK-O-NEXT: Finished llvm::Module pass manager run.
 ; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
 ; CHECK-POSTLINK-O-NEXT: Starting llvm::Module pass manager run.
Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -69,6 +69,8 @@
 ; CHECK-O-NEXT: Running analysis: GlobalsAA
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
@@ -167,6 +169,7 @@
 ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
 ; CHECK-O-NEXT: Finished CGSCC pass manager run.
 ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
+; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
 ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -77,6 +77,8 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
 ; CHECK-O-NEXT: Running analysis: GlobalsAA
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
@@ -178,6 +180,7 @@
 ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
 ; CHECK-O-NEXT: Finished CGSCC pass manager run.
 ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
+; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
 ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -96,6 +96,8 @@
 ; CHECK-O-NEXT: Running analysis: GlobalsAA
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
@@ -216,6 +218,7 @@
 ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
 ; CHECK-O-NEXT: Finished CGSCC pass manager run.
 ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
+; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
 ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar
 ; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis on bar
Index: llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -77,6 +77,8 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
 ; CHECK-O-NEXT: Running analysis: GlobalsAA
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
@@ -177,6 +179,7 @@
 ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
 ; CHECK-O-NEXT: Finished CGSCC pass manager run.
 ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
+; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
 ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
 ; CHECK-O-NEXT: Running pass: NameAnonGlobalPass
Index: llvm/test/Transforms/Inline/ML/ensure-delete.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Inline/ML/ensure-delete.ll
@@ -0,0 +1,31 @@
+; RUN: opt -passes=scc-oz-module-inliner -ml-advisor-size-increase-threshold=0.5 -enable-ml-inliner=1 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+declare i64 @f1()
+
+define internal i64 @f2() #0 {
+  %r = call i64 @f1()
+  %r2 = add i64 13, %r
+  ret i64 %r2
+}
+
+define internal i64 @indirect_caller(i1 %which) {
+  %f = select i1 %which, i64 ()* @f1, i64 ()* @f2
+  %r = call i64 %f(), !callees !1
+  ret i64 %r
+}
+
+define i64 @top() {
+  %r = call i64 @indirect_caller(i1 1)
+  %r2 = call i64 @f2()
+  %r3 = add i64 %r, %r2
+  ret i64 %r3
+}
+
+!1 = !{i64 ()* @f1, i64()* @f2}
+
+attributes #0 = { alwaysinline }
+
+; CHECK: !0 = distinct !{i64 ()* @f1, null}
\ No newline at end of file
Index: llvm/test/Transforms/Inline/ML/func-features.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Inline/ML/func-features.ll
@@ -0,0 +1,61 @@
+; RUN: opt -enable-ml-inliner -mandatory-inlinings-first -passes=scc-oz-module-inliner -debug-training-log=- %{model_path} %{ir2native_path} -S < %s | FileCheck -check-prefix=CHECK -check-prefix=%{mode} %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare i32 @f1(i32)
+declare i32 @f2(i32)
+
+define dso_local i32 @branches(i32) {
+  %cond = icmp slt i32 %0, 3
+  br i1 %cond, label %then, label %else
+
+then:
+  %ret.1 = call i32 @f1(i32 %0)
+  br label %last.block
+
+else:
+  %ret.2 = call i32 @f2(i32 %0)
+  br label %last.block
+
+last.block:
+  %ret = phi i32 [%ret.1, %then], [%ret.2, %else]
+  ret i32 %ret
+}
+
+define dso_local i32 @top() {
+  %1 = call i32 @branches(i32 2)
+  ret i32 %1
+}
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 7.0.0-6 (tags/RELEASE_700/final)"}
+
+; CHECK-LABEL:     ModuleName:
+; DEV-NEXT:        InitialSize: 51
+; REL-NEXT:        InitialSize: 5
+; CHECK-NEXT:      Inlining Decision: 0
+; CHECK-NEXT:      DeltaSize: 0
+; CHECK-NEXT:      Success: 1
+; CHECK-NEXT:      CallerName: top
+; CHECK-NEXT:      CalleeName: branches
+; CHECK-NEXT:      Features:
+; CHECK-NEXT:      callee_basic_block_count: 4
+; CHECK-NEXT:      callsite_height: 1
+; CHECK-NEXT:      node_count: 2
+; CHECK-NEXT:      nr_ctant_params: 1
+; CHECK-NEXT:      cost_estimate: 0
+; CHECK-NEXT:      edge_count: 1
+; CHECK-NEXT:      caller_users: 1
+; CHECK-NEXT:      caller_conditionally_executed_blocks: 0
+; CHECK-NEXT:      caller_basic_block_count: 1
+; CHECK-NEXT:      callee_conditionally_executed_blocks: 2
+; CHECK-NEXT:      callee_users: 2
+; CHECK-NEXT:      FinalNodeCount: 2
+; CHECK-NEXT:      FinalEdgeCount: 0
+; DEV-NEXT:        FinalSize: 51
+; REL-NEXT:        FinalSize: 5
\ No newline at end of file
Index: llvm/test/Transforms/Inline/ML/graph-structure.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Inline/ML/graph-structure.ll
@@ -0,0 +1,131 @@
+; RUN: opt -passes=scc-oz-module-inliner -debug-training-log=- -enable-ml-inliner=1 %{model_path} %{ir2native_path} -S < %s | FileCheck -check-prefix=CHECK -check-prefix=%{mode} %s
+; Test that we can collect a log in 'dev' mode, and that the log captures both successful and unsuccessful decisions
+; RUN: opt -passes=scc-oz-module-inliner -debug-training-log=- -enable-ml-inliner=1 %{ir2native_path} -S < %s | FileCheck -check-prefix=CHECK-LOG-%{mode} %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+declare void @external_fct(i32)
+
+define dso_local i32 @top() {
+  %a = call i32 @multiplier(i32 5)
+  %b = call i32 @adder(i32 10)
+  %ret = add nsw i32 %a, %b
+  call void @external_fct(i32 %ret)
+  ret i32 %ret
+}
+
+define internal dso_local i32 @adder(i32) {
+  %2 = alloca i32, align 4
+  store i32 %0, i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
+  %4 = call i32 @multiplier(i32 %3)
+  %5 = load i32, i32* %2, align 4
+  %6 = call i32 @switcher(i32 1)
+  %7 = add nsw i32 %4, %6
+  ret i32 %7
+}
+
+define internal i32 @multiplier(i32) {
+  %2 = alloca i32, align 4
+  store i32 %0, i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
+  %4 = load i32, i32* %2, align 4
+  %5 = mul nsw i32 %3, %4
+  ret i32 %5
+}
+
+define i32 @switcher(i32) {
+  %2 = alloca i32, align 4
+  %3 = alloca i32, align 4
+  store i32 %0, i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
+  switch i32 %4, label %11 [
+    i32 1, label %5
+    i32 2, label %6
+  ]
+
+; <label>:5:                                      ; preds = %1
+  store i32 2, i32* %2, align 4
+  br label %12
+
+; <label>:6:                                      ; preds = %1
+  %7 = load i32, i32* %3, align 4
+  %8 = load i32, i32* %3, align 4
+  %9 = call i32 @multiplier(i32 %8)
+  %10 = add nsw i32 %7, %9
+  store i32 %10, i32* %2, align 4
+  br label %12
+
+; <label>:11:                                     ; preds = %1
+  %adder.result = call i32 @adder(i32 2)
+  store i32 %adder.result, i32* %2, align 4
+  br label %12
+
+; <label>:12:                                     ; preds = %11, %6, %5
+  %13 = load i32, i32* %2, align 4
+  ret i32 %13
+}
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version google3-trunk (trunk r352619)"}
+
+; CHECK-LABEL:  ModuleName:
+; DEV-NEXT:    InitialSize: 184
+; REL-NEXT:    InitialSize: 8
+
+; CHECK-LABEL: Inlining Decision: 0
+; DEV:         DeltaSize: -7
+; CHECK:       Success: 1
+; CHECK:       CallerName: switcher
+; CHECK:       CalleeName: multiplier
+; CHECK:       callee_basic_block_count: 1
+; CHECK:       callsite_height: 1
+; CHECK:       node_count: 4
+; CHECK:       nr_ctant_params: 0
+; CHECK:       cost_estimate: -30
+; CHECK:       edge_count: 6
+; CHECK:       caller_users: 2
+; CHECK:       caller_conditionally_executed_blocks: 3
+; CHECK:       caller_basic_block_count: 5
+; CHECK:       callee_conditionally_executed_blocks: 0
+; CHECK:       callee_users: 3
+
+; CHECK-LOG-DEV:    Inlining Decision: 3
+; In 'rel' mode, we can't turn off inference but turn on logging.
+; (unnecessary complexity). FileCheck needs a label, though.
+; CHECK-LOG-REL:    Inlining Decision: 3
+; CHECK-LOG-DEV-NEXT:    DeltaSize: 0
+; CHECK-LOG-DEV-NEXT:    Success: 0
+; CHECK-LOG-DEV:         node_count: 4
+; CHECK-LOG-DEV:         edge_count: 4
+; CHECK-LOG-DEV:         Inlining Decision: 4
+; CHECK-LOG-DEV:         node_count: 4
+; CHECK-LOG-DEV:         edge_count: 4
+; CHECK-LOG-DEV:         Inlining Decision: 5
+
+; CHECK:       Inlining Decision: 6
+; DEV:         DeltaSize: -36
+; CHECK:       Success: 1
+; CHECK:       CallerName: top
+; CHECK:       CalleeName: adder
+; CHECK:       Features:
+; CHECK:       callee_basic_block_count: 1
+; CHECK:       callsite_height: 2
+; CHECK:       node_count: 3
+; CHECK:       nr_ctant_params: 1
+; CHECK:       cost_estimate: -15035
+; CHECK:       edge_count: 2
+; CHECK:       caller_users: 1
+; CHECK:       caller_conditionally_executed_blocks: 0
+; CHECK:       caller_basic_block_count: 1
+; CHECK:       callee_conditionally_executed_blocks: 0
+; CHECK:       callee_users: 1
+
+; CHECK-LABEL: FinalNodeCount: 2
+; CHECK-NEXT:  FinalEdgeCount: 1
+; DEV-NEXT:    FinalSize: 56
+; REL-NEXT:    FinalSize: 5
\ No newline at end of file
Index: llvm/test/Transforms/Inline/ML/lit.local.cfg.py
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Inline/ML/lit.local.cfg.py
@@ -0,0 +1,12 @@
+if config.use_ml_policies == '':
+    config.unsupported = True
+
+config.substitutions.append(('%{mode}', config.use_ml_policies.upper()))
+if config.use_ml_policies == 'DEV':
+    config.substitutions.insert(0, (
+    '%{model_path}', '-ml-inliner-trained-model=%S/Inputs/inliner'))
+    config.substitutions.insert(0, (
+    '%{ir2native_path}', '-ml-inliner-ir2native-model=%S/Inputs/ir_to_native'))
+else:
+    config.substitutions.append(('%{model_path}', ''))
+    config.substitutions.append(('%{ir2native_path}', ''))
Index: llvm/test/Transforms/Inline/ML/mandatory-first.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Inline/ML/mandatory-first.ll
@@ -0,0 +1,50 @@
+; RUN: opt -passes=scc-oz-module-inliner %{model_path} %{ir2native_path} -debug-training-log=- -enable-ml-inliner -mandatory-inlinings-first=0 -ml-advisor-size-increase-threshold=1.0 -S < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-%{mode}
+; RUN: opt -passes=scc-oz-module-inliner %{model_path} %{ir2native_path} -debug-training-log=- -enable-ml-inliner -mandatory-inlinings-first=1 -ml-advisor-size-increase-threshold=1.0 -S < %s | FileCheck %s -check-prefix=MANDATORY-FIRST -check-prefix=MANDATORY-FIRST-%{mode}
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+declare i64 @f1()
+
+define internal i64 @f2() #0 {
+  %r = call i64 @f1()
+  %r2 = add i64 13, %r
+  ret i64 %r2
+}
+
+define i64 @external_function() {
+  %r = call i64 @f1()
+  %r2 = add i64 13, %r
+  ret i64 %r2
+}
+
+define i64 @top() {
+  %r = call i64 @f2()
+  %r2 = call i64 @external_function()
+  %r3 = add i64 %r, %r2
+  ret i64 %r3
+}
+
+attributes #0 = { alwaysinline }
+
+; CHECK-DEV:    InitialSize: 52
+; CHECK-REL:    InitialSize: 3
+; CHECK:        Inlining Decision: 0
+; CHECK:        CallerName: top
+; CHECK:        CalleeName: f2
+; CHECK:        Inlining Decision: 1
+; CHECK-DEV:    DeltaSize: 6
+; CHECK:        CallerName: top
+; CHECK:        CalleeName: external_function
+
+; With "mandatory first", the initial size is smaller, so the inlining of
+; external_function into top trips over the threshold, leading to the "bad"
+; reward (==DeltaSize)
+
+; MANDATORY-FIRST-DEV:   InitialSize: 41
+; MANDATORY-FIRST-REL:   InitialSize: 2
+; MANDATORY-FIRST-NEXT:  Inlining Decision: 0
+; MANDATORY-FIRST:       DeltaSize: 2147483647
+; MANDATORY-FIRST:       CallerName: top
+; MANDATORY-FIRST:       CalleeName: external_function
+; MANDATORY-FIRST-NOT:   Inlining Decision: 1
\ No newline at end of file
Index: llvm/test/lit.site.cfg.py.in
===================================================================
--- llvm/test/lit.site.cfg.py.in
+++ llvm/test/lit.site.cfg.py.in
@@ -48,6 +48,7 @@
 config.libcxx_used = @LLVM_LIBCXX_USED@
 config.has_plugins = @LLVM_ENABLE_PLUGINS@
 config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@
+config.use_ml_policies = '@LLVM_USE_ML_POLICY@'
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.