diff --git a/clang/test/Frontend/optimization-remark-line-directive.c b/clang/test/Frontend/optimization-remark-line-directive.c
--- a/clang/test/Frontend/optimization-remark-line-directive.c
+++ b/clang/test/Frontend/optimization-remark-line-directive.c
@@ -2,11 +2,11 @@
 // directives. We cannot map #line directives back to
 // a SourceLocation.
 
-// RUN: %clang_cc1 %s -Rpass=inline -debug-info-kind=line-tables-only -emit-llvm-only -verify -fno-experimental-new-pass-manager
+// RUN: %clang_cc1 %s -Rpass=inline -debug-info-kind=line-tables-only -emit-llvm-only -verify -fno-experimental-new-pass-manager -mllvm -mandatory-inlining-first=0
 
 // The new PM inliner is not added to the default pipeline at O0, so we add
 // some optimizations to trigger it.
-// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -debug-info-kind=line-tables-only -emit-llvm-only -verify
+// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -debug-info-kind=line-tables-only -emit-llvm-only -verify -mllvm -mandatory-inlining-first=0
 
 int foo(int x, int y) __attribute__((always_inline));
 int foo(int x, int y) { return x + y; }
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -27,6 +27,8 @@
 /// There are 3 scenarios we can use the InlineAdvisor:
 /// - Default - use manual heuristics.
 ///
+/// - MandatoryOnly - only mandatory inlinings (i.e. AlwaysInline).
+///
 /// - Release mode, the expected mode for production, day to day deployments.
 /// In this mode, when building the compiler, we also compile a pre-trained ML
 /// model to native code, and link it as a static library. This mode has low
@@ -37,7 +39,12 @@
 /// requires the full C Tensorflow API library, and evaluates models
 /// dynamically. This mode also permits generating training logs, for offline
 /// training.
-enum class InliningAdvisorMode : int { Default, Release, Development };
+enum class InliningAdvisorMode : int {
+  Default,
+  MandatoryOnly,
+  Release,
+  Development
+};
 
 class InlineAdvisor;
 /// Capture state between an inlining decision having had been made, and
@@ -178,6 +185,20 @@
   InlineParams Params;
 };
 
+/// Advisor recommending only mandatory (AlwaysInline) cases.
+class MandatoryInlineAdvisor final : public InlineAdvisor {
+  std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
+
+public:
+  MandatoryInlineAdvisor(FunctionAnalysisManager &FAM) : InlineAdvisor(FAM) {}
+
+  enum class MandatoryInliningKind { NotMandatory, Always, Never };
+
+  static MandatoryInliningKind getMandatoryKind(CallBase &CB,
+                                                FunctionAnalysisManager &FAM,
+                                                OptimizationRemarkEmitter &ORE);
+};
+
 /// The InlineAdvisorAnalysis is a module pass because the InlineAdvisor
 /// needs to capture state right before inlining commences over a module.
 class InlineAdvisorAnalysis : public AnalysisInfoMixin<InlineAdvisorAnalysis> {
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -344,7 +344,8 @@
   /// Construct the module pipeline that performs inlining as well as
   /// the inlining-driven cleanups.
   ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level,
-                                                ThinLTOPhase Phase);
+                                                ThinLTOPhase Phase,
+                                                bool MandatoryOnly);
 
   /// Construct the core LLVM module optimization pipeline.
   ///
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -158,6 +158,9 @@
   case InliningAdvisorMode::Default:
     Advisor.reset(new DefaultInlineAdvisor(FAM, Params));
     break;
+  case InliningAdvisorMode::MandatoryOnly:
+    Advisor.reset(new MandatoryInlineAdvisor(FAM));
+    break;
   case InliningAdvisorMode::Development:
 #ifdef LLVM_HAVE_TF_API
     Advisor =
@@ -437,3 +440,38 @@
     return Remark;
   });
 }
+
+std::unique_ptr<InlineAdvice> MandatoryInlineAdvisor::getAdvice(CallBase &CB) {
+  auto &Caller = *CB.getCaller();
+  auto &Callee = *CB.getCalledFunction();
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
+
+  bool Advice = MandatoryInliningKind::Always ==
+                    MandatoryInlineAdvisor::getMandatoryKind(CB, FAM, ORE) &&
+                &Caller != &Callee;
+  return std::make_unique<InlineAdvice>(this, CB, ORE, Advice);
+}
+
+MandatoryInlineAdvisor::MandatoryInliningKind
+MandatoryInlineAdvisor::getMandatoryKind(CallBase &CB,
+                                         FunctionAnalysisManager &FAM,
+                                         OptimizationRemarkEmitter &ORE) {
+  auto &Callee = *CB.getCalledFunction();
+
+  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+
+  auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
+
+  auto TrivialDecision =
+      llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI);
+
+  if (TrivialDecision.hasValue()) {
+    if (TrivialDecision->isSuccess())
+      return MandatoryInliningKind::Always;
+    else
+      return MandatoryInliningKind::Never;
+  }
+  return MandatoryInliningKind::NotMandatory;
+}
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -175,25 +175,20 @@
   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
     return FAM.getResult<AssumptionAnalysis>(F);
   };
-  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
-    return FAM.getResult<TargetLibraryAnalysis>(F);
-  };
-
   auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
 
-  auto TrivialDecision =
-      llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI);
-
+  auto MandatoryKind = MandatoryInlineAdvisor::getMandatoryKind(CB, FAM, ORE);
   // If this is a "never inline" case, there won't be any changes to internal
   // state we need to track, so we can just return the base InlineAdvice, which
   // will do nothing interesting.
   // Same thing if this is a recursive case.
-  if ((TrivialDecision.hasValue() && !TrivialDecision->isSuccess()) ||
+  if (MandatoryKind == MandatoryInlineAdvisor::MandatoryInliningKind::Never ||
       &Caller == &Callee)
     return std::make_unique<InlineAdvice>(this, CB, ORE, false);
 
-  bool Mandatory = TrivialDecision.hasValue() && TrivialDecision->isSuccess();
+  bool Mandatory =
+      MandatoryKind == MandatoryInlineAdvisor::MandatoryInliningKind::Always;
 
   // If we need to stop, we won't want to track anymore any state changes, so
   // we just return the base InlineAdvice, which acts as a noop.
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -299,6 +299,11 @@
                                        cl::Hidden, cl::ZeroOrMore,
                                        cl::desc("Enable memory profiler"));
 
+static cl::opt<bool> PerformMandatoryInliningsFirst(
+    "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Perform mandatory inlinings module-wide, before performing "
+             "inlining."));
+
 PipelineTuningOptions::PipelineTuningOptions() {
   LoopInterleaving = true;
   LoopVectorization = true;
@@ -906,7 +911,8 @@
 }
 
 ModuleInlinerWrapperPass
-PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase) {
+PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase,
+                                  bool MandatoryOnly) {
   InlineParams IP = getInlineParamsFromOptLevel(Level);
   if (Phase == PassBuilder::ThinLTOPhase::PreLink && PGOOpt &&
       PGOOpt->Action == PGOOptions::SampleUse)
@@ -915,8 +921,10 @@
   if (PGOOpt)
     IP.EnableDeferral = EnablePGOInlineDeferral;
 
-  ModuleInlinerWrapperPass MIWP(IP, DebugLogging, UseInlineAdvisor,
-                                MaxDevirtIterations);
+  ModuleInlinerWrapperPass MIWP(
+      IP, DebugLogging,
+      (MandatoryOnly ? InliningAdvisorMode::MandatoryOnly : UseInlineAdvisor),
+      MaxDevirtIterations);
 
   // Require the GlobalsAA analysis for the module so we can query it within
   // the CGSCC pipeline.
@@ -946,6 +954,9 @@
   // Now deduce any function attributes based in the current code.
   MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
 
+  if (MandatoryOnly)
+    return MIWP;
+
   // When at O3 add argument promotion to the pass pipeline.
   // FIXME: It isn't at all clear why this should be limited to O3.
   if (Level == OptimizationLevel::O3)
@@ -1104,7 +1115,9 @@
   if (EnableSyntheticCounts && !PGOOpt)
     MPM.addPass(SyntheticCountsPropagation());
 
-  MPM.addPass(buildInlinerPipeline(Level, Phase));
+  if (PerformMandatoryInliningsFirst)
+    MPM.addPass(buildInlinerPipeline(Level, Phase, /*MandatoryOnly=*/true));
+  MPM.addPass(buildInlinerPipeline(Level, Phase, /*MandatoryOnly=*/false));
 
   if (EnableMemProfiler && Phase != ThinLTOPhase::PreLink) {
     MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -62,6 +62,10 @@
 MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
 MODULE_PASS("inferattrs", InferFunctionAttrsPass())
 MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
+MODULE_PASS("always-inliner-wrapper", ModuleInlinerWrapperPass(
+  getInlineParams(), 
+  DebugLogging, 
+  InliningAdvisorMode::MandatoryOnly))
 MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
 MODULE_PASS("instrorderfile", InstrOrderFilePass())
 MODULE_PASS("instrprof", InstrProfiling())
@@ -95,7 +99,8 @@
 MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass())
 MODULE_PASS("sample-profile", SampleProfileLoaderPass())
 MODULE_PASS("scc-oz-module-inliner",
-  buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None))
+  buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, 
+  /*MandatoryOnly=*/false))
 MODULE_PASS("loop-extract-single", LoopExtractorPass(1))
 MODULE_PASS("oz-module-optimizer",
   buildModuleOptimizationPipeline(OptimizationLevel::Oz, /*LTOPreLink*/false))
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -92,11 +92,6 @@
     DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
                                 cl::init(false), cl::Hidden);
 
-/// Flag to disable adding AlwaysInlinerPass to ModuleInlinerWrapperPass.
-/// TODO: remove this once this has is baked in for long enough.
-static cl::opt<bool> DisableAlwaysInlinerInModuleWrapper(
-    "disable-always-inliner-in-module-wrapper", cl::init(false), cl::Hidden);
-
 namespace {
 
 enum class InlinerFunctionImportStatsOpts {
@@ -1046,8 +1041,6 @@
     return PreservedAnalyses::all();
   }
 
-  if (!DisableAlwaysInlinerInModuleWrapper)
-    MPM.addPass(AlwaysInlinerPass());
   // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
   // to detect when we devirtualize indirect calls and iterate the SCC passes
   // in that case to try and catch knock-on inlining or function attrs
diff --git a/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
--- a/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
+++ b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
@@ -7,18 +7,18 @@
 ; REQUIRES: have_tf_api
 ;
 ; When the bounds are very wide ("no bounds"), all inlinings happen.
-; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=10.0 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS
+; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS
 ;
 ; When the bounds are very restrictive, the first inlining happens but it's
 ; considered as "bad" (since it trips over the bounds) and its reward is a
 ; penalty. However, the mandatory inlining, which is considered next, happens.
 ; No other inlinings happend.
-; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.0 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
+; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
 ;
 ; With more restrictive bounds, the first inlining happens and is OK. The
 ; mandatory inlining happens next, and it trips over the bounds, which then
 ; forces no further inlinings.
-; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.1 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=RELAXED-BOUNDS
+; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.1 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=RELAXED-BOUNDS
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-grtev4-linux-gnu"
diff --git a/llvm/test/Transforms/Inline/ML/bounds-checks.ll b/llvm/test/Transforms/Inline/ML/bounds-checks.ll
--- a/llvm/test/Transforms/Inline/ML/bounds-checks.ll
+++ b/llvm/test/Transforms/Inline/ML/bounds-checks.ll
@@ -4,7 +4,7 @@
 ; factor, we don't inline anymore.
 ; REQUIRES: have_tf_aot
 ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS
-; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=1.0 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
+; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=1.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-grtev4-linux-gnu"
diff --git a/llvm/test/Transforms/Inline/inline_stats.ll b/llvm/test/Transforms/Inline/inline_stats.ll
--- a/llvm/test/Transforms/Inline/inline_stats.ll
+++ b/llvm/test/Transforms/Inline/inline_stats.ll
@@ -6,8 +6,11 @@
 ; RUN: opt -S -passes=inline -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK
 ; RUN: opt -S -passes=inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK
 
-; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-BASIC -check-prefix=WRAPPER
-; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-VERBOSE -check-prefix=WRAPPER
+; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK
+; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK
+
+; RUN: opt -S -passes=always-inliner-wrapper,inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-BASIC -check-prefix=WRAPPER
+; RUN: opt -S -passes=always-inliner-wrapper,inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-VERBOSE -check-prefix=WRAPPER
 
 ; CHECK: ------- Dumping inliner stats for [<stdin>] -------
 ; CHECK-BASIC-NOT: -- List of inlined functions: