Index: include/llvm/Analysis/CGSCCPassManager.h
===================================================================
--- include/llvm/Analysis/CGSCCPassManager.h
+++ include/llvm/Analysis/CGSCCPassManager.h
@@ -331,7 +331,9 @@
                             InvalidSCCSet, nullptr,   nullptr};
 
     PreservedAnalyses PA = PreservedAnalyses::all();
-    for (LazyCallGraph::RefSCC &InitialRC : CG.postorder_ref_sccs()) {
+    for (auto RCI = CG.postorder_ref_scc_begin(),
+              RCE = CG.postorder_ref_scc_end();
+         RCI != RCE;) {
       assert(RCWorklist.empty() &&
              "Should always start with an empty RefSCC worklist");
       // The postorder_ref_sccs range we are walking is lazily constructed, so
@@ -342,7 +344,10 @@
       // to update as the program is simplified and allows us to have greater
       // cache locality as forming a RefSCC touches all the parts of all the
       // functions within that RefSCC.
-      RCWorklist.insert(&InitialRC);
+      //
+      // We also eagerly increment the iterator to the next position because
+      // the CGSCC passes below may delete the current RefSCC.
+      RCWorklist.insert(&*RCI++);
 
       do {
         LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
@@ -419,6 +424,10 @@
               dbgs() << "Re-running SCC passes after a refinement of the "
                         "current SCC: "
                      << *UR.UpdatedC << "\n";
+
+            // Note that both `C` and `RC` may at this point refer to deleted,
+            // invalid SCC and RefSCCs respectively. But we will short circuit
+            // the processing when we check them in the loop above.
           } while (UR.UpdatedC);
 
         } while (!CWorklist.empty());
Index: include/llvm/Transforms/IPO/Inliner.h
===================================================================
--- include/llvm/Transforms/IPO/Inliner.h
+++ include/llvm/Transforms/IPO/Inliner.h
@@ -1,4 +1,4 @@
-//===- InlinerPass.h - Code common to all inliners --------------*- C++ -*-===//
+//===- Inliner.h - Inliner pass and infrastructure --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,19 +6,14 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines a simple policy-based bottom-up inliner.  This file
-// implements all of the boring mechanics of the bottom-up inlining, while the
-// subclass determines WHAT to inline, which is the much more interesting
-// component.
-//
-//===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TRANSFORMS_IPO_INLINERPASS_H
-#define LLVM_TRANSFORMS_IPO_INLINERPASS_H
+#ifndef LLVM_TRANSFORMS_IPO_INLINER_H
+#define LLVM_TRANSFORMS_IPO_INLINER_H
 
+#include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
 
@@ -29,13 +24,13 @@
 class InlineCost;
 class OptimizationRemarkEmitter;
 class ProfileSummaryInfo;
-template <class PtrType, unsigned SmallSize> class SmallPtrSet;
 
 /// This class contains all of the helper code which is used to perform the
-/// inlining operations that do not depend on the policy.
-struct Inliner : public CallGraphSCCPass {
-  explicit Inliner(char &ID);
-  explicit Inliner(char &ID, bool InsertLifetime);
+/// inlining operations that do not depend on the policy. It contains the core
+/// bottom-up inlining infrastructure that specific inliner passes use.
+struct LegacyInlinerBase : public CallGraphSCCPass {
+  explicit LegacyInlinerBase(char &ID);
+  explicit LegacyInlinerBase(char &ID, bool InsertLifetime);
 
   /// For this class, we declare that we require and preserve the call graph.
   /// If the derived class implements this method, it should always explicitly
@@ -82,6 +77,32 @@
   ImportedFunctionsInliningStatistics ImportedFunctionsStats;
 };
 
+/// The inliner pass for the new pass manager.
+///
+/// This pass wires together the inlining utilities and the inline cost
+/// analysis into a CGSCC pass. It considers every call in every function in
+/// the SCC and tries to inline if profitable. It can be tuned with a number of
+/// parameters to control what cost model is used and what tradeoffs are made
+/// when making the decision.
+///
+/// It should be noted that the legacy inliners do considerably more than this
+/// inliner pass does. They provide logic for manually merging allocas, and
+/// doing considerable DCE including the DCE of dead functions. This pass makes
+/// every attempt to be simpler. DCE of functions requires complex reasoning
+/// about comdat groups, etc. Instead, it is expected that other more focused
+/// passes be composed to achieve the same end result.
+class InlinerPass : public PassInfoMixin<InlinerPass> {
+public:
+  InlinerPass(InlineParams Params = getInlineParams())
+      : Params(std::move(Params)) {}
+
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
+private:
+  InlineParams Params;
+};
+
 } // End llvm namespace
 
 #endif
Index: include/llvm/Transforms/Utils/Cloning.h
===================================================================
--- include/llvm/Transforms/Utils/Cloning.h
+++ include/llvm/Transforms/Utils/Cloning.h
@@ -194,9 +194,17 @@
   /// inlined from the callee.  This is only filled in if CG is non-null.
   SmallVector<WeakVH, 8> InlinedCalls;
 
+  /// All of the new call sites inlined into the caller.
+  ///
+  /// 'InlineFunction' fills this in by scanning the inlined instructions, and
+  /// only if CG is null. If CG is non-null, instead the value handle
+  /// `InlinedCalls` above is used.
+  SmallVector<CallSite, 8> InlinedCallSites;
+
   void reset() {
     StaticAllocas.clear();
     InlinedCalls.clear();
+    InlinedCallSites.clear();
   }
 };
 
@@ -210,6 +218,10 @@
 /// exists in the instruction stream.  Similarly this will inline a recursive
 /// function by one level.
 ///
+/// Note that while this routine is allowed to cleanup and optimize the
+/// *inlined* code to minimize the actual inserted code, it must not delete
+/// code in the caller as users of this routine may have pointers to
+/// instructions in the caller that need to remain stable.
 bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
                     AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
 bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
Index: lib/Analysis/InlineCost.cpp
===================================================================
--- lib/Analysis/InlineCost.cpp
+++ lib/Analysis/InlineCost.cpp
@@ -638,7 +638,7 @@
 
   bool HotCallsite = false;
   uint64_t TotalWeight;
-  if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
+  if (PSI && CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
       PSI->isHotCount(TotalWeight)) {
     HotCallsite = true;
   }
@@ -647,14 +647,14 @@
   // when it would increase the threshold and the caller does not need to
   // minimize its size.
   bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
-                    PSI->isFunctionEntryHot(&Callee);
+                    (PSI && PSI->isFunctionEntryHot(&Callee));
   if (InlineHint && !Caller->optForMinSize())
     Threshold = MaxIfValid(Threshold, Params.HintThreshold);
 
   if (HotCallsite && !Caller->optForMinSize())
     Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
 
-  bool ColdCallee = PSI->isFunctionEntryCold(&Callee);
+  bool ColdCallee = PSI && PSI->isFunctionEntryCold(&Callee);
   // For cold callees, use the ColdThreshold knob if it is available and reduces
   // the threshold.
   if (ColdCallee)
Index: lib/Passes/PassBuilder.cpp
===================================================================
--- lib/Passes/PassBuilder.cpp
+++ lib/Passes/PassBuilder.cpp
@@ -72,6 +72,7 @@
 #include "llvm/Transforms/IPO/GlobalOpt.h"
 #include "llvm/Transforms/IPO/GlobalSplit.h"
 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/Transforms/IPO/Internalize.h"
 #include "llvm/Transforms/IPO/LowerTypeTests.h"
 #include "llvm/Transforms/IPO/PartialInlining.h"
Index: lib/Passes/PassRegistry.def
===================================================================
--- lib/Passes/PassRegistry.def
+++ lib/Passes/PassRegistry.def
@@ -87,6 +87,7 @@
 #endif
 CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
+CGSCC_PASS("inline", InlinerPass())
 CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
 #undef CGSCC_PASS
 
Index: lib/Transforms/IPO/AlwaysInliner.cpp
===================================================================
--- lib/Transforms/IPO/AlwaysInliner.cpp
+++ lib/Transforms/IPO/AlwaysInliner.cpp
@@ -26,7 +26,8 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 
 using namespace llvm;
@@ -62,14 +63,15 @@
 ///
 /// Unlike the \c AlwaysInlinerPass, this uses the more heavyweight \c Inliner
 /// base class to provide several facilities such as array alloca merging.
-class AlwaysInlinerLegacyPass : public Inliner {
+class AlwaysInlinerLegacyPass : public LegacyInlinerBase {
 
 public:
-  AlwaysInlinerLegacyPass() : Inliner(ID, /*InsertLifetime*/ true) {
+  AlwaysInlinerLegacyPass() : LegacyInlinerBase(ID, /*InsertLifetime*/ true) {
     initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
-  AlwaysInlinerLegacyPass(bool InsertLifetime) : Inliner(ID, InsertLifetime) {
+  AlwaysInlinerLegacyPass(bool InsertLifetime)
+      : LegacyInlinerBase(ID, InsertLifetime) {
     initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
Index: lib/Transforms/IPO/InlineSimple.cpp
===================================================================
--- lib/Transforms/IPO/InlineSimple.cpp
+++ lib/Transforms/IPO/InlineSimple.cpp
@@ -25,7 +25,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/IPO/Inliner.h"
 
 using namespace llvm;
 
@@ -38,16 +38,17 @@
 /// The common implementation of the inlining logic is shared between this
 /// inliner pass and the always inliner pass. The two passes use different cost
 /// analyses to determine when to inline.
-class SimpleInliner : public Inliner {
+class SimpleInliner : public LegacyInlinerBase {
 
   InlineParams Params;
 
 public:
-  SimpleInliner() : Inliner(ID), Params(llvm::getInlineParams()) {
+  SimpleInliner() : LegacyInlinerBase(ID), Params(llvm::getInlineParams()) {
     initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
   }
 
-  explicit SimpleInliner(InlineParams Params) : Inliner(ID), Params(Params) {
+  explicit SimpleInliner(InlineParams Params)
+      : LegacyInlinerBase(ID), Params(Params) {
     initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
   }
 
@@ -101,10 +102,10 @@
 
 bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
   TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
-  return Inliner::runOnSCC(SCC);
+  return LegacyInlinerBase::runOnSCC(SCC);
 }
 
 void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetTransformInfoWrapperPass>();
-  Inliner::getAnalysisUsage(AU);
+  LegacyInlinerBase::getAnalysisUsage(AU);
 }
Index: lib/Transforms/IPO/Inliner.cpp
===================================================================
--- lib/Transforms/IPO/Inliner.cpp
+++ lib/Transforms/IPO/Inliner.cpp
@@ -13,6 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -26,12 +27,12 @@
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
@@ -76,15 +77,16 @@
     cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
 } // namespace
 
-Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
+LegacyInlinerBase::LegacyInlinerBase(char &ID)
+    : CallGraphSCCPass(ID), InsertLifetime(true) {}
 
-Inliner::Inliner(char &ID, bool InsertLifetime)
+LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
     : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
 
 /// For this class, we declare that we require and preserve the call graph.
 /// If the derived class implements this method, it should
 /// always explicitly call the implementation here.
-void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AssumptionCacheTracker>();
   AU.addRequired<ProfileSummaryInfoWrapperPass>();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
@@ -409,13 +411,13 @@
   return false;
 }
 
-bool Inliner::doInitialization(CallGraph &CG) {
+bool LegacyInlinerBase::doInitialization(CallGraph &CG) {
   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
     ImportedFunctionsStats.setModuleInfo(CG.getModule());
   return false; // No changes to CallGraph.
 }
 
-bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
   if (skipSCC(SCC))
     return false;
   return inlineCalls(SCC);
@@ -630,7 +632,7 @@
   return Changed;
 }
 
-bool Inliner::inlineCalls(CallGraphSCC &SCC) {
+bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
   ACT = &getAnalysis<AssumptionCacheTracker>();
   PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
@@ -655,7 +657,7 @@
 
 /// Remove now-dead linkonce functions at the end of
 /// processing to avoid breaking the SCC traversal.
-bool Inliner::doFinalization(CallGraph &CG) {
+bool LegacyInlinerBase::doFinalization(CallGraph &CG) {
   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
     ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
                                 InlinerFunctionImportStatsOpts::Verbose);
@@ -663,7 +665,8 @@
 }
 
 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
-bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
+                                            bool AlwaysInlineOnly) {
   SmallVector<CallGraphNode *, 16> FunctionsToRemove;
   SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
   SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
@@ -765,3 +768,175 @@
   }
   return true;
 }
+
+PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
+                                   CGSCCAnalysisManager &AM, LazyCallGraph &CG,
+                                   CGSCCUpdateResult &UR) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
+          .getManager();
+  const ModuleAnalysisManager &MAM =
+      AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
+  bool Changed = false;
+
+  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
+  Module &M = *InitialC.begin()->getFunction().getParent();
+  ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+
+  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+      [&](Function &F) -> AssumptionCache & {
+    return FAM.getResult<AssumptionAnalysis>(F);
+  };
+
+  // Setup the data structure used to plumb customization into the
+  // `InlineFunction` routine.
+  //
+  // FIXME: We should make the `GetAssumptionCache` below a passed by value
+  // lambda that is converted to a function_ref instead of the address of
+  // a std::function.
+  InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+
+  auto GetInlineCost = [&](CallSite CS) {
+    Function &Callee = *CS.getCalledFunction();
+    auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
+    return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI);
+  };
+
+  // We use a worklist of nodes to process so that we can handle if the SCC
+  // structure changes and some nodes are no longer part of the current SCC. We
+  // also need to use an updatable pointer for the SCC as a consequence.
+  SmallVector<LazyCallGraph::Node *, 16> Nodes;
+  for (auto &N : InitialC)
+    Nodes.push_back(&N);
+  auto *C = &InitialC;
+  auto *RC = &C->getOuterRefSCC();
+
+  // We also use a secondary worklist of calls to allow quickly continuing to
+  // inline through newly inlined call sites where possible.
+  SmallVector<CallSite, 16> Calls;
+
+  // Track a set vector of inlined callees so that we can augment the caller
+  // with all of their edges in the call graph before pruning out the ones that
+  // got simplified away.
+  SmallSetVector<Function *, 4> InlinedCallees;
+
+  // Track a set of dead functions to delete once finished with inlining calls.
+  // We defer deleting these to make it easier to handle the call graph
+  // updates.
+  SmallPtrSet<Function *, 4> DeadFunctions;
+
+  do {
+    auto &N = *Nodes.pop_back_val();
+    if (CG.lookupSCC(N) != C)
+      continue;
+    Function &F = N.getFunction();
+    if (F.hasFnAttribute(Attribute::OptimizeNone))
+      continue;
+
+    // Get the remarks emission analysis for the caller.
+    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+    // We want to generally process call sites top-down in order for
+    // simplifications stemming from replacing the call with the returned value
+    // after inlining to be visible to subsequent inlining decisions. So we
+    // walk the function backwards and then process the back of the vector.
+    // FIXME: Using reverse is a really bad way to do this. Instead we should
+    // do an actual PO walk of the function body.
+    for (Instruction &I : reverse(instructions(F)))
+      if (auto CS = CallSite(&I))
+        if (Function *Callee = CS.getCalledFunction())
+          if (!Callee->isDeclaration())
+            Calls.push_back(CS);
+
+    bool DidInline = false;
+    while (!Calls.empty()) {
+      CallSite CS = Calls.pop_back_val();
+      Function &Callee = *CS.getCalledFunction();
+
+      // Check whether we want to inline this callsite.
+      if (!shouldInline(CS, GetInlineCost, ORE))
+        continue;
+
+      if (!InlineFunction(CS, IFI))
+        continue;
+      DidInline = true;
+      InlinedCallees.insert(&Callee);
+
+      // Add any new callsites to defined functions to the worklist.
+      for (CallSite &CS : reverse(IFI.InlinedCallSites))
+        if (Function *NewCallee = CS.getCalledFunction())
+          if (!NewCallee->isDeclaration())
+            Calls.push_back(CS);
+
+      // For local functions, check whether this makes the callee trivially
+      // dead. In that case, we can drop the body of the function eagerly
+      // which may reduce the number of callers of other functions to one,
+      // changing inline cost thresholds.
+      if (Callee.hasLocalLinkage()) {
+        // To check this we also need to nuke any dead constant uses (perhaps
+        // made dead by this operation on other functions).
+        Callee.removeDeadConstantUsers();
+        if (Callee.use_empty())
+          // Note that after this point, it is an error to do anything other
+          // than use the callee's address or delete it.
+          Callee.dropAllReferences();
+      }
+    }
+
+    if (!DidInline)
+      continue;
+    Changed = true;
+
+    // Add all the inlined callees' edges to the caller. These are by
+    // definition trivial edges as we already had a transitive call edge to the
+    // callee.
+    for (Function *InlinedCallee : InlinedCallees) {
+      LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
+      for (LazyCallGraph::Edge &E : CalleeN)
+        if (E.isCall())
+          RC->insertTrivialCallEdge(N, *E.getNode());
+        else
+          RC->insertTrivialRefEdge(N, *E.getNode());
+    }
+
+    // At this point, since we have made changes we have at least removed
+    // a call instruction. However, in the process we do some incremental
+    // simplification of the surrounding code. This simplification can
+    // essentially do all of the same things as a function pass and we can
+    // re-use the exact same logic for updating the call graph to reflect the
+    // change..
+    C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+    RC = &C->getOuterRefSCC();
+
+    // And remember all of the inlined callees that ended up dead after
+    // inlining.
+    for (Function *InlinedCallee : InlinedCallees)
+      if (InlinedCallee->hasLocalLinkage() && InlinedCallee->use_empty())
+        DeadFunctions.insert(InlinedCallee);
+    InlinedCallees.clear();
+  } while (!Nodes.empty());
+
+  // Now that we've finished inlining all of the calls across this SCC, delete
+  // all of the trivially dead functions, updating the call graph and the CGSCC
+  // pass manager in the process.
+  //
+  // Note that this walks a pointer set which has non-deterministic order but
+  // that is OK as all we do is delete things and add pointers to unordered
+  // sets.
+  for (Function *DeadF : DeadFunctions) {
+    // Get the necessary information out of the call graph and nuke the
+    // function there.
+    auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
+    auto &DeadRC = DeadC.getOuterRefSCC();
+    CG.removeDeadFunction(*DeadF);
+
+    // Mark the relevant parts of the call graph as invalid so we don't visit
+    // them.
+    UR.InvalidatedSCCs.insert(&DeadC);
+    UR.InvalidatedRefSCCs.insert(&DeadRC);
+
+    // And delete the actual function from the module.
+    M.getFunctionList().erase(DeadF);
+  }
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
Index: lib/Transforms/Utils/InlineFunction.cpp
===================================================================
--- lib/Transforms/Utils/InlineFunction.cpp
+++ lib/Transforms/Utils/InlineFunction.cpp
@@ -1644,8 +1644,16 @@
     }
 
     // Update the callgraph if requested.
-    if (IFI.CG)
+    if (IFI.CG) {
       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+    } else {
+      // Otherwise just collect the raw call sites that were inlined.
+      for (BasicBlock &NewBB :
+           make_range(FirstNewBlock->getIterator(), Caller->end()))
+        for (Instruction &I : NewBB)
+          if (auto CS = CallSite(&I))
+            IFI.InlinedCallSites.push_back(CS);
+    }
 
     // For 'nodebug' functions, the associated DISubprogram is always null.
     // Conservatively avoid propagating the callsite debug location to
Index: test/Transforms/Inline/basictest.ll
===================================================================
--- test/Transforms/Inline/basictest.ll
+++ test/Transforms/Inline/basictest.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -inline -sroa -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline,function(sroa))' -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define i32 @test1f(i32 %i) {
Index: test/Transforms/Inline/cgscc-update.ll
===================================================================
--- /dev/null
+++ test/Transforms/Inline/cgscc-update.ll
@@ -0,0 +1,145 @@
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs,inline)' -S | FileCheck %s
+; This test runs the inliner and the function attribute deduction. It ensures
+; that when the inliner mutates the call graph it correctly updates the CGSCC
+; iteration so that we can compute refined function attributes. In this way it
+; is leveraging function attribute computation to observe correct call graph
+; updates.
+
+; Boring unknown external function call.
+; CHECK: declare void @unknown()
+declare void @unknown()
+
+; Sanity check: this should get annotated as readnone.
+; CHECK: Function Attrs: readnone
+; CHECK-NEXT: declare void @readnone()
+declare void @readnone() readnone
+
+; The 'test1_' prefixed functions are designed to trigger forming a new direct
+; call in the inlined body of the function. After that, we form a new SCC and
+; using that can deduce precise function attrs.
+
+; This function should no longer exist.
+; CHECK-NOT: @test1_f()
+define internal void @test1_f(void()* %p) {
+entry:
+  call void %p()
+  ret void
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test1_g()
+define void @test1_g() noinline {
+entry:
+  call void @test1_f(void()* @test1_h)
+  ret void
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test1_h()
+define void @test1_h() noinline {
+entry:
+  call void @test1_g()
+  call void @readnone()
+  ret void
+}
+
+
+; The 'test2_' prefixed functions are designed to trigger forming a new direct
+; call due to RAUW-ing the returned value of a called function into the caller.
+; This too should form a new SCC which can then be reasoned about to compute
+; precise function attrs.
+
+; This function should no longer exist.
+; CHECK-NOT: @test2_f()
+define internal void()* @test2_f() {
+entry:
+  ret void()* @test2_h
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test2_g()
+define void @test2_g() noinline {
+entry:
+  %p = call void()* @test2_f()
+  call void %p()
+  ret void
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test2_h()
+define void @test2_h() noinline {
+entry:
+  call void @test2_g()
+  call void @readnone()
+  ret void
+}
+
+
+; The 'test3_' prefixed functions are designed to inline in a way that causes
+; call sites to become trivially dead during the middle of inlining callsites of
+; a single function to make sure that the inliner does not get confused by this
+; pattern.
+
+; CHECK-NOT: @test3_maybe_unknown(
+define internal void @test3_maybe_unknown(i1 %b) {
+entry:
+  br i1 %b, label %then, label %exit
+
+then:
+  call void @unknown()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-NOT: @test3_f(
+define internal i1 @test3_f() {
+entry:
+  ret i1 false
+}
+
+; CHECK-NOT: @test3_g(
+define internal i1 @test3_g(i1 %b) {
+entry:
+  br i1 %b, label %then1, label %if2
+
+then1:
+  call void @test3_maybe_unknown(i1 true)
+  br label %if2
+
+if2:
+  %f = call i1 @test3_f()
+  br i1 %f, label %then2, label %exit
+
+then2:
+  call void @test3_maybe_unknown(i1 true)
+  br label %exit
+
+exit:
+  ret i1 false
+}
+
+; FIXME: Currently the inliner doesn't successfully mark this as readnone
+; because while it simplifies trivially dead CFGs when inlining callees it
+; doesn't simplify the caller's trivially dead CFG and so we end with a dead
+; block calling @unknown.
+; CHECK-NOT: Function Attrs: readnone
+; CHECK: define void @test3_h()
+define void @test3_h() {
+entry:
+  %g = call i1 @test3_g(i1 false)
+  br i1 %g, label %then, label %exit
+
+then:
+  call void @test3_maybe_unknown(i1 true)
+  br label %exit
+
+exit:
+  call void @test3_maybe_unknown(i1 false)
+  ret void
+}
Index: test/Transforms/Inline/last-callsite.ll
===================================================================
--- /dev/null
+++ test/Transforms/Inline/last-callsite.ll
@@ -0,0 +1,270 @@
+; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=0 -S | FileCheck %s
+
+; The 'test1_' prefixed functions test the basic 'last callsite' inline
+; threshold adjustment where we specifically inline the last call site of an
+; internal function regardless of cost.
+
+define internal void @test1_f() {
+entry:
+  %p = alloca i32
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  ret void
+}
+
+; Identical to @test1_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test1_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test1_g() {
+entry:
+  %p = alloca i32
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  ret void
+}
+
+define void @test1() {
+; CHECK-LABEL: define void @test1()
+entry:
+  call void @test1_f()
+; CHECK-NOT: @test1_f
+
+  call void @test1_g()
+  call void @test1_g()
+; CHECK: call void @test1_g()
+; CHECK: call void @test1_g()
+
+  ret void
+}
+
+
+; The 'test2_' prefixed functions test that we can discover the last callsite
+; bonus after having inlined the prior call site. For this to to work, we need
+; a callsite dependent cost so we have a trivial predicate guarding all the
+; cost, and set that in a particular direction.
+
+define internal void @test2_f(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Identical to @test2_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test2_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test2_g(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: define void @test2()
+entry:
+  ; The first call is trivial to inline due to the argument.
+  call void @test2_f(i1 false)
+; CHECK-NOT: @test2_f
+
+  ; The second call is too expensive to inline unless we update the number of
+  ; calls after inlining the second.
+  call void @test2_f(i1 true)
+; CHECK-NOT: @test2_f
+
+  ; Sanity check that two calls with the hard predicate remain uninlined.
+  call void @test2_g(i1 true)
+  call void @test2_g(i1 true)
+; CHECK: call void @test2_g(i1 true)
+; CHECK: call void @test2_g(i1 true)
+
+  ret void
+}
+
+
+; The 'test3_' prefixed functions are similar to the 'test2_' functions but the
+; relative order of the trivial and hard to inline callsites is reversed. This
+; checks that the order of calls isn't significant to whether we observe the
+; "last callsite" threshold difference because the next-to-last gets inlined.
+; FIXME: We don't currently catch this case.
+
+define internal void @test3_f(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Identical to @test3_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test3_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test3_g(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @test3() {
+; CHECK-LABEL: define void @test3()
+entry:
+  ; The first call is too expensive to inline unless we update the number of
+  ; calls after inlining the second.
+  call void @test3_f(i1 true)
+; FIXME: We should inline this call without iteration.
+; CHECK: call void @test3_f(i1 true)
+
+  ; But the second call is trivial to inline due to the argument.
+  call void @test3_f(i1 false)
+; CHECK-NOT: @test3_f
+
+  ; Sanity check that two calls with the hard predicate remain uninlined.
+  call void @test3_g(i1 true)
+  call void @test3_g(i1 true)
+; CHECK: call void @test3_g(i1 true)
+; CHECK: call void @test3_g(i1 true)
+
+  ret void
+}
+
+
+; The 'test4_' prefixed functions test that we can discover the last callsite
+; bonus after having inlined the prior call site. For this to to work, we need
+; a callsite dependent cost so we have a trivial predicate guarding all the
+; cost, and set that in a particular direction.
+
+define internal void @test4_f(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Identical to @test4_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test4_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test4_g(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @test4() {
+; CHECK-LABEL: define void @test4()
+entry:
+  ; The first call is trivial to inline due to the argument. However this
+  ; argument also carefully uses the function being called as part of a complex
+  ; constant expression. Merely inlining and deleting the call isn't enough to
+  ; drop the use count here, we need to GC the dead constant expression as
+  ; well.
+  call void @test4_f(i1 icmp ne (i64 ptrtoint (void (i1)* @test4_f to i64), i64 ptrtoint(void (i1)* @test4_f to i64)))
+; CHECK-NOT: @test4_f
+
+  ; The second call is too expensive to inline unless we update the number of
+  ; calls after inlining the second.
+  call void @test4_f(i1 true)
+; CHECK-NOT: @test4_f
+
+  ; And check that a single call to a function which is used by a complex
+  ; constant expression cannot be inlined because the constant expression forms
+  ; a second use. If this part starts failing we need to use more complex
+  ; constant expressions to reference a particular function with them.
+  %sink = alloca i1
+  store volatile i1 icmp ne (i64 ptrtoint (void (i1)* @test4_g to i64), i64 ptrtoint(void (i1)* @test4_g to i64)), i1* %sink
+  call void @test4_g(i1 true)
+; CHECK: store volatile i1 false
+; CHECK: call void @test4_g(i1 true)
+
+  ret void
+}
Index: test/Transforms/Inline/nested-inline.ll
===================================================================
--- test/Transforms/Inline/nested-inline.ll
+++ test/Transforms/Inline/nested-inline.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
 ; Test that bar and bar2 are both inlined throughout and removed.
 @A = weak global i32 0		; <i32*> [#uses=1]
 @B = weak global i32 0		; <i32*> [#uses=1]