Index: include/llvm/Analysis/CGSCCPassManager.h
===================================================================
--- include/llvm/Analysis/CGSCCPassManager.h
+++ include/llvm/Analysis/CGSCCPassManager.h
@@ -91,7 +91,10 @@
 
 #include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueHandle.h"
 
 namespace llvm {
 
@@ -607,6 +610,199 @@
   return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass),
                                                    DebugLogging);
 }
+
+/// A helper that repeats an SCC pass each time an indirect call is refined to
+/// a direct call by that pass.
+///
+/// While the CGSCC pass manager works to re-visit SCCs and RefSCCs as they
+/// change shape, we may also want to repeat an SCC pass if it simply refines
+/// an indirect call to a direct call, even if doing so does not alter the
+/// shape of the graph. Note that this only pertains to direct calls to
+/// functions where IPO across the SCC may be able to compute more precise
+/// results. For intrinsics, we assume scalar optimizations already can fully
+/// reason about them.
+///
+/// This repeatition has the potential to be very large however, as each one
+/// might refine a single call site. As a consequence, in practice we use an
+/// upper bound on the number of repetitions to limit things.
+template <typename PassT>
+class DevirtSCCRepeatedPass
+    : public PassInfoMixin<DevirtSCCRepeatedPass<PassT>> {
+public:
+  explicit DevirtSCCRepeatedPass(PassT Pass, int MaxRepetitions,
+                                  bool DebugLogging = false)
+      : Pass(std::move(Pass)), MaxRepetitions(MaxRepetitions),
+        DebugLogging(DebugLogging) {}
+  // We have to explicitly define all the special member functions because MSVC
+  // refuses to generate them.
+  DevirtSCCRepeatedPass(const DevirtSCCRepeatedPass &Arg)
+      : Pass(Arg.Pass), MaxRepetitions(Arg.MaxRepetitions),
+        DebugLogging(Arg.DebugLogging) {}
+  DevirtSCCRepeatedPass(DevirtSCCRepeatedPass &&Arg)
+      : Pass(std::move(Arg.Pass)), MaxRepetitions(Arg.MaxRepetitions),
+        DebugLogging(Arg.DebugLogging) {}
+  friend void swap(DevirtSCCRepeatedPass &LHS,
+                   DevirtSCCRepeatedPass &RHS) {
+    using std::swap;
+    swap(LHS.Pass, RHS.Pass);
+    swap(LHS.MaxRepetitions, RHS.MaxRepetitions);
+    swap(LHS.DebugLogging, RHS.DebugLogging);
+  }
+  DevirtSCCRepeatedPass &operator=(DevirtSCCRepeatedPass RHS) {
+    swap(*this, RHS);
+    return *this;
+  }
+
+  /// Runs the wrapped pass up to \c MaxRepetitions on the SCC, iterating
+  /// whenever an indirect call is refined.
+  PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR) {
+    PreservedAnalyses PA = PreservedAnalyses::all();
+
+    // The SCC may be refined while we are running passes over it, so set up
+    // a pointer that we can update.
+    LazyCallGraph::SCC *C = &InitialC;
+
+    // Collect value handles for all of the indirect call sites.
+    SmallVector<WeakVH, 8> CallHandles;
+
+    // Struct to track the counts of direct and indirect calls in each function
+    // of the SCC.
+    struct CallCount {
+      int Direct;
+      int Indirect;
+    };
+
+    // Put value handles on all of the indirect calls and return the number of
+    // direct calls for each function in the SCC.
+    auto ScanSCC = [](LazyCallGraph::SCC &C,
+                      SmallVectorImpl<WeakVH> &CallHandles) {
+      assert(CallHandles.empty() && "Must start with a clear set of handles.");
+
+      SmallVector<CallCount, 4> CallCounts;
+      for (LazyCallGraph::Node &N : C) {
+        CallCounts.push_back({0, 0});
+        CallCount &Count = CallCounts.back();
+        for (Instruction &I : instructions(N.getFunction()))
+          if (auto CS = CallSite(&I)) {
+            if (CS.getCalledFunction()) {
+              ++Count.Direct;
+            } else {
+              ++Count.Indirect;
+              CallHandles.push_back(WeakVH(&I));
+            }
+          }
+      }
+
+      return CallCounts;
+    };
+
+    // Populate the initial call handles and get the initial call counts.
+    auto CallCounts = ScanSCC(*C, CallHandles);
+
+    for (int i = 0;; ++i) {
+      PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR);
+
+      // If the SCC structure has changed, bail immediately and let the outer
+      // CGSCC layer handle any iteration to reflect the refined structure.
+      if (UR.UpdatedC && UR.UpdatedC != C) {
+        PA.intersect(std::move(PassPA));
+        break;
+      }
+
+      // Check that we didn't miss any update scenario.
+      assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
+      assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+      assert((int)CallCounts.size() == C->size() &&
+             "Cannot have changed the size of the SCC!");
+
+      // Check whether any of the handles were devirtualized.
+      auto IsDevirtualizedHandle = [](WeakVH &CallH) {
+        if (!CallH)
+          return false;
+        auto CS = CallSite(CallH);
+        if (!CS)
+          return false;
+
+        // If the call is still indirect, leave it alone.
+        Function *F = CS.getCalledFunction();
+        if (!F)
+          return false;
+
+        // We now have a direct call where previously we had an indirect call,
+        // so iterate to process this devirtualization site.
+        return true;
+      };
+      bool Devirt = any_of(CallHandles, IsDevirtualizedHandle);
+
+      // Rescan to build up a new set of handles and count how many direct
+      // calls remain. If we decide to iterate, this also sets up the input to
+      // the next iteration.
+      CallHandles.clear();
+      auto NewCallCounts = ScanSCC(*C, CallHandles);
+
+      // If we haven't found an explicit devirtualization already see if we
+      // have decreased the number of indirect calls and increased the number
+      // of direct calls for any function in the SCC. This can be fooled by all
+      // manner of transformations such as DCE and other things, but seems to
+      // work well in practice.
+      if (!Devirt)
+        for (int i = 0, Size = C->size(); i < Size; ++i)
+          if (CallCounts[i].Indirect > NewCallCounts[i].Indirect &&
+              CallCounts[i].Direct < NewCallCounts[i].Direct) {
+            Devirt = true;
+            break;
+          }
+
+      if (!Devirt) {
+        PA.intersect(std::move(PassPA));
+        break;
+      }
+
+      // Otherwise, if we've already hit our max, we're done.
+      if (i >= MaxRepetitions) {
+        if (DebugLogging)
+          dbgs() << "Found another devirtualization after hitting the max "
+                    "number of repetitions ("
+                 << MaxRepetitions << ") on SCC: " << *C << "\n";
+        PA.intersect(std::move(PassPA));
+        break;
+      }
+
+      if (DebugLogging)
+        dbgs() << "Repeating an SCC pass after finding a devirtualization in: "
+               << *C << "\n";
+
+      // Move over the new call counts in preparation for iterating.
+      CallCounts = std::move(NewCallCounts);
+
+      // Update the analysis manager with each run and intersect the total set
+      // of preserved analyses so we're ready to iterate.
+      AM.invalidate(*C, PassPA);
+      PA.intersect(std::move(PassPA));
+    }
+
+    // Note that we don't add any preserved entries here unlike a more normal
+    // "pass manager" because we only handle invalidation *between* iterations,
+    // not after the last iteration.
+    return PA;
+  }
+
+private:
+  PassT Pass;
+  int MaxRepetitions;
+  bool DebugLogging;
+};
+
+/// \brief A function to deduce a function pass type and wrap it in the
+/// templated adaptor.
+template <typename PassT>
+DevirtSCCRepeatedPass<PassT>
+createDevirtSCCRepeatedPass(PassT Pass, int MaxRepetitions,
+                             bool DebugLogging = false) {
+  return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxRepetitions,
+                                       DebugLogging);
+}
 }
 
 #endif
Index: lib/Passes/PassBuilder.cpp
===================================================================
--- lib/Passes/PassBuilder.cpp
+++ lib/Passes/PassBuilder.cpp
@@ -582,6 +582,15 @@
   return Count;
 }
 
+static Optional<int> parseDevirtPassName(StringRef Name) {
+  if (!Name.consume_front("devirt<") || !Name.consume_back(">"))
+    return None;
+  int Count;
+  if (Name.getAsInteger(0, Count) || Count <= 0)
+    return None;
+  return Count;
+}
+
 static bool isModulePassName(StringRef Name) {
   // Manually handle aliases for pre-configured pipeline fragments.
   if (Name.startswith("default") || Name.startswith("lto"))
@@ -620,6 +629,8 @@
   // Explicitly handle custom-parsed pass names.
   if (parseRepeatPassName(Name))
     return true;
+  if (parseDevirtPassName(Name))
+    return true;
 
 #define CGSCC_PASS(NAME, CREATE_PASS)                                          \
   if (Name == NAME)                                                            \
@@ -863,6 +874,15 @@
       CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM)));
       return true;
     }
+    if (auto MaxRepetitions = parseDevirtPassName(Name)) {
+      CGSCCPassManager NestedCGPM(DebugLogging);
+      if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
+                                  DebugLogging))
+        return false;
+      CGPM.addPass(createDevirtSCCRepeatedPass(std::move(NestedCGPM),
+                                               *MaxRepetitions, DebugLogging));
+      return true;
+    }
     // Normal passes can't have pipelines.
     return false;
   }
Index: test/Other/cgscc-devirt-iteration.ll
===================================================================
--- /dev/null
+++ test/Other/cgscc-devirt-iteration.ll
@@ -0,0 +1,107 @@
+; The CGSCC pass manager includes an SCC iteration utility that tracks indirect
+; calls that are turned into direct calls (devirtualization) and re-visits the
+; SCC to expose those calls to the SCC-based IPO passes. We trigger
+; devirtualization here with GVN which forwards a store through a load and to
+; an indirect call.
+;
+; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn,instcombine))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=BEFORE
+; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(devirt<1>(function-attrs,function(gvn,instcombine)))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER1
+; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(devirt<2>(function-attrs,function(gvn,instcombine)))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER2
+
+declare void @readnone() readnone
+; CHECK: Function Attrs: readnone
+; CHECK: declare void @readnone()
+
+declare void @unknown()
+; CHECK-NOT: Function Attrs
+; CHECK: declare void @unknown()
+
+; The @test1 function checks that when we refine an indirect call to a direct
+; call we revisit the SCC passes to reflect the more precise information. This
+; is the basic functionality.
+
+define void @test1() {
+; BEFORE-NOT: Function Attrs
+; AFTER: Function Attrs: readnone
+; CHECK: define void @test1()
+entry:
+  %fptr = alloca void ()*
+  store void ()* @readnone, void ()** %fptr
+  %f = load void ()*, void ()** %fptr
+  call void %f()
+  ret void
+}
+
+; The @test2_* functions check that when we need multiple (in this case 2)
+; repeatitions to compute some state that is incrementally exposed with each
+; one, the limit on repeatitions is enforced. So we make progress with
+; one repeatition but not as much as with three.
+;
+; This is somewhat awkward to test because we have to contrive to have a state
+; repeatition triggered and observed with very few passes. The technique here
+; is to have one indirect call that can only be resolved when the entire SCC is
+; deduced as readonce, and mark that indirect call at the call site as readonce
+; to make that possible.
+
+declare void @readnone_with_arg(void ()**) readnone
+; CHECK: Function Attrs: readnone
+; CHECK: declare void @readnone_with_arg(void ()**)
+
+define void @test2_a(void ()** %ignore) {
+; BEFORE-NOT: Function Attrs
+; AFTER1: Function Attrs: readonly
+; AFTER2: Function Attrs: readnone
+; BEFORE: define void @test2_a(void ()** %ignore)
+; AFTER: define void @test2_a(void ()** readnone %ignore)
+entry:
+  %f1ptr = alloca void (void ()**)*
+  store void (void ()**)* @readnone_with_arg, void (void ()**)** %f1ptr
+  %f1 = load void (void ()**)*, void (void ()**)** %f1ptr
+  call void %f1(void ()** %ignore)
+; CHECK: call void @readnone_with_arg(void ()** %ignore)
+
+  ; Bogus call to test2_b to make this a cycle.
+  call void @test2_b()
+
+  ret void
+}
+
+define void @test2_b() {
+; BEFORE-NOT: Function Attrs
+; AFTER1: Function Attrs: readonly
+; AFTER2: Function Attrs: readnone
+; CHECK: define void @test2_b()
+entry:
+  %f2ptr = alloca void ()*
+  store void ()* @readnone, void ()** %f2ptr
+  ; Call the other function here to prevent forwarding until the SCC has had
+  ; function attrs deduced.
+  call void @test2_a(void ()** %f2ptr)
+
+  %f2 = load void ()*, void ()** %f2ptr
+  call void %f2() readonly
+; BEFORE: call void %f2()
+; AFTER: call void @readnone()
+
+  ret void
+}
+
+declare i8* @memcpy(i8*, i8*, i64)
+; CHECK-NOT: Function Attrs
+; CHECK: declare i8* @memcpy(i8*, i8*, i64)
+
+; The @test3 function checks that when we refine an indirect call to an
+; intrinsic we still revisit the SCC pass. This also covers cases where the
+; value handle itself doesn't persist due to the nature of how instcombine
+; creates the memcpy intrinsic call, and we rely on the count of indirect calls
+; decreasing and the count of direct calls increasing.
+define void @test3(i8* %src, i8* %dest, i64 %size) {
+; CHECK-NOT: Function Attrs
+; BEFORE: define void @test3(i8* %src, i8* %dest, i64 %size)
+; AFTER: define void @test3(i8* nocapture readonly %src, i8* nocapture %dest, i64 %size)
+  %fptr = alloca i8* (i8*, i8*, i64)*
+  store i8* (i8*, i8*, i64)* @memcpy, i8* (i8*, i8*, i64)** %fptr
+  %f = load i8* (i8*, i8*, i64)*, i8* (i8*, i8*, i64)** %fptr
+  call i8* %f(i8* %dest, i8* %src, i64 %size)
+  ret void
+}
Index: test/Other/cgscc-observe-devirt.ll
===================================================================
--- test/Other/cgscc-observe-devirt.ll
+++ test/Other/cgscc-observe-devirt.ll
@@ -1,130 +1,99 @@
+; Make sure that even without some external devirtualization iteration tool,
+; the CGSCC pass manager correctly observes and re-visits SCCs that change
+; structure due to devirtualization. We trigger devirtualization here with GVN
+; which forwards a store through a load and to an indirect call.
+;
 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S < %s | FileCheck %s --check-prefix=BEFORE
 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn))' -S < %s | FileCheck %s --check-prefix=AFTER
-;
-; Also check that adding an extra CGSCC pass after the function update but
-; without requiring the outer manager to iterate doesn't break any invariant.
-; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn),function-attrs)' -S < %s | FileCheck %s --check-prefix=AFTER2
 
 declare void @readnone() readnone
 declare void @unknown()
 
-; The @test1_* functions check that when we refine an indirect call to a direct
-; call, even if it doesn't change the call graph structure, we revisit the SCC
-; passes to reflect the more precise information.
-; FIXME: Currently, this isn't implemented in the new pass manager and so we
-; only get this with AFTER2, not with AFTER.
-
-; BEFORE: define void @test1_a() {
-; AFTER: define void @test1_a() {
-; AFTER2: define void @test1_a() {
-define void @test1_a() {
-  %fptr = alloca void()*
-  store void()* @unknown, void()** %fptr
-  %f = load void()*, void()** %fptr
-  call void %f()
-  ret void
-}
-
-; BEFORE: define void @test1_b() {
-; AFTER: define void @test1_b() {
-; AFTER2: define void @test1_b() #0 {
-define void @test1_b() {
-  %fptr = alloca void()*
-  store void()* @readnone, void()** %fptr
-  %f = load void()*, void()** %fptr
-  call void %f()
-  ret void
-}
-
-; The @test2_* checks that if we refine an indirect call to a direct call and
+; The @test1_* checks that if we refine an indirect call to a direct call and
 ; in the process change the very structure of the call graph we also revisit
 ; that component of the graph and do so in an up-to-date fashion.
 
-; BEFORE: define void @test2_a1() {
-; AFTER: define void @test2_a1() {
-; AFTER2: define void @test2_a1() {
-define void @test2_a1() {
+; BEFORE: define void @test1_a1() {
+; AFTER: define void @test1_a1() {
+define void @test1_a1() {
   %fptr = alloca void()*
-  store void()* @test2_b2, void()** %fptr
-  store void()* @test2_b1, void()** %fptr
+  store void()* @test1_b2, void()** %fptr
+  store void()* @test1_b1, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
 }
 
-; BEFORE: define void @test2_b1() {
-; AFTER: define void @test2_b1() {
-; AFTER2: define void @test2_b1() {
-define void @test2_b1() {
+; BEFORE: define void @test1_b1() {
+; AFTER: define void @test1_b1() {
+define void @test1_b1() {
   call void @unknown()
-  call void @test2_a1()
+  call void @test1_a1()
   ret void
 }
 
-; BEFORE: define void @test2_a2() {
-; AFTER: define void @test2_a2() #0 {
-; AFTER2: define void @test2_a2() #0 {
-define void @test2_a2() {
+; BEFORE: define void @test1_a2() {
+; AFTER: define void @test1_a2() #0 {
+define void @test1_a2() {
   %fptr = alloca void()*
-  store void()* @test2_b1, void()** %fptr
-  store void()* @test2_b2, void()** %fptr
+  store void()* @test1_b1, void()** %fptr
+  store void()* @test1_b2, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
 }
 
-; BEFORE: define void @test2_b2() {
-; AFTER: define void @test2_b2() #0 {
-; AFTER2: define void @test2_b2() #0 {
-define void @test2_b2() {
+; BEFORE: define void @test1_b2() {
+; AFTER: define void @test1_b2() #0 {
+define void @test1_b2() {
   call void @readnone()
-  call void @test2_a2()
+  call void @test1_a2()
   ret void
 }
 
 
-; The @test3_* set of functions exercise a case where running function passes
+; The @test2_* set of functions exercise a case where running function passes
 ; introduces a new post-order relationship that was not present originally and
 ; makes sure we walk across the SCCs in that order.
 
-; CHECK: define void @test3_a() {
-define void @test3_a() {
-  call void @test3_b1()
-  call void @test3_b2()
-  call void @test3_b3()
+; CHECK: define void @test2_a() {
+define void @test2_a() {
+  call void @test2_b1()
+  call void @test2_b2()
+  call void @test2_b3()
   call void @unknown()
   ret void
 }
 
-; CHECK: define void @test3_b1() #0 {
-define void @test3_b1() {
+; CHECK: define void @test2_b1() #0 {
+define void @test2_b1() {
   %fptr = alloca void()*
-  store void()* @test3_a, void()** %fptr
+  store void()* @test2_a, void()** %fptr
   store void()* @readnone, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
 }
 
-; CHECK: define void @test3_b2() #0 {
-define void @test3_b2() {
+; CHECK: define void @test2_b2() #0 {
+define void @test2_b2() {
   %fptr = alloca void()*
-  store void()* @test3_a, void()** %fptr
-  store void()* @test3_b2, void()** %fptr
-  store void()* @test3_b3, void()** %fptr
-  store void()* @test3_b1, void()** %fptr
+  store void()* @test2_a, void()** %fptr
+  store void()* @test2_b2, void()** %fptr
+  store void()* @test2_b3, void()** %fptr
+  store void()* @test2_b1, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
 }
 
-; CHECK: define void @test3_b3() #0 {
-define void @test3_b3() {
+; CHECK: define void @test2_b3() #0 {
+define void @test2_b3() {
   %fptr = alloca void()*
-  store void()* @test3_a, void()** %fptr
-  store void()* @test3_b2, void()** %fptr
-  store void()* @test3_b3, void()** %fptr
-  store void()* @test3_b1, void()** %fptr
+  store void()* @test2_a, void()** %fptr
+  store void()* @test2_b2, void()** %fptr
+  store void()* @test2_b3, void()** %fptr
+  store void()* @test2_b1, void()** %fptr
   %f = load void()*, void()** %fptr
   call void %f()
   ret void
Index: test/Transforms/Inline/devirtualize-2.ll
===================================================================
--- test/Transforms/Inline/devirtualize-2.ll
+++ test/Transforms/Inline/devirtualize-2.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(devirt<4>(inline))' -S | FileCheck %s
 ; PR4834
 
 define i32 @test1() {