Index: llvm/include/llvm/ADT/SCCIterator.h
===================================================================
--- llvm/include/llvm/ADT/SCCIterator.h
+++ llvm/include/llvm/ADT/SCCIterator.h
@@ -90,6 +90,7 @@
   /// Compute the next SCC using the DFS traversal.
   void GetNextSCC();
 
+public:
   scc_iterator(NodeRef entryN) : visitNum(0) {
     DFSVisitOne(entryN);
     GetNextSCC();
@@ -98,12 +99,6 @@
   /// End is when the DFS stack is empty.
   scc_iterator() = default;
 
-public:
-  static scc_iterator begin(const GraphT &G) {
-    return scc_iterator(GT::getEntryNode(G));
-  }
-  static scc_iterator end(const GraphT &) { return scc_iterator(); }
-
   /// Direct loop termination test which is more efficient than
   /// comparison with \c end().
   bool isAtEnd() const {
@@ -222,16 +217,25 @@
     return false;
   }
 
-/// Construct the begin iterator for a deduced graph type T.
+/// Construct the begin iterator for a deduced graph type T, starting from its
+/// entry node.
 template <class T> scc_iterator<T> scc_begin(const T &G) {
-  return scc_iterator<T>::begin(G);
+  return scc_iterator<T>(GraphTraits<T>::getEntryNode(G));
 }
 
-/// Construct the end iterator for a deduced graph type T.
+/// Construct the begin iterator for a graph type T, starting from the specified
+/// node.
+template <class T, class U = GraphTraits<T>>
+scc_iterator<T, U> scc_begin(typename U::NodeRef N) {
+  return scc_iterator<T>(N);
+}
+
+  /// Construct the end iterator for a deduced graph type T.
 template <class T> scc_iterator<T> scc_end(const T &G) {
-  return scc_iterator<T>::end(G);
+  return scc_iterator<T>();
 }
 
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_SCCITERATOR_H
Index: llvm/lib/Analysis/CallGraphSCCPass.cpp
===================================================================
--- llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -17,6 +17,7 @@
 
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CallGraph.h"
@@ -63,6 +64,10 @@
   /// whether any of the passes modifies the module, and if so, return true.
   bool runOnModule(Module &M) override;
 
+  /// Execute all of the passes scheduled for execution on a given SCC. Return
+  /// true if any passes modify the module.
+  bool runOnSCC(CallGraphSCC &SCC, CallGraph &CG);
+
   using ModulePass::doInitialization;
   using ModulePass::doFinalization;
 
@@ -448,51 +453,77 @@
 bool CGPassManager::runOnModule(Module &M) {
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
   bool Changed = doInitialization(CG);
-  
-  // Walk the callgraph in bottom-up SCC order.
-  scc_iterator<CallGraph*> CGI = scc_begin(&CG);
-
-  CallGraphSCC CurSCC(CG, &CGI);
-  while (!CGI.isAtEnd()) {
-    // Copy the current SCC and increment past it so that the pass can hack
-    // on the SCC if it wants to without invalidating our iterator.
-    const std::vector<CallGraphNode *> &NodeVec = *CGI;
-    CurSCC.initialize(NodeVec);
-    ++CGI;
-
-    // At the top level, we run all the passes in this pass manager on the
-    // functions in this SCC.  However, we support iterative compilation in the
-    // case where a function pass devirtualizes a call to a function.  For
-    // example, it is very common for a function pass (often GVN or instcombine)
-    // to eliminate the addressing that feeds into a call.  With that improved
-    // information, we would like the call to be an inline candidate, infer
-    // mod-ref information etc.
-    //
-    // Because of this, we allow iteration up to a specified iteration count.
-    // This only happens in the case of a devirtualized call, so we only burn
-    // compile time in the case that we're making progress.  We also have a hard
-    // iteration count limit in case there is crazy code.
-    unsigned Iteration = 0;
-    bool DevirtualizedCall = false;
-    do {
-      LLVM_DEBUG(if (Iteration) dbgs()
-                 << "  SCCPASSMGR: Re-visiting SCC, iteration #" << Iteration
-                 << '\n');
-      DevirtualizedCall = false;
-      Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
-    } while (Iteration++ < MaxIterations && DevirtualizedCall);
-    
-    if (DevirtualizedCall)
-      LLVM_DEBUG(dbgs() << "  CGSCCPASSMGR: Stopped iteration after "
-                        << Iteration
-                        << " times, due to -max-cg-scc-iterations\n");
 
-    MaxSCCIterations.updateMax(Iteration);
+  DenseSet<const Function *> Visited;
+
+  CallGraph::iterator INext;
+  for (auto I = CG.begin(), E = CG.end(); I != E;) {
+    // Walk the callgraph in bottom-up SCC order.
+    auto CGI = scc_begin<CallGraph *>(I->second.get());
+
+    CallGraphSCC CurSCC(CG, &CGI);
+    while (!CGI.isAtEnd()) {
+      const std::vector<CallGraphNode *> &NodeVec = *CGI;
+
+      // Record that we've seen this set of functions so we don't run the pass
+      // twice.
+      for (auto *Elt : NodeVec)
+        Visited.insert(Elt->getFunction());
+
+      // Copy the current SCC and increment past it so that the pass can hack
+      // on the SCC if it wants to without invalidating our iterator.
+      CurSCC.initialize(NodeVec);
+      ++CGI;
+
+      // If we've got all functions reachable from our chosen initial node,
+      // calculate the next node we need to search from now too.
+      if (CGI.isAtEnd()) {
+        do
+          ++I;
+        while (I != E && Visited.count(I->first));
+      }
+
+      Changed |= runOnSCC(CurSCC, CG);
+    }
   }
+
   Changed |= doFinalization(CG);
   return Changed;
 }
 
+bool CGPassManager::runOnSCC(CallGraphSCC &CurSCC, CallGraph &CG) {
+  bool Changed = false;
+
+  // At the top level, we run all the passes in this pass manager on the
+  // functions in this SCC.  However, we support iterative compilation in the
+  // case where a function pass devirtualizes a call to a function.  For
+  // example, it is very common for a function pass (often GVN or instcombine)
+  // to eliminate the addressing that feeds into a call.  With that improved
+  // information, we would like the call to be an inline candidate, infer
+  // mod-ref information etc.
+  //
+  // Because of this, we allow iteration up to a specified iteration count.
+  // This only happens in the case of a devirtualized call, so we only burn
+  // compile time in the case that we're making progress.  We also have a hard
+  // iteration count limit in case there is crazy code.
+  unsigned Iteration = 0;
+  bool DevirtualizedCall = false;
+  do {
+    LLVM_DEBUG(if (Iteration) dbgs()
+               << "  SCCPASSMGR: Re-visiting SCC, iteration #" << Iteration
+               << '\n');
+    DevirtualizedCall = false;
+    Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
+  } while (Iteration++ < MaxIterations && DevirtualizedCall);
+
+  if (DevirtualizedCall)
+    LLVM_DEBUG(dbgs() << "  CGSCCPASSMGR: Stopped iteration after " << Iteration
+                      << " times, due to -max-cg-scc-iterations\n");
+
+  MaxSCCIterations.updateMax(Iteration);
+  return Changed;
+}
+
 /// Initialize CG
 bool CGPassManager::doInitialization(CallGraph &CG) {
   bool Changed = false;
Index: llvm/test/Transforms/Inline/always-inline.ll
===================================================================
--- llvm/test/Transforms/Inline/always-inline.ll
+++ llvm/test/Transforms/Inline/always-inline.ll
@@ -316,3 +316,10 @@
   call void @inner14()
   ret void
 }
+
+define internal i32 @outer15() {
+; CHECK-LABEL: @outer15
+; CHECK: ret i32 1
+  %res = call i32 @inner1()
+  ret i32 %res
+}