Index: llvm/lib/Transforms/IPO/Inliner.cpp
===================================================================
--- llvm/lib/Transforms/IPO/Inliner.cpp
+++ llvm/lib/Transforms/IPO/Inliner.cpp
@@ -959,7 +959,35 @@
               // inline through the SCC the function will end up being
               // self-recursive which the inliner bails out on, and inlining
               // within an SCC is necessary for performance.
-              if (CalleeSCC != C &&
+
+              // Was there an edge from the callee to the new callee? if there
+              // wasn't, this call was devirtualised.
+              bool WasEdge = false;
+              auto &Node = CG.get(Callee);
+              for (llvm::LazyCallGraph::Edge &E : *Node)
+                if (&E.getNode() == &CG.get(*NewCallee))
+                  WasEdge = true;
+
+              if (!WasEdge && CalleeSCC != C &&
+                  CalleeSCC != CG.lookupSCC(CG.get(*NewCallee)))  {
+                // This newly devirtualised function call might form a cycle in
+                // our RefSCC, don't repeatedly inline it as mentioned above.
+                // Take the cost multiplier from the new call-site function:
+                // we do not get the SCC-post-order traversal guarantee as
+                // NewCallee is in a different SCC, thus a different SCCs
+                // inlining NewCallee might give it different multipliers each
+                // time.
+                auto OptCost =
+                    getStringFnAttrAsInt(
+                        *ICB, InlineConstants::
+                                 FunctionInlineCostMultiplierAttributeName);
+                CBCostMult = (OptCost) ? *OptCost : 1;
+                Attribute NewCBCostMult = Attribute::get(
+                    M.getContext(),
+                    InlineConstants::FunctionInlineCostMultiplierAttributeName,
+                    itostr(std::min(CBCostMult * IntraSCCCostMultiplier, 10000)));
+                NewCallee->addFnAttr(NewCBCostMult);
+              } else if (CalleeSCC != C &&
                   CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
                 Attribute NewCBCostMult = Attribute::get(
                     M.getContext(),
Index: llvm/test/Transforms/Inline/devirtualize-7.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Inline/devirtualize-7.ll
@@ -0,0 +1,75 @@
+; RUN: opt %s -o - --passes='cgscc(inline)' -S | FileCheck %s --implicit-check-not=@extern2
+
+; The mutual recursion between @longname and @rec_call_to_longname grows
+; exponentially due to the two devirtualised calls -- the inliner should
+; annotate one of the functions in the SCC (rec_call_to_longname) with the
+; "this is part of a cycle" flag, function-inline-cost-multiplier.
+;
+; In addition, check that there are only three instances of calls to @extern2
+; inlined into the outer RefSCC (which has all the internal functions inlined
+; then stripped). Without the call-site being annotated, there's an extra
+; round of inlining and the number of calls grows to seven.
+;
+; NB: add extra direct function calls in the chain from @e to @a to add an
+; additional round of potential inlining, pre-patch this causes exponential
+; growth in the size of @e.
+
+; CHECK: declare void @extern2()
+; CHECK: define internal void @rec_call_to_longname() #0
+; CHECK-LABEL: define internal void @longname
+; CHECK: call void @extern2
+; CHECK-LABEL: define void @e()
+; CHECK: call void @extern2
+; CHECK: call void @extern2
+; CHECK: call void @extern2
+
+; CHECK: attributes #0 = { "function-inline-cost-multiplier"="64" }
+
+target triple = "x86_64-unknown-unknown"
+
+@extern = external global ptr
+
+declare void @extern1()
+declare void @extern2()
+declare i32 @extern3()
+
+define internal void @rec_call_to_longname() {
+  store ptr @extern, ptr @e
+  br i1 undef, label %1, label %2, !prof !0
+
+1:
+  call void @longname(ptr @rec_call_to_longname, ptr undef, ptr undef)
+  br label %2
+
+2:
+  ret void
+}
+
+define internal void @longname(ptr %0, ptr %1, ptr %2) {
+  call void @extern1()
+  call void @extern2()
+  %4 = call i32 @extern3()
+  br label %5
+
+5:          
+  call void %0()  ; Exponential growth occurs between these two devirt calls.
+  call void %0()
+  ret void
+}
+
+define void @e() {
+  call void @c()
+  ret void
+}
+
+define internal void @c() {
+  call void @a()
+  ret void
+
+}
+define internal void @a() {
+  call void @rec_call_to_longname()
+  ret void
+}
+
+!0 = !{!"branch_weights", i32 8, i32 592480}