Index: lib/Transforms/IPO/Inliner.cpp
===================================================================
--- lib/Transforms/IPO/Inliner.cpp
+++ lib/Transforms/IPO/Inliner.cpp
@@ -1145,6 +1145,7 @@
     // re-use the exact same logic for updating the call graph to reflect the
     // change.
     LazyCallGraph::SCC *OldC = C;
+    unsigned CWSizeBeforeUpdate = UR.CWorklist.size();
     C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
     LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
     RC = &C->getOuterRefSCC();
@@ -1159,9 +1160,20 @@
     // node and the SCC containing the call edge. This is a slight over
     // approximation of the possible inlining decisions that must be avoided,
     // but is relatively efficient to store.
+    //
+    // It is possible that even if no new SCC is generated (i.e., C == OldC),
+    // the original SCC could be split and then merged into the same one as
+    // itself. During this process, the original SCC will be added into
+    // UR.CWorklist again, we want to cache such case too.
+    //
+    // So if only split has ever happen, the size of UR.CWorklist will be
+    // larger than that before the SCC update, and we will cache the history
+    // then.
+    //
     // FIXME: This seems like a very heavyweight way of retaining the inline
     // history, we should look for a more efficient way of tracking it.
-    if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) {
+    if (CWSizeBeforeUpdate < UR.CWorklist.size() &&
+        llvm::any_of(InlinedCallees, [&](Function *Callee) {
           return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
         })) {
       LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
Index: test/Transforms/Inline/cgscc-cycle-2.ll
===================================================================
--- test/Transforms/Inline/cgscc-cycle-2.ll
+++ test/Transforms/Inline/cgscc-cycle-2.ll
@@ -0,0 +1,102 @@
+; This test contains another tricky call graph structures for the inliner to
+; handle correctly. The callgraph is like following:
+;
+;         foo <---> goo
+;          |         ^
+;          v         |
+;         moo <---> noo
+;
+; For all the call edges in the call graph, only moo and noo can be inlined
+; into foo, and no other call edge can be inlined.
+;
+; After moo is inlined into foo, the original call edge foo->moo will be
+; removed, a new call edge will be added and the call graph becomes:
+;
+;         foo <---> goo
+;           \      ^
+;            v    /
+;   moo <---> noo
+; But foo, goo, moo and noo still belong to the same SCC.
+;
+; Then after foo->noo is inlined, when foo->noo is converted to a ref edge,
+; the original SCC will be split into two: {moo, noo} and {foo, goo},
+; immediately the newly added ref edge foo->moo will be converted to a call
+; edge, and the two SCCs will be merged into the original one again. During
+; this cycle, the original SCC will be added into UR.CWorklist again and
+; this creates an infinite loop.
+;
+; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=50 -S
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = dso_local local_unnamed_addr global i64 0, align 8
+
+define dso_local void @_Z3mooi(i32 %i) local_unnamed_addr #1 {
+entry:
+  %cmp = icmp eq i32 %i, 5
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call = tail call i64 @random() #2
+  %t0 = load i64, i64* @a, align 8
+  %add = add nsw i64 %t0, %call
+  store i64 %add, i64* @a, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  tail call void @_Z3nooi(i32 %i)
+  %t6 = load i64, i64* @a, align 8
+  %add85 = add nsw i64 %t6, 1
+  store i64 %add85, i64* @a, align 8
+  ret void
+}
+
+; Function Attrs: nounwind
+declare dso_local i64 @random() local_unnamed_addr
+
+define dso_local void @_Z3nooi(i32 %i) local_unnamed_addr #1 {
+entry:
+  %cmp = icmp eq i32 %i, 5
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call = tail call i64 @random() #2
+  %t0 = load i64, i64* @a, align 8
+  %add = add nsw i64 %t0, %call
+  store i64 %add, i64* @a, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  tail call void @_Z3mooi(i32 %i)
+  tail call void @_Z3goov()
+  %t6 = load i64, i64* @a, align 8
+  %add79 = add nsw i64 %t6, 3
+  store i64 %add79, i64* @a, align 8
+  ret void
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @_Z3goov() local_unnamed_addr #0 {
+entry:
+  tail call void @_Z3foov()
+  %t0 = load i64, i64* @a, align 8
+  %add = add nsw i64 %t0, 2
+  store i64 %add, i64* @a, align 8
+  ret void
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @_Z3foov() local_unnamed_addr #0 {
+entry:
+  tail call void @_Z3goov()
+  tail call void @_Z3mooi(i32 5)
+  %t0 = load i64, i64* @a, align 8
+  %add = add nsw i64 %t0, 5
+  store i64 %add, i64* @a, align 8
+  ret void
+}
+
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }