Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -1145,6 +1145,7 @@ // re-use the exact same logic for updating the call graph to reflect the // change. LazyCallGraph::SCC *OldC = C; + unsigned CWSizeBeforeUpdate = UR.CWorklist.size(); C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR); LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); RC = &C->getOuterRefSCC(); @@ -1159,9 +1160,20 @@ // node and the SCC containing the call edge. This is a slight over // approximation of the possible inlining decisions that must be avoided, // but is relatively efficient to store. + // + // It is possible that even if no new SCC is generated (i.e., C == OldC), + // the original SCC could be split and then merged into the same one as + // itself. During this process, the original SCC will be added into + // UR.CWorklist again, we want to cache such case too. + // + // So if only split has ever happen, the size of UR.CWorklist will be + // larger than that before the SCC update, and we will cache the history + // then. + // // FIXME: This seems like a very heavyweight way of retaining the inline // history, we should look for a more efficient way of tracking it. - if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) { + if (CWSizeBeforeUpdate < UR.CWorklist.size() && + llvm::any_of(InlinedCallees, [&](Function *Callee) { return CG.lookupSCC(*CG.lookup(*Callee)) == OldC; })) { LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, " Index: test/Transforms/Inline/cgscc-cycle-2.ll =================================================================== --- test/Transforms/Inline/cgscc-cycle-2.ll +++ test/Transforms/Inline/cgscc-cycle-2.ll @@ -0,0 +1,102 @@ +; This test contains another tricky call graph structures for the inliner to +; handle correctly. The callgraph is like following: +; +; foo <---> goo +; | ^ +; v | +; moo <---> noo +; +; For all the call edges in the call graph, only moo and noo can be inlined +; into foo, and no other call edge can be inlined. +; +; After moo is inlined into foo, the original call edge foo->moo will be +; removed, a new call edge will be added and the call graph becomes: +; +; foo <---> goo +; \ ^ +; v / +; moo <---> noo +; But foo, goo, moo and noo still belong to the same SCC. +; +; Then after foo->noo is inlined, when foo->noo is converted to a ref edge, +; the original SCC will be split into two: {moo, noo} and {foo, goo}, +; immediately the newly added ref edge foo->moo will be converted to a call +; edge, and the two SCCs will be merged into the original one again. During +; this cycle, the original SCC will be added into UR.CWorklist again and +; this creates an infinite loop. +; +; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=50 -S + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = dso_local local_unnamed_addr global i64 0, align 8 + +define dso_local void @_Z3mooi(i32 %i) local_unnamed_addr #1 { +entry: + %cmp = icmp eq i32 %i, 5 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call i64 @random() #2 + %t0 = load i64, i64* @a, align 8 + %add = add nsw i64 %t0, %call + store i64 %add, i64* @a, align 8 + br label %if.end + +if.end: ; preds = %entry, %if.then + tail call void @_Z3nooi(i32 %i) + %t6 = load i64, i64* @a, align 8 + %add85 = add nsw i64 %t6, 1 + store i64 %add85, i64* @a, align 8 + ret void +} + +; Function Attrs: nounwind +declare dso_local i64 @random() local_unnamed_addr + +define dso_local void @_Z3nooi(i32 %i) local_unnamed_addr #1 { +entry: + %cmp = icmp eq i32 %i, 5 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call i64 @random() #2 + %t0 = load i64, i64* @a, align 8 + %add = add nsw i64 %t0, %call + store i64 %add, i64* @a, align 8 + br label %if.end + +if.end: ; preds = %entry, %if.then + tail call void @_Z3mooi(i32 %i) + tail call void @_Z3goov() + %t6 = load i64, i64* @a, align 8 + %add79 = add nsw i64 %t6, 3 + store i64 %add79, i64* @a, align 8 + ret void +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @_Z3goov() local_unnamed_addr #0 { +entry: + tail call void @_Z3foov() + %t0 = load i64, i64* @a, align 8 + %add = add nsw i64 %t0, 2 + store i64 %add, i64* @a, align 8 + ret void +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @_Z3foov() local_unnamed_addr #0 { +entry: + tail call void @_Z3goov() + tail call void @_Z3mooi(i32 5) + %t0 = load i64, i64* @a, align 8 + %add = add nsw i64 %t0, 5 + store i64 %add, i64* @a, align 8 + ret void +} + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind }