Index: llvm/lib/Transforms/IPO/Inliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/Inliner.cpp +++ llvm/lib/Transforms/IPO/Inliner.cpp @@ -959,7 +959,35 @@ // inline through the SCC the function will end up being // self-recursive which the inliner bails out on, and inlining // within an SCC is necessary for performance. - if (CalleeSCC != C && + + // Was there an edge from the callee to the new callee? if there + // wasn't, this call was devirtualised. + bool WasEdge = false; + auto &Node = CG.get(Callee); + for (llvm::LazyCallGraph::Edge &E : *Node) + if (&E.getNode() == &CG.get(*NewCallee)) + WasEdge = true; + + if (!WasEdge && CalleeSCC != C && + CalleeSCC != CG.lookupSCC(CG.get(*NewCallee))) { + // This newly devirtualised function call might form a cycle in + // our RefSCC, don't repeatedly inline it as mentioned above. + // Take the cost multiplier from the new call-site function: + // we do not get the SCC-post-order traversal guarantee as + // NewCallee is in a different SCC, thus a different SCCs + // inlining NewCallee might give it different multipliers each + // time. + auto OptCost = + getStringFnAttrAsInt( + *ICB, InlineConstants:: + FunctionInlineCostMultiplierAttributeName); + CBCostMult = (OptCost) ? *OptCost : 1; + Attribute NewCBCostMult = Attribute::get( + M.getContext(), + InlineConstants::FunctionInlineCostMultiplierAttributeName, + itostr(std::min(CBCostMult * IntraSCCCostMultiplier, 10000))); + NewCallee->addFnAttr(NewCBCostMult); + } else if (CalleeSCC != C && CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) { Attribute NewCBCostMult = Attribute::get( M.getContext(), Index: llvm/test/Transforms/Inline/devirtualize-7.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/devirtualize-7.ll @@ -0,0 +1,75 @@ +; RUN: opt %s -o - --passes='cgscc(inline)' -S | FileCheck %s --implicit-check-not=@extern2 + +; The mutual recursion between @longname and @rec_call_to_longname grows +; exponentially due to the two devirtualised calls -- the inliner should +; annotate one of the functions in the SCC (rec_call_to_longname) with the +; "this is part of a cycle" flag, function-inline-cost-multiplier. +; +; In addition, check that there are only three instances of calls to @extern2 +; inlined into the outer RefSCC (which has all the internal functions inlined +; then stripped). Without the call-site being annotated, there's an extra +; round of inlining and the number of calls grows to seven. +; +; NB: add extra direct function calls in the chain from @e to @a to add an +; additional round of potential inlining, pre-patch this causes exponential +; growth in the size of @e. + +; CHECK: declare void @extern2() +; CHECK: define internal void @rec_call_to_longname() #0 +; CHECK-LABEL: define internal void @longname +; CHECK: call void @extern2 +; CHECK-LABEL: define void @e() +; CHECK: call void @extern2 +; CHECK: call void @extern2 +; CHECK: call void @extern2 + +; CHECK: attributes #0 = { "function-inline-cost-multiplier"="64" } + +target triple = "x86_64-unknown-unknown" + +@extern = external global ptr + +declare void @extern1() +declare void @extern2() +declare i32 @extern3() + +define internal void @rec_call_to_longname() { + store ptr @extern, ptr @e + br i1 undef, label %1, label %2, !prof !0 + +1: + call void @longname(ptr @rec_call_to_longname, ptr undef, ptr undef) + br label %2 + +2: + ret void +} + +define internal void @longname(ptr %0, ptr %1, ptr %2) { + call void @extern1() + call void @extern2() + %4 = call i32 @extern3() + br label %5 + +5: + call void %0() ; Exponential growth occurs between these two devirt calls. + call void %0() + ret void +} + +define void @e() { + call void @c() + ret void +} + +define internal void @c() { + call void @a() + ret void + +} +define internal void @a() { + call void @rec_call_to_longname() + ret void +} + +!0 = !{!"branch_weights", i32 8, i32 592480}