diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -220,6 +220,8 @@ Optional AllowRecursiveCall = false; }; +Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind); + /// Generate the parameters to tune the inline cost analysis based only on the /// commandline options. InlineParams getInlineParams(); diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -133,8 +133,6 @@ cl::desc("Disables evaluation of GetElementPtr with constant operands")); namespace { -class InlineCostCallAnalyzer; - /// This function behaves more like CallBase::hasFnAttr: when it looks for the /// requested attribute, it check both the call instruction and the called /// function (if it's available and operand bundles don't prohibit that). @@ -151,7 +149,9 @@ return {}; } +} // namespace +namespace llvm { Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) { Attribute Attr = getFnAttr(CB, AttrKind); int AttrValue; @@ -159,6 +159,10 @@ return None; return AttrValue; } +} // namespace llvm + +namespace { +class InlineCostCallAnalyzer; // This struct is used to store information about inline cost of a // particular instruction @@ -904,6 +908,10 @@ getStringFnAttrAsInt(CandidateCall, "function-inline-cost")) Cost = *AttrCost; + if (Optional AttrCostMult = getStringFnAttrAsInt( + CandidateCall, "function-inline-cost-multiplier")) + Cost *= *AttrCostMult; + if (Optional AttrThreshold = getStringFnAttrAsInt(CandidateCall, "function-inline-threshold")) Threshold = *AttrThreshold; diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -877,8 +878,8 @@ // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. - if (CG.lookupSCC(*CG.lookup(Callee)) == C && - UR.InlinedInternalEdges.count({&N, C})) { + LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee)); + if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); @@ -898,6 +899,10 @@ continue; } + int CBCostMult = + getStringFnAttrAsInt(*CB, "function-inline-cost-multiplier") + .getValueOr(1); + // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( @@ -936,9 +941,27 @@ if (tryPromoteCall(*ICB)) NewCallee = ICB->getCalledFunction(); } - if (NewCallee) - if (!NewCallee->isDeclaration()) + if (NewCallee) { + if (!NewCallee->isDeclaration()) { Calls->push({ICB, NewHistoryID}); + // Continually inlining through an SCC can result in huge compile + // times and bloated code since we arbitrarily stop at some point + // when the inliner decides it's not profitable to inline anymore. + // We attempt to mitigate this by making these calls exponentially + // more expensive. + // This doesn't apply to calls in the same SCC since if we do + // inline through the SCC the function will end up being + // self-recursive which the inliner bails out on, and inlining + // within an SCC is necessary for performance. + if (CalleeSCC != C && + CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) { + Attribute NewCBCostMult = Attribute::get( + M.getContext(), "function-inline-cost-multiplier", + itostr(CBCostMult * 2)); + ICB->addFnAttr(NewCBCostMult); + } + } + } } } diff --git a/llvm/test/Transforms/Inline/inline-cost-attributes.ll b/llvm/test/Transforms/Inline/inline-cost-attributes.ll --- a/llvm/test/Transforms/Inline/inline-cost-attributes.ll +++ b/llvm/test/Transforms/Inline/inline-cost-attributes.ll @@ -11,8 +11,9 @@ define void @fn2() "function-inline-threshold"="41" { ; INLINER-LABEL: Inlining calls in: fn2 -; INLINER-NEXT: Function size: 6 +; INLINER-NEXT: Function size: 7 ; INLINER-NEXT: NOT Inlining (cost=321, threshold=123), Call: call void @fn1() +; INLINER-NEXT: NOT Inlining (cost=963, threshold=123), Call: call void @fn1() ; INLINER-NEXT: NOT Inlining (cost=321, threshold=321), Call: call void @fn1() ; INLINER-NEXT: NOT Inlining (cost=197, threshold=123), Call: call void @fn1() ; INLINER-NEXT: Inlining (cost=197, threshold=321), Call: call void @fn1() @@ -23,6 +24,8 @@ ; COST-NEXT: call void @extern() ; COST-NEXT: cost delta = 132, threshold delta = 193 ; COST-NEXT: call void @fn1() +; COST-NEXT: cost delta = 132, threshold delta = 193 +; COST-NEXT: call void @fn1() ; COST-NEXT: cost delta = 0 ; COST-NEXT: call void @fn1() ; COST-NEXT: cost delta = 271, threshold delta = 17 @@ -33,6 +36,7 @@ entry: call void @extern() call void @fn1() "call-inline-cost"="132" "call-threshold-bonus"="193" + call void @fn1() "call-inline-cost"="132" "call-threshold-bonus"="193" "function-inline-cost-multiplier"="3" call void @fn1() "call-inline-cost"="0" "function-inline-threshold"="321" call void @fn1() "call-threshold-bonus"="17" "function-inline-cost"="197" call void @fn1() "call-inline-cost"="473" "function-inline-cost"="197" "function-inline-threshold"="321" @@ -44,7 +48,7 @@ ; INLINER-NEXT: Function size: 3 ; INLINER-NEXT: Inlining (cost=386, threshold=849), Call: call void @fn1() ; INLINER-NEXT: Size after inlining: 2 -; INLINER-NEXT: NOT Inlining (cost=403, threshold=41), Call: call void @fn2() +; INLINER-NEXT: NOT Inlining (cost=535, threshold=41), Call: call void @fn2() entry: call void @fn1() "function-inline-cost"="386" "function-inline-threshold"="849" diff --git a/llvm/test/Transforms/Inline/mut-rec-scc-2.ll b/llvm/test/Transforms/Inline/mut-rec-scc-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/mut-rec-scc-2.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -passes='inline' < %s | FileCheck %s + +; Make sure we don't mark calls within the same SCC as original function with noinline. +; CHECK-NOT: function-inline-cost-multiplier + +define void @samescc1() { + call void @samescc2() + ret void +} + +define void @samescc2() { + call void @samescc3() + ret void +} + +define void @samescc3() { + call void @samescc1() + ret void +} diff --git a/llvm/test/Transforms/Inline/mut-rec-scc.ll b/llvm/test/Transforms/Inline/mut-rec-scc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/mut-rec-scc.ll @@ -0,0 +1,72 @@ +; RUN: opt -S -passes='cgscc(inline,instcombine)' < %s | FileCheck %s + +; We use call to a dummy function to avoid inlining test1 into test2 or vice +; versa, such that we aren't left with a trivial cycle, as trivial cycles are +; special-cased to never be inlined. +; However, InstCombine will eliminate these calls after inlining, and thus +; make the functions eligible for inlining in their callers. +declare void @dummy() readnone nounwind willreturn + +define void @test1() { +; CHECK-LABEL: define void @test1( +; CHECK-NEXT: call void @test2() +; CHECK-NEXT: call void @test2() +; CHECK-NEXT: ret void +; + call void @test2() + call void @test2() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + ret void +} + +define void @test2() { +; CHECK-LABEL: define void @test2( +; CHECK-NEXT: call void @test1() +; CHECK-NEXT: call void @test1() +; CHECK-NEXT: ret void +; + call void @test1() + call void @test1() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + ret void +} + +; We should inline the @test2 calls and mark the inlined @test1 calls with function-inline-cost-multiplier +define void @test3() { +; CHECK-LABEL: define void @test3( +; CHECK-NEXT: call void @test2() #[[COSTMULT:[0-9]+]] +; CHECK-NEXT: call void @test2() #[[COSTMULT]] +; CHECK-NEXT: call void @test2() #[[COSTMULT]] +; CHECK-NEXT: call void @test2() #[[COSTMULT]] +; CHECK-NEXT: call void @test2() #[[COSTMULT]] +; CHECK-NEXT: call void @test2() #[[COSTMULT]] +; CHECK-NEXT: call void @test2() #[[COSTMULT]] +; CHECK-NEXT: call void @test2() #[[COSTMULT]] +; CHECK-NEXT: ret void +; + call void @test2() + call void @test2() + ret void +} + +; CHECK: [[COSTMULT]] = { "function-inline-cost-multiplier"="4" }