Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -377,6 +377,10 @@ /// Attempt to evaluate indirect calls to boost its inline cost. const bool BoostIndirectCalls; + /// If true, inlining may be more conservative to take the caller's + /// inlineability into account. + const bool CallerMayBeInlined; + /// Inlining cost measured in abstract units, accounts for all the /// instructions expected to be executed for a given function invocation. /// Instructions that are statically proven to be dead based on call-site @@ -626,12 +630,14 @@ std::function &GetAssumptionCache, Optional> &GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, - CallBase &Call, const InlineParams &Params, bool BoostIndirect = true) + CallBase &Call, const InlineParams &Params, bool BoostIndirect = true, + bool CallerMayBeInlined = true) : CallAnalyzer(TTI, GetAssumptionCache, GetBFI, PSI, ORE, Callee, Call), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE), Params(Params), Threshold(Params.DefaultThreshold), - BoostIndirectCalls(BoostIndirect) {} + BoostIndirectCalls(BoostIndirect), + CallerMayBeInlined(CallerMayBeInlined) {} void dump(); virtual ~InlineCostCallAnalyzer() {} @@ -1164,11 +1170,14 @@ int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; - // Lambda to set all the above bonus and bonus percentages to 0. - auto DisallowAllBonuses = [&]() { - SingleBBBonusPercent = 0; - VectorBonusPercent = 0; - LastCallToStaticBonus = 0; + // Lambda to set all the above bonus and bonus percentages to 0, if doing so + // might help us inline the caller elsewhere. + auto DisallowAllBonusesForCallerInlinability = [&]() { + if (CallerMayBeInlined) { + SingleBBBonusPercent = 0; + VectorBonusPercent = 0; + LastCallToStaticBonus = 0; + } }; // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available @@ -1212,7 +1221,7 @@ // LastCallToStatic bonus. While this bonus might result in code size // reduction, it can cause the size of a non-cold caller to increase // preventing it from being inlined. - DisallowAllBonuses(); + DisallowAllBonusesForCallerInlinability(); Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); } else if (PSI) { // Use callee's global profile information only if we have no way of @@ -1229,7 +1238,7 @@ // LastCallToStatic bonus. While this bonus might result in code size // reduction, it can cause the size of a non-cold caller to increase // preventing it from being inlined. - DisallowAllBonuses(); + DisallowAllBonusesForCallerInlinability(); Threshold = MinIfValid(Threshold, Params.ColdThreshold); } } @@ -2173,11 +2182,17 @@ if (Call.isNoInline()) return llvm::InlineCost::getNever("noinline call site attribute"); + // FIXME: use_begin() == use_end() only implies inlining isn't possible if + // we're not doing some form of LTO later. + bool CallerMayBeInlined = !(Caller->hasFnAttribute(Attribute::NoInline) || + Caller->use_begin() == Caller->use_end()); + LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Caller->getName() << ")\n"); InlineCostCallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, - *Callee, Call, Params); + *Callee, Call, Params, /*BoostIndirect=*/true, + CallerMayBeInlined); InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); Index: llvm/test/Transforms/Inline/bpi-cold-inlining.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/bpi-cold-inlining.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -passes=inline -inline-cold-callsite-threshold=0 -S | FileCheck %s + +declare void @foo(i32) + +@a = external global i1 + +define internal void @callee1() { + call void @foo(i32 1) + ret void +} + +; CHECK-LABEL: define void @not_inlined_if_cold +define void @not_inlined_if_cold() { +entry: + %a = load i1, i1* @a + br i1 %a, label %if.then, label %if.end, !prof !0 + +if.then: + ; CHECK: call void @callee1() + call void @callee1() + br label %if.end + +if.end: + ; CHECK: ret void + ret void +} + +@not_inlined_if_cold_addr = global void ()* @not_inlined_if_cold + +define internal void @callee2() { + call void @foo(i32 2) + ret void +} + +; CHECK-LABEL: define void @gets_inlined_noinline_and_cold +define void @gets_inlined_noinline_and_cold() #0 { +entry: + %a = load i1, i1* @a + br i1 %a, label %if.then, label %if.end, !prof !0 + +if.then: + ; CHECK: call void @foo(i32 2) + call void @callee2() + br label %if.end + +if.end: + ; CHECK: ret void + ret void +} + +@gets_inlined_noinline_and_cold_addr = global void ()* @gets_inlined_noinline_and_cold + +define internal void @callee3() { + call void @foo(i32 3) + ret void +} + +; CHECK-LABEL: define void @gets_inlined_no_uses_and_cold +define void @gets_inlined_no_uses_and_cold() { +entry: + %a = load i1, i1* @a + br i1 %a, label %if.then, label %if.end, !prof !0 + +if.then: + ; CHECK: call void @foo(i32 3) + call void @callee3() + br label %if.end + +if.end: + ; CHECK: ret void + ret void +} + +attributes #0 = { noinline } + +!0 = !{!"branch_weights", i32 1, i32 2000}