diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -80,7 +80,7 @@ cl::desc("Multiplier to multiply cycle savings by during inlining")); static cl::opt - InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), + InlineCostAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::ZeroOrMore, cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings")); @@ -101,6 +101,12 @@ "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites ")); +static cl::opt ImpossibleCodeRelFreq( + "impossible-code-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, + cl::desc("Maximum block frequency, expressed as a per-million of functions " + "entry frequency, for a block to be considered to be impossible " + "to execute in the absence of profile information.")); + static cl::opt ColdCallSiteRelFreq( "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " @@ -485,9 +491,8 @@ // the size of that basic block. int CostAtBBStart = 0; - // The static size of live but cold basic blocks. This is "static" in the - // sense that it's not weighted by profile counts at all. - int ColdSize = 0; + // As per the block frequency info, does this block basically never execute? + bool IsImpossibleBB = false; // Whether inlining is decided by cost-benefit analysis. bool DecidedByCostBenefit = false; @@ -517,7 +522,8 @@ /// Handle a capped 'int' increment for Cost. void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) { assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound"); - Cost = (int)std::min(UpperBound, Cost + Inc); + if (!IsImpossibleBB) + Cost = (int)std::min(UpperBound, Cost + Inc); } void onDisableSROA(AllocaInst *Arg) override { @@ -629,21 +635,21 @@ SROACostSavings += InlineConstants::InstrCost; } - void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; } + void onBlockStart(const BasicBlock *BB) override { + CostAtBBStart = Cost; - void onBlockAnalyzed(const BasicBlock *BB) override { - if (CostBenefitAnalysisEnabled) { - // Keep track of the static size of live but cold basic blocks. For now, - // we define a cold basic block to be one that's never executed. - assert(GetBFI && "GetBFI must be available"); + IsImpossibleBB = false; + if (GetBFI) { BlockFrequencyInfo *BFI = &(GetBFI(F)); assert(BFI && "BFI must be available"); - auto ProfileCount = BFI->getBlockProfileCount(BB); - assert(ProfileCount.hasValue()); - if (ProfileCount.getValue() == 0) - ColdSize += Cost - CostAtBBStart; + const BranchProbability ImpossibleProb(ImpossibleCodeRelFreq, 1'000'000); + auto Freq = BFI->getBlockFreq(BB); + auto EntryFreq = BFI->getBlockFreq(&BB->getParent()->getEntryBlock()); + IsImpossibleBB = Freq < EntryFreq * ImpossibleProb; } + } + void onBlockAnalyzed(const BasicBlock *BB) override { auto *TI = BB->getTerminator(); // If we had any successors at this point, than post-inlining is likely to // have them as well. Note that we assume any basic blocks which existed @@ -782,26 +788,23 @@ CycleSavings += getCallsiteCost(this->CandidateCall, DL); CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue(); - // Remove the cost of the cold basic blocks. - int Size = Cost - ColdSize; - // Allow tiny callees to be inlined regardless of whether they meet the // savings threshold. - Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1; + Cost = Cost > InlineCostAllowance ? Cost - InlineCostAllowance : 1; // Return true if the savings justify the cost of inlining. Specifically, // we evaluate the following inequality: // // CycleSavings PSI->getOrCompHotCountThreshold() // -------------- >= ----------------------------------- - // Size InlineSavingsMultiplier + // Cost InlineSavingsMultiplier // // Note that the left hand side is specific to a call site. The right hand // side is a constant for the entire executable. APInt LHS = CycleSavings; LHS *= InlineSavingsMultiplier; APInt RHS(128, PSI->getOrCompHotCountThreshold()); - RHS *= Size; + RHS *= Cost; return LHS.uge(RHS); } diff --git a/llvm/test/Transforms/Inline/X86/impossible-block.ll b/llvm/test/Transforms/Inline/X86/impossible-block.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/X86/impossible-block.ll @@ -0,0 +1,70 @@ +; RUN: opt < %s -inline -debug-only=inline-cost -impossible-code-rel-freq=2 -print-instruction-comments -S -mtriple=x86_64-unknown-linux-gnu 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-GOOD %s +; RUN: opt < %s -passes='cgscc(inline)' -impossible-code-rel-freq=2 -debug-only=inline-cost -print-instruction-comments -S -mtriple=x86_64-unknown-linux-gnu 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-GOOD %s + +; RUN: opt < %s -inline -debug-only=inline-cost -impossible-code-rel-freq=1 -print-instruction-comments -S -mtriple=x86_64-unknown-linux-gnu 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-BAD %s +; RUN: opt < %s -passes='cgscc(inline)' -impossible-code-rel-freq=1 -debug-only=inline-cost -print-instruction-comments -S -mtriple=x86_64-unknown-linux-gnu 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-BAD %s + +; REQUIRES: asserts + +; Check the threshold for inlining cold callsites. + +; CHECK: Analyzing call of callee... (caller:caller) +; CHECK-NEXT: define void @callee(i1 %c) { +; CHECK-NEXT: entry: +; CHECK-NEXT: ; cost before = -35, cost after = -30, threshold before = 674, threshold after = 674, cost delta = 5 +; CHECK-NEXT: br i1 %c, label %hot, label %cold, !prof !0 +; CHECK: hot: ; preds = %entry +; CHECK-NEXT: ; cost before = -30, cost after = 0, threshold before = 562, threshold after = 562, cost delta = 30 +; CHECK-NEXT: call void @hot_callee() +; CHECK-NEXT: ; cost before = 0, cost after = 0, threshold before = 562, threshold after = 562, cost delta = 0 +; CHECK-NEXT: br label %end +; CHECK: cold: ; preds = %entry +; CHECK-GOOD-NEXT: ; cost before = 0, cost after = 0, threshold before = 562, threshold after = 562, cost delta = 0 +; CHECK-BAD-NEXT: ; cost before = 0, cost after = 30, threshold before = 562, threshold after = 562, cost delta = 30 +; CHECK-NEXT: call void @cold_callee() +; CHECK-GOOD-NEXT: ; cost before = 0, cost after = 0, threshold before = 562, threshold after = 562, cost delta = 0 +; CHECK-BAD-NEXT: ; cost before = 30, cost after = 30, threshold before = 562, threshold after = 562, cost delta = 0 +; CHECK-NEXT: br label %end +; CHECK: end: ; preds = %cold, %hot +; CHECK-GOOD-NEXT: ; cost before = 0, cost after = 0, threshold before = 562, threshold after = 562, cost delta = 0 +; CHECK-BAD-NEXT: ; cost before = 30, cost after = 30, threshold before = 562, threshold after = 562, cost delta = 0 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +declare void @hot_callee() +declare void @cold_callee() +declare void @terminating_callee() noreturn nounwind + +define void @callee(i1 %c) { +entry: + br i1 %c, label %hot, label %cold, !prof !0 + +hot: + call void @hot_callee() + br label %end + +cold: + call void @cold_callee() + br label %end + +end: + ret void +} + +define void @caller(i1 %c) { +entry: + br i1 %c, label %hot, label %cold + +hot: + call void @callee(i1 %c) + br label %end + +cold: + call void @cold_callee() + br label %end + +end: + ret void +} + +!0 = !{!"branch_weights", i32 1000000, i32 1}