Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -66,6 +66,15 @@ cl::ZeroOrMore, cl::desc("Threshold for hot callsites ")); +static cl::opt + ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(1), + cl::ZeroOrMore, + cl::desc("Maxmimum block frequency, expressed as" + " a percentage of caller's entry" + " frequency, for a callsite to be" + " cold in the absence of profile " + " information.")); + namespace { class CallAnalyzer : public InstVisitor { @@ -172,6 +181,9 @@ /// Return true if size growth is allowed when inlining the callee at CS. bool allowSizeGrowth(CallSite CS); + /// Return true if \p CS is a cold callsite. + bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI); + // Custom analysis routines. bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); @@ -631,6 +643,22 @@ return true; } +bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) { + // If global profile summary is available, then callsite's coldness is + // determined based on that. + if (PSI->hasProfileSummary()) + return PSI->isColdCallSite(CS, CallerBFI); + if (!CallerBFI) + return false; + + // In the absence of global profile summary, determine if the callsite is cold + // relative to caller's entry. + auto CallSiteBB = CS.getInstruction()->getParent(); + auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency(); + auto CallerEntryFreq = CallerBFI->getEntryFreq(); + return CallSiteFreq * 100 <= CallerEntryFreq * ColdCallSiteRelFreq; +} + void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // If no size growth is allowed for this inlining, set Threshold to 0. if (!allowSizeGrowth(CS)) { @@ -676,7 +704,7 @@ if (PSI->isHotCallSite(CS, CallerBFI)) { DEBUG(dbgs() << "Hot callsite.\n"); Threshold = Params.HotCallSiteThreshold.getValue(); - } else if (PSI->isColdCallSite(CS, CallerBFI)) { + } else if (isColdCallSite(CS, CallerBFI)) { DEBUG(dbgs() << "Cold callsite.\n"); Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); } Index: test/Transforms/Inline/inline-cold-callsite.ll =================================================================== --- test/Transforms/Inline/inline-cold-callsite.ll +++ test/Transforms/Inline/inline-cold-callsite.ll @@ -1,54 +1,47 @@ + ; RUN: opt < %s -passes='require,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s ; This tests that a cold callsite gets the inline-cold-callsite-threshold ; and does not get inlined. Another callsite to an identical callee that ; is not cold gets inlined because cost is below the inline-threshold. -define i32 @callee1(i32 %x) !prof !21 { - %x1 = add i32 %x, 1 - %x2 = add i32 %x1, 1 - %x3 = add i32 %x2, 1 +define void @callee() { + call void @extern() call void @extern() - ret i32 %x3 + ret void } -define i32 @caller(i32 %n) !prof !22 { -; CHECK-LABEL: @caller( - %cond = icmp sle i32 %n, 100 - br i1 %cond, label %cond_true, label %cond_false, !prof !0 - -cond_true: -; CHECK-LABEL: cond_true: -; CHECK-NOT: call i32 @callee1 -; CHECK: ret i32 %x3.i - %i = call i32 @callee1(i32 %n) - ret i32 %i -cond_false: -; CHECK-LABEL: cond_false: -; CHECK: call i32 @callee1 -; CHECK: ret i32 %j - %j = call i32 @callee1(i32 %n) - ret i32 %j -} declare void @extern() +declare i1 @ext(i32) + +; CHECK-LABEL: caller +define i32 @caller(i32 %n) { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret i32 0 + +for.body: + %i.05 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] +; CHECK: %call = tail call + %call = tail call zeroext i1 @ext(i32 %i.05) +; CHECK-NOT: call void @callee +; CHECK-NEXT: call void @extern + call void @callee() + br i1 %call, label %cold, label %for.inc, !prof !0 + +cold: +; CHECK: call void @callee + call void @callee() + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + -!0 = !{!"branch_weights", i32 200, i32 1} - -!llvm.module.flags = !{!1} -!21 = !{!"function_entry_count", i64 200} -!22 = !{!"function_entry_count", i64 200} - -!1 = !{i32 1, !"ProfileSummary", !2} -!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} -!3 = !{!"ProfileFormat", !"InstrProf"} -!4 = !{!"TotalCount", i64 10000} -!5 = !{!"MaxCount", i64 1000} -!6 = !{!"MaxInternalCount", i64 1} -!7 = !{!"MaxFunctionCount", i64 1000} -!8 = !{!"NumCounts", i64 3} -!9 = !{!"NumFunctions", i64 3} -!10 = !{!"DetailedSummary", !11} -!11 = !{!12, !13, !14} -!12 = !{i32 10000, i64 1000, i32 1} -!13 = !{i32 999000, i64 1000, i32 1} -!14 = !{i32 999999, i64 1, i32 2} +!0 = !{!"branch_weights", i32 1, i32 2000}