Index: llvm/trunk/include/llvm/Analysis/InlineCost.h =================================================================== --- llvm/trunk/include/llvm/Analysis/InlineCost.h +++ llvm/trunk/include/llvm/Analysis/InlineCost.h @@ -23,6 +23,7 @@ class CallSite; class DataLayout; class Function; +class ProfileSummaryInfo; class TargetTransformInfo; namespace InlineConstants { @@ -111,7 +112,7 @@ /// inlining the callsite. It is an expensive, heavyweight call. InlineCost getInlineCost(CallSite CS, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT); + AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI); /// \brief Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a @@ -120,7 +121,7 @@ // InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT); + AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI); int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel); Index: llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h =================================================================== --- llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h +++ llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h @@ -24,6 +24,7 @@ class CallSite; class DataLayout; class InlineCost; +class ProfileSummaryInfo; template class SmallPtrSet; /// Inliner - This class contains all of the helper code which is used to @@ -85,6 +86,7 @@ protected: AssumptionCacheTracker *ACT; + ProfileSummaryInfo *PSI; }; } // End llvm namespace Index: llvm/trunk/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp +++ llvm/trunk/lib/Analysis/InlineCost.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" @@ -77,6 +78,9 @@ /// The cache of @llvm.assume intrinsics. AssumptionCacheTracker *ACT; + /// Profile summary information. + ProfileSummaryInfo *PSI; + // The called function. Function &F; @@ -200,17 +204,19 @@ public: CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT, - Function &Callee, int Threshold, CallSite CSArg) - : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold), - Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + ProfileSummaryInfo *PSI, Function &Callee, int Threshold, + CallSite CSArg) + : TTI(TTI), ACT(ACT), PSI(PSI), F(Callee), CandidateCS(CSArg), + Threshold(Threshold), Cost(0), IsCallerRecursive(false), + IsRecursiveCall(false), ExposesReturnsTwice(false), + HasDynamicAlloca(false), ContainsNoDuplicateCall(false), + HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -626,35 +632,15 @@ Threshold = OptSizeThreshold; } - // If profile information is available, use that to adjust threshold of hot - // and cold functions. - // FIXME: The heuristic used below for determining hotness and coldness are - // based on preliminary SPEC tuning and may not be optimal. Replace this with - // a well-tuned heuristic based on *callsite* hotness and not callee hotness. - uint64_t FunctionCount = 0, MaxFunctionCount = 0; - bool HasPGOCounts = false; - if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) { - HasPGOCounts = true; - FunctionCount = Callee.getEntryCount().getValue(); - MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue(); - } - // Listen to the inlinehint attribute or profile based hotness information // when it would increase the threshold and the caller does not need to // minimize its size. - bool InlineHint = - Callee.hasFnAttribute(Attribute::InlineHint) || - (HasPGOCounts && - FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount)); + bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) || + PSI->isHotFunction(&Callee); if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize()) Threshold = HintThreshold; - // Listen to the cold attribute or profile based coldness information - // when it would decrease the threshold. - bool ColdCallee = - Callee.hasFnAttribute(Attribute::Cold) || - (HasPGOCounts && - FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount)); + bool ColdCallee = PSI->isColdFunction(&Callee); // Command line argument for DefaultInlineThreshold will override the default // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, // do not use the default cold threshold even if it is smaller. @@ -963,7 +949,8 @@ // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS); + CallAnalyzer CA(TTI, ACT, PSI, *F, InlineConstants::IndirectCallThreshold, + CS); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. @@ -1451,9 +1438,10 @@ InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT) { + AssumptionCacheTracker *ACT, + ProfileSummaryInfo *PSI) { return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI, - ACT); + ACT, PSI); } int llvm::computeThresholdFromOptLevels(unsigned OptLevel, @@ -1472,7 +1460,8 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT) { + AssumptionCacheTracker *ACT, + ProfileSummaryInfo *PSI) { // Cannot inline indirect calls. if (!Callee) @@ -1506,7 +1495,7 @@ DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS); + CallAnalyzer CA(CalleeTTI, ACT, PSI, *Callee, DefaultThreshold, CS); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); Index: llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp +++ llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" @@ -65,6 +66,7 @@ "Inliner for always_inline functions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) Index: llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp +++ llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" @@ -60,7 +61,7 @@ InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); - return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT); + return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT, PSI); } bool runOnSCC(CallGraphSCC &SCC) override; @@ -77,6 +78,7 @@ "Function Integration/Inlining", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(SimpleInliner, "inline", Index: llvm/trunk/lib/Transforms/IPO/Inliner.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/Inliner.cpp +++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -56,6 +57,7 @@ /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.addRequired(); getAAResultsAnalysisUsage(AU); CallGraphSCCPass::getAnalysisUsage(AU); @@ -374,6 +376,7 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis().getCallGraph(); ACT = &getAnalysis(); + PSI = getAnalysis().getPSI(CG.getModule()); auto &TLI = getAnalysis().getTLI(); SmallPtrSet SCCFunctions; Index: llvm/trunk/test/Transforms/Inline/inline-cold-callee.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/inline-cold-callee.ll +++ llvm/trunk/test/Transforms/Inline/inline-cold-callee.ll @@ -5,7 +5,7 @@ ; A callee with identical body does gets inlined because cost fits within the ; inline-threshold -define i32 @callee1(i32 %x) !prof !1 { +define i32 @callee1(i32 %x) !prof !21 { %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 %x3 = add i32 %x2, 1 @@ -13,7 +13,7 @@ ret i32 %x3 } -define i32 @callee2(i32 %x) !prof !2 { +define i32 @callee2(i32 %x) !prof !22 { ; CHECK-LABEL: @callee2( %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 @@ -22,7 +22,7 @@ ret i32 %x3 } -define i32 @caller2(i32 %y1) !prof !2 { +define i32 @caller2(i32 %y1) !prof !22 { ; CHECK-LABEL: @caller2( ; CHECK: call i32 @callee2 ; CHECK-NOT: call i32 @callee1 @@ -32,8 +32,21 @@ ret i32 %y3 } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 1000} -!1 = !{!"function_entry_count", i64 100} -!2 = !{!"function_entry_count", i64 1} - +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 100} +!22 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} Index: llvm/trunk/test/Transforms/Inline/inline-hot-callee.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/inline-hot-callee.ll +++ llvm/trunk/test/Transforms/Inline/inline-hot-callee.ll @@ -5,7 +5,7 @@ ; A cold callee with identical body does not get inlined because cost exceeds the ; inline-threshold -define i32 @callee1(i32 %x) !prof !1 { +define i32 @callee1(i32 %x) !prof !21 { %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 %x3 = add i32 %x2, 1 @@ -13,7 +13,7 @@ ret i32 %x3 } -define i32 @callee2(i32 %x) !prof !2 { +define i32 @callee2(i32 %x) !prof !22 { ; CHECK-LABEL: @callee2( %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 @@ -22,7 +22,7 @@ ret i32 %x3 } -define i32 @caller2(i32 %y1) !prof !2 { +define i32 @caller2(i32 %y1) !prof !22 { ; CHECK-LABEL: @caller2( ; CHECK: call i32 @callee2 ; CHECK-NOT: call i32 @callee1 @@ -32,8 +32,21 @@ ret i32 %y3 } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 10} -!1 = !{!"function_entry_count", i64 10} -!2 = !{!"function_entry_count", i64 1} - +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 300} +!22 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2}