diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -183,10 +183,14 @@ cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size.")); +// "Since profiles are consumed by many passes, turning on this option has +// side effects. For instance, pre-link SCC inliner would see merged profiles +// and inline the hot functions (that are skipped in this pass). static cl::opt DisableSampleLoaderInlining( "disable-sample-loader-inlining", cl::Hidden, cl::init(false), - cl::desc("If true, turn off inliner in sample profile loader. Used for " - "evaluation or debugging.")); + cl::desc("If true, artifically skip inline transformation in sample-loader " + "pass, and merge (or scale) profiles (as configured by " + "--sample-profile-merge-inlinee).")); cl::opt ProfileInlineGrowthLimit( "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), @@ -936,10 +940,15 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate( Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum, SmallVector *InlinedCallSite) { + // Bail out early if sample-loader inliner is disabled. + if (DisableSampleLoaderInlining) + return false; + // Bail out early if MaxNumPromotions is zero. // This prevents allocating an array of zero length in callees below. if (MaxNumPromotions == 0) return false; + auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName(); auto R = SymbolMap.find(CalleeFunctionName); if (R == SymbolMap.end() || !R->getValue()) @@ -1116,11 +1125,20 @@ /// Iteratively inline hot callsites of a function. /// -/// Iteratively traverse all callsites of the function \p F, and find if -/// the corresponding inlined instance exists and is hot in profile. If -/// it is hot enough, inline the callsites and adds new callsites of the -/// callee into the caller. If the call is an indirect call, first promote -/// it to direct call. Each indirect call is limited with a single target. +/// Iteratively traverse all callsites of the function \p F, so as to +/// find out callsites with corresponding inline instances. +/// +/// For such callsites, +/// - If it is hot enough, inline the callsites and adds callsites of the callee +/// into the caller. If the call is an indirect call, first promote +/// it to direct call. Each indirect call is limited with a single target. +/// +/// - If a callsite is not inlined, merge the its profile to the outline +/// version (if --sample-profile-merge-inlinee is true), or scale the +/// counters of standalone function based on the profile of inlined +/// instances (if --sample-profile-merge-inlinee is false). +/// +/// Later passes may consume the updated profiles. /// /// \param F function to perform iterative inlining. /// \param InlinedGUIDs a set to be updated to include all GUIDs that are @@ -1129,8 +1147,6 @@ /// \returns True if there is any inline happened. bool SampleProfileLoader::inlineHotFunctions( Function &F, DenseSet &InlinedGUIDs) { - if (DisableSampleLoaderInlining) - return false; // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure // Profile symbol list is ignored when profile-sample-accurate is on. assert((!ProfAccForSymsInList || @@ -1194,6 +1210,7 @@ PSI->getOrCompHotCountThreshold()); continue; } + if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) continue; @@ -1227,6 +1244,10 @@ bool SampleProfileLoader::tryInlineCandidate( InlineCandidate &Candidate, SmallVector *InlinedCallSites) { + // Do not attempt to inline a candidate if + // --disable-sample-loader-inlining is true. + if (DisableSampleLoaderInlining) + return false; CallBase &CB = *Candidate.CallInstr; Function *CalledFunction = CB.getCalledFunction(); @@ -1401,8 +1422,6 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( Function &F, DenseSet &InlinedGUIDs) { - if (DisableSampleLoaderInlining) - return false; // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure // Profile symbol list is ignored when profile-sample-accurate is on. assert((!ProfAccForSymsInList || @@ -1481,11 +1500,13 @@ // fixed, but we generate different types). if (!PSI->isHotCount(EntryCountDistributed)) break; + SmallVector InlinedCallSites; // Attach function profile for promoted indirect callee, and update // call site count for the promoted inline candidate too. Candidate = {I, FS, EntryCountDistributed, Candidate.CallsiteDistribution}; + if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, &InlinedCallSites)) { for (auto *CB : InlinedCallSites) { diff --git a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll --- a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll +++ b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll @@ -11,6 +11,10 @@ ; when the profile uses md5. ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.md5.prof -sample-profile-merge-inlinee=true -use-profiled-call-graph=0 -S | FileCheck -check-prefix=MERGE %s +; Test we properly merge not inlined profile with '--sample-profile-merge-inlinee' even if '--disable-sample-loader-inlining' is true +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.md5.prof -sample-profile-merge-inlinee=true --disable-sample-loader-inlining -use-profiled-call-graph=0 -S | FileCheck -check-prefix=MERGE %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true --disable-sample-loader-inlining -use-profiled-call-graph=0 -enable-new-pm=0 -S | FileCheck -check-prefix=SCALE %s + @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 define i32 @main() #0 !dbg !6 {