diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -185,8 +185,14 @@ static cl::opt DisableSampleLoaderInlining( "disable-sample-loader-inlining", cl::Hidden, cl::init(false), - cl::desc("If true, turn off inliner in sample profile loader. Used for " - "evaluation or debugging.")); + cl::desc( + "If true, skip inline transformation in sample-loader pass. " + "Note profile merging or scaling (as configured by " + "--sample-profile-merge-inlinee) " + "still happens, for the outlining versions. See D121862 for " + "cross-pass side effects when this option is true and take " + "actions accordingly. For instance, adding an option to skip " + "profile merging might serve the purpose of a specific use case.")); cl::opt ProfileInlineGrowthLimit( "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), @@ -1116,11 +1122,20 @@ /// Iteratively inline hot callsites of a function. /// -/// Iteratively traverse all callsites of the function \p F, and find if -/// the corresponding inlined instance exists and is hot in profile. If -/// it is hot enough, inline the callsites and adds new callsites of the -/// callee into the caller. If the call is an indirect call, first promote -/// it to direct call. Each indirect call is limited with a single target. +/// Iteratively traverse all callsites of the function \p F, so as to +/// find out callsites with corresponding inline instances. +/// +/// For such callsites, +/// - If it is hot enough, inline the callsites and adds callsites of the callee +/// into the caller. If the call is an indirect call, first promote +/// it to direct call. Each indirect call is limited with a single target. +/// +/// - If a callsite is not inlined, merge the its profile to the outline +/// version (if --sample-profile-merge-inlinee is true), or scale the +/// counters of standalone function based on the profile of inlined +/// instances (if --sample-profile-merge-inlinee is false). +/// +/// Later passes may consume the updated profiles. /// /// \param F function to perform iterative inlining. /// \param InlinedGUIDs a set to be updated to include all GUIDs that are @@ -1129,8 +1144,6 @@ /// \returns True if there is any inline happened. bool SampleProfileLoader::inlineHotFunctions( Function &F, DenseSet &InlinedGUIDs) { - if (DisableSampleLoaderInlining) - return false; // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure // Profile symbol list is ignored when profile-sample-accurate is on. assert((!ProfAccForSymsInList || @@ -1194,6 +1207,10 @@ PSI->getOrCompHotCountThreshold()); continue; } + + if (DisableSampleLoaderInlining) + continue; + if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) continue; @@ -1205,6 +1222,9 @@ } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { + if (DisableSampleLoaderInlining) + continue; + if (tryInlineCandidate(Candidate)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; @@ -1401,8 +1421,6 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( Function &F, DenseSet &InlinedGUIDs) { - if (DisableSampleLoaderInlining) - return false; // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure // Profile symbol list is ignored when profile-sample-accurate is on. assert((!ProfAccForSymsInList || @@ -1481,12 +1499,16 @@ // fixed, but we generate different types). if (!PSI->isHotCount(EntryCountDistributed)) break; + SmallVector InlinedCallSites; // Attach function profile for promoted indirect callee, and update // call site count for the promoted inline candidate too. Candidate = {I, FS, EntryCountDistributed, Candidate.CallsiteDistribution}; - if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, + // Do not attempt to promote and inline candidate if + // --disable-sample-loader-inlining is true. + if ((!DisableSampleLoaderInlining) && + tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, &InlinedCallSites)) { for (auto *CB : InlinedCallSites) { if (getInlineCandidate(&NewCandidate, CB)) @@ -1501,7 +1523,10 @@ } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { SmallVector InlinedCallSites; - if (tryInlineCandidate(Candidate, &InlinedCallSites)) { + // Do not attempt to inline a candidate if + // --disable-sample-loader-inlining is true. + if ((!DisableSampleLoaderInlining) && + tryInlineCandidate(Candidate, &InlinedCallSites)) { for (auto *CB : InlinedCallSites) { if (getInlineCandidate(&NewCandidate, CB)) CQueue.emplace(NewCandidate); diff --git a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll --- a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll +++ b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll @@ -11,6 +11,10 @@ ; when the profile uses md5. ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.md5.prof -sample-profile-merge-inlinee=true -use-profiled-call-graph=0 -S | FileCheck -check-prefix=MERGE %s +; Test we properly merge not inlined profile with '--sample-profile-merge-inlinee' even if '--disable-sample-loader-inlining' is true +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.md5.prof -sample-profile-merge-inlinee=true --disable-sample-loader-inlining -use-profiled-call-graph=0 -S | FileCheck -check-prefix=MERGE %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true --disable-sample-loader-inlining -use-profiled-call-graph=0 -enable-new-pm=0 -S | FileCheck -check-prefix=SCALE %s + @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 define i32 @main() #0 !dbg !6 {