diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -13,6 +13,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/ProfileData/SampleProfReader.h" #include "llvm/Transforms/IPO/SampleContextTracker.h" #include #include @@ -40,40 +41,41 @@ class ProfiledCallGraph { public: using iterator = std::set::iterator; - ProfiledCallGraph(StringMap &ProfileMap, - SampleContextTracker &ContextTracker) { - // Add all profiled functions into profiled call graph. - // We only add function with actual context profile - for (auto &FuncSample : ProfileMap) { - FunctionSamples *FSamples = &FuncSample.second; - addProfiledFunction(FSamples->getName()); + + // Constructor for non-CS profile. + ProfiledCallGraph(StringMap &ProfileMap) { + assert(!FunctionSamples::ProfileIsCS && "CS profile is not handled here"); + for (const auto &Samples : ProfileMap) { + addProfiledCalls(Samples.second); } + } - // BFS traverse the context profile trie to add call edges for - // both samples calls as well as calls shown in context. + // Constructor for CS profile. + ProfiledCallGraph(SampleContextTracker &ContextTracker) { + // BFS traverse the context profile trie to add call edges for calls shown + // in context. std::queue Queue; - Queue.push(&ContextTracker.getRootContext()); + for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) { + ContextTrieNode *Callee = &Child.second; + addProfiledFunction(Callee->getFuncName()); + Queue.push(Callee); + } + while (!Queue.empty()) { ContextTrieNode *Caller = Queue.front(); Queue.pop(); - FunctionSamples *CallerSamples = Caller->getFunctionSamples(); - - // Add calls for context, if both caller and callee has context profile. + // Add calls for context. When AddNodeWithSamplesOnly is true, both caller + // and callee need to have context profile. + // Note that callsite target samples are completely ignored since they can + // conflict with the context edges, which are formed by context + // compression during profile generation, for cyclic SCCs. This may + // further result in an SCC order incompatible with the purely + // context-based one, which may in turn block context-based inlining. for (auto &Child : Caller->getAllChildContext()) { ContextTrieNode *Callee = &Child.second; + addProfiledFunction(Callee->getFuncName()); Queue.push(Callee); - if (CallerSamples && Callee->getFunctionSamples()) { - addProfiledCall(Caller->getFuncName(), Callee->getFuncName()); - } - } - - // Add calls from call site samples - if (CallerSamples) { - for (auto &LocCallSite : CallerSamples->getBodySamples()) { - for (auto &NameCallSite : LocCallSite.second.getCallTargets()) { - addProfiledCall(Caller->getFuncName(), NameCallSite.first()); - } - } + addProfiledCall(Caller->getFuncName(), Callee->getFuncName()); } } } @@ -89,6 +91,7 @@ ProfiledFunctions[Name] = ProfiledCallGraphNode(Name); } } + void addProfiledCall(StringRef CallerName, StringRef CalleeName) { assert(ProfiledFunctions.count(CallerName)); auto CalleeIt = ProfiledFunctions.find(CalleeName); @@ -98,6 +101,25 @@ ProfiledFunctions[CallerName].Callees.insert(&CalleeIt->second); } + void addProfiledCalls(const FunctionSamples &Samples) { + addProfiledFunction(Samples.getFuncName()); + + for (const auto &Sample : Samples.getBodySamples()) { + for (const auto &Target : Sample.second.getCallTargets()) { + addProfiledFunction(Target.first()); + addProfiledCall(Samples.getFuncName(), Target.first()); + } + } + + for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { + for (const auto &InlinedSamples : CallsiteSamples.second) { + addProfiledFunction(InlinedSamples.first); + addProfiledCall(Samples.getFuncName(), InlinedSamples.first); + addProfiledCalls(InlinedSamples.second); + } + } + } + private: ProfiledCallGraphNode Root; StringMap ProfiledFunctions; diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -18,7 +18,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/CallGraph.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Instructions.h" #include "llvm/ProfileData/SampleProf.h" @@ -124,7 +123,6 @@ ContextTrieNode &getRootContext(); void promoteMergeContextSamplesTree(const Instruction &Inst, StringRef CalleeName); - void addCallGraphEdges(CallGraph &CG, StringMap &SymbolMap); // Dump the internal context profile trie. void dump(); diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -568,26 +568,4 @@ return *ToNode; } - -// Replace call graph edges with dynamic call edges from the profile. -void SampleContextTracker::addCallGraphEdges(CallGraph &CG, - StringMap &SymbolMap) { - // Add profile call edges to the call graph. - std::queue NodeQueue; - NodeQueue.push(&RootContext); - while (!NodeQueue.empty()) { - ContextTrieNode *Node = NodeQueue.front(); - NodeQueue.pop(); - Function *F = SymbolMap.lookup(Node->getFuncName()); - for (auto &I : Node->getAllChildContext()) { - ContextTrieNode *ChildNode = &I.second; - NodeQueue.push(ChildNode); - if (F && !F->isDeclaration()) { - Function *Callee = SymbolMap.lookup(ChildNode->getFuncName()); - if (Callee && !Callee->isDeclaration()) - CG[F]->addCalledFunction(nullptr, CG[Callee]); - } - } - } -} } // namespace llvm diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -77,6 +77,7 @@ #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/ProfiledCallGraph.h" #include "llvm/Transforms/IPO/SampleContextTracker.h" #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/Transforms/Instrumentation.h" @@ -160,15 +161,11 @@ "order of call graph during sample profile loading. It only " "works for new pass manager. ")); -static cl::opt UseProfileIndirectCallEdges( - "use-profile-indirect-call-edges", cl::init(true), cl::Hidden, - cl::desc("Considering indirect call samples from profile when top-down " - "processing functions. Only CSSPGO is supported.")); - -static cl::opt UseProfileTopDownOrder( - "use-profile-top-down-order", cl::init(false), cl::Hidden, - cl::desc("Process functions in one SCC in a top-down order " - "based on the input profile.")); +static cl::opt + UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, + cl::desc("Process functions in a top-down order " + "defined by the profiled call graph when " + "-sample-profile-top-down-load is on.")); static cl::opt ProfileSizeInline( "sample-profile-inline-size", cl::Hidden, cl::init(false), @@ -395,8 +392,7 @@ const SmallVectorImpl &Candidates, const Function &F, bool Hot); std::vector buildFunctionOrder(Module &M, CallGraph *CG); - void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples); - void replaceCallGraphEdges(CallGraph &CG, StringMap &SymbolMap); + std::unique_ptr buildProfiledCallGraph(CallGraph &CG); void generateMDProfMetadata(Function &F); /// Map from function name to Function *. Used to find the function from @@ -1579,43 +1575,25 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) -// Add inlined profile call edges to the call graph. -void SampleProfileLoader::addCallGraphEdges(CallGraph &CG, - const FunctionSamples &Samples) { - Function *Caller = SymbolMap.lookup(Samples.getFuncName()); - if (!Caller || Caller->isDeclaration()) - return; - - // Skip non-inlined call edges which are not important since top down inlining - // for non-CS profile is to get more precise profile matching, not to enable - // more inlining. - - for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { - for (const auto &InlinedSamples : CallsiteSamples.second) { - Function *Callee = SymbolMap.lookup(InlinedSamples.first); - if (Callee && !Callee->isDeclaration()) - CG[Caller]->addCalledFunction(nullptr, CG[Callee]); - addCallGraphEdges(CG, InlinedSamples.second); - } +std::unique_ptr +SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { + std::unique_ptr ProfiledCG; + if (ProfileIsCS) + ProfiledCG = std::make_unique(*ContextTracker); + else + ProfiledCG = std::make_unique(Reader->getProfiles()); + + // Add all functions into the profiled call graph even if they are not in + // the profile. This makes sure functions missing from the profile still + // gets a chance to be processed. + for (auto &Node : CG) { + const auto *F = Node.first; + if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile")) + continue; + ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F)); } -} -// Replace call graph edges with dynamic call edges from the profile. -void SampleProfileLoader::replaceCallGraphEdges( - CallGraph &CG, StringMap &SymbolMap) { - // Remove static call edges from the call graph except for the ones from the - // root which make the call graph connected. - for (const auto &Node : CG) - if (Node.second.get() != CG.getExternalCallingNode()) - Node.second->removeAllCalledFunctions(); - - // Add profile call edges to the call graph. - if (ProfileIsCS) { - ContextTracker->addCallGraphEdges(CG, SymbolMap); - } else { - for (const auto &Samples : Reader->getProfiles()) - addCallGraphEdges(CG, Samples.second); - } + return ProfiledCG; } std::vector @@ -1623,6 +1601,10 @@ std::vector FunctionOrderList; FunctionOrderList.reserve(M.size()); + if (!ProfileTopDownLoad && UseProfiledCallGraph) + errs() << "WARNING: -use-profiled-call-graph ignored, should be used " + "together with -sample-profile-top-down-load.\n"; + if (!ProfileTopDownLoad || CG == nullptr) { if (ProfileMergeInlinee) { // Disable ProfileMergeInlinee if profile is not loaded in top down order, @@ -1641,89 +1623,78 @@ assert(&CG->getModule() == &M); - // Add indirect call edges from profile to augment the static call graph. - // Functions will be processed in a top-down order defined by the static call - // graph. Adjusting the order by considering indirect call edges from the - // profile (which don't exist in the static call graph) can enable the - // inlining of indirect call targets by processing the caller before them. - // TODO: enable this for non-CS profile and fix the counts returning logic to - // have a full support for indirect calls. - if (UseProfileIndirectCallEdges && ProfileIsCS) { - for (auto &Entry : *CG) { - const auto *F = Entry.first; - if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile")) - continue; - auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName()); - if (AllContexts.empty()) - continue; - - for (const auto &BB : *F) { - for (const auto &I : BB.getInstList()) { - const auto *CB = dyn_cast(&I); - if (!CB || !CB->isIndirectCall()) - continue; - const DebugLoc &DLoc = I.getDebugLoc(); - if (!DLoc) - continue; - auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc); - for (FunctionSamples *Samples : AllContexts) { - if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) { - for (const auto &Target : CallTargets.get()) { - Function *Callee = SymbolMap.lookup(Target.first()); - if (Callee && !Callee->isDeclaration()) - Entry.second->addCalledFunction(nullptr, (*CG)[Callee]); - } - } - } - } + if (UseProfiledCallGraph || + (ProfileIsCS && !UseProfiledCallGraph.getNumOccurrences())) { + // Use profiled call edges to augment the top-down order. There are cases + // that the top-down order computed based on the static call graph doesn't + // reflect real execution order. For example + // + // 1. Incomplete static call graph due to unknown indirect call targets. + // Adjusting the order by considering indirect call edges from the + // profile can enable the inlining of indirect call targets by allowing + // the caller processed before them. + // 2. Mutual call edges in an SCC. The static processing order computed for + // an SCC may not reflect the call contexts in the context-sensitive + // profile, thus may cause potential inlining to be overlooked. The + // function order in one SCC is being adjusted to a top-down order based + // on the profile to favor more inlining. This is only a problem with CS + // profile. + // 3. Transitive indirect call edges due to inlining. When a callee function + // (say B) is inlined into into a caller function (say A) in LTO prelink, + // every call edge originated from the callee B will be transferred to + // the caller A. If any transferred edge (say A->C) is indirect, the + // original profiled indirect edge B->C, even if considered, would not + // enforce a top-down order from the caller A to the potential indirect + // call target C in LTO postlink since the inlined callee B is gone from + // the static call graph. + // 4. #3 can happen even for direct call targets, due to functions defined + // in header files. A header function (say A), when included into source + // files, is defined multiple times but only one definition survives due + // to ODR. Therefore, the LTO prelink inlining done on those dropped + // definitions can be useless based on a local file scope. More + // importantly, the inlinee (say B), once fully inlined to a + // to-be-dropped A, will have no profile to consume when its outlined + // version is compiled. This can lead to a profile-less prelink + // compilation for the outlined version of B which may be called from + // external modules. while this isn't easy to fix, we rely on the + // postlink AutoFDO pipeline to optimize B. Since the survived copy of + // the A can be inlined in its local scope in prelink, it may not exist + // in the merged IR in postlink, and we'll need the profiled call edges + // to enforce a top-down order for the rest of the functions. + // + // Considering those cases, a profiled call graph completely independent of + // the static call graph is constructed based on profile data, where + // function objects are not even needed to handle case #3 and case 4. + // + // Note that static callgraph edges are completely ignored since they + // can be conflicting with profiled edges for cyclic SCCs and may result in + // an SCC order incompatible with profile-defined one. Using strictly + // profile order ensures a maximum inlining experience. On the other hand, + // static call edges are not so important when they don't correspond to a + // context in the profile. + + std::unique_ptr ProfiledCG = buildProfiledCallGraph(*CG); + scc_iterator CGI = scc_begin(ProfiledCG.get()); + while (!CGI.isAtEnd()) { + for (ProfiledCallGraphNode *Node : *CGI) { + Function *F = SymbolMap.lookup(Node->Name); + if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) + FunctionOrderList.push_back(F); } + ++CGI; } - } - - // Compute a top-down order the profile which is used to sort functions in - // one SCC later. The static processing order computed for an SCC may not - // reflect the call contexts in the context-sensitive profile, thus may cause - // potential inlining to be overlooked. The function order in one SCC is being - // adjusted to a top-down order based on the profile to favor more inlining. - DenseMap ProfileOrderMap; - if (UseProfileTopDownOrder || - (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) { - // Create a static call graph. The call edges are not important since they - // will be replaced by dynamic edges from the profile. - CallGraph ProfileCG(M); - replaceCallGraphEdges(ProfileCG, SymbolMap); - scc_iterator CGI = scc_begin(&ProfileCG); - uint64_t I = 0; + } else { + scc_iterator CGI = scc_begin(CG); while (!CGI.isAtEnd()) { for (CallGraphNode *Node : *CGI) { - if (auto *F = Node->getFunction()) - ProfileOrderMap[F] = ++I; + auto *F = Node->getFunction(); + if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) + FunctionOrderList.push_back(F); } ++CGI; } } - scc_iterator CGI = scc_begin(CG); - while (!CGI.isAtEnd()) { - uint64_t Start = FunctionOrderList.size(); - for (CallGraphNode *Node : *CGI) { - auto *F = Node->getFunction(); - if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) - FunctionOrderList.push_back(F); - } - - // Sort nodes in SCC based on the profile top-down order. - if (!ProfileOrderMap.empty()) { - std::stable_sort(FunctionOrderList.begin() + Start, - FunctionOrderList.end(), - [&ProfileOrderMap](Function *Left, Function *Right) { - return ProfileOrderMap[Left] < ProfileOrderMap[Right]; - }); - } - - ++CGI; - } - LLVM_DEBUG({ dbgs() << "Function processing order:\n"; for (auto F : reverse(FunctionOrderList)) { diff --git a/llvm/test/Transforms/SampleProfile/ctxsplit.ll b/llvm/test/Transforms/SampleProfile/ctxsplit.ll --- a/llvm/test/Transforms/SampleProfile/ctxsplit.ll +++ b/llvm/test/Transforms/SampleProfile/ctxsplit.ll @@ -1,14 +1,14 @@ ; Check the nonflattened part of the ctxsplit profile will be read in thinlto ; postlink phase while flattened part of the ctxsplit profile will not be read. -; RUN: opt < %s -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=POSTLINK +; RUN: opt < %s -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -use-profiled-call-graph=0 -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=POSTLINK ; ; Check both the flattened and nonflattened parts of the ctxsplit profile will ; be read in thinlto prelink phase. -; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=PRELINK +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -use-profiled-call-graph=0 -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=PRELINK ; ; Check both the flattened and nonflattened parts of the ctxsplit profile will ; be read in non-thinlto mode. -; RUN: opt < %s -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=NOTHINLTO +; RUN: opt < %s -passes='default' -pgo-kind=pgo-sample-use-pipeline -use-profiled-call-graph=0 -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=NOTHINLTO ; POSTLINK: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { ; POSTLINK: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { diff --git a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll --- a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll +++ b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll @@ -1,14 +1,14 @@ ; Test we lose details of not inlined profile without '-sample-profile-merge-inlinee' -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=false -enable-new-pm=0 -S | FileCheck -check-prefix=SCALE %s -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -enable-new-pm=0 -S | FileCheck -check-prefix=SCALE %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=false -S | FileCheck -check-prefix=SCALE %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=false -use-profiled-call-graph=0 -enable-new-pm=0 -S | FileCheck -check-prefix=SCALE %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -use-profiled-call-graph=0 -enable-new-pm=0 -S | FileCheck -check-prefix=SCALE %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=false -use-profiled-call-graph=0 -S | FileCheck -check-prefix=SCALE %s ; Test we properly merge not inlined profile with '-sample-profile-merge-inlinee' -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -S | FileCheck -check-prefix=MERGE %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -use-profiled-call-graph=0 -S | FileCheck -check-prefix=MERGE %s ; Test we properly merge not inlined profile with '-sample-profile-merge-inlinee' ; when the profile uses md5. -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.md5.prof -sample-profile-merge-inlinee=true -S | FileCheck -check-prefix=MERGE %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.md5.prof -sample-profile-merge-inlinee=true -use-profiled-call-graph=0 -S | FileCheck -check-prefix=MERGE %s @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 diff --git a/llvm/test/Transforms/SampleProfile/profile-context-order.ll b/llvm/test/Transforms/SampleProfile/profile-context-order.ll --- a/llvm/test/Transforms/SampleProfile/profile-context-order.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-order.ll @@ -1,21 +1,21 @@ ;; Test for different function processing orders affecting inlining in sample profile loader. ;; There is an SCC _Z5funcAi -> _Z8funcLeafi -> _Z5funcAi in the program. -;; With -use-profile-top-down-order=0, the top-down processing order of +;; With -use-profiled-call-graph=0, the top-down processing order of ;; that SCC is (_Z8funcLeafi, _Z5funcAi), which is determinined based on -;; the static call graph. With -use-profile-top-down-order=1, call edges +;; the static call graph. With -use-profiled-call-graph=1, call edges ;; from profile are considered, thus the order becomes (_Z5funcAi, _Z8funcLeafi) ;; which leads to _Z8funcLeafi inlined into _Z5funcAi. -; RUN: opt < %s -passes=sample-profile -use-profile-top-down-order=1 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=INLINE -; RUN: opt < %s -passes=sample-profile -use-profile-top-down-order=0 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=NOINLINE +; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=1 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=INLINE +; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=0 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=NOINLINE ;; There is an indirect call _Z5funcAi -> _Z3fibi in the program. -;; With -use-profile-indirect-call-edges=0, the processing order computed +;; With -use-profiled-call-graph=0, the processing order computed ;; based on the static call graph is (_Z3fibi, _Z5funcAi). With -;; -use-profile-top-down-order=1, the indirect call edge from profile is +;; -use-profiled-call-graph=1, the indirect call edge from profile is ;; considered, thus the order becomes (_Z5funcAi, _Z3fibi) which leads to ;; _Z3fibi inlined into _Z5funcAi. -; RUN: opt < %s -passes=sample-profile -use-profile-indirect-call-edges=1 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=ICALL-INLINE +; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=1 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=ICALL-INLINE @factor = dso_local global i32 3, align 4, !dbg !0 @fp = dso_local global i32 (i32)* null, align 8 diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll --- a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll @@ -25,8 +25,6 @@ ; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; INLINE-ALL-NEXT: Getting callee context for instr: %call.i1 = tail call i32 @_Z3fibi ; INLINE-ALL-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi -; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcAi -; INLINE-ALL-NEXT: Merging context profile into base profile: _Z5funcAi ; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcBi ; INLINE-ALL-NEXT: Merging context profile into base profile: _Z5funcBi ; INLINE-ALL-NEXT: Found context tree root to promote: external:10 @ _Z5funcBi @@ -41,6 +39,8 @@ ; INLINE-ALL-NEXT: Marking context profile as inlined: _Z5funcBi:1 @ _Z8funcLeafi ; INLINE-ALL-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi ; INLINE-ALL-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi +; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcAi +; INLINE-ALL-NEXT: Merging context profile into base profile: _Z5funcAi ; INLINE-ALL-NEXT: Getting base profile for function: _Z8funcLeafi ; INLINE-ALL-NEXT: Merging context profile into base profile: _Z8funcLeafi @@ -55,16 +55,6 @@ ; INLINE-HOT-NEXT: Callee context found: main:3.1 @ _Z5funcBi ; INLINE-HOT-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi ; INLINE-HOT-NEXT: Callee context found: main:3 @ _Z5funcAi -; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcAi -; INLINE-HOT-NEXT: Merging context profile into base profile: _Z5funcAi -; INLINE-HOT-NEXT: Found context tree root to promote: main:3 @ _Z5funcAi -; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi -; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi:1 @ _Z8funcLeafi -; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50 -; INLINE-HOT-NEXT: Callee context found: _Z5funcAi:1 @ _Z8funcLeafi -; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcAi:1 @ _Z8funcLeafi -; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 -; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi(i32 %tmp1.i) #2, !dbg !69 ; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcBi ; INLINE-HOT-NEXT: Merging context profile into base profile: _Z5funcBi ; INLINE-HOT-NEXT: Found context tree root to promote: external:10 @ _Z5funcBi @@ -79,6 +69,16 @@ ; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcBi:1 @ _Z8funcLeafi ; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi ; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi +; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcAi +; INLINE-HOT-NEXT: Merging context profile into base profile: _Z5funcAi +; INLINE-HOT-NEXT: Found context tree root to promote: main:3 @ _Z5funcAi +; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi +; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi:1 @ _Z8funcLeafi +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50 +; INLINE-HOT-NEXT: Callee context found: _Z5funcAi:1 @ _Z8funcLeafi +; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcAi:1 @ _Z8funcLeafi +; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 +; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi(i32 %tmp1.i) #2, !dbg !69 ; INLINE-HOT-NEXT: Getting base profile for function: _Z8funcLeafi ; INLINE-HOT-NEXT: Merging context profile into base profile: _Z8funcLeafi diff --git a/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll --- a/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll +++ b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll @@ -1,14 +1,14 @@ ;; Test for different function processing orders affecting inlining in sample profile loader. ;; There is an SCC _Z5funcAi -> _Z8funcLeafi -> _Z5funcAi in the program. -;; With -use-profile-top-down-order=0, the top-down processing order of +;; With -use-profiled-call-graph=0, the top-down processing order of ;; that SCC is (_Z8funcLeafi, _Z5funcAi), which is determinined based on -;; the static call graph. With -use-profile-top-down-order=1, call edges +;; the static call graph. With -use-profiled-call-graph=1, call edges ;; from profile are considered, thus the order becomes (_Z5funcAi, _Z8funcLeafi). ;; While _Z8funcLeafi is not supposed to be inlined, the outlined entry counts ;; are affected. -; RUN: opt < %s -passes=sample-profile -use-profile-top-down-order=0 -sample-profile-file=%S/Inputs/profile-topdown-order.prof -S | FileCheck %s -check-prefix=STATIC -; RUN: opt < %s -passes=sample-profile -use-profile-top-down-order=1 -sample-profile-file=%S/Inputs/profile-topdown-order.prof -S | FileCheck %s -check-prefix=DYNAMIC +; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=0 -sample-profile-file=%S/Inputs/profile-topdown-order.prof -S | FileCheck %s -check-prefix=STATIC +; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=1 -sample-profile-file=%S/Inputs/profile-topdown-order.prof -S | FileCheck %s -check-prefix=DYNAMIC ; STATIC: define dso_local i32 @_Z8funcLeafi{{.*}} !prof ![[#PROF:]] diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -42,7 +42,7 @@ std::vector CSPreInliner::buildTopDownOrder() { std::vector Order; - ProfiledCallGraph ProfiledCG(ProfileMap, ContextTracker); + ProfiledCallGraph ProfiledCG(ContextTracker); // Now that we have a profiled call graph, construct top-down order // by building up SCC and reversing SCC order.