diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -15,6 +15,7 @@ #define LLVM_PROFILEDATA_SAMPLEPROF_H #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -96,6 +97,12 @@ SPF_Binary = 0xff }; +enum SampleProfileLayout { + SPL_None = 0, + SPL_Nest = 0x1, + SPL_CS = 0x2, +}; + static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | @@ -358,6 +365,22 @@ : sampleprof_error::success; } + /// Remove called function and return its count if it exists. + Optional removeCalledTarget(StringRef F) { + Optional Count; + auto I = CallTargets.find(F); + if (I != CallTargets.end()) { + Count = I->second; + CallTargets.erase(I); + if (*Count <= NumSamples) + NumSamples -= *Count; + else { + NumSamples = 0; + } + } + return Count; + } + /// Return true if this sample record contains function calls. bool hasCalls() const { return !CallTargets.empty(); } @@ -737,6 +760,19 @@ FName, Num, Weight); } + void removeCalledTargetAndUpdateTotalSamples(uint32_t LineOffset, + uint32_t Discriminator, + StringRef FName) { + auto I = BodySamples.find(LineLocation(LineOffset, Discriminator)); + if (I != BodySamples.end()) { + if (auto C = I->second.removeCalledTarget(FName)) { + TotalSamples -= *C; + if (!I->second.getSamples()) + BodySamples.erase(I); + } + } + } + sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num, uint64_t Weight = 1) { SampleRecord S; @@ -878,6 +914,8 @@ return CallsiteSamples; } + CallsiteSampleMap &getCallsiteSamples() { return CallsiteSamples; } + /// Return the maximum of sample counts in a function body including functions /// inlined in it. uint64_t getMaxCountInside() const { @@ -1200,11 +1238,14 @@ }; // CSProfileConverter converts a full context-sensitive flat sample profile into -// a nested context-sensitive sample profile. +// a nested sample profile, or the other way around from a nested profile to a +// CS flat profile. class CSProfileConverter { public: - CSProfileConverter(SampleProfileMap &Profiles); - void convertProfiles(); + CSProfileConverter(SampleProfileMap &Profiles, + std::list *Contexts = nullptr); + void convertToCSNestedProfiles(); + void convertToCSFlatProfiles(); struct FrameNode { FrameNode(StringRef FName = StringRef(), FunctionSamples *FSamples = nullptr, @@ -1226,10 +1267,18 @@ private: // Nest all children profiles into the profile of Node. - void convertProfiles(FrameNode &Node); + void convertToCSNestedProfiles(FrameNode &Node); + // Flatten moving all children profiles of `FProfile` out to `NewProfileMap` + void convertToCSFlatProfiles(FunctionSamples &FProfile, + SampleProfileMap &NewProfileMap, + SampleContextFrameVector &Context); FrameNode *getOrCreateContextPath(const SampleContext &Context); SampleProfileMap &ProfileMap; + + // Underlying context table serves for sample profile writer when + // converting a nested profile to a CS profile. + std::list *Contexts; FrameNode RootFrame; }; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include using namespace llvm; using namespace sampleprof; @@ -39,6 +40,12 @@ "generate extra base profile for function with all its context " "profiles merged into it.")); +cl::opt PreserveContextAttributes( + "preserve-context-attributes", cl::init(true), cl::ZeroOrMore, + cl::desc("When generating nested context-sensitive profiles, always " + "preserve the attributes of each context.")); + + namespace llvm { namespace sampleprof { bool FunctionSamples::ProfileIsProbeBased = false; @@ -217,7 +224,7 @@ unsigned FunctionSamples::getOffset(const DILocation *DIL) { return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & - 0xffff; + 0xffff; } LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL, @@ -478,15 +485,9 @@ return &AllChildFrames[Hash]; } -CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) - : ProfileMap(Profiles) { - for (auto &FuncSample : Profiles) { - FunctionSamples *FSamples = &FuncSample.second; - auto *NewNode = getOrCreateContextPath(FSamples->getContext()); - assert(!NewNode->FuncSamples && "New node cannot have sample profile"); - NewNode->FuncSamples = FSamples; - } -} +CSProfileConverter::CSProfileConverter( + SampleProfileMap &Profiles, std::list *Contexts) + : ProfileMap(Profiles), Contexts(Contexts) {} CSProfileConverter::FrameNode * CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { @@ -499,17 +500,22 @@ return Node; } -void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { +void CSProfileConverter::convertToCSNestedProfiles( + CSProfileConverter::FrameNode &Node) { // Process each child profile. Add each child profile to callsite profile map // of the current node `Node` if `Node` comes with a profile. Otherwise // promote the child profile to a standalone profile. auto *NodeProfile = Node.FuncSamples; for (auto &It : Node.AllChildFrames) { auto &ChildNode = It.second; - convertProfiles(ChildNode); + convertToCSNestedProfiles(ChildNode); auto *ChildProfile = ChildNode.FuncSamples; if (!ChildProfile) continue; + + if (!PreserveContextAttributes) + ChildProfile->getContext().setAllAttributes(0); + SampleContext OrigChildContext = ChildProfile->getContext(); // Reset the child context to be contextless. ChildProfile->getContext().setName(OrigChildContext.getName()); @@ -518,6 +524,10 @@ auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc); SamplesMap.emplace(OrigChildContext.getName().str(), *ChildProfile); NodeProfile->addTotalSamples(ChildProfile->getTotalSamples()); + // Remove the corresponding body sample for the callsite and update the total + // weight. + NodeProfile->removeCalledTargetAndUpdateTotalSamples( + ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator, OrigChildContext.getName()); } // Separate child profile to be a standalone profile, if the current parent @@ -545,4 +555,67 @@ } } -void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); } +void CSProfileConverter::convertToCSNestedProfiles() { + for (auto &FuncSample : ProfileMap) { + FunctionSamples *FSamples = &FuncSample.second; + auto *NewNode = getOrCreateContextPath(FSamples->getContext()); + assert(!NewNode->FuncSamples && "New node cannot have sample profile"); + NewNode->FuncSamples = FSamples; + } + convertToCSNestedProfiles(RootFrame); +} + +void CSProfileConverter::convertToCSFlatProfiles( + FunctionSamples &FProfile, SampleProfileMap &NewProfileMap, + SampleContextFrameVector &ParentContext) { + // Push current frame. + ParentContext.emplace_back(FProfile.getName(), LineLocation(0, 0)); + + // Recursively promote callee profiles to a standalone top-level profiles with + // a flattened context. + while (!FProfile.getCallsiteSamples().empty()) { + auto I = FProfile.getCallsiteSamples().begin(); + ParentContext.back().Location = I->first; + for (auto &Callee : I->second) { + auto &CalleeProfile = Callee.second; + // Add body sample. + FProfile.addBodySamples(I->first.LineOffset, I->first.Discriminator, + CalleeProfile.getEntrySamples()); + // Add callsite sample. + FProfile.addCalledTargetSamples( + I->first.LineOffset, I->first.Discriminator, CalleeProfile.getName(), + CalleeProfile.getEntrySamples()); + // Update total samples. + FProfile.addTotalSamples(CalleeProfile.getEntrySamples()); + // Withdraw callee samples from total samples. + FProfile.setTotalSamples(FProfile.getTotalSamples() - CalleeProfile.getTotalSamples()); + // Recursively convert callee profile. + convertToCSFlatProfiles(CalleeProfile, NewProfileMap, ParentContext); + } + + // Remove callee profile + FProfile.getCallsiteSamples().erase(I); + ParentContext.back().Location = LineLocation(0, 0); + } + + // Update head samples for callee profiles. + if (ParentContext.size() > 1) + FProfile.addHeadSamples(FProfile.getEntrySamples()); + + // Update current context. + Contexts->push_back(ParentContext); + SampleContext NewContext(Contexts->back()); + FProfile.setContext(NewContext); + NewProfileMap.emplace(NewContext, FProfile); + + // Pop current frame. + ParentContext.pop_back(); +} + +void CSProfileConverter::convertToCSFlatProfiles() { + SampleProfileMap NewProfileMap; + SampleContextFrameVector Context; + for (auto &FuncSample : ProfileMap) + convertToCSFlatProfiles(FuncSample.second, NewProfileMap, Context); + ProfileMap = std::move(NewProfileMap); +} diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -10,7 +10,7 @@ ; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile %S/Inputs/profile-context-tracker.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --sample-profile-layout=nest %S/Inputs/profile-context-tracker.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile diff --git a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --sample-profile-layout=nest -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline-probe.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline-probe.proftext --- a/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline-probe.proftext +++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline-probe.proftext @@ -2,7 +2,7 @@ 0: 6 1: 6 3: 287884 - 4: 287864 _Z3fibi:315608 + 4: 287864 _Z3fibi:287864 15: 23 !CFGChecksum: 281479271677951 !Attributes: 2 @@ -10,21 +10,21 @@ 0: 15 1: 15 3: 74946 - 4: 74941 _Z3fibi:82359 + 4: 74941 _Z3fibi:74941 10: 23324 - 11: 23327 _Z3fibi:25228 + 11: 23327 _Z3fibi:23327 15: 11 !CFGChecksum: 281479271677951 !Attributes: 2 [external:12 @ main]:154:12 2: 12 3: 10 _Z5funcAi:7 - 3.1: 10 _Z5funcBi:11 + 3.1: 10 _Z5funcBi:10 !CFGChecksum: 563125815542069 [main]:154:0 2: 12 3: 18 _Z5funcAi:11 - 3.1: 18 _Z5funcBi:19 + 3.1: 18 _Z5funcBi:18 !CFGChecksum: 563125815542069 [external:10 @ _Z5funcBi]:120:10 0: 10 @@ -36,13 +36,13 @@ !CFGChecksum: 563022570642068 [main:3.1 @ _Z5funcBi]:120:19 0: 19 - 1: 19 _Z8funcLeafi:20 + 1: 19 _Z8funcLeafi:19 3: 12 !CFGChecksum: 563022570642068 !Attributes: 2 [main:3 @ _Z5funcAi]:99:11 0: 10 - 1: 10 _Z8funcLeafi:11 + 1: 10 _Z8funcLeafi:10 3: 24 !CFGChecksum: 844530426352218 !Attributes: 2 diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext --- a/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext +++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext @@ -2,26 +2,26 @@ 0: 6 1: 6 3: 287884 - 4: 287864 _Z3fibi:315608 + 4: 287864 _Z3fibi:287864 15: 23 !Attributes: 2 [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 0: 15 1: 15 3: 74946 - 4: 74941 _Z3fibi:82359 + 4: 74941 _Z3fibi:74941 10: 23324 - 11: 23327 _Z3fibi:25228 + 11: 23327 _Z3fibi:23327 15: 11 !Attributes: 2 [external:12 @ main]:154:12 2: 12 3: 10 _Z5funcAi:7 - 3.1: 10 _Z5funcBi:11 + 3.1: 10 _Z5funcBi:10 [main]:154:0 2: 12 3: 18 _Z5funcAi:11 - 3.1: 18 _Z5funcBi:19 + 3.1: 18 _Z5funcBi:18 [external:10 @ _Z5funcBi]:120:10 0: 10 1: 10 @@ -30,11 +30,11 @@ 1: 3 [main:3.1 @ _Z5funcBi]:120:19 0: 19 - 1: 19 _Z8funcLeafi:20 + 1: 19 _Z8funcLeafi:19 3: 12 !Attributes: 2 [main:3 @ _Z5funcAi]:99:11 0: 10 - 1: 10 _Z8funcLeafi:11 + 1: 10 _Z8funcLeafi:10 3: 24 !Attributes: 2 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-flat-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-flat-profile.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/cs-sample-flat-profile.test @@ -0,0 +1,23 @@ +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/sample-profile.proftext --sample-profile-layout=cs +RUN: FileCheck %s < %t.proftext +RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.proftext --sample-profile-layout=nest --preserve-context-attributes=0 -generate-merged-base-profiles=0 +RUN: diff %t2.proftext %S/Inputs/sample-profile.proftext + + +; CHECK: [main]:184019:0 +; CHECK-NEXT: 4: 534 +; CHECK-NEXT: 4.2: 534 +; CHECK-NEXT: 5: 1075 +; CHECK-NEXT: 5.1: 1075 +; CHECK-NEXT: 6: 2080 +; CHECK-NEXT: 7: 534 +; CHECK-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +; CHECK-NEXT: 10: 3000 inline2:2000 inline1:1000 +; CHECK-NEXT: [_Z3bari]:20301:1437 +; CHECK-NEXT: 1: 1437 +; CHECK-NEXT: [_Z3fooi]:7711:610 +; CHECK-NEXT: 1: 610 +; CHECK-NEXT: [main:10 @ inline2]:2000:2000 +; CHECK-NEXT: 1: 2000 +; CHECK-NEXT: [main:10 @ inline1]:1000:1000 +; CHECK-NEXT: 1: 1000 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test --- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -1,46 +1,44 @@ -RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.probe.proftext --match-full-lines --strict-whitespace -check-prefix=PROBE -RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE -RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT -RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --extbinary -output=%t3.profbin %S/Inputs/cs-sample-preinline.proftext --sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY -RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY -RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY +RUN: llvm-profdata show -sample -detailed-summary %t3.profbin | FileCheck %s -check-prefix=SUMMARY-NEST +RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY-NEST -; CHECK:main:1968679:12 +; CHECK:main:1968621:12 ; CHECK-NEXT: 2: 24 -; CHECK-NEXT: 3: 28 _Z5funcAi:18 -; CHECK-NEXT: 3.1: 28 _Z5funcBi:30 -; CHECK-NEXT: 3: _Z5funcAi:1467398 +; CHECK-NEXT: 3: 17 _Z5funcAi:7 +; CHECK-NEXT: 3.1: 10 _Z5funcBi:10 +; CHECK-NEXT: 3: _Z5funcAi:1467388 ; CHECK-NEXT: 0: 10 -; CHECK-NEXT: 1: 10 _Z8funcLeafi:11 ; CHECK-NEXT: 3: 24 ; CHECK-NEXT: 1: _Z8funcLeafi:1467299 ; CHECK-NEXT: 0: 6 ; CHECK-NEXT: 1: 6 ; CHECK-NEXT: 3: 287884 -; CHECK-NEXT: 4: 287864 _Z3fibi:315608 +; CHECK-NEXT: 4: 287864 _Z3fibi:287864 ; CHECK-NEXT: 15: 23 ; CHECK-NEXT: !Attributes: 2 ; CHECK-NEXT: !Attributes: 2 -; CHECK-NEXT: 3.1: _Z5funcBi:500973 +; CHECK-NEXT: 3.1: _Z5funcBi:500954 ; CHECK-NEXT: 0: 19 -; CHECK-NEXT: 1: 19 _Z8funcLeafi:20 ; CHECK-NEXT: 3: 12 ; CHECK-NEXT: 1: _Z8funcLeafi:500853 ; CHECK-NEXT: 0: 15 ; CHECK-NEXT: 1: 15 ; CHECK-NEXT: 3: 74946 -; CHECK-NEXT: 4: 74941 _Z3fibi:82359 +; CHECK-NEXT: 4: 74941 _Z3fibi:74941 ; CHECK-NEXT: 10: 23324 -; CHECK-NEXT: 11: 23327 _Z3fibi:25228 +; CHECK-NEXT: 11: 23327 _Z3fibi:23327 ; CHECK-NEXT: 15: 11 ; CHECK-NEXT: !Attributes: 2 ; CHECK-NEXT: !Attributes: 2 @@ -49,34 +47,31 @@ ; CHECK-NEXT: 1: 13 - -; RECOUNT:main:1968679:12 +; RECOUNT:main:1968621:12 ; RECOUNT-NEXT: 2: 24 -; RECOUNT-NEXT: 3: 28 _Z5funcAi:18 -; RECOUNT-NEXT: 3.1: 28 _Z5funcBi:30 -; RECOUNT-NEXT: 3: _Z5funcAi:1467398 +; RECOUNT-NEXT: 3: 17 _Z5funcAi:7 +; RECOUNT-NEXT: 3.1: 10 _Z5funcBi:10 +; RECOUNT-NEXT: 3: _Z5funcAi:1467388 ; RECOUNT-NEXT: 0: 10 -; RECOUNT-NEXT: 1: 10 _Z8funcLeafi:11 ; RECOUNT-NEXT: 3: 24 ; RECOUNT-NEXT: 1: _Z8funcLeafi:1467299 ; RECOUNT-NEXT: 0: 6 ; RECOUNT-NEXT: 1: 6 ; RECOUNT-NEXT: 3: 287884 -; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608 +; RECOUNT-NEXT: 4: 287864 _Z3fibi:287864 ; RECOUNT-NEXT: 15: 23 ; RECOUNT-NEXT: !Attributes: 6 ; RECOUNT-NEXT: !Attributes: 6 -; RECOUNT-NEXT: 3.1: _Z5funcBi:500973 +; RECOUNT-NEXT: 3.1: _Z5funcBi:500954 ; RECOUNT-NEXT: 0: 19 -; RECOUNT-NEXT: 1: 19 _Z8funcLeafi:20 ; RECOUNT-NEXT: 3: 12 ; RECOUNT-NEXT: 1: _Z8funcLeafi:500853 ; RECOUNT-NEXT: 0: 15 ; RECOUNT-NEXT: 1: 15 ; RECOUNT-NEXT: 3: 74946 -; RECOUNT-NEXT: 4: 74941 _Z3fibi:82359 +; RECOUNT-NEXT: 4: 74941 _Z3fibi:74941 ; RECOUNT-NEXT: 10: 23324 -; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 +; RECOUNT-NEXT: 11: 23327 _Z3fibi:23327 ; RECOUNT-NEXT: 15: 11 ; RECOUNT-NEXT: !Attributes: 6 ; RECOUNT-NEXT: !Attributes: 6 @@ -84,66 +79,63 @@ ; RECOUNT-NEXT: 0: 21 ; RECOUNT-NEXT: 1: 21 ; RECOUNT-NEXT: 3: 362830 -; RECOUNT-NEXT: 4: 362805 _Z3fibi:397967 +; RECOUNT-NEXT: 4: 362805 _Z3fibi:362805 ; RECOUNT-NEXT: 10: 23324 -; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 +; RECOUNT-NEXT: 11: 23327 _Z3fibi:23327 ; RECOUNT-NEXT: 15: 34 ; RECOUNT-NEXT: !Attributes: 2 -; RECOUNT-NEXT:_Z5funcAi:1467398:11 +; RECOUNT-NEXT:_Z5funcAi:1467388:11 ; RECOUNT-NEXT: 0: 10 -; RECOUNT-NEXT: 1: 10 _Z8funcLeafi:11 ; RECOUNT-NEXT: 3: 24 ; RECOUNT-NEXT: 1: _Z8funcLeafi:1467299 ; RECOUNT-NEXT: 0: 6 ; RECOUNT-NEXT: 1: 6 ; RECOUNT-NEXT: 3: 287884 -; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608 +; RECOUNT-NEXT: 4: 287864 _Z3fibi:287864 ; RECOUNT-NEXT: 15: 23 ; RECOUNT-NEXT: !Attributes: 6 ; RECOUNT-NEXT: !Attributes: 2 -; RECOUNT-NEXT:_Z5funcBi:501213:32 +; RECOUNT-NEXT:_Z5funcBi:501194:32 ; RECOUNT-NEXT: 0: 32 -; RECOUNT-NEXT: 1: 32 _Z8funcLeafi:20 +; RECOUNT-NEXT: 1: 13 ; RECOUNT-NEXT: 3: 12 ; RECOUNT-NEXT: 1: _Z8funcLeafi:500853 ; RECOUNT-NEXT: 0: 15 ; RECOUNT-NEXT: 1: 15 ; RECOUNT-NEXT: 3: 74946 -; RECOUNT-NEXT: 4: 74941 _Z3fibi:82359 +; RECOUNT-NEXT: 4: 74941 _Z3fibi:74941 ; RECOUNT-NEXT: 10: 23324 -; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 +; RECOUNT-NEXT: 11: 23327 _Z3fibi:23327 ; RECOUNT-NEXT: 15: 11 ; RECOUNT-NEXT: !Attributes: 6 -; PROBE:main:1968679:12 +; PROBE:main:1968621:12 ; PROBE-NEXT: 2: 24 -; PROBE-NEXT: 3: 28 _Z5funcAi:18 -; PROBE-NEXT: 3.1: 28 _Z5funcBi:30 -; PROBE-NEXT: 3: _Z5funcAi:1467398 +; PROBE-NEXT: 3: 17 _Z5funcAi:7 +; PROBE-NEXT: 3.1: 10 _Z5funcBi:10 +; PROBE-NEXT: 3: _Z5funcAi:1467388 ; PROBE-NEXT: 0: 10 -; PROBE-NEXT: 1: 10 _Z8funcLeafi:11 ; PROBE-NEXT: 3: 24 ; PROBE-NEXT: 1: _Z8funcLeafi:1467299 ; PROBE-NEXT: 0: 6 ; PROBE-NEXT: 1: 6 ; PROBE-NEXT: 3: 287884 -; PROBE-NEXT: 4: 287864 _Z3fibi:315608 +; PROBE-NEXT: 4: 287864 _Z3fibi:287864 ; PROBE-NEXT: 15: 23 ; PROBE-NEXT: !CFGChecksum: 281479271677951 ; PROBE-NEXT: !Attributes: 2 ; PROBE-NEXT: !CFGChecksum: 844530426352218 ; PROBE-NEXT: !Attributes: 2 -; PROBE-NEXT: 3.1: _Z5funcBi:500973 +; PROBE-NEXT: 3.1: _Z5funcBi:500954 ; PROBE-NEXT: 0: 19 -; PROBE-NEXT: 1: 19 _Z8funcLeafi:20 ; PROBE-NEXT: 3: 12 ; PROBE-NEXT: 1: _Z8funcLeafi:500853 ; PROBE-NEXT: 0: 15 ; PROBE-NEXT: 1: 15 ; PROBE-NEXT: 3: 74946 -; PROBE-NEXT: 4: 74941 _Z3fibi:82359 +; PROBE-NEXT: 4: 74941 _Z3fibi:74941 ; PROBE-NEXT: 10: 23324 -; PROBE-NEXT: 11: 23327 _Z3fibi:25228 +; PROBE-NEXT: 11: 23327 _Z3fibi:23327 ; PROBE-NEXT: 15: 11 ; PROBE-NEXT: !CFGChecksum: 281479271677951 ; PROBE-NEXT: !Attributes: 2 @@ -181,3 +173,27 @@ ; SUMMARY-NEXT: 11 blocks with count >= 24 account for 99.99 percentage of the total counts. ; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.999 percentage of the total counts. ; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.9999 percentage of the total counts. + + +; SUMMARY-NEST: Total functions: 4 +; SUMMARY-NEST-NEXT: Maximum function count: 32 +; SUMMARY-NEST-NEXT: Maximum block count: 362830 +; SUMMARY-NEST-NEXT: Total number of blocks: 15 +; SUMMARY-NEST-NEXT: Total count: 772504 +; SUMMARY-NEST-NEXT: Detailed summary: +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 1 percentage of the total counts. +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 10 percentage of the total counts. +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 20 percentage of the total counts. +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 30 percentage of the total counts. +; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 40 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 50 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 60 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 70 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 80 percentage of the total counts. +; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 90 percentage of the total counts. +; SUMMARY-NEST-NEXT: 3 blocks with count >= 23327 account for 95 percentage of the total counts. +; SUMMARY-NEST-NEXT: 4 blocks with count >= 23324 account for 99 percentage of the total counts. +; SUMMARY-NEST-NEXT: 4 blocks with count >= 23324 account for 99.9 percentage of the total counts. +; SUMMARY-NEST-NEXT: 10 blocks with count >= 21 account for 99.99 percentage of the total counts. +; SUMMARY-NEST-NEXT: 15 blocks with count >= 10 account for 99.999 percentage of the total counts. +; SUMMARY-NEST-NEXT: 15 blocks with count >= 10 account for 99.9999 percentage of the total counts. diff --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test --- a/llvm/test/tools/llvm-profgen/cs-preinline.test +++ b/llvm/test/tools/llvm-profgen/cs-preinline.test @@ -57,10 +57,9 @@ ; CHECK-TRIM-NEXT: 1: 14 ; CHECK-TRIM-NEXT: !Attributes: 3 -; CHECK-PREINL-NEST: foo:393:0 +; CHECK-PREINL-NEST: foo:379:0 ; CHECK-PREINL-NEST-NEXT: 2.1: 14 ; CHECK-PREINL-NEST-NEXT: 3: 15 -; CHECK-PREINL-NEST-NEXT: 3.1: 14 bar:14 ; CHECK-PREINL-NEST-NEXT: 3.2: 1 ; CHECK-PREINL-NEST-NEXT: 65526: 14 ; CHECK-PREINL-NEST-NEXT: 3.1: bar:84 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -22,6 +22,7 @@ #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/RawMemProfReader.h" +#include "llvm/ProfileData/SampleProf.h" #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/CommandLine.h" @@ -732,7 +733,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, - bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, + bool UseMD5, bool GenPartialProfile, SampleProfileLayout ProfileLayout, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode) { using namespace sampleprof; @@ -809,10 +810,19 @@ SampleMergeColdContext, SampleColdContextFrameDepth, false); } - if (ProfileIsCSFlat && GenCSNestedProfile) { + + // Underlying context table serves for sample profile writer when + // converting a nested profile to a CS profile. + std::list Contexts; + + if (ProfileIsCSFlat && ProfileLayout == llvm::sampleprof::SPL_Nest) { CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + CSConverter.convertToCSNestedProfiles(); ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat = false; + } else if (!ProfileIsCSFlat.getValue() && ProfileLayout == llvm::sampleprof::SPL_CS) { + CSProfileConverter CSConverter(ProfileMap, &Contexts); + CSConverter.convertToCSFlatProfiles(); + ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat = true; } auto WriterOrErr = @@ -994,15 +1004,17 @@ "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " "override the cold threshold got from profile summary. ")); - cl::opt GenCSNestedProfile( - "gen-cs-nested-profile", cl::Hidden, cl::init(false), - cl::desc("Generate nested function profiles for CSSPGO")); cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); cl::opt ProfiledBinary( "profiled-binary", cl::init(""), cl::desc("Path to binary from which the profile was collected.")); + cl::opt ProfileLayout( + "sample-profile-layout", cl::desc("Convert the generated profile to"), cl::init(SPL_None), + cl::values( + clEnumValN(SPL_Nest, "nest", "Nested profile"), + clEnumValN(SPL_CS, "cs", "Context-sensitive flat profile"))); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -1049,7 +1061,7 @@ else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, - UseMD5, GenPartialProfile, GenCSNestedProfile, + UseMD5, GenPartialProfile, ProfileLayout, SampleMergeColdContext, SampleTrimColdContext, SampleColdContextFrameDepth, FailureMode); return 0; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -905,7 +905,7 @@ calculateAndShowDensity(ContextLessProfiles); if (GenCSNestedProfile) { CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + CSConverter.convertToCSNestedProfiles(); FunctionSamples::ProfileIsCSFlat = false; FunctionSamples::ProfileIsCSNested = EnableCSPreInliner; }