diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -161,6 +161,12 @@ coverage for the optimized target. This option can only be used with sample-based profile in extbinary format. +.. option:: --convert-sample-profile-layout=[nest|flat] + + Convert the merged profile into a profile with a new layout. Supported + layout are ``nest``(Nested profile, the input should be CS flat profile) and + ``flat``(Profile with nested inlinees flattened out). + .. option:: --supplement-instr-with-sample= Supplement an instrumentation profile with sample profile. The sample profile diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -96,6 +96,12 @@ SPF_Binary = 0xff }; +enum SampleProfileLayout { + SPL_None = 0, + SPL_Nest = 0x1, + SPL_Flat = 0x2, +}; + static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | @@ -747,6 +753,8 @@ void setTotalSamples(uint64_t Num) { TotalSamples = Num; } + void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; } + sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { bool Overflowed; TotalHeadSamples = @@ -934,6 +942,8 @@ return CallsiteSamples; } + CallsiteSampleMap &getCallsiteSamples() { return CallsiteSamples; } + /// Return the maximum of sample counts in a function body. When SkipCallSite /// is false, which is the default, the return count includes samples in the /// inlined functions. When SkipCallSite is true, the return count only @@ -1274,12 +1284,16 @@ SampleProfileMap &ProfileMap; }; -// CSProfileConverter converts a full context-sensitive flat sample profile into -// a nested context-sensitive sample profile. -class CSProfileConverter { +/// Helper class for profile conversion. +/// +/// It supports full context-sensitive profile to nested profile conversion, +/// nested profile to flatten profile conversion, etc. +class ProfileConverter { public: - CSProfileConverter(SampleProfileMap &Profiles); - void convertProfiles(); + ProfileConverter(SampleProfileMap &Profiles); + // Convert a full context-sensitive flat sample profile into a nested sample + // profile. + void convertCSProfiles(); struct FrameNode { FrameNode(StringRef FName = StringRef(), FunctionSamples *FSamples = nullptr, @@ -1299,9 +1313,85 @@ StringRef CalleeName); }; + static void flattenProfile(SampleProfileMap &ProfileMap, + bool ProfileIsCS = false) { + SampleProfileMap TmpProfiles; + flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS); + ProfileMap = std::move(TmpProfiles); + } + + static void flattenProfile(const SampleProfileMap &InputProfiles, + SampleProfileMap &OutputProfiles, + bool ProfileIsCS = false) { + if (ProfileIsCS) { + for (const auto &I : InputProfiles) + OutputProfiles[I.second.getName()].merge(I.second); + // Retain the profile name and clear the full context for each function + // profile. + for (auto &I : OutputProfiles) + I.second.setContext(SampleContext(I.first)); + } else { + for (const auto &I : InputProfiles) + flattenNestedProfile(OutputProfiles, I.second); + } + } + private: + static void flattenNestedProfile(SampleProfileMap &OutputProfiles, + const FunctionSamples &FS) { + // To retain the context, checksum, attributes of the original profile, make + // a copy of it if no profile is found. + SampleContext &Context = FS.getContext(); + auto Ret = OutputProfiles.emplace(Context, FS); + FunctionSamples &Profile = Ret.first->second; + if (Ret.second) { + // When it's the copy of the old profile, just clear all the inlinees' + // samples. + Profile.getCallsiteSamples().clear(); + // We recompute TotalSamples later, so here set to zero. + Profile.setTotalSamples(0); + } else { + for (const auto &Line : FS.getBodySamples()) { + Profile.addBodySamples(Line.first.LineOffset, Line.first.Discriminator, + Line.second.getSamples()); + } + } + + assert(Profile.getCallsiteSamples().empty() && + "There should be no inlinees' profiles after flattening."); + + // TotalSamples might not be equal to the sum of all samples from + // BodySamples and CallsiteSamples. So here we use "TotalSamples = + // Original_TotalSamples - All_of_Callsite_TotalSamples + + // All_of_Callsite_HeadSamples" to compute the new TotalSamples. + uint64_t TotalSamples = FS.getTotalSamples(); + + for (const auto &I : FS.getCallsiteSamples()) { + for (const auto &Callee : I.second) { + const auto &CalleeProfile = Callee.second; + // Add body sample. + Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator, + CalleeProfile.getHeadSamplesEstimate()); + // Add callsite sample. + Profile.addCalledTargetSamples( + I.first.LineOffset, I.first.Discriminator, CalleeProfile.getName(), + CalleeProfile.getHeadSamplesEstimate()); + // Update total samples. + TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples() + ? TotalSamples - CalleeProfile.getTotalSamples() + : 0; + TotalSamples += CalleeProfile.getHeadSamplesEstimate(); + // Recursively convert callee profile. + flattenNestedProfile(OutputProfiles, CalleeProfile); + } + } + Profile.addTotalSamples(TotalSamples); + + Profile.setHeadSamples(Profile.getHeadSamplesEstimate()); + } + // Nest all children profiles into the profile of Node. - void convertProfiles(FrameNode &Node); + void convertCSProfiles(FrameNode &Node); FrameNode *getOrCreateContextPath(const SampleContext &Context); SampleProfileMap &ProfileMap; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -461,9 +461,9 @@ OS << Sym << "\n"; } -CSProfileConverter::FrameNode * -CSProfileConverter::FrameNode::getOrCreateChildFrame( - const LineLocation &CallSite, StringRef CalleeName) { +ProfileConverter::FrameNode * +ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite, + StringRef CalleeName) { uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); auto It = AllChildFrames.find(Hash); if (It != AllChildFrames.end()) { @@ -476,7 +476,7 @@ return &AllChildFrames[Hash]; } -CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) +ProfileConverter::ProfileConverter(SampleProfileMap &Profiles) : ProfileMap(Profiles) { for (auto &FuncSample : Profiles) { FunctionSamples *FSamples = &FuncSample.second; @@ -486,8 +486,8 @@ } } -CSProfileConverter::FrameNode * -CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { +ProfileConverter::FrameNode * +ProfileConverter::getOrCreateContextPath(const SampleContext &Context) { auto Node = &RootFrame; LineLocation CallSiteLoc(0, 0); for (auto &Callsite : Context.getContextFrames()) { @@ -497,14 +497,14 @@ return Node; } -void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { +void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) { // Process each child profile. Add each child profile to callsite profile map // of the current node `Node` if `Node` comes with a profile. Otherwise // promote the child profile to a standalone profile. auto *NodeProfile = Node.FuncSamples; for (auto &It : Node.AllChildFrames) { auto &ChildNode = It.second; - convertProfiles(ChildNode); + convertCSProfiles(ChildNode); auto *ChildProfile = ChildNode.FuncSamples; if (!ChildProfile) continue; @@ -544,4 +544,4 @@ } } -void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); } +void ProfileConverter::convertCSProfiles() { convertCSProfiles(RootFrame); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -139,6 +139,11 @@ cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section).")); +static cl::opt FlattenProfileForMatching( + "flatten-profile-for-matching", cl::Hidden, cl::init(true), + cl::desc( + "Use flattened profile for stale profile detection and matching.")); + static cl::opt ProfileSampleAccurate( "profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " @@ -434,6 +439,7 @@ Module &M; SampleProfileReader &Reader; const PseudoProbeManager *ProbeManager; + SampleProfileMap FlattenedProfiles; // Profile mismatching statstics. uint64_t TotalProfiledCallsites = 0; @@ -448,7 +454,21 @@ public: SampleProfileMatcher(Module &M, SampleProfileReader &Reader, const PseudoProbeManager *ProbeManager) - : M(M), Reader(Reader), ProbeManager(ProbeManager) {} + : M(M), Reader(Reader), ProbeManager(ProbeManager) { + if (FlattenProfileForMatching) { + ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, + FunctionSamples::ProfileIsCS); + } + } + + FunctionSamples *getFlattenedSamplesFor(const Function &F) { + StringRef CanonFName = FunctionSamples::getCanonicalFnName(F); + auto It = FlattenedProfiles.find(CanonFName); + if (It != FlattenedProfiles.end()) + return &It->second; + return nullptr; + } + void detectProfileMismatch(); void detectProfileMismatch(const Function &F, const FunctionSamples &FS); }; @@ -2156,7 +2176,11 @@ for (auto &F : M) { if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; - FunctionSamples *FS = Reader.getSamplesFor(F); + FunctionSamples *FS = nullptr; + if (FlattenProfileForMatching) + FS = getFlattenedSamplesFor(F); + else + FS = Reader.getSamplesFor(F); if (!FS) continue; detectProfileMismatch(F, *FS); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof @@ -0,0 +1,18 @@ +[main]:30:0 + 0: 0 + 1.1: 0 + 3: 10 matched:10 + 4: 10 + 5: 10 bar_mismatch:10 + 7: 5 foo:5 + 8: 0 +[main:7 @ foo]:15:5 + 1: 5 + 2: 5 + 3: 5 inlinee_mismatch:5 +[bar]:10:10 + 1: 10 +[matched]:10:10 + 1: 10 +[main:7 @ foo:3 @ inlinee_mismatch]:5:5 + 1: 5 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof --- a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof @@ -5,9 +5,11 @@ 4: 10 5: 10 bar_mismatch:10 8: 0 - 7: foo:10 + 7: foo:15 1: 5 2: 5 + 3: inlinee_mismatch:5 + 1: 5 bar:10:10 1: 10 matched:10:10 diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -10,7 +10,7 @@ ; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile %S/Inputs/profile-context-tracker.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest %S/Inputs/profile-context-tracker.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile diff --git a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll @@ -0,0 +1,13 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + +; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch-cs.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + + +; CHECK: (3/4) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. + +; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 3, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30} diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll --- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll @@ -1,5 +1,5 @@ ; REQUIRES: x86_64-linux -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=0 -S 2>%t -o %t.ll ; RUN: FileCheck %s --input-file %t ; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD ; RUN: llc < %t.ll -filetype=obj -o %t.obj diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext @@ -0,0 +1,20 @@ +[baz]:150:10 + 1: 10 + 3: 20 + 5: 20 foo:20 +[foo]:102:1 + 1: 1 + 3: 1 +[main]:91:1 + 4: 1 + 4.2: 1 + 7: 1 + 9: 3 bar:2 foo:1 + 10: 3 baz:2 foo:1 +[main:10 @ foo]:2:1 + 3: 1 bar:1 + 4: 1 +[bar]:1:1 + 1: 1 +[main:10 @ foo:3 @ bar]:1:1 + 1: 1 diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext @@ -0,0 +1,44 @@ +baz:160:10 + 1: 10 + 3: 20 + 5: foo:30 + 1: 20 + 3: bar:10 + 1: 10 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 3 + !Attributes: 3 + !CFGChecksum: 1 + !Attributes: 1 +main:110:1 + 4: 1 + 4.2: 1 + 7: 1 + 9: 3 bar:2 foo:1 + 10: foo:2 + 4: 1 + 3: bar:1 + 1: 1 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 3 + !Attributes: 3 + 10: baz:20 + 10: 1 + 6: bar:3 + 1: 2 + 7: 1 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 2 + !Attributes: 2 +foo:102:1 + 1: 1 + 3: 1 + !CFGChecksum: 3 + !Attributes: 3 +bar:1:1 + 1: 1 + !CFGChecksum: 4 + !Attributes: 4 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test --- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -1,14 +1,14 @@ -RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.probe.proftext --match-full-lines --strict-whitespace -check-prefix=PROBE -RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE -RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT -RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY-NEST RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY-NEST diff --git a/llvm/test/tools/llvm-profdata/sample-flatten-profile.test b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test @@ -0,0 +1,50 @@ +; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace + +; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile-cs.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile-cs.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS + +; CHECK:baz:169:10 +; CHECK-NEXT: 1: 10 +; CHECK-NEXT: 3: 20 +; CHECK-NEXT: 5: 20 foo:20 +; CHECK-NEXT: 6: 2 bar:2 +; CHECK-NEXT: 10: 1 +; CHECK-NEXT: !CFGChecksum: 1 +; CHECK-NEXT: !Attributes: 1 +; CHECK-NEXT:foo:134:21 +; CHECK-NEXT: 1: 21 +; CHECK-NEXT: 3: 12 bar:11 +; CHECK-NEXT: 4: 1 +; CHECK-NEXT: !CFGChecksum: 3 +; CHECK-NEXT: !Attributes: 3 +; CHECK-NEXT:main:91:1 +; CHECK-NEXT: 4: 1 +; CHECK-NEXT: 4.2: 1 +; CHECK-NEXT: 7: 1 +; CHECK-NEXT: 9: 3 bar:2 foo:1 +; CHECK-NEXT: 10: 3 baz:2 foo:1 +; CHECK-NEXT: !CFGChecksum: 2 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT:bar:15:14 +; CHECK-NEXT: 1: 14 +; CHECK-NEXT: 7: 1 +; CHECK-NEXT: !CFGChecksum: 4 +; CHECK-NEXT: !Attributes: 4 + +; CHECK-CS:baz:150:10 +; CHECK-CS-NEXT: 1: 10 +; CHECK-CS-NEXT: 3: 20 +; CHECK-CS-NEXT: 5: 20 foo:20 +; CHECK-CS-NEXT:foo:104:2 +; CHECK-CS-NEXT: 1: 1 +; CHECK-CS-NEXT: 3: 2 bar:1 +; CHECK-CS-NEXT: 4: 1 +; CHECK-CS-NEXT:main:91:1 +; CHECK-CS-NEXT: 4: 1 +; CHECK-CS-NEXT: 4.2: 1 +; CHECK-CS-NEXT: 7: 1 +; CHECK-CS-NEXT: 9: 3 bar:2 foo:1 +; CHECK-CS-NEXT: 10: 3 baz:2 foo:1 +; CHECK-CS-NEXT:bar:2:2 +; CHECK-CS-NEXT: 1: 2 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -968,7 +968,8 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, - bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, + bool UseMD5, bool GenPartialProfile, + SampleProfileLayout ProfileLayout, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode, bool DropProfileSymbolList, size_t OutputSizeLimit) { @@ -1048,9 +1049,12 @@ SampleMergeColdContext, SampleColdContextFrameDepth, false); } - if (ProfileIsCS && GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + if (ProfileLayout == llvm::sampleprof::SPL_Flat) { + ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS); + ProfileIsCS = FunctionSamples::ProfileIsCS = false; + } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) { + ProfileConverter CSConverter(ProfileMap); + CSConverter.convertCSProfiles(); ProfileIsCS = FunctionSamples::ProfileIsCS = false; } @@ -1241,9 +1245,15 @@ "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " "override the cold threshold got from profile summary. ")); - cl::opt GenCSNestedProfile( - "gen-cs-nested-profile", cl::Hidden, cl::init(false), - cl::desc("Generate nested function profiles for CSSPGO")); + cl::opt ProfileLayout( + "convert-sample-profile-layout", + cl::desc("Convert the generated profile to a profile with a new layout"), + cl::init(SPL_None), + cl::values( + clEnumValN(SPL_Nest, "nest", + "Nested profile, the input should be CS flat profile"), + clEnumValN(SPL_Flat, "flat", + "Profile with nested inlinee flatten out"))); cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); @@ -1298,12 +1308,12 @@ OutputFilename, OutputFormat, OutputSparse, NumThreads, FailureMode, ProfiledBinary); else - mergeSampleProfile( - WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, - ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile, - GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext, - SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList, - OutputSizeLimit); + mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, + OutputFormat, ProfileSymbolListFile, CompressAllSections, + UseMD5, GenPartialProfile, ProfileLayout, + SampleMergeColdContext, SampleTrimColdContext, + SampleColdContextFrameDepth, FailureMode, + DropProfileSymbolList, OutputSizeLimit); return 0; } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -1026,8 +1026,8 @@ calculateAndShowDensity(ContextLessProfiles); if (GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + ProfileConverter CSConverter(ProfileMap); + CSConverter.convertCSProfiles(); FunctionSamples::ProfileIsCS = false; } }