diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -95,6 +95,12 @@ SPF_Binary = 0xff }; +enum SampleProfileLayout { + SPL_None = 0, + SPL_Nest = 0x1, + SPL_Flat = 0x2, +}; + static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | @@ -923,6 +929,7 @@ const CallsiteSampleMap &getCallsiteSamples() const { return CallsiteSamples; } + CallsiteSampleMap &getCallsiteSamples() { return CallsiteSamples; } /// Return the maximum of sample counts in a function body including functions /// inlined in it. @@ -1246,11 +1253,14 @@ }; // CSProfileConverter converts a full context-sensitive flat sample profile into -// a nested context-sensitive sample profile. +// a nested sample profile, or the other way around from a nested profile to a +// CS flat profile. class CSProfileConverter { public: - CSProfileConverter(SampleProfileMap &Profiles); - void convertProfiles(); + CSProfileConverter(SampleProfileMap &Profiles, + std::list *Contexts = nullptr); + void convertToCSNestedProfiles(); + void convertToCSFlatProfiles(); struct FrameNode { FrameNode(StringRef FName = StringRef(), FunctionSamples *FSamples = nullptr, @@ -1272,10 +1282,18 @@ private: // Nest all children profiles into the profile of Node. - void convertProfiles(FrameNode &Node); + void convertToCSNestedProfiles(FrameNode &Node); + // Flatten all children profiles of `FProfile` out to `NewProfileMap` + void convertToCSFlatProfiles(FunctionSamples &FProfile, + SampleProfileMap &NewProfileMap, + SampleContextFrameVector &Context); FrameNode *getOrCreateContextPath(const SampleContext &Context); SampleProfileMap &ProfileMap; + + // Underlying context table serves for sample profile writer when + // converting a nested profile to a flat profile. + std::list *Contexts; FrameNode RootFrame; }; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -39,6 +39,11 @@ "generate extra base profile for function with all its context " "profiles merged into it.")); +cl::opt PreserveContextAttributes( + "preserve-context-attributes", cl::init(true), cl::ZeroOrMore, + cl::desc("When generating nested context-sensitive profiles, always " + "preserve the attributes of each context.")); + namespace llvm { namespace sampleprof { bool FunctionSamples::ProfileIsProbeBased = false; @@ -478,15 +483,9 @@ return &AllChildFrames[Hash]; } -CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) - : ProfileMap(Profiles) { - for (auto &FuncSample : Profiles) { - FunctionSamples *FSamples = &FuncSample.second; - auto *NewNode = getOrCreateContextPath(FSamples->getContext()); - assert(!NewNode->FuncSamples && "New node cannot have sample profile"); - NewNode->FuncSamples = FSamples; - } -} +CSProfileConverter::CSProfileConverter( + SampleProfileMap &Profiles, std::list *Contexts) + : ProfileMap(Profiles), Contexts(Contexts) {} CSProfileConverter::FrameNode * CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { @@ -499,17 +498,22 @@ return Node; } -void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { +void CSProfileConverter::convertToCSNestedProfiles( + CSProfileConverter::FrameNode &Node) { // Process each child profile. Add each child profile to callsite profile map // of the current node `Node` if `Node` comes with a profile. Otherwise // promote the child profile to a standalone profile. auto *NodeProfile = Node.FuncSamples; for (auto &It : Node.AllChildFrames) { auto &ChildNode = It.second; - convertProfiles(ChildNode); + convertToCSNestedProfiles(ChildNode); auto *ChildProfile = ChildNode.FuncSamples; if (!ChildProfile) continue; + + if (!PreserveContextAttributes) + ChildProfile->getContext().setAllAttributes(ContextNone); + SampleContext OrigChildContext = ChildProfile->getContext(); // Reset the child context to be contextless. ChildProfile->getContext().setName(OrigChildContext.getName()); @@ -546,4 +550,68 @@ } } -void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); } +void CSProfileConverter::convertToCSNestedProfiles() { + for (auto &FuncSample : ProfileMap) { + FunctionSamples *FSamples = &FuncSample.second; + auto *NewNode = getOrCreateContextPath(FSamples->getContext()); + assert(!NewNode->FuncSamples && "New node cannot have sample profile"); + NewNode->FuncSamples = FSamples; + } + convertToCSNestedProfiles(RootFrame); +} + +void CSProfileConverter::convertToCSFlatProfiles( + FunctionSamples &FProfile, SampleProfileMap &NewProfileMap, + SampleContextFrameVector &ParentContext) { + // Push current frame. + ParentContext.emplace_back(FProfile.getName(), LineLocation(0, 0)); + + // Recursively promote callee profiles to a standalone top-level profiles with + // a flattened context. + while (!FProfile.getCallsiteSamples().empty()) { + auto I = FProfile.getCallsiteSamples().begin(); + ParentContext.back().Location = I->first; + for (auto &Callee : I->second) { + auto &CalleeProfile = Callee.second; + // Add body sample. + FProfile.addBodySamples(I->first.LineOffset, I->first.Discriminator, + CalleeProfile.getEntrySamples()); + // Add callsite sample. + FProfile.addCalledTargetSamples( + I->first.LineOffset, I->first.Discriminator, CalleeProfile.getName(), + CalleeProfile.getEntrySamples()); + // Update total samples. + FProfile.addTotalSamples(CalleeProfile.getEntrySamples()); + // Withdraw callee samples from total samples. + FProfile.setTotalSamples(FProfile.getTotalSamples() - + CalleeProfile.getTotalSamples()); + // Recursively convert callee profile. + convertToCSFlatProfiles(CalleeProfile, NewProfileMap, ParentContext); + } + + // Remove callee profile + FProfile.getCallsiteSamples().erase(I); + ParentContext.back().Location = LineLocation(0, 0); + } + + // Update head samples for callee profiles. + if (ParentContext.size() > 1) + FProfile.addHeadSamples(FProfile.getEntrySamples()); + + // Update current context. + Contexts->push_back(ParentContext); + SampleContext NewContext(Contexts->back()); + FProfile.setContext(NewContext); + NewProfileMap.emplace(NewContext, FProfile); + + // Pop current frame. + ParentContext.pop_back(); +} + +void CSProfileConverter::convertToCSFlatProfiles() { + SampleProfileMap NewProfileMap; + SampleContextFrameVector Context; + for (auto &FuncSample : ProfileMap) + convertToCSFlatProfiles(FuncSample.second, NewProfileMap, Context); + ProfileMap = std::move(NewProfileMap); +} diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -10,7 +10,7 @@ ; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile %S/Inputs/profile-context-tracker.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest %S/Inputs/profile-context-tracker.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile diff --git a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE diff --git a/llvm/test/tools/llvm-profdata/cs-sample-flat-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-flat-profile.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/cs-sample-flat-profile.test @@ -0,0 +1,23 @@ +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/sample-profile.proftext --convert-sample-profile-layout=cs +RUN: FileCheck %s < %t.proftext +RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.proftext --convert-sample-profile-layout=nest --preserve-context-attributes=0 -generate-merged-base-profiles=0 +RUN: diff %t2.proftext %S/Inputs/sample-profile.proftext + + +; CHECK: [main]:184019:0 +; CHECK-NEXT: 4: 534 +; CHECK-NEXT: 4.2: 534 +; CHECK-NEXT: 5: 1075 +; CHECK-NEXT: 5.1: 1075 +; CHECK-NEXT: 6: 2080 +; CHECK-NEXT: 7: 534 +; CHECK-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +; CHECK-NEXT: 10: 3000 inline2:2000 inline1:1000 +; CHECK-NEXT: [_Z3bari]:20301:1437 +; CHECK-NEXT: 1: 1437 +; CHECK-NEXT: [_Z3fooi]:7711:610 +; CHECK-NEXT: 1: 610 +; CHECK-NEXT: [main:10 @ inline2]:2000:2000 +; CHECK-NEXT: 1: 2000 +; CHECK-NEXT: [main:10 @ inline1]:1000:1000 +; CHECK-NEXT: 1: 1000 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test --- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -1,14 +1,14 @@ -RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.probe.proftext --match-full-lines --strict-whitespace -check-prefix=PROBE -RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE -RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT -RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY-NEST RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY-NEST diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -22,6 +22,7 @@ #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/RawMemProfReader.h" +#include "llvm/ProfileData/SampleProf.h" #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/CommandLine.h" @@ -746,7 +747,8 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, - bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, + bool UseMD5, bool GenPartialProfile, + SampleProfileLayout ProfileLayout, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode) { using namespace sampleprof; @@ -807,7 +809,7 @@ WriterList.merge(*ReaderList); } - if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) { + if (*ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) { // Use threshold calculated from profile summary unless specified. SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); auto Summary = Builder.computeSummaryForProfiles(ProfileMap); @@ -822,10 +824,18 @@ SampleMergeColdContext, SampleColdContextFrameDepth, false); } - if (ProfileIsCS && GenCSNestedProfile) { + // Underlying context table serves for sample profile writer when + // converting a nested profile to a CS profile. + std::list Contexts; + + if (*ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) { CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + CSConverter.convertToCSNestedProfiles(); ProfileIsCS = FunctionSamples::ProfileIsCS = false; + } else if (!*ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Flat) { + CSProfileConverter CSConverter(ProfileMap, &Contexts); + CSConverter.convertToCSFlatProfiles(); + ProfileIsCS = FunctionSamples::ProfileIsCS = true; } auto WriterOrErr = @@ -1007,15 +1017,17 @@ "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " "override the cold threshold got from profile summary. ")); - cl::opt GenCSNestedProfile( - "gen-cs-nested-profile", cl::Hidden, cl::init(false), - cl::desc("Generate nested function profiles for CSSPGO")); cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); cl::opt ProfiledBinary( "profiled-binary", cl::init(""), cl::desc("Path to binary from which the profile was collected.")); + cl::opt ProfileLayout( + "convert-sample-profile-layout", + cl::desc("Convert the generated profile to"), cl::init(SPL_None), + cl::values(clEnumValN(SPL_Nest, "nest", "Nested profile"), + clEnumValN(SPL_Flat, "cs", "Context-sensitive flat profile"))); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -1062,7 +1074,7 @@ else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, - UseMD5, GenPartialProfile, GenCSNestedProfile, + UseMD5, GenPartialProfile, ProfileLayout, SampleMergeColdContext, SampleTrimColdContext, SampleColdContextFrameDepth, FailureMode); return 0; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -959,7 +959,7 @@ calculateAndShowDensity(ContextLessProfiles); if (GenCSNestedProfile) { CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + CSConverter.convertToCSNestedProfiles(); FunctionSamples::ProfileIsCS = false; } }