diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -161,6 +161,10 @@ coverage for the optimized target. This option can only be used with sample-based profile in extbinary format. +.. option:: --gen-flattened-profile=[true|false] + + Generate a profile with nested inlinees flattened out. + .. option:: --supplement-instr-with-sample= Supplement an instrumentation profile with sample profile. The sample profile diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -747,6 +747,8 @@ void setTotalSamples(uint64_t Num) { TotalSamples = Num; } + void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; } + sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { bool Overflowed; TotalHeadSamples = @@ -934,6 +936,8 @@ return CallsiteSamples; } + CallsiteSampleMap &getCallsiteSamples() { return CallsiteSamples; } + /// Return the maximum of sample counts in a function body. When SkipCallSite /// is false, which is the default, the return count includes samples in the /// inlined functions. When SkipCallSite is true, the return count only @@ -1274,12 +1278,16 @@ SampleProfileMap &ProfileMap; }; -// CSProfileConverter converts a full context-sensitive flat sample profile into -// a nested context-sensitive sample profile. -class CSProfileConverter { +/// Helper class for profile conversion. +/// +/// It supports full context-sensitive profile to nested profile conversion, +/// nested profile to flatten profile conversion, etc. +class ProfileConverter { public: - CSProfileConverter(SampleProfileMap &Profiles); - void convertProfiles(); + ProfileConverter(SampleProfileMap &Profiles); + // Convert a full context-sensitive flat sample profile into a nested sample + // profile. + void convertCSProfiles(); struct FrameNode { FrameNode(StringRef FName = StringRef(), FunctionSamples *FSamples = nullptr, @@ -1299,9 +1307,82 @@ StringRef CalleeName); }; + static void flattenProfile(SampleProfileMap &ProfileMap, + bool ProfileIsCS = false) { + SampleProfileMap TmpProfiles; + flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS); + ProfileMap = std::move(TmpProfiles); + } + + static void flattenProfile(const SampleProfileMap &InputProfiles, + SampleProfileMap &OutputProfiles, + bool ProfileIsCS = false) { + if (ProfileIsCS) { + for (const auto &I : InputProfiles) + OutputProfiles[I.second.getName()].merge(I.second); + // Retain the profile name and clear the full context for each function + // profile. + for (auto &I : OutputProfiles) + I.second.setContext(SampleContext(I.first)); + } else { + for (const auto &I : InputProfiles) + flattenNestedProfile(OutputProfiles, I.second); + } + } + + static void flattenNestedProfile(SampleProfileMap &OutputProfiles, + const FunctionSamples &FS) { + // To retain the context, checksum, attributes of the original profile, make + // a copy of it if no profile is found. + SampleContext &Context = FS.getContext(); + auto Ret = OutputProfiles.emplace(Context, FS); + FunctionSamples &Profile = Ret.first->second; + if (Ret.second) { + // When it's the copy of the old profile, just clear all the inlinees' + // samples. + Profile.getCallsiteSamples().clear(); + // We recompute TotalSamples later, so here set to zero. + Profile.setTotalSamples(0); + } else { + for (const auto &Line : FS.getBodySamples()) { + Profile.addBodySamples(Line.first.LineOffset, Line.first.Discriminator, + Line.second.getSamples()); + } + } + + // TotalSamples might not be equal to the sum of all samples from + // BodySamples and CallsiteSamples. So here we use "TotalSamples = + // Original_TotalSamples - All_of_Callsite_TotalSamples + + // All_of_Callsite_HeadSamples" to compute the new TotalSamples. + uint64_t TotalSamples = FS.getTotalSamples(); + + for (const auto &I : FS.getCallsiteSamples()) { + for (const auto &Callee : I.second) { + const auto &CalleeProfile = Callee.second; + // Add body sample. + Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator, + CalleeProfile.getHeadSamplesEstimate()); + // Add callsite sample. + Profile.addCalledTargetSamples( + I.first.LineOffset, I.first.Discriminator, CalleeProfile.getName(), + CalleeProfile.getHeadSamplesEstimate()); + // Update total samples. + TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples() + ? TotalSamples - CalleeProfile.getTotalSamples() + : 0; + TotalSamples += CalleeProfile.getHeadSamplesEstimate(); + // Recursively convert callee profile. + flattenNestedProfile(OutputProfiles, CalleeProfile); + } + } + Profile.addTotalSamples(TotalSamples); + + Profile.setHeadSamples(Profile.getHeadSamplesEstimate()); + } + private: // Nest all children profiles into the profile of Node. - void convertProfiles(FrameNode &Node); + void convertCSProfiles(FrameNode &Node); FrameNode *getOrCreateContextPath(const SampleContext &Context); SampleProfileMap &ProfileMap; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -461,9 +461,9 @@ OS << Sym << "\n"; } -CSProfileConverter::FrameNode * -CSProfileConverter::FrameNode::getOrCreateChildFrame( - const LineLocation &CallSite, StringRef CalleeName) { +ProfileConverter::FrameNode * +ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite, + StringRef CalleeName) { uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); auto It = AllChildFrames.find(Hash); if (It != AllChildFrames.end()) { @@ -476,7 +476,7 @@ return &AllChildFrames[Hash]; } -CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) +ProfileConverter::ProfileConverter(SampleProfileMap &Profiles) : ProfileMap(Profiles) { for (auto &FuncSample : Profiles) { FunctionSamples *FSamples = &FuncSample.second; @@ -486,8 +486,8 @@ } } -CSProfileConverter::FrameNode * -CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { +ProfileConverter::FrameNode * +ProfileConverter::getOrCreateContextPath(const SampleContext &Context) { auto Node = &RootFrame; LineLocation CallSiteLoc(0, 0); for (auto &Callsite : Context.getContextFrames()) { @@ -497,14 +497,14 @@ return Node; } -void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { +void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) { // Process each child profile. Add each child profile to callsite profile map // of the current node `Node` if `Node` comes with a profile. Otherwise // promote the child profile to a standalone profile. auto *NodeProfile = Node.FuncSamples; for (auto &It : Node.AllChildFrames) { auto &ChildNode = It.second; - convertProfiles(ChildNode); + convertCSProfiles(ChildNode); auto *ChildProfile = ChildNode.FuncSamples; if (!ChildProfile) continue; @@ -544,4 +544,4 @@ } } -void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); } +void ProfileConverter::convertCSProfiles() { convertCSProfiles(RootFrame); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -139,6 +139,11 @@ cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section).")); +static cl::opt FlattenProfileForMatching( + "flatten-profile-for-matching", cl::Hidden, cl::init(true), + cl::desc( + "Use flattened profile for stale profile detection and matching.")); + static cl::opt ProfileSampleAccurate( "profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " @@ -434,6 +439,7 @@ Module &M; SampleProfileReader &Reader; const PseudoProbeManager *ProbeManager; + SampleProfileMap FlattenedProfiles; // Profile mismatching statstics. uint64_t TotalProfiledCallsites = 0; @@ -448,7 +454,21 @@ public: SampleProfileMatcher(Module &M, SampleProfileReader &Reader, const PseudoProbeManager *ProbeManager) - : M(M), Reader(Reader), ProbeManager(ProbeManager) {} + : M(M), Reader(Reader), ProbeManager(ProbeManager) { + if (FlattenProfileForMatching) { + ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, + FunctionSamples::ProfileIsCS); + } + } + + FunctionSamples *getFlattenedSamplesFor(const Function &F) { + StringRef CanonFName = FunctionSamples::getCanonicalFnName(F); + auto It = FlattenedProfiles.find(CanonFName); + if (It != FlattenedProfiles.end()) + return &It->second; + return nullptr; + } + void detectProfileMismatch(); void detectProfileMismatch(const Function &F, const FunctionSamples &FS); }; @@ -2157,7 +2177,11 @@ for (auto &F : M) { if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; - FunctionSamples *FS = Reader.getSamplesFor(F); + FunctionSamples *FS = nullptr; + if (FlattenProfileForMatching) + FS = getFlattenedSamplesFor(F); + else + FS = Reader.getSamplesFor(F); if (!FS) continue; detectProfileMismatch(F, *FS); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof @@ -0,0 +1,18 @@ +[main]:30:0 + 0: 0 + 1.1: 0 + 3: 10 matched:10 + 4: 10 + 5: 10 bar_mismatch:10 + 7: 5 foo:5 + 8: 0 +[main:7 @ foo]:15:5 + 1: 5 + 2: 5 + 3: 5 inlinee_mismatch:5 +[bar]:10:10 + 1: 10 +[matched]:10:10 + 1: 10 +[main:7 @ foo:3 @ inlinee_mismatch]:5:5 + 1: 5 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof --- a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof @@ -5,9 +5,11 @@ 4: 10 5: 10 bar_mismatch:10 8: 0 - 7: foo:10 + 7: foo:15 1: 5 2: 5 + 3: inlinee_mismatch:5 + 1: 5 bar:10:10 1: 10 matched:10:10 diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll @@ -0,0 +1,13 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + +; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch-cs.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + + +; CHECK: (3/4) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. + +; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 3, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30} diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll --- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll @@ -1,5 +1,5 @@ ; REQUIRES: x86_64-linux -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=0 -S 2>%t -o %t.ll ; RUN: FileCheck %s --input-file %t ; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD ; RUN: llc < %t.ll -filetype=obj -o %t.obj diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext @@ -0,0 +1,20 @@ +[baz]:150:10 + 1: 10 + 3: 20 + 5: 20 foo:20 +[foo]:102:1 + 1: 1 + 3: 1 +[main]:91:1 + 4: 1 + 4.2: 1 + 7: 1 + 9: 3 bar:2 foo:1 + 10: 3 baz:2 foo:1 +[main:10 @ foo]:2:1 + 3: 1 bar:1 + 4: 1 +[bar]:1:1 + 1: 1 +[main:10 @ foo:3 @ bar]:1:1 + 1: 1 diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext @@ -0,0 +1,44 @@ +baz:160:10 + 1: 10 + 3: 20 + 5: foo:30 + 1: 20 + 3: bar:10 + 1: 10 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 3 + !Attributes: 3 + !CFGChecksum: 1 + !Attributes: 1 +main:110:1 + 4: 1 + 4.2: 1 + 7: 1 + 9: 3 bar:2 foo:1 + 10: foo:2 + 4: 1 + 3: bar:1 + 1: 1 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 3 + !Attributes: 3 + 10: baz:20 + 10: 1 + 6: bar:3 + 1: 2 + 7: 1 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 2 + !Attributes: 2 +foo:102:1 + 1: 1 + 3: 1 + !CFGChecksum: 3 + !Attributes: 3 +bar:1:1 + 1: 1 + !CFGChecksum: 4 + !Attributes: 4 diff --git a/llvm/test/tools/llvm-profdata/sample-flatten-profile.test b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test @@ -0,0 +1,50 @@ +; RUN: llvm-profdata merge --sample --gen-flattened-profile --text %S/Inputs/sample-flatten-profile.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile.proftext -o %t2 && llvm-profdata merge --sample --gen-flattened-profile --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace + +; RUN: llvm-profdata merge --sample --gen-flattened-profile --text %S/Inputs/sample-flatten-profile-cs.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile-cs.proftext -o %t2 && llvm-profdata merge --sample --gen-flattened-profile --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS + +; CHECK:baz:169:10 +; CHECK-NEXT: 1: 10 +; CHECK-NEXT: 3: 20 +; CHECK-NEXT: 5: 20 foo:20 +; CHECK-NEXT: 6: 2 bar:2 +; CHECK-NEXT: 10: 1 +; CHECK-NEXT: !CFGChecksum: 1 +; CHECK-NEXT: !Attributes: 1 +; CHECK-NEXT:foo:134:21 +; CHECK-NEXT: 1: 21 +; CHECK-NEXT: 3: 12 bar:11 +; CHECK-NEXT: 4: 1 +; CHECK-NEXT: !CFGChecksum: 3 +; CHECK-NEXT: !Attributes: 3 +; CHECK-NEXT:main:91:1 +; CHECK-NEXT: 4: 1 +; CHECK-NEXT: 4.2: 1 +; CHECK-NEXT: 7: 1 +; CHECK-NEXT: 9: 3 bar:2 foo:1 +; CHECK-NEXT: 10: 3 baz:2 foo:1 +; CHECK-NEXT: !CFGChecksum: 2 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT:bar:15:14 +; CHECK-NEXT: 1: 14 +; CHECK-NEXT: 7: 1 +; CHECK-NEXT: !CFGChecksum: 4 +; CHECK-NEXT: !Attributes: 4 + +; CHECK-CS:baz:150:10 +; CHECK-CS-NEXT: 1: 10 +; CHECK-CS-NEXT: 3: 20 +; CHECK-CS-NEXT: 5: 20 foo:20 +; CHECK-CS-NEXT:foo:104:2 +; CHECK-CS-NEXT: 1: 1 +; CHECK-CS-NEXT: 3: 2 bar:1 +; CHECK-CS-NEXT: 4: 1 +; CHECK-CS-NEXT:main:91:1 +; CHECK-CS-NEXT: 4: 1 +; CHECK-CS-NEXT: 4.2: 1 +; CHECK-CS-NEXT: 7: 1 +; CHECK-CS-NEXT: 9: 3 bar:2 foo:1 +; CHECK-CS-NEXT: 10: 3 baz:2 foo:1 +; CHECK-CS-NEXT:bar:2:2 +; CHECK-CS-NEXT: 1: 2 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -969,9 +969,10 @@ StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, - bool SampleMergeColdContext, bool SampleTrimColdContext, - bool SampleColdContextFrameDepth, FailureMode FailMode, - bool DropProfileSymbolList, size_t OutputSizeLimit) { + bool GenFlattenedProfile, bool SampleMergeColdContext, + bool SampleTrimColdContext, bool SampleColdContextFrameDepth, + FailureMode FailMode, bool DropProfileSymbolList, + size_t OutputSizeLimit) { using namespace sampleprof; SampleProfileMap ProfileMap; SmallVector, 5> Readers; @@ -1033,6 +1034,12 @@ } } + if (GenFlattenedProfile) { + ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS); + if (FunctionSamples::ProfileIsCS) + ProfileIsCS = FunctionSamples::ProfileIsCS = false; + } + if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) { // Use threshold calculated from profile summary unless specified. SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); @@ -1049,8 +1056,8 @@ } if (ProfileIsCS && GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + ProfileConverter CSConverter(ProfileMap); + CSConverter.convertCSProfiles(); ProfileIsCS = FunctionSamples::ProfileIsCS = false; } @@ -1244,6 +1251,9 @@ cl::opt GenCSNestedProfile( "gen-cs-nested-profile", cl::Hidden, cl::init(false), cl::desc("Generate nested function profiles for CSSPGO")); + cl::opt GenFlattenedProfile( + "gen-flattened-profile", cl::init(false), + cl::desc("Generate a profile with nested inlinees flattened out.")); cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); @@ -1298,12 +1308,12 @@ OutputFilename, OutputFormat, OutputSparse, NumThreads, FailureMode, ProfiledBinary); else - mergeSampleProfile( - WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, - ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile, - GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext, - SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList, - OutputSizeLimit); + mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, + OutputFormat, ProfileSymbolListFile, CompressAllSections, + UseMD5, GenPartialProfile, GenCSNestedProfile, + GenFlattenedProfile, SampleMergeColdContext, + SampleTrimColdContext, SampleColdContextFrameDepth, + FailureMode, DropProfileSymbolList, OutputSizeLimit); return 0; } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -1026,8 +1026,8 @@ calculateAndShowDensity(ContextLessProfiles); if (GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + ProfileConverter CSConverter(ProfileMap); + CSConverter.convertCSProfiles(); FunctionSamples::ProfileIsCS = false; } }