diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -430,6 +430,22 @@ return ContextStr.split(" @ "); } + // Reconstruct a new context with the last k frames, return the context-less + // name if K = 1 + StringRef getContextWithLastKFrames(uint32_t K) { + if (K == 1) + return getNameWithoutContext(); + + size_t I = FullContext.size(); + while (K--) { + I = FullContext.find_last_of(" @ ", I); + if (I == StringRef::npos) + return FullContext; + I -= 2; + } + return FullContext.slice(I + 3, StringRef::npos); + } + // Decode context string for a frame to get function name and location. // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. static void decodeContextString(StringRef ContextStr, StringRef &FName, @@ -994,7 +1010,8 @@ // Trim and merge cold context profile when requested. void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, bool TrimColdContext = true, - bool MergeColdContext = true); + bool MergeColdContext = true, + uint32_t KeepLastKFrames = 1); // Canonicalize context profile name and attributes. void canonicalizeContextProfiles(); diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -324,7 +324,8 @@ } void SampleContextTrimmer::trimAndMergeColdContextProfiles( - uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext) { + uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext, + uint32_t KeepLastKFrames) { if (!TrimColdContext && !MergeColdContext) return; @@ -342,21 +343,23 @@ ColdProfiles.emplace_back(I.getKey(), &I.second); } - // Remove the cold profile from ProfileMap and merge them into BaseProileMap - StringMap BaseProfileMap; + // Remove the cold profile from ProfileMap and merge them into + // MergedProfileMap by the last K frames of context + StringMap MergedProfileMap; for (const auto &I : ColdProfiles) { if (MergeColdContext) { - auto Ret = BaseProfileMap.try_emplace( - I.second->getContext().getNameWithoutContext(), FunctionSamples()); - FunctionSamples &BaseProfile = Ret.first->second; - BaseProfile.merge(*I.second); + auto Ret = MergedProfileMap.try_emplace( + I.second->getContext().getContextWithLastKFrames(KeepLastKFrames), + FunctionSamples()); + FunctionSamples &MergedProfile = Ret.first->second; + MergedProfile.merge(*I.second); } ProfileMap.erase(I.first); } - // Merge the base profiles into ProfileMap; - for (const auto &I : BaseProfileMap) { - // Filter the cold base profile + // Move the merged profiles into ProfileMap; + for (const auto &I : MergedProfileMap) { + // Filter the cold merged profile if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold && ProfileMap.find(I.getKey()) == ProfileMap.end()) continue; diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -10,6 +10,12 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t3 --compress-recursion=-1 --profile-summary-cold-count=10 --csprof-merge-cold-context=0 ; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED +; Test --csprof-keep-last-k-frames +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-cold-count=100 --csprof-trim-cold-context=0 --csprof-keep-last-k-frames=2 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-K-COLD + + + ; CHECK: [fa]:14:4 ; CHECK-NEXT: 1: 4 ; CHECK-NEXT: 2: 18446744073709551615 @@ -56,6 +62,38 @@ ; CHECK-UNMERGED-NOT: [fa] ; CHECK-UNMERGED-NOT: [fb] +; CHECK-KEEP-K-COLD: [fb:5 @ fb]:13:4 +; CHECK-KEEP-K-COLD-NEXT: 1: 4 +; CHECK-KEEP-K-COLD-NEXT: 2: 3 +; CHECK-KEEP-K-COLD-NEXT: 3: 1 +; CHECK-KEEP-K-COLD-NEXT: 5: 4 fb:4 +; CHECK-KEEP-K-COLD-NEXT: 6: 1 fa:1 +; CHECK-KEEP-K-COLD-NEXT: !CFGChecksum: 72617220756 +; CHECK-KEEP-K-COLD-NEXT: !Attributes: 0 +; CHECK-KEEP-K-COLD-NEXT:[fb:6 @ fa]:10:3 +; CHECK-KEEP-K-COLD-NEXT: 1: 3 +; CHECK-KEEP-K-COLD-NEXT: 2: 18446744073709551615 +; CHECK-KEEP-K-COLD-NEXT: 3: 3 +; CHECK-KEEP-K-COLD-NEXT: 4: 1 +; CHECK-KEEP-K-COLD-NEXT: 5: 1 +; CHECK-KEEP-K-COLD-NEXT: 7: 1 fb:1 +; CHECK-KEEP-K-COLD-NEXT: 8: 1 fa:1 +; CHECK-KEEP-K-COLD-NEXT: !CFGChecksum: 120515930909 +; CHECK-KEEP-K-COLD-NEXT: !Attributes: 0 +; CHECK-KEEP-K-COLD-NEXT:[fa:7 @ fb]:6:2 +; CHECK-KEEP-K-COLD-NEXT: 1: 2 +; CHECK-KEEP-K-COLD-NEXT: 3: 2 +; CHECK-KEEP-K-COLD-NEXT: 6: 2 fa:2 +; CHECK-KEEP-K-COLD-NEXT: !CFGChecksum: 72617220756 +; CHECK-KEEP-K-COLD-NEXT: !Attributes: 0 +; CHECK-KEEP-K-COLD-NEXT:[fa:8 @ fa]:4:1 +; CHECK-KEEP-K-COLD-NEXT: 1: 1 +; CHECK-KEEP-K-COLD-NEXT: 2: 18446744073709551615 +; CHECK-KEEP-K-COLD-NEXT: 3: 1 +; CHECK-KEEP-K-COLD-NEXT: 4: 1 +; CHECK-KEEP-K-COLD-NEXT: 7: 1 fb:1 +; CHECK-KEEP-K-COLD-NEXT: !CFGChecksum: 120515930909 +; CHECK-KEEP-K-COLD-NEXT: !Attributes: 0 ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -689,7 +689,7 @@ StringRef ProfileSymbolListFile, bool CompressAllSections, bool UseMD5, bool GenPartialProfile, bool SampleMergeColdContext, bool SampleTrimColdContext, - FailureMode FailMode) { + bool SampleKeepLastKFrames, FailureMode FailMode) { using namespace sampleprof; StringMap ProfileMap; SmallVector, 5> Readers; @@ -758,9 +758,9 @@ // Trim and merge cold context profile using cold threshold above; SampleContextTrimmer(ProfileMap) - .trimAndMergeColdContextProfiles(SampleProfColdThreshold, - SampleTrimColdContext, - SampleMergeColdContext); + .trimAndMergeColdContextProfiles( + SampleProfColdThreshold, SampleTrimColdContext, + SampleMergeColdContext, SampleKeepLastKFrames); } auto WriterOrErr = @@ -914,6 +914,10 @@ "sample-trim-cold-context", cl::init(false), cl::Hidden, cl::desc( "Trim context sample profiles whose count is below cold threshold")); + cl::opt SampleKeepLastKFrames( + "sample-keep-last-k-frames", cl::init(1), cl::ZeroOrMore, + cl::desc("Keep the last K frames while merging cold profile. 1 means the " + "context-less base profile")); cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); @@ -985,7 +989,8 @@ mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile, SampleMergeColdContext, - SampleTrimColdContext, FailureMode); + SampleTrimColdContext, SampleKeepLastKFrames, + FailureMode); return 0; } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -43,6 +43,11 @@ cl::desc("If the total count of the profile after all merge is done " "is still smaller than threshold, it will be trimmed.")); +static cl::opt CSProfKeepLastKFrames( + "csprof-keep-last-k-frames", cl::init(1), cl::ZeroOrMore, + cl::desc("Keep the last K frames while merging cold profile. 1 means the " + "context-less base profile")); + extern cl::opt ProfileSummaryCutoffCold; using namespace llvm; @@ -401,7 +406,8 @@ // Trim and merge cold context profile using cold threshold above; SampleContextTrimmer(ProfileMap) .trimAndMergeColdContextProfiles( - ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext); + ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, + CSProfKeepLastKFrames); } void CSProfileGenerator::computeSummaryAndThreshold() {