diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -38,7 +38,7 @@ // units. This would require making this depend on BFI. class ProfileSummaryInfo { private: - const Module &M; + const Module *M; std::unique_ptr Summary; void computeThresholds(); // Count thresholds to answer isHotCount and isColdCount queries. @@ -58,7 +58,9 @@ mutable DenseMap ThresholdCache; public: - ProfileSummaryInfo(const Module &M) : M(M) { refresh(); } + ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); } + ProfileSummaryInfo(std::unique_ptr PSI) + : M(nullptr), Summary(std::move(PSI)) {} ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default; diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -95,13 +95,13 @@ if (hasProfileSummary()) return; // First try to get context sensitive ProfileSummary. - auto *SummaryMD = M.getProfileSummary(/* IsCS */ true); + auto *SummaryMD = M->getProfileSummary(/* IsCS */ true); if (SummaryMD) Summary.reset(ProfileSummary::getFromMD(SummaryMD)); if (!hasProfileSummary()) { // This will actually return PSK_Instr or PSK_Sample summary. - SummaryMD = M.getProfileSummary(/* IsCS */ false); + SummaryMD = M->getProfileSummary(/* IsCS */ false); if (SummaryMD) Summary.reset(ProfileSummary::getFromMD(SummaryMD)); } diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -2,10 +2,14 @@ ; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=8 ; RUN: FileCheck %s --input-file %t -; Test --csprof-keep-cold -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-keep-cold +; Test --csprof-trim-cold-context=0 +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-trim-cold-context=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-KEEP-COLD +; Test --csprof-merge-cold-context=0 +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=10 --csprof-merge-cold-context=0 +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNMERGED + ; CHECK: [fa]:14:4 ; CHECK-NEXT: 1: 4 ; CHECK-NEXT: 3: 4 @@ -40,6 +44,14 @@ ; CHECK-KEEP-COLD-NEXT: 8: 1 fa:1 ; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 120515930909 +; CHECK-UNMERGED: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4 +; CHECK-UNMERGED-NEXT: 1: 4 +; CHECK-UNMERGED-NEXT: 2: 3 +; CHECK-UNMERGED-NEXT: 3: 1 +; CHECK-UNMERGED-NEXT: 5: 4 fb:4 +; CHECK-UNMERGED-NEXT: 6: 1 fa:1 +; CHECK-UNMERGED-NEXT: !CFGChecksum: 72617220756 + ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -11,7 +11,9 @@ #include "ErrorHandling.h" #include "PerfReader.h" #include "ProfiledBinary.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/ProfileData/SampleProfWriter.h" +#include using namespace llvm; using namespace sampleprof; @@ -179,6 +181,7 @@ // Merge cold context profile whose total sample is below threshold // into base profile. void mergeAndTrimColdProfile(StringMap &ProfileMap); + void computeSummaryAndThreshold(); void write(std::unique_ptr Writer, StringMap &ProfileMap) override; @@ -197,6 +200,9 @@ ProfiledBinary *Binary); void populateInferredFunctionSamples(); + // Profile summary to answer isHotCount and isColdCount queries. + std::unique_ptr PSI; + public: // Deduplicate adjacent repeated context sequences up to a given sequence // length. -1 means no size limit. diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "ProfileGenerator.h" +#include "llvm/ProfileData/ProfileCommon.h" static cl::opt OutputFilename("output", cl::value_desc("output"), cl::Required, @@ -31,18 +32,23 @@ cl::Hidden, cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); -static cl::opt CSProfColdThres( +static cl::opt CSProfColdThreshold( "csprof-cold-thres", cl::init(100), cl::ZeroOrMore, cl::desc("Specify the total samples threshold for a context profile to " "be considered cold, any cold profiles will be merged into " "context-less base profiles")); -static cl::opt CSProfKeepCold( - "csprof-keep-cold", cl::init(false), cl::ZeroOrMore, +static cl::opt CSProfMergeColdContext( + "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, cl::desc("This works together with --csprof-cold-thres. If the total count " - "of the profile after all merge is done is still smaller than the " - "csprof-cold-thres, it will be trimmed unless csprof-keep-cold " - "flag is specified.")); + "of context profile is smaller than the threshold, it will be " + "merged into context-less base profile.")); + +static cl::opt CSProfTrimColdContext( + "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore, + cl::desc("This works together with --csprof-cold-thres. If the total count " + "of the profile after all merge is done is still smaller than " + "threshold, it will be trimmed.")); using namespace llvm; using namespace sampleprof; @@ -197,6 +203,7 @@ FContext.setAttribute(ContextWasInlined); FunctionSamples &FProfile = Ret.first->second; FProfile.setContext(FContext); + FProfile.setName(FContext.getNameWithoutContext()); } return Ret.first->second; } @@ -226,6 +233,10 @@ // functions, we estimate it from inlinee's profile using the entry of the // body sample. populateInferredFunctionSamples(); + + // Compute hot/cold threshold based on profile. This will be used for cold + // context profile merging/trimming. + computeSummaryAndThreshold(); } void CSProfileGenerator::updateBodySamplesforFunctionProfile( @@ -381,36 +392,54 @@ } } +void CSProfileGenerator::computeSummaryAndThreshold() { + SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); + auto Summary = Builder.computeSummaryForProfiles(ProfileMap); + PSI.reset(new ProfileSummaryInfo(std::move(Summary))); +} + void CSProfileGenerator::mergeAndTrimColdProfile( StringMap &ProfileMap) { + if (!CSProfMergeColdContext && !CSProfTrimColdContext) + return; + + // Use threshold calculated from profile summary unless specified + uint64_t ColdThreshold = PSI->getColdCountThreshold(); + if (CSProfColdThreshold.getNumOccurrences()) { + ColdThreshold = CSProfColdThreshold; + } + // Nothing to merge if sample threshold is zero - if (!CSProfColdThres) + if (ColdThreshold == 0) return; // Filter the cold profiles from ProfileMap and move them into a tmp // container - std::vector> ToRemoveVec; + std::vector> ColdProfiles; for (const auto &I : ProfileMap) { const FunctionSamples &FunctionProfile = I.second; - if (FunctionProfile.getTotalSamples() >= CSProfColdThres) + if (FunctionProfile.getTotalSamples() >= ColdThreshold) continue; - ToRemoveVec.emplace_back(I.getKey(), &I.second); + ColdProfiles.emplace_back(I.getKey(), &I.second); } // Remove the code profile from ProfileMap and merge them into BaseProileMap StringMap BaseProfileMap; - for (const auto &I : ToRemoveVec) { - auto Ret = BaseProfileMap.try_emplace( - I.second->getContext().getNameWithoutContext(), FunctionSamples()); - FunctionSamples &BaseProfile = Ret.first->second; - BaseProfile.merge(*I.second); + for (const auto &I : ColdProfiles) { + if (CSProfMergeColdContext) { + auto Ret = BaseProfileMap.try_emplace( + I.second->getContext().getNameWithoutContext(), FunctionSamples()); + FunctionSamples &BaseProfile = Ret.first->second; + BaseProfile.merge(*I.second); + } ProfileMap.erase(I.first); } // Merge the base profiles into ProfileMap; for (const auto &I : BaseProfileMap) { // Filter the cold base profile - if (!CSProfKeepCold && I.second.getTotalSamples() < CSProfColdThres && + if (CSProfTrimColdContext && + I.second.getTotalSamples() < CSProfColdThreshold && ProfileMap.find(I.getKey()) == ProfileMap.end()) continue; // Merge the profile if the original profile exists, otherwise just insert @@ -470,6 +499,10 @@ ContextStrStack, Binary); } } + + // Compute hot/cold threshold based on profile. This will be used for cold + // context profile merging/trimming. + computeSummaryAndThreshold(); } void PseudoProbeCSProfileGenerator::extractProbesFromRange(