diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -3,7 +3,7 @@ ; RUN: FileCheck %s --input-file %t ; Test --csprof-keep-cold -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-keep-cold +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-trim-cold-context=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-KEEP-COLD ; CHECK: [fa]:14:4 diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -179,6 +179,7 @@ // Merge cold context profile whose total sample is below threshold // into base profile. void mergeAndTrimColdProfile(StringMap &ProfileMap); + void computeSummaryAndThreshold(); void write(std::unique_ptr Writer, StringMap &ProfileMap) override; @@ -197,6 +198,11 @@ ProfiledBinary *Binary); void populateInferredFunctionSamples(); + // Count thresholds to answer isHotCount and isColdCount queries. + // Mirrors the threshold in ProfileSummaryInfo. + uint64_t HotCountThreshold; + uint64_t ColdCountThreshold; + public: // Deduplicate adjacent repeated context sequences up to a given sequence // length. -1 means no size limit. diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "ProfileGenerator.h" +#include "llvm/ProfileData/ProfileCommon.h" static cl::opt OutputFilename("output", cl::value_desc("output"), cl::Required, @@ -31,18 +32,46 @@ cl::Hidden, cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); -static cl::opt CSProfColdThres( +static cl::opt CSProfColdThreshold( "csprof-cold-thres", cl::init(100), cl::ZeroOrMore, cl::desc("Specify the total samples threshold for a context profile to " "be considered cold, any cold profiles will be merged into " "context-less base profiles")); -static cl::opt CSProfKeepCold( - "csprof-keep-cold", cl::init(false), cl::ZeroOrMore, +static cl::opt CSProfMergeColdContext( + "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, cl::desc("This works together with --csprof-cold-thres. If the total count " - "of the profile after all merge is done is still smaller than the " - "csprof-cold-thres, it will be trimmed unless csprof-keep-cold " - "flag is specified.")); + "of context profile is smaller than the threshold, it will be " + "merged into context-less base profile.")); + +static cl::opt CSProfTrimColdContext( + "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore, + cl::desc("This works together with --csprof-cold-thres. If the total count " + "of the profile after all merge is done is still smaller than " + "threshold, it will be trimmed.")); + +// The 4 switches below mirrors those in ProfileSummaryInfo.cpp +// We need them here so the pre-inliner in llvm-profgen is tunable +// and can better align with compiler. +static cl::opt ProfileSummaryCutoffHot( + "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore, + cl::desc("A count is hot if it exceeds the minimum count to" + " reach this percentile of total counts.")); + +static cl::opt ProfileSummaryCutoffCold( + "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore, + cl::desc("A count is cold if it is below the minimum count" + " to reach this percentile of total counts.")); + +static cl::opt ProfileSummaryHotCount( + "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore, + cl::desc("A fixed hot count that overrides the count derived from" + " profile-summary-cutoff-hot")); + +static cl::opt ProfileSummaryColdCount( + "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore, + cl::desc("A fixed cold count that overrides the count derived from" + " profile-summary-cutoff-cold")); using namespace llvm; using namespace sampleprof; @@ -197,6 +226,7 @@ FContext.setAttribute(ContextWasInlined); FunctionSamples &FProfile = Ret.first->second; FProfile.setContext(FContext); + FProfile.setName(FContext.getNameWithoutContext()); } return Ret.first->second; } @@ -226,6 +256,10 @@ // functions, we estimate it from inlinee's profile using the entry of the // body sample. populateInferredFunctionSamples(); + + // Compute hot/cold threshold based on profile. This will be used for cold + // context profile merging/trimming. + computeSummaryAndThreshold(); } void CSProfileGenerator::updateBodySamplesforFunctionProfile( @@ -381,36 +415,64 @@ } } +void CSProfileGenerator::computeSummaryAndThreshold() { + SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); + auto Summary = Builder.computeSummaryForProfiles(ProfileMap); + auto &DetailedSummary = Summary->getDetailedSummary(); + auto &HotEntry = ProfileSummaryBuilder::getEntryForPercentile( + DetailedSummary, ProfileSummaryCutoffHot); + HotCountThreshold = HotEntry.MinCount; + if (ProfileSummaryHotCount.getNumOccurrences() > 0) + HotCountThreshold = ProfileSummaryHotCount; + auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( + DetailedSummary, ProfileSummaryCutoffCold); + ColdCountThreshold = ColdEntry.MinCount; + if (ProfileSummaryColdCount.getNumOccurrences() > 0) + ColdCountThreshold = ProfileSummaryColdCount; +} + void CSProfileGenerator::mergeAndTrimColdProfile( StringMap &ProfileMap) { + if (!CSProfMergeColdContext && !CSProfTrimColdContext) + return; + + // Use threshold calculated from profile summary unless specified + uint64_t ColdThreshold = ColdCountThreshold; + if (CSProfColdThreshold.getNumOccurrences()) { + ColdThreshold = CSProfColdThreshold; + } + // Nothing to merge if sample threshold is zero - if (!CSProfColdThres) + if (ColdThreshold == 0) return; // Filter the cold profiles from ProfileMap and move them into a tmp // container - std::vector> ToRemoveVec; + std::vector> ColdProfiles; for (const auto &I : ProfileMap) { const FunctionSamples &FunctionProfile = I.second; - if (FunctionProfile.getTotalSamples() >= CSProfColdThres) + if (FunctionProfile.getTotalSamples() >= ColdThreshold) continue; - ToRemoveVec.emplace_back(I.getKey(), &I.second); + ColdProfiles.emplace_back(I.getKey(), &I.second); } // Remove the code profile from ProfileMap and merge them into BaseProileMap StringMap BaseProfileMap; - for (const auto &I : ToRemoveVec) { - auto Ret = BaseProfileMap.try_emplace( - I.second->getContext().getNameWithoutContext(), FunctionSamples()); - FunctionSamples &BaseProfile = Ret.first->second; - BaseProfile.merge(*I.second); + for (const auto &I : ColdProfiles) { + if (CSProfMergeColdContext) { + auto Ret = BaseProfileMap.try_emplace( + I.second->getContext().getNameWithoutContext(), FunctionSamples()); + FunctionSamples &BaseProfile = Ret.first->second; + BaseProfile.merge(*I.second); + } ProfileMap.erase(I.first); } // Merge the base profiles into ProfileMap; for (const auto &I : BaseProfileMap) { // Filter the cold base profile - if (!CSProfKeepCold && I.second.getTotalSamples() < CSProfColdThres && + if (CSProfTrimColdContext && + I.second.getTotalSamples() < CSProfColdThreshold && ProfileMap.find(I.getKey()) == ProfileMap.end()) continue; // Merge the profile if the original profile exists, otherwise just insert @@ -470,6 +532,10 @@ ContextStrStack, Binary); } } + + // Compute hot/cold threshold based on profile. This will be used for cold + // context profile merging/trimming. + computeSummaryAndThreshold(); } void PseudoProbeCSProfileGenerator::extractProbesFromRange(