diff --git a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test @@ -1,7 +1,8 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0 --use-offset=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --show-density &> %t1 ; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-DENSITY ; CHECK: [main:2 @ foo]:74:0 ; CHECK-NEXT: 1: 0 @@ -19,6 +20,9 @@ ; CHECK-NEXT: 4: 14 ; CHECK-NEXT: !CFGChecksum: 72617220756 +; CHECK-DENSITY: AutoFDO is estimated to optimize better with 1675.7x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. +; CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 0.6 + ; CHECK-UNWINDER: 3 ; CHECK-UNWINDER-NEXT: 201800-201858:1 ; CHECK-UNWINDER-NEXT: 20180e-20182b:1 diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -2,8 +2,9 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-ARTIFICIAL-BRANCH ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-noprobe2.perfscript --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t --skip-symbolization --use-offset=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE -; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --show-density -hot-function-density-threshold=1 &> %t2 ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-DENSITY ; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/inline-noprobe2.perfscript --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t --populate-profile-symbol-list=1 ; RUN: llvm-profdata show -show-prof-sym-list -sample %t | FileCheck %s --check-prefix=CHECK-SYM-LIST @@ -98,6 +99,9 @@ ;CHECK-NEXT: 11: 0 ;CHECK-NEXT: 14: 0 +;CHECK-DENSITY: AutoFDO is estimated to optimize better with 4.9x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. +;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 0.2 + ; original code: ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out #include diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -13,6 +13,7 @@ #include "PerfReader.h" #include "ProfiledBinary.h" #include "llvm/ProfileData/SampleProfWriter.h" +#include "llvm/Support/raw_os_ostream.h" #include #include @@ -75,7 +76,23 @@ const SampleContextFrame &LeafLoc, uint64_t Count); void updateTotalSamples(); + StringRef getCalleeNameForOffset(uint64_t TargetOffset); + + void computeSummaryAndThreshold(); + + virtual void calculateAndShowDensity(const SampleProfileMap &Profiles) = 0; + + double calculateDensity(const SampleProfileMap &Profiles, + uint64_t HotCntThreshold); + + void showDensitySuggestion(double Density, raw_fd_ostream &OS); + + // Thresholds from profile summary to answer isHotCount/isColdCount queries. + uint64_t HotCountThreshold; + + uint64_t ColdCountThreshold; + // Used by SampleProfileWriter SampleProfileMap ProfileMap; @@ -104,6 +121,8 @@ void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); + void postProcessProfiles(); + void calculateAndShowDensity(const SampleProfileMap &Profiles) override; }; using ProbeCounterMap = @@ -245,8 +264,6 @@ // and trimming cold profiles, running preinliner on profiles. void postProcessProfiles(); - void computeSummaryAndThreshold(); - void populateBodySamplesForFunction(FunctionSamples &FunctionProfile, const RangeSample &RangeCounters); void populateBoundarySamplesForFunction(SampleContextFrames ContextId, @@ -269,9 +286,8 @@ FunctionSamples & getFunctionProfileForLeafProbe(SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe); - // Thresholds from profile summary to answer isHotCount/isColdCount queries. - uint64_t HotCountThreshold; - uint64_t ColdCountThreshold; + + void calculateAndShowDensity(const SampleProfileMap &Profiles) override; // Underlying context table serves for sample profile writer. std::unordered_set Contexts; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -9,6 +9,7 @@ #include "ProfileGenerator.h" #include "ProfiledBinary.h" #include "llvm/ProfileData/ProfileCommon.h" +#include #include cl::opt OutputFilename("output", cl::value_desc("output"), @@ -70,7 +71,16 @@ "depth limit."), cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); -extern cl::opt ProfileSummaryCutoffCold; +static cl::opt HotFunctionDensityThreshold( + "hot-function-density-threshold", llvm::cl::init(1000), + llvm::cl::desc( + "specify density threshold for hot functions (default: 1000)"), + llvm::cl::Optional); +static cl::opt ShowDensity("show-density", llvm::cl::init(false), + llvm::cl::desc("show profile density details"), + llvm::cl::Optional); + +extern cl::opt ProfileSummaryCutoffHot; using namespace llvm; using namespace sampleprof; @@ -127,6 +137,51 @@ write(std::move(WriterOrErr.get()), ProfileMap); } +void ProfileGeneratorBase::showDensitySuggestion(double Density, + raw_fd_ostream &OS) { + if (Density == 0.0) + OS << "The --profile-summary-cutoff-hot option may be set too low. Please " + "check your command.\n"; + else if (Density < HotFunctionDensityThreshold) + OS << "AutoFDO is estimated to optimize better with " + << format("%.1f", HotFunctionDensityThreshold / Density) + << "x more samples. Please consider increasing sampling rate or " + "profiling for longer duration to get more samples.\n"; + + if (ShowDensity) + OS << "Minimum profile density for hot functions with top " + << format("%.2f", + static_cast(ProfileSummaryCutoffHot.getValue()) / + 10000) + << "% total samples: " << format("%.1f", Density) << "\n"; +} + +double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, + uint64_t HotCntThreshold) { + double Density = DBL_MAX; + std::vector HotFuncs; + for (auto &I : Profiles) { + auto &FuncSamples = I.second; + if (FuncSamples.getTotalSamples() < HotCntThreshold) + break; + HotFuncs.emplace_back(&FuncSamples); + } + + for (auto *FuncSamples : HotFuncs) { + auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); + if (!Func) + continue; + size_t FuncSize = Func->getFuncSize(); + if (FuncSize == 0) + continue; + Density = + std::min(Density, static_cast(FuncSamples->getTotalSamples()) / + FuncSize); + } + + return Density == DBL_MAX ? 0.0 : Density; +} + void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, const RangeSample &Ranges) { @@ -311,6 +366,12 @@ } else { generateLineNumBasedProfile(); } + postProcessProfiles(); +} + +void ProfileGenerator::postProcessProfiles() { + computeSummaryAndThreshold(); + calculateAndShowDensity(ProfileMap); } void ProfileGenerator::generateLineNumBasedProfile() { @@ -440,6 +501,12 @@ } } +void ProfileGenerator::calculateAndShowDensity( + const SampleProfileMap &Profiles) { + double Density = calculateDensity(Profiles, HotCountThreshold); + showDensitySuggestion(Density, outs()); +} + FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( const SampleContextFrameVector &Context, bool WasLeafInlined) { auto I = ProfileMap.find(SampleContext(Context)); @@ -664,9 +731,11 @@ HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, CSProfMaxColdContextDepth, EnableCSPreInliner); } + + calculateAndShowDensity(ProfileMap); } -void CSProfileGenerator::computeSummaryAndThreshold() { +void ProfileGeneratorBase::computeSummaryAndThreshold() { SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); auto Summary = Builder.computeSummaryForProfiles(ProfileMap); HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( @@ -844,5 +913,17 @@ return FunctionProile; } +void CSProfileGenerator::calculateAndShowDensity( + const SampleProfileMap &Profiles) { + sampleprof::SampleProfileMap ContextLessProfiles; + // Merge function samples for CS profile. + for (const auto &I : Profiles) { + ContextLessProfiles[I.second.getName()].merge(I.second); + } + + double Density = calculateDensity(ContextLessProfiles, HotCountThreshold); + showDensitySuggestion(Density, outs()); +} + } // end namespace sampleprof } // end namespace llvm diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -76,6 +76,14 @@ StringRef FuncName; // End of range is an exclusive bound. RangesTy Ranges; + + size_t getFuncSize() { + size_t Sum = 0; + for (auto &R : Ranges) { + Sum += R.second - R.first; + } + return Sum; + } }; // Info about function range. A function can be split into multiple @@ -402,6 +410,13 @@ return BinaryFunctions; } + BinaryFunction *getBinaryFunction(StringRef FName) { + auto I = BinaryFunctions.find(FName.str()); + if (I == BinaryFunctions.end()) + return nullptr; + return &I->second; + } + uint32_t getFuncSizeForContext(SampleContext &Context) { return FuncSizeTracker.getFuncSizeForContext(Context); }