diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -6,6 +6,9 @@ ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-DENSITY +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile --profile-summary-cold-count=100 +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-TRIM-COLD + ; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/inline-noprobe2.perfscript --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t --populate-profile-symbol-list=1 ; RUN: llvm-profdata show -show-prof-sym-list -sample %t | FileCheck %s --check-prefix=CHECK-SYM-LIST @@ -102,6 +105,11 @@ ;CHECK-DENSITY: AutoFDO is estimated to optimize better with 4.9x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. ;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 0.2 +;CHECK-TRIM-COLD: partition_pivot_first:367:5 +;CHECK-TRIM-COLD: partition_pivot_last:225:7 +;CHECK-TRIM-COLD-NOT: quick_sort:83:25 +;CHECK-TRIM-COLD-NOT: main:52:0 + ; original code: ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out #include diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -122,6 +122,8 @@ void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); void postProcessProfiles(); + void trimColdProfiles(const SampleProfileMap &Profiles, + uint64_t ColdCntThreshold); void calculateAndShowDensity(const SampleProfileMap &Profiles) override; }; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -49,6 +49,11 @@ cl::Hidden, cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); +static cl::opt + TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore, + cl::desc("If the total count of the profile is smaller " + "than threshold, it will be trimmed.")); + static cl::opt CSProfMergeColdContext( "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, cl::desc("If the total count of context profile is smaller than " @@ -371,9 +376,27 @@ void ProfileGenerator::postProcessProfiles() { computeSummaryAndThreshold(); + trimColdProfiles(ProfileMap, ColdCountThreshold); calculateAndShowDensity(ProfileMap); } +void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, + uint64_t ColdCntThreshold) { + if (!TrimColdProfile) + return; + + // Move cold profiles into a tmp container. + std::vector ColdProfiles; + for (const auto &I : ProfileMap) { + if (I.second.getTotalSamples() < ColdCntThreshold) + ColdProfiles.emplace_back(I.first); + } + + // Remove the cold profile from ProfileMap. + for (const auto &I : ColdProfiles) + ProfileMap.erase(I); +} + void ProfileGenerator::generateLineNumBasedProfile() { assert(SampleCounters.size() == 1 && "Must have one entry for profile generation.");