diff --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test --- a/llvm/test/tools/llvm-profgen/cs-preinline.test +++ b/llvm/test/tools/llvm-profgen/cs-preinline.test @@ -11,7 +11,7 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL ; Test cold profile trimming. Only base profiles should be dropped. -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --csprof-trim-cold-context=1 --profile-summary-hot-count=250 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --trim-cold-profile=1 --profile-summary-hot-count=250 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-TRIM diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -6,6 +6,9 @@ ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-DENSITY +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile --profile-summary-cold-count=100 +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-TRIM-COLD + ; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/inline-noprobe2.perfscript --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t --populate-profile-symbol-list=1 ; RUN: llvm-profdata show -show-prof-sym-list -sample %t | FileCheck %s --check-prefix=CHECK-SYM-LIST @@ -102,6 +105,11 @@ ;CHECK-DENSITY: AutoFDO is estimated to optimize better with 4.9x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. ;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 0.2 +;CHECK-TRIM-COLD: partition_pivot_first:367:5 +;CHECK-TRIM-COLD: partition_pivot_last:225:7 +;CHECK-TRIM-COLD-NOT: quick_sort:83:25 +;CHECK-TRIM-COLD-NOT: main:52:0 + ; original code: ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out #include diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -2,8 +2,8 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --profile-summary-hot-count=8 ; RUN: FileCheck %s --input-file %t1 -; Test --csprof-trim-cold-context=0 -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0 +; Test --trim-cold-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-COLD ; Test --csprof-merge-cold-context=0 @@ -11,7 +11,7 @@ ; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED ; Test --csprof-frame-depth-for-cold-context -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0 --csprof-max-cold-context-depth=2 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csprof-max-cold-context-depth=2 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH ; CHECK: [fa]:14:4 diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -122,6 +122,8 @@ void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); void postProcessProfiles(); + void trimColdProfiles(const SampleProfileMap &Profiles, + uint64_t ColdCntThreshold); void calculateAndShowDensity(const SampleProfileMap &Profiles) override; }; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -49,17 +49,17 @@ cl::Hidden, cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); +static cl::opt + TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore, + cl::desc("If the total count of the profile is smaller " + "than threshold, it will be trimmed.")); + static cl::opt CSProfMergeColdContext( "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, cl::desc("If the total count of context profile is smaller than " "the threshold, it will be merged into context-less base " "profile.")); -static cl::opt CSProfTrimColdContext( - "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore, - cl::desc("If the total count of the profile after all merge is done " - "is still smaller than threshold, it will be trimmed.")); - static cl::opt CSProfMaxColdContextDepth( "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, cl::desc("Keep the last K contexts while merging cold profile. 1 means the " @@ -371,9 +371,27 @@ void ProfileGenerator::postProcessProfiles() { computeSummaryAndThreshold(); + trimColdProfiles(ProfileMap, ColdCountThreshold); calculateAndShowDensity(ProfileMap); } +void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, + uint64_t ColdCntThreshold) { + if (!TrimColdProfile) + return; + + // Move cold profiles into a tmp container. + std::vector ColdProfiles; + for (const auto &I : ProfileMap) { + if (I.second.getTotalSamples() < ColdCntThreshold) + ColdProfiles.emplace_back(I.first); + } + + // Remove the cold profile from ProfileMap. + for (const auto &I : ColdProfiles) + ProfileMap.erase(I); +} + void ProfileGenerator::generateLineNumBasedProfile() { assert(SampleCounters.size() == 1 && "Must have one entry for profile generation."); @@ -725,10 +743,10 @@ } // Trim and merge cold context profile using cold threshold above. - if (CSProfTrimColdContext || CSProfMergeColdContext) { + if (TrimColdProfile || CSProfMergeColdContext) { SampleContextTrimmer(ProfileMap) .trimAndMergeColdContextProfiles( - HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, + HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, CSProfMaxColdContextDepth, EnableCSPreInliner); }