diff --git a/llvm/test/tools/llvm-profgen/Inputs/profile-density-cs.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/profile-density-cs.raw.prof new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/profile-density-cs.raw.prof @@ -0,0 +1,154 @@ +[main] + 8 + 810-82f:15 + 834-85c:15 + 870-870:1544 + 875-8a1:11 + 875-8bf:1223 + 875-8c3:185 + 893-8bf:176 + 8a7-8c3:13 + 5 + 82f->790:15 + 870->540:1546 + 8a1->810:15 + 8bf->870:2022 + 8c3->893:276 +[partition_pivot_first] + 10 + 710-72d:238 + 740-753:1 + 740-75b:739 + 740-75f:267 + 740-761:1164 + 743-753:12 + 743-75b:2414 + 743-761:793 + 755-75b:103 + 755-75f:115 + 3 + 753->770:13 + 75b->743:3327 + 75f->740:385 +[partition_pivot_first:4.2 @ swap] + 1 + 764-76e:2904 + 1 + 76e->740:2999 +[partition_pivot_first:5 @ swap] + 2 + 770-770:619 + 77a-783:619 + 0 +[partition_pivot_last] + 15 + 650-66d:206 + 650-675:182 + 682-689:164 + 686-689:193 + 6b0-6b7:18 + 6b0-6bf:2082 + 6b0-6c8:1180 + 6b0-6ca:683 + 6b9-6bf:170 + 6b9-6c8:92 + 6b9-6ca:62 + 6d0-6d3:2230 + 6e3-6ea:712 + 6e3-6ef:1518 + 6ec-6ef:667 + 8 + 66d->686:206 + 675->682:79 + 689->6b9:359 + 6b7->68b:18 + 6bf->6d0:2307 + 6c8->6b0:1300 + 6ca->6ec:755 + 6ea->6b0:724 +[partition_pivot_last:5 @ swap] + 3 + 677-67d:292 + 6d6-6df:3621 + 6f2-700:3528 + 1 + 700->6b0:3619 +[partition_pivot_last:6 @ swap] + 2 + 68b-68b:1124 + 695-69e:1124 + 0 +[quick_sort] + 4 + 790-79c:1273 + 7a6-7a6:1273 + 7a8-7b8:941 + 7bd-7ca:791 + 4 + 7a6->650:817 + 7a6->710:489 + 7b8->790:961 + 7ca->790:805 +[quick_sort:2 @ partition_pivot_first] + 12 + 710-72d:408 + 740-753:208 + 740-75b:463 + 740-75f:262 + 740-761:496 + 743-753:386 + 743-75b:1300 + 743-761:451 + 755-75b:283 + 755-75f:144 + 774-777:619 + 787-788:619 + 4 + 753->770:619 + 75b->743:2137 + 75f->740:427 + 788->7a8:646 +[quick_sort:2 @ partition_pivot_last] + 17 + 650-66d:295 + 650-675:517 + 682-689:528 + 686-689:307 + 68f-692:1124 + 6a2-6a2:1124 + 6b0-6b7:806 + 6b0-6bf:1093 + 6b0-6c8:935 + 6b0-6ca:351 + 6b9-6bf:226 + 6b9-6c8:273 + 6b9-6ca:81 + 6d0-6d3:1391 + 6e3-6ea:500 + 6e3-6ef:891 + 6ec-6ef:452 + 9 + 66d->686:307 + 675->682:340 + 689->6b9:580 + 6a2->7a8:1167 + 6b7->68b:834 + 6bf->6d0:1391 + 6c8->6b0:1263 + 6ca->6ec:452 + 6ea->6b0:518 +[quick_sort:4 @ quick_sort] + 6 + 790-792:831 + 790-79c:331 + 7a6-7a6:331 + 7a8-7b8:441 + 7bd-7ca:632 + 7d7-7d7:2029 + 6 + 792->7d7:853 + 7a6->650:248 + 7a6->710:103 + 7b8->790:462 + 7ca->790:661 + 7d7->7cf:2097 diff --git a/llvm/test/tools/llvm-profgen/Inputs/profile-density.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/profile-density.raw.prof new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/profile-density.raw.prof @@ -0,0 +1,29 @@ +27 +400540-400540:10 +400650-40066d:31 +400686-400689:3 +40068b-4006a2:3 +4006b0-4006b7:3 +4006b0-4006bf:6 +4006b0-4006c8:6 +4006d0-4006ea:51 +4006d0-400700:4 +4006ec-400700:30 +400710-40072f:5 +400740-400753:3 +400740-40075b:9 +400740-40076e:14 +400743-400753:3 +400743-40075b:43 +400743-40076e:11 +400755-40075b:4 +400770-400788:6 +400790-400792:12 +400790-4007a6:12 +4007a8-4007b8:11 +4007bd-4007ca:12 +4007cf-4007d7:12 +4007d7-4007d7:12 +400870-400870:12 +400875-4008bf:10 +0 diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/profile-density.test @@ -0,0 +1,64 @@ +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --show-density -hot-function-density-threshold=1 &> %t2 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-DENSITY + +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t3 --show-density -hot-function-density-threshold=1 &> %t4 +; RUN: FileCheck %s --input-file %t4 --check-prefix=CHECK-DENSITY-CS + +;CHECK-DENSITY: AutoFDO is estimated to optimize better with 4.9x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. +;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 0.2 + +;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 31.4 + +; original code: +; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out +#include +#include + +void swap(int *a, int *b) { + int t = *a; + *a = *b; + *b = t; +} + +int partition_pivot_last(int* array, int low, int high) { + int pivot = array[high]; + int i = low - 1; + for (int j = low; j < high; j++) + if (array[j] < pivot) + swap(&array[++i], &array[j]); + swap(&array[i + 1], &array[high]); + return (i + 1); +} + +int partition_pivot_first(int* array, int low, int high) { + int pivot = array[low]; + int i = low + 1; + for (int j = low + 1; j <= high; j++) + if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;} + swap(&array[i - 1], &array[low]); + return i - 1; +} + +void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) { + if (low < high) { + int pi = (*partition_func)(array, low, high); + quick_sort(array, low, pi - 1, partition_func); + quick_sort(array, pi + 1, high, partition_func); + } +} + +int main() { + const int size = 200; + int sum = 0; + int *array = malloc(size * sizeof(int)); + for(int i = 0; i < 100 * 1000; i++) { + for(int j = 0; j < size; j++) + array[j] = j % 10 ? rand() % size: j; + int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first; + quick_sort(array, 0, size - 1, fptr); + sum += array[i % size]; + } + printf("sum=%d\n", sum); + + return 0; +} diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -75,7 +75,23 @@ const SampleContextFrame &LeafLoc, uint64_t Count); void updateTotalSamples(); + StringRef getCalleeNameForOffset(uint64_t TargetOffset); + + void computeSummaryAndThreshold(); + + void calculateAndShowDensity(const SampleProfileMap &Profiles); + + double calculateDensity(const SampleProfileMap &Profiles, + uint64_t HotCntThreshold); + + void showDensitySuggestion(double Density); + + // Thresholds from profile summary to answer isHotCount/isColdCount queries. + uint64_t HotCountThreshold; + + uint64_t ColdCountThreshold; + // Used by SampleProfileWriter SampleProfileMap ProfileMap; @@ -104,6 +120,7 @@ void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); + void postProcessProfiles(); }; using ProbeCounterMap = @@ -245,8 +262,6 @@ // and trimming cold profiles, running preinliner on profiles. void postProcessProfiles(); - void computeSummaryAndThreshold(); - void populateBodySamplesForFunction(FunctionSamples &FunctionProfile, const RangeSample &RangeCounters); void populateBoundarySamplesForFunction(SampleContextFrames ContextId, @@ -269,9 +284,6 @@ FunctionSamples & getFunctionProfileForLeafProbe(SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe); - // Thresholds from profile summary to answer isHotCount/isColdCount queries. - uint64_t HotCountThreshold; - uint64_t ColdCountThreshold; // Underlying context table serves for sample profile writer. std::unordered_set Contexts; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -9,6 +9,7 @@ #include "ProfileGenerator.h" #include "ProfiledBinary.h" #include "llvm/ProfileData/ProfileCommon.h" +#include #include cl::opt OutputFilename("output", cl::value_desc("output"), @@ -70,7 +71,16 @@ "depth limit."), cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); -extern cl::opt ProfileSummaryCutoffCold; +static cl::opt HotFunctionDensityThreshold( + "hot-function-density-threshold", llvm::cl::init(1000), + llvm::cl::desc( + "specify density threshold for hot functions (default: 1000)"), + llvm::cl::Optional); +static cl::opt ShowDensity("show-density", llvm::cl::init(false), + llvm::cl::desc("show profile density details"), + llvm::cl::Optional); + +extern cl::opt ProfileSummaryCutoffHot; using namespace llvm; using namespace sampleprof; @@ -127,6 +137,51 @@ write(std::move(WriterOrErr.get()), ProfileMap); } +void ProfileGeneratorBase::showDensitySuggestion(double Density) { + if (Density == 0.0) + WithColor::warning() << "The --profile-summary-cutoff-hot option may be " + "set too low. Please check your command.\n"; + else if (Density < HotFunctionDensityThreshold) + WithColor::warning() + << "AutoFDO is estimated to optimize better with " + << format("%.1f", HotFunctionDensityThreshold / Density) + << "x more samples. Please consider increasing sampling rate or " + "profiling for longer duration to get more samples.\n"; + + if (ShowDensity) + outs() << "Minimum profile density for hot functions with top " + << format("%.2f", + static_cast(ProfileSummaryCutoffHot.getValue()) / + 10000) + << "% total samples: " << format("%.1f", Density) << "\n"; +} + +double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, + uint64_t HotCntThreshold) { + double Density = DBL_MAX; + std::vector HotFuncs; + for (auto &I : Profiles) { + auto &FuncSamples = I.second; + if (FuncSamples.getTotalSamples() < HotCntThreshold) + continue; + HotFuncs.emplace_back(&FuncSamples); + } + + for (auto *FuncSamples : HotFuncs) { + auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); + if (!Func) + continue; + uint64_t FuncSize = Func->getFuncSize(); + if (FuncSize == 0) + continue; + Density = + std::min(Density, static_cast(FuncSamples->getTotalSamples()) / + FuncSize); + } + + return Density == DBL_MAX ? 0.0 : Density; +} + void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, const RangeSample &Ranges) { @@ -311,6 +366,12 @@ } else { generateLineNumBasedProfile(); } + postProcessProfiles(); +} + +void ProfileGenerator::postProcessProfiles() { + computeSummaryAndThreshold(); + calculateAndShowDensity(ProfileMap); } void ProfileGenerator::generateLineNumBasedProfile() { @@ -440,6 +501,12 @@ } } +void ProfileGeneratorBase::calculateAndShowDensity( + const SampleProfileMap &Profiles) { + double Density = calculateDensity(Profiles, HotCountThreshold); + showDensitySuggestion(Density); +} + FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( const SampleContextFrameVector &Context, bool WasLeafInlined) { auto I = ProfileMap.find(SampleContext(Context)); @@ -664,9 +731,17 @@ HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, CSProfMaxColdContextDepth, EnableCSPreInliner); } + + // Merge function samples of CS profile to calculate profile density. + sampleprof::SampleProfileMap ContextLessProfiles; + for (const auto &I : ProfileMap) { + ContextLessProfiles[I.second.getName()].merge(I.second); + } + + calculateAndShowDensity(ContextLessProfiles); } -void CSProfileGenerator::computeSummaryAndThreshold() { +void ProfileGeneratorBase::computeSummaryAndThreshold() { SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); auto Summary = Builder.computeSummaryForProfiles(ProfileMap); HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -76,6 +76,14 @@ StringRef FuncName; // End of range is an exclusive bound. RangesTy Ranges; + + uint64_t getFuncSize() { + uint64_t Sum = 0; + for (auto &R : Ranges) { + Sum += R.second - R.first; + } + return Sum; + } }; // Info about function range. A function can be split into multiple @@ -406,6 +414,13 @@ return BinaryFunctions; } + BinaryFunction *getBinaryFunction(StringRef FName) { + auto I = BinaryFunctions.find(FName.str()); + if (I == BinaryFunctions.end()) + return nullptr; + return &I->second; + } + uint32_t getFuncSizeForContext(SampleContext &Context) { return FuncSizeTracker.getFuncSizeForContext(Context); }