diff --git a/llvm/test/tools/llvm-profgen/cold-profile-trimming-symbolized.test b/llvm/test/tools/llvm-profgen/cold-profile-trimming-symbolized.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/cold-profile-trimming-symbolized.test @@ -0,0 +1,63 @@ +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 +; RUN: llvm-profgen --format=text --llvm-sample-profile=%t1 --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t2 --trim-cold-profile=1 --profile-summary-cold-count=1000 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-TRIM + + +;CHECK-TRIM: partition_pivot_last:5187:7 +;CHECK-TRIM: partition_pivot_first:3010:5 +;CHECK-TRIM-NOT: quick_sort:903:25 +;CHECK-TRIM-NOT: main:820:0 + +; original code: +; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out +#include +#include + +void swap(int *a, int *b) { + int t = *a; + *a = *b; + *b = t; +} + +int partition_pivot_last(int* array, int low, int high) { + int pivot = array[high]; + int i = low - 1; + for (int j = low; j < high; j++) + if (array[j] < pivot) + swap(&array[++i], &array[j]); + swap(&array[i + 1], &array[high]); + return (i + 1); +} + +int partition_pivot_first(int* array, int low, int high) { + int pivot = array[low]; + int i = low + 1; + for (int j = low + 1; j <= high; j++) + if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;} + swap(&array[i - 1], &array[low]); + return i - 1; +} + +void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) { + if (low < high) { + int pi = (*partition_func)(array, low, high); + quick_sort(array, low, pi - 1, partition_func); + quick_sort(array, pi + 1, high, partition_func); + } +} + +int main() { + const int size = 200; + int sum = 0; + int *array = malloc(size * sizeof(int)); + for(int i = 0; i < 100 * 1000; i++) { + for(int j = 0; j < size; j++) + array[j] = j % 10 ? rand() % size: j; + int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first; + quick_sort(array, 0, size - 1, fptr); + sum += array[i % size]; + } + printf("sum=%d\n", sum); + + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/cs-preinline-sample-profile.test b/llvm/test/tools/llvm-profgen/cs-preinline-sample-profile.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/cs-preinline-sample-profile.test @@ -0,0 +1,43 @@ +; Test default llvm-profgen with preinline off +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --csspgo-preinliner=0 --gen-cs-nested-profile=0 --output=%t1 + +; Test llvm-profgen with preinliner on will merge not inlinable profile into base profile. +; RUN: llvm-profgen --format=text --llvm-sample-profile=%t1 --binary=%S/Inputs/inline-cs-noprobe.perfbin --csspgo-preinliner=1 --gen-cs-nested-profile=0 --sample-profile-hot-inline-threshold=3000 --sample-profile-cold-inline-threshold=45 --output=%t2 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-PREINL + +; Test default llvm-profgen with preinline off +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --csspgo-preinliner=0 --gen-cs-nested-profile=0 --output=%t3 + +; Test llvm-profgen with preinliner on will merge not inlinable profile into base profile. +; RUN: llvm-profgen --format=text --llvm-sample-profile=%t3 --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --csspgo-preinliner=1 --gen-cs-nested-profile=0 --sample-profile-hot-inline-threshold=3000 --sample-profile-cold-inline-threshold=45 --output=%t4 +; RUN: FileCheck %s --input-file %t4 --check-prefix=CHECK-PREINL-PROBE + +; CHECK-PREINL: [foo]:309:0 +; CHECK-PREINL-NEXT: 2.1: 14 +; CHECK-PREINL-NEXT: 3: 15 +; CHECK-PREINL-NEXT: 3.1: 14 bar:14 +; CHECK-PREINL-NEXT: 3.2: 1 +; CHECK-PREINL-NEXT: 65526: 14 +; CHECK-PREINL-NEXT: !Attributes: 1 +; CHECK-PREINL-NEXT:[foo:3.1 @ bar]:84:0 +; CHECK-PREINL-NEXT: 1: 14 +; CHECK-PREINL-NEXT: !Attributes: 3 + + +; CHECK-PREINL-PROBE: [foo]:74:0 +; CHECK-PREINL-PROBE-NEXT: 1: 0 +; CHECK-PREINL-PROBE-NEXT: 2: 15 +; CHECK-PREINL-PROBE-NEXT: 3: 15 +; CHECK-PREINL-PROBE-NEXT: 4: 14 +; CHECK-PREINL-PROBE-NEXT: 5: 1 +; CHECK-PREINL-PROBE-NEXT: 6: 15 +; CHECK-PREINL-PROBE-NEXT: 7: 0 +; CHECK-PREINL-PROBE-NEXT: 8: 14 bar:14 +; CHECK-PREINL-PROBE-NEXT: 9: 0 +; CHECK-PREINL-PROBE-NEXT: !CFGChecksum: 563088904013236 +; CHECK-PREINL-PROBE-NEXT: !Attributes: 1 +; CHECK-PREINL-PROBE-NEXT:[foo:8 @ bar]:28:14 +; CHECK-PREINL-PROBE-NEXT: 1: 14 +; CHECK-PREINL-PROBE-NEXT: 4: 14 +; CHECK-PREINL-PROBE-NEXT: !CFGChecksum: 72617220756 +; CHECK-PREINL-PROBE-NEXT: !Attributes: 3 diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -33,11 +33,18 @@ public: ProfileGeneratorBase(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) + const ContextSampleCounterMap *Counters) : Binary(Binary), SampleCounters(Counters){}; + ProfileGeneratorBase(ProfiledBinary *Binary, + const SampleProfileMap &&Profiles) + : Binary(Binary), ProfileMap(std::move(Profiles)){}; + virtual ~ProfileGeneratorBase() = default; static std::unique_ptr - create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters, + create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters, + bool ProfileIsCSFlat); + static std::unique_ptr + create(ProfiledBinary *Binary, const SampleProfileMap &&ProfileMap, bool ProfileIsCSFlat); virtual void generateProfile() = 0; void write(); @@ -113,20 +120,22 @@ uint64_t ColdCountThreshold; + ProfiledBinary *Binary = nullptr; + // Used by SampleProfileWriter SampleProfileMap ProfileMap; - ProfiledBinary *Binary = nullptr; - - const ContextSampleCounterMap &SampleCounters; + const ContextSampleCounterMap *SampleCounters = nullptr; }; class ProfileGenerator : public ProfileGeneratorBase { public: ProfileGenerator(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) + const ContextSampleCounterMap *Counters) : ProfileGeneratorBase(Binary, Counters){}; + ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles) + : ProfileGeneratorBase(Binary, std::move(Profiles)){}; void generateProfile() override; private: @@ -143,9 +152,10 @@ void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); - void populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter); void - populateBoundarySamplesWithProbesForAllFunctions(const BranchSample &BranchCounters); + populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter); + void populateBoundarySamplesWithProbesForAllFunctions( + const BranchSample &BranchCounters); void postProcessProfiles(); void trimColdProfiles(const SampleProfileMap &Profiles, uint64_t ColdCntThreshold); @@ -154,9 +164,10 @@ class CSProfileGenerator : public ProfileGeneratorBase { public: CSProfileGenerator(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) + const ContextSampleCounterMap *Counters) : ProfileGeneratorBase(Binary, Counters){}; - + CSProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles) + : ProfileGeneratorBase(Binary, std::move(Profiles)){}; void generateProfile() override; // Trim the context stack at a given depth. diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -7,12 +7,14 @@ //===----------------------------------------------------------------------===// #include "ProfileGenerator.h" #include "ErrorHandling.h" +#include "PerfReader.h" #include "ProfiledBinary.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/ProfileData/ProfileCommon.h" #include #include #include +#include cl::opt OutputFilename("output", cl::value_desc("output"), cl::Required, @@ -109,7 +111,7 @@ std::unique_ptr ProfileGeneratorBase::create(ProfiledBinary *Binary, - const ContextSampleCounterMap &SampleCounters, + const ContextSampleCounterMap *SampleCounters, bool ProfileIsCSFlat) { std::unique_ptr Generator; if (ProfileIsCSFlat) { @@ -125,6 +127,24 @@ return Generator; } +std::unique_ptr +ProfileGeneratorBase::create(ProfiledBinary *Binary, + const SampleProfileMap &&Profiles, + bool ProfileIsCSFlat) { + std::unique_ptr Generator; + if (ProfileIsCSFlat) { + if (Binary->useFSDiscriminator()) + exitWithError("FS discriminator is not supported in CS profile."); + Generator.reset(new CSProfileGenerator(Binary, std::move(Profiles))); + } else { + Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); + } + ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); + FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); + + return Generator; +} + void ProfileGeneratorBase::write(std::unique_ptr Writer, SampleProfileMap &ProfileMap) { // Populate profile symbol list if extended binary format is used. @@ -372,31 +392,39 @@ void ProfileGeneratorBase::collectProfiledFunctions() { std::unordered_set ProfiledFunctions; - // Go through all the stacks, ranges and branches in sample counters, use the - // start of the range to look up the function it belongs and record the - // function. - for (const auto &CI : SampleCounters) { - if (const auto *CtxKey = dyn_cast(CI.first.getPtr())) { - for (auto Addr : CtxKey->Context) { - if (FuncRange *FRange = Binary->findFuncRangeForOffset( - Binary->virtualAddrToOffset(Addr))) + if (SampleCounters) { + // Go through all the stacks, ranges and branches in sample counters, use + // the start of the range to look up the function it belongs and record the + // function. + for (const auto &CI : *SampleCounters) { + if (const auto *CtxKey = dyn_cast(CI.first.getPtr())) { + for (auto Addr : CtxKey->Context) { + if (FuncRange *FRange = Binary->findFuncRangeForOffset( + Binary->virtualAddrToOffset(Addr))) + ProfiledFunctions.insert(FRange->Func); + } + } + + for (auto Item : CI.second.RangeCounter) { + uint64_t StartOffset = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) ProfiledFunctions.insert(FRange->Func); } - } - for (auto Item : CI.second.RangeCounter) { - uint64_t StartOffset = Item.first.first; - if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) - ProfiledFunctions.insert(FRange->Func); + for (auto Item : CI.second.BranchCounter) { + uint64_t SourceOffset = Item.first.first; + uint64_t TargetOffset = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) + ProfiledFunctions.insert(FRange->Func); + if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) + ProfiledFunctions.insert(FRange->Func); + } } - - for (auto Item : CI.second.BranchCounter) { - uint64_t SourceOffset = Item.first.first; - uint64_t TargetOffset = Item.first.first; - if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) - ProfiledFunctions.insert(FRange->Func); - if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) - ProfiledFunctions.insert(FRange->Func); + } else { + // This is for the case the input is a llvm sample profile. + for (const auto &FS : ProfileMap) { + if (auto *Func = Binary->getBinaryFunction(FS.first.getName())) + ProfiledFunctions.insert(Func); } } @@ -416,11 +444,18 @@ void ProfileGenerator::generateProfile() { collectProfiledFunctions(); - if (Binary->usePseudoProbes()) { - generateProbeBasedProfile(); - } else { - generateLineNumBasedProfile(); + + if (Binary->usePseudoProbes()) + Binary->decodePseudoProbe(); + + if (SampleCounters) { + if (Binary->usePseudoProbes()) { + generateProbeBasedProfile(); + } else { + generateLineNumBasedProfile(); + } } + postProcessProfiles(); } @@ -448,9 +483,9 @@ } void ProfileGenerator::generateLineNumBasedProfile() { - assert(SampleCounters.size() == 1 && + assert(SampleCounters->size() == 1 && "Must have one entry for profile generation."); - const SampleCounter &SC = SampleCounters.begin()->second; + const SampleCounter &SC = SampleCounters->begin()->second; // Fill in function body samples populateBodySamplesForAllFunctions(SC.RangeCounter); // Fill in boundary sample counts as well as call site samples for calls @@ -460,12 +495,11 @@ } void ProfileGenerator::generateProbeBasedProfile() { - assert(SampleCounters.size() == 1 && + assert(SampleCounters->size() == 1 && "Must have one entry for profile generation."); - Binary->decodePseudoProbe(); // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; - const SampleCounter &SC = SampleCounters.begin()->second; + const SampleCounter &SC = SampleCounters->begin()->second; // Fill in function body samples populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); // Fill in boundary sample counts as well as call site samples for calls @@ -687,10 +721,15 @@ collectProfiledFunctions(); - if (Binary->usePseudoProbes()) { - generateProbeBasedProfile(); - } else { - generateLineNumBasedProfile(); + if (Binary->usePseudoProbes()) + Binary->decodePseudoProbe(); + + if (SampleCounters) { + if (Binary->usePseudoProbes()) { + generateProbeBasedProfile(); + } else { + generateLineNumBasedProfile(); + } } if (Binary->getTrackFuncContextSize()) @@ -709,7 +748,7 @@ } void CSProfileGenerator::generateLineNumBasedProfile() { - for (const auto &CI : SampleCounters) { + for (const auto &CI : *SampleCounters) { const auto *CtxKey = cast(CI.first.getPtr()); // Get or create function profile for the range @@ -967,10 +1006,9 @@ } void CSProfileGenerator::generateProbeBasedProfile() { - Binary->decodePseudoProbe(); // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; - for (const auto &CI : SampleCounters) { + for (const auto &CI : *SampleCounters) { const AddrBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); SampleContextFrameVector ContextStack; diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -49,6 +49,12 @@ static cl::alias UPA("up", cl::desc("Alias for --unsymbolized-profile"), cl::aliasopt(UnsymbolizedProfFilename)); +static cl::opt + SampleProfFilename("llvm-sample-profile", + cl::value_desc("llvm sample profile"), cl::ZeroOrMore, + cl::desc("Path of the LLVM sample profile"), + cl::cat(ProfGenCategory)); + static cl::opt BinaryPath("binary", cl::value_desc("binary"), cl::Required, cl::desc("Path of profiled executable binary."), @@ -76,7 +82,9 @@ uint16_t HasPerfScript = PerfScriptFilename.getNumOccurrences(); uint16_t HasUnsymbolizedProfile = UnsymbolizedProfFilename.getNumOccurrences(); - uint16_t S = HasPerfData + HasPerfScript + HasUnsymbolizedProfile; + uint16_t HasSampleProfile = SampleProfFilename.getNumOccurrences(); + uint16_t S = + HasPerfData + HasPerfScript + HasUnsymbolizedProfile + HasSampleProfile; if (S != 1) { std::string Msg = S > 1 @@ -97,6 +105,7 @@ CheckFileExists(HasPerfData, PerfDataFilename); CheckFileExists(HasPerfScript, PerfScriptFilename); CheckFileExists(HasUnsymbolizedProfile, UnsymbolizedProfFilename); + CheckFileExists(HasSampleProfile, SampleProfFilename); } if (!llvm::sys::fs::exists(BinaryPath)) { @@ -146,20 +155,34 @@ if (ShowDisassemblyOnly) return EXIT_SUCCESS; - PerfInputFile PerfFile = getPerfInputFile(); - std::unique_ptr Reader = - PerfReaderBase::create(Binary.get(), PerfFile); - // Parse perf events and samples - Reader->parsePerfTraces(); - - if (SkipSymbolization) - return EXIT_SUCCESS; - - std::unique_ptr Generator = - ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(), - Reader->profileIsCSFlat()); - Generator->generateProfile(); - Generator->write(); + if (SampleProfFilename.getNumOccurrences()) { + LLVMContext Context; + auto ReaderOrErr = SampleProfileReader::create(SampleProfFilename, Context); + std::unique_ptr Reader = + std::move(ReaderOrErr.get()); + Reader->read(); + std::unique_ptr Generator = + ProfileGeneratorBase::create(Binary.get(), + std::move(Reader->getProfiles()), + Reader->profileIsCSFlat()); + Generator->generateProfile(); + Generator->write(); + } else { + PerfInputFile PerfFile = getPerfInputFile(); + std::unique_ptr Reader = + PerfReaderBase::create(Binary.get(), PerfFile); + // Parse perf events and samples + Reader->parsePerfTraces(); + + if (SkipSymbolization) + return EXIT_SUCCESS; + + std::unique_ptr Generator = + ProfileGeneratorBase::create(Binary.get(), &Reader->getSampleCounters(), + Reader->profileIsCSFlat()); + Generator->generateProfile(); + Generator->write(); + } return EXIT_SUCCESS; }