diff --git a/llvm/test/tools/llvm-profgen/cold-profile-trimming-symbolized.test b/llvm/test/tools/llvm-profgen/cold-profile-trimming-symbolized.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/cold-profile-trimming-symbolized.test @@ -0,0 +1,63 @@ +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 +; RUN: llvm-profgen --format=text --symbolized-profile=%t1 --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t2 --trim-cold-profile=1 --profile-summary-cold-count=1000 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-TRIM + + +;CHECK-TRIM: partition_pivot_last:5187:7 +;CHECK-TRIM: partition_pivot_first:3010:5 +;CHECK-TRIM-NOT: quick_sort:903:25 +;CHECK-TRIM-NOT: main:820:0 + +; original code: +; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out +#include +#include + +void swap(int *a, int *b) { + int t = *a; + *a = *b; + *b = t; +} + +int partition_pivot_last(int* array, int low, int high) { + int pivot = array[high]; + int i = low - 1; + for (int j = low; j < high; j++) + if (array[j] < pivot) + swap(&array[++i], &array[j]); + swap(&array[i + 1], &array[high]); + return (i + 1); +} + +int partition_pivot_first(int* array, int low, int high) { + int pivot = array[low]; + int i = low + 1; + for (int j = low + 1; j <= high; j++) + if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;} + swap(&array[i - 1], &array[low]); + return i - 1; +} + +void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) { + if (low < high) { + int pi = (*partition_func)(array, low, high); + quick_sort(array, low, pi - 1, partition_func); + quick_sort(array, pi + 1, high, partition_func); + } +} + +int main() { + const int size = 200; + int sum = 0; + int *array = malloc(size * sizeof(int)); + for(int i = 0; i < 100 * 1000; i++) { + for(int j = 0; j < size; j++) + array[j] = j % 10 ? rand() % size: j; + int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first; + quick_sort(array, 0, size - 1, fptr); + sum += array[i % size]; + } + printf("sum=%d\n", sum); + + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/cs-preinline-symbolized.test b/llvm/test/tools/llvm-profgen/cs-preinline-symbolized.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/cs-preinline-symbolized.test @@ -0,0 +1,18 @@ +; Test default llvm-profgen with preinline off +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --csspgo-preinliner=0 --gen-cs-nested-profile=0 --output=%t1 + +; Test llvm-profgen with preinliner on will merge not inlinable profile into base profile. +; RUN: llvm-profgen --format=text --symbolized-profile=%t1 --binary=%S/Inputs/inline-cs-noprobe.perfbin --csspgo-preinliner=1 --gen-cs-nested-profile=0 --sample-profile-hot-inline-threshold=3000 --sample-profile-cold-inline-threshold=45 --output=%t2 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-PREINL + + +; CHECK-PREINL: [foo]:309:0 +; CHECK-PREINL-NEXT: 2.1: 14 +; CHECK-PREINL-NEXT: 3: 15 +; CHECK-PREINL-NEXT: 3.1: 14 bar:14 +; CHECK-PREINL-NEXT: 3.2: 1 +; CHECK-PREINL-NEXT: 65526: 14 +; CHECK-PREINL-NEXT: !Attributes: 1 +; CHECK-PREINL-NEXT:[foo:3.1 @ bar]:84:0 +; CHECK-PREINL-NEXT: 1: 14 +; CHECK-PREINL-NEXT: !Attributes: 3 diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -10,6 +10,7 @@ #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H #include "ErrorHandling.h" #include "ProfiledBinary.h" +#include "llvm/ProfileData/SampleProfReader.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Regex.h" @@ -64,7 +65,7 @@ PerfData = 1, // Raw linux perf.data. PerfScript = 2, // Perf script create by `perf script` command. UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen. - + SymbolizedProfile = 4, // Symbolized profile generated by llvm-profgen. }; // The type of perfscript content. @@ -561,13 +562,15 @@ }; virtual ~PerfReaderBase() = default; static std::unique_ptr create(ProfiledBinary *Binary, - PerfInputFile &PerfInput); + PerfInputFile &PerfInput, + LLVMContext &Context); // Entry of the reader to parse multiple perf traces virtual void parsePerfTraces() = 0; const ContextSampleCounterMap &getSampleCounters() const { return SampleCounters; } + SampleProfileMap& getProfileMap() { return ProfileMap; } bool profileIsCSFlat() { return ProfileIsCSFlat; } protected: @@ -575,6 +578,8 @@ StringRef PerfTraceFile; ContextSampleCounterMap SampleCounters; + SampleProfileMap ProfileMap; + bool ProfileIsCSFlat = false; uint64_t NumTotalSample = 0; @@ -722,6 +727,22 @@ std::unordered_set ContextStrSet; }; +class SymbolizedProfileReader : public PerfReaderBase { +public: + SymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace, + LLVMContext &Context) + : PerfReaderBase(Binary, PerfTrace) { + auto ReaderOrErr = SampleProfileReader::create(PerfTrace.str(), Context); + ProfileReader = std::move(ReaderOrErr.get()); + }; + void parsePerfTraces() override; + +private: + void readSymbolizedProfile(); + + std::unique_ptr ProfileReader; +}; + } // end namespace sampleprof } // end namespace llvm diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -332,13 +332,17 @@ } std::unique_ptr -PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput) { +PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput, LLVMContext& Context) { std::unique_ptr PerfReader; if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) { PerfReader.reset( new UnsymbolizedProfileReader(Binary, PerfInput.InputFile)); return PerfReader; + } else if (PerfInput.Format == PerfFormat::SymbolizedProfile) { + PerfReader.reset( + new SymbolizedProfileReader(Binary, PerfInput.InputFile, Context)); + return PerfReader; } // For perf data input, we need to convert them into perf script first. @@ -897,6 +901,17 @@ readUnsymbolizedProfile(PerfTraceFile); } +void SymbolizedProfileReader::readSymbolizedProfile() +{ + ProfileReader->read(); + ProfileMap = std::move(ProfileReader->getProfiles()); + ProfileIsCSFlat = ProfileReader->profileIsCSFlat(); +} + +void SymbolizedProfileReader::parsePerfTraces() { + readSymbolizedProfile(); +} + void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat) { SampleCounter &Counter = SampleCounters.begin()->second; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -31,13 +31,11 @@ class ProfileGeneratorBase { public: - ProfileGeneratorBase(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) - : Binary(Binary), SampleCounters(Counters){}; + ProfileGeneratorBase(ProfiledBinary *Binary, PerfReaderBase *Reader) + : Binary(Binary), Reader(Reader){}; virtual ~ProfileGeneratorBase() = default; - static std::unique_ptr - create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters, - bool ProfileIsCSFlat); + static std::unique_ptr create(ProfiledBinary *Binary, + PerfReaderBase *Reader); virtual void generateProfile() = 0; void write(); @@ -115,15 +113,14 @@ ProfiledBinary *Binary = nullptr; - const ContextSampleCounterMap &SampleCounters; + PerfReaderBase *Reader; }; class ProfileGenerator : public ProfileGeneratorBase { public: - ProfileGenerator(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) - : ProfileGeneratorBase(Binary, Counters){}; + ProfileGenerator(ProfiledBinary *Binary, PerfReaderBase *Reader) + : ProfileGeneratorBase(Binary, Reader){}; void generateProfile() override; private: @@ -140,9 +137,10 @@ void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); - void populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter); void - populateBoundarySamplesWithProbesForAllFunctions(const BranchSample &BranchCounters); + populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter); + void populateBoundarySamplesWithProbesForAllFunctions( + const BranchSample &BranchCounters); void postProcessProfiles(); void trimColdProfiles(const SampleProfileMap &Profiles, uint64_t ColdCntThreshold); @@ -150,9 +148,8 @@ class CSProfileGenerator : public ProfileGeneratorBase { public: - CSProfileGenerator(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) - : ProfileGeneratorBase(Binary, Counters){}; + CSProfileGenerator(ProfiledBinary *Binary, PerfReaderBase *Reader) + : ProfileGeneratorBase(Binary, Reader){}; void generateProfile() override; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -8,6 +8,7 @@ #include "ProfileGenerator.h" #include "ErrorHandling.h" +#include "PerfReader.h" #include "ProfiledBinary.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/ProfileData/ProfileCommon.h" @@ -108,16 +109,14 @@ bool ProfileGeneratorBase::UseFSDiscriminator = false; std::unique_ptr -ProfileGeneratorBase::create(ProfiledBinary *Binary, - const ContextSampleCounterMap &SampleCounters, - bool ProfileIsCSFlat) { +ProfileGeneratorBase::create(ProfiledBinary *Binary, PerfReaderBase *Reader) { std::unique_ptr Generator; - if (ProfileIsCSFlat) { + if (Reader->profileIsCSFlat()) { if (Binary->useFSDiscriminator()) exitWithError("FS discriminator is not supported in CS profile."); - Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); + Generator.reset(new CSProfileGenerator(Binary, Reader)); } else { - Generator.reset(new ProfileGenerator(Binary, SampleCounters)); + Generator.reset(new ProfileGenerator(Binary, Reader)); } ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); @@ -382,11 +381,15 @@ } void ProfileGenerator::generateProfile() { - if (Binary->usePseudoProbes()) { - generateProbeBasedProfile(); - } else { - generateLineNumBasedProfile(); + ProfileMap = std::move(Reader->getProfileMap()); + if (ProfileMap.empty()) { + if (Binary->usePseudoProbes()) { + generateProbeBasedProfile(); + } else { + generateLineNumBasedProfile(); + } } + postProcessProfiles(); } @@ -414,9 +417,9 @@ } void ProfileGenerator::generateLineNumBasedProfile() { - assert(SampleCounters.size() == 1 && + assert(Reader->getSampleCounters().size() == 1 && "Must have one entry for profile generation."); - const SampleCounter &SC = SampleCounters.begin()->second; + const SampleCounter &SC = Reader->getSampleCounters().begin()->second; // Fill in function body samples populateBodySamplesForAllFunctions(SC.RangeCounter); // Fill in boundary sample counts as well as call site samples for calls @@ -426,11 +429,11 @@ } void ProfileGenerator::generateProbeBasedProfile() { - assert(SampleCounters.size() == 1 && + assert(Reader->getSampleCounters().size() == 1 && "Must have one entry for profile generation."); // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; - const SampleCounter &SC = SampleCounters.begin()->second; + const SampleCounter &SC = Reader->getSampleCounters().begin()->second; // Fill in function body samples populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); // Fill in boundary sample counts as well as call site samples for calls @@ -442,16 +445,18 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( const RangeSample &RangeCounter) { ProbeCounterMap ProbeCounter; - // preprocessRangeCounter returns disjoint ranges, so no longer to redo it inside - // extractProbesFromRange. - extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, false); + // preprocessRangeCounter returns disjoint ranges, so no longer to redo it + // inside extractProbesFromRange. + extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, + false); for (const auto &PI : ProbeCounter) { const MCDecodedPseudoProbe *Probe = PI.first; uint64_t Count = PI.second; SampleContextFrameVector FrameVec; Binary->getInlineContextForProbe(Probe, FrameVec, true); - FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count); + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, Count); FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); if (Probe->isEntry()) FunctionProfile.addHeadSamples(Count); @@ -496,7 +501,8 @@ &getTopLevelFunctionProfile(FrameVec[0].FuncName); FunctionProfile->addTotalSamples(Count); if (Binary->usePseudoProbes()) { - const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + const auto *FuncDesc = Binary->getFuncDescForGUID( + Function::getGUID(FunctionProfile->getName())); FunctionProfile->setFunctionHash(FuncDesc->FuncHash); } @@ -515,7 +521,8 @@ FunctionProfile = &Ret.first->second; FunctionProfile->addTotalSamples(Count); if (Binary->usePseudoProbes()) { - const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + const auto *FuncDesc = Binary->getFuncDescForGUID( + Function::getGUID(FunctionProfile->getName())); FunctionProfile->setFunctionHash(FuncDesc->FuncHash); } } @@ -645,14 +652,17 @@ void CSProfileGenerator::generateProfile() { FunctionSamples::ProfileIsCSFlat = true; + ProfileMap = std::move(Reader->getProfileMap()); if (Binary->getTrackFuncContextSize()) computeSizeForProfiledFunctions(); - if (Binary->usePseudoProbes()) { - generateProbeBasedProfile(); - } else { - generateLineNumBasedProfile(); + if (ProfileMap.empty()) { + if (Binary->usePseudoProbes()) { + generateProbeBasedProfile(); + } else { + generateLineNumBasedProfile(); + } } postProcessProfiles(); } @@ -660,14 +670,21 @@ void CSProfileGenerator::computeSizeForProfiledFunctions() { std::unordered_set ProfiledFunctions; - // Go through all the ranges in the CS counters, use the start of the range to - // look up the function it belongs and record the function. - for (const auto &CI : SampleCounters) { - for (const auto &Item : CI.second.RangeCounter) { - // FIXME: Filter the bogus crossing function range. - uint64_t StartOffset = Item.first.first; - if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) - ProfiledFunctions.insert(FRange->Func); + if (Reader->getSampleCounters().empty()) { + for (const auto &FS : ProfileMap) { + if (auto *Func = Binary->getBinaryFunction(FS.first.getName())) + ProfiledFunctions.insert(Func); + } + } else { + // Go through all the ranges in the CS counters, use the start of the range + // to look up the function it belongs and record the function. + for (const auto &CI : Reader->getSampleCounters()) { + for (const auto &Item : CI.second.RangeCounter) { + // FIXME: Filter the bogus crossing function range. + uint64_t StartOffset = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) + ProfiledFunctions.insert(FRange->Func); + } } } @@ -679,7 +696,7 @@ } void CSProfileGenerator::generateLineNumBasedProfile() { - for (const auto &CI : SampleCounters) { + for (const auto &CI : Reader->getSampleCounters()) { const auto *CtxKey = cast(CI.first.getPtr()); // Get or create function profile for the range @@ -922,7 +939,7 @@ void CSProfileGenerator::generateProbeBasedProfile() { // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; - for (const auto &CI : SampleCounters) { + for (const auto &CI : Reader->getSampleCounters()) { const auto *CtxKey = cast(CI.first.getPtr()); SampleContextFrameVector ContextStack; extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -49,6 +49,11 @@ static cl::alias UPA("up", cl::desc("Alias for --unsymbolized-profile"), cl::aliasopt(UnsymbolizedProfFilename)); +static cl::opt SymbolizedProfFilename( + "symbolized-profile", cl::value_desc("symbolized profile"), cl::ZeroOrMore, + llvm::cl::MiscFlags::CommaSeparated, + cl::desc("Path of the symbolized profile"), cl::cat(ProfGenCategory)); + static cl::opt BinaryPath("binary", cl::value_desc("binary"), cl::Required, cl::desc("Path of profiled executable binary."), @@ -76,7 +81,9 @@ uint16_t HasPerfScript = PerfScriptFilename.getNumOccurrences(); uint16_t HasUnsymbolizedProfile = UnsymbolizedProfFilename.getNumOccurrences(); - uint16_t S = HasPerfData + HasPerfScript + HasUnsymbolizedProfile; + uint16_t HasSymbolizedProfile = SymbolizedProfFilename.getNumOccurrences(); + uint16_t S = HasPerfData + HasPerfScript + HasUnsymbolizedProfile + + HasSymbolizedProfile; if (S != 1) { std::string Msg = S > 1 @@ -97,6 +104,7 @@ CheckFileExists(HasPerfData, PerfDataFilename); CheckFileExists(HasPerfScript, PerfScriptFilename); CheckFileExists(HasUnsymbolizedProfile, UnsymbolizedProfFilename); + CheckFileExists(HasSymbolizedProfile, SymbolizedProfFilename); } if (!llvm::sys::fs::exists(BinaryPath)) { @@ -124,6 +132,9 @@ } else if (UnsymbolizedProfFilename.getNumOccurrences()) { File.InputFile = UnsymbolizedProfFilename; File.Format = PerfFormat::UnsymbolizedProfile; + } else if (SymbolizedProfFilename.getNumOccurrences()) { + File.InputFile = SymbolizedProfFilename; + File.Format = PerfFormat::SymbolizedProfile; } return File; } @@ -147,8 +158,9 @@ return EXIT_SUCCESS; PerfInputFile PerfFile = getPerfInputFile(); + LLVMContext Context; std::unique_ptr Reader = - PerfReaderBase::create(Binary.get(), PerfFile); + PerfReaderBase::create(Binary.get(), PerfFile, Context); // Parse perf events and samples Reader->parsePerfTraces(); @@ -156,8 +168,7 @@ return EXIT_SUCCESS; std::unique_ptr Generator = - ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(), - Reader->profileIsCSFlat()); + ProfileGeneratorBase::create(Binary.get(), Reader.get()); Generator->generateProfile(); Generator->write();