diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe.perfbin new file mode 100755 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@650:21 +CHECK-RAW-PROFILE-NEXT: 691->669:43 + +; original code: +; clang -O3 -g -fdebug-info-for-profiling test.c -o a.out +#include + +int bar(int x, int y) { + if (x % 2) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 3) s = bar(i, s); else s += bar(s, i); + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -0,0 +1,109 @@ + +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-noprobe2.perfscript --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK + +;CHECK: partition_pivot_first:1050:5 +;CHECK-NEXT: 0: 5 +;CHECK-NEXT: 1: 5 +;CHECK-NEXT: 2: 5 +;CHECK-NEXT: 3: 5 +;CHECK-NEXT: 3.1: 83 +;CHECK-NEXT: 4: 82 +;CHECK-NEXT: 4.1: 26 +;CHECK-NEXT: 4.2: 25 +;CHECK-NEXT: 4.3: 26 +;CHECK-NEXT: 5: 6 +;CHECK-NEXT: 6: 6 +;CHECK-NEXT: 4.2: swap:100 +;CHECK-NEXT: 1: 25 +;CHECK-NEXT: 2: 25 +;CHECK-NEXT: 3: 25 +;CHECK-NEXT: 5: swap:24 +;CHECK-NEXT: 1: 6 +;CHECK-NEXT: 2: 6 +;CHECK-NEXT: 3: 6 +;CHECK-NEXT: quick_sort:414:25 +;CHECK-NEXT: 1: 24 +;CHECK-NEXT: 2: 12 partition_pivot_last:7 partition_pivot_first:5 +;CHECK-NEXT: 3: 11 quick_sort:12 +;CHECK-NEXT: 4: 12 quick_sort:12 +;CHECK-NEXT: 6: 24 +;CHECK-NEXT: partition_pivot_last:391:7 +;CHECK-NEXT: 1: 6 +;CHECK-NEXT: 2: 6 +;CHECK-NEXT: 3: 6 +;CHECK-NEXT: 3.1: 18 +;CHECK-NEXT: 3.3: 18 +;CHECK-NEXT: 4: 19 +;CHECK-NEXT: 5: 9 +;CHECK-NEXT: 6: 5 +;CHECK-NEXT: 7: 5 +;CHECK-NEXT: 5: swap:61 +;CHECK-NEXT: 1: 9 +;CHECK-NEXT: 2: 9 +;CHECK-NEXT: 3: 9 +;CHECK-NEXT: 6: swap:20 +;CHECK-NEXT: 1: 5 +;CHECK-NEXT: 2: 5 +;CHECK-NEXT: 3: 5 +;CHECK-NEXT: main:213:0 +;CHECK-NEXT: 5.1: 10 +;CHECK-NEXT: 5.3: 10 +;CHECK-NEXT: 6: 10 +;CHECK-NEXT: 6.1: 12 +;CHECK-NEXT: 6.3: 10 +;CHECK-NEXT: 8: 0 quick_sort:1 + +; original code: +; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out +#include +#include + +void swap(int *a, int *b) { + int t = *a; + *a = *b; + *b = t; +} + +int partition_pivot_last(int* array, int low, int high) { + int pivot = array[high]; + int i = low - 1; + for (int j = low; j < high; j++) + if (array[j] < pivot) + swap(&array[++i], &array[j]); + swap(&array[i + 1], &array[high]); + return (i + 1); +} + +int partition_pivot_first(int* array, int low, int high) { + int pivot = array[low]; + int i = low + 1; + for (int j = low + 1; j <= high; j++) + if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;} + swap(&array[i - 1], &array[low]); + return i - 1; +} + +void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) { + if (low < high) { + int pi = (*partition_func)(array, low, high); + quick_sort(array, low, pi - 1, partition_func); + quick_sort(array, pi + 1, high, partition_func); + } +} + +int main() { + const int size = 200; + int sum = 0; + int *array = malloc(size * sizeof(int)); + for(int i = 0; i < 100 * 1000; i++) { + for(int j = 0; j < size; j++) + array[j] = j % 10 ? rand() % size: j; + int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first; + quick_sort(array, 0, size - 1, fptr); + sum += array[i % size]; + } + printf("sum=%d\n", sum); + + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/noprobe.test b/llvm/test/tools/llvm-profgen/noinline-noprobe.test rename from llvm/test/tools/llvm-profgen/noprobe.test rename to llvm/test/tools/llvm-profgen/noinline-noprobe.test --- a/llvm/test/tools/llvm-profgen/noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-noprobe.test @@ -4,6 +4,18 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.aggperfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --skip-symbolization ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE-AGG +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK + +CHECK: foo:360:0 +CHECK: 2: 19 +CHECK: 3: 19 bar:21 +CHECK: bar:324:21 +CHECK: 0: 20 +CHECK: 1: 20 +CHECK: 2: 13 +CHECK: 4: 6 +CHECK: 5: 18 CHECK-RAW-PROFILE: 7 CHECK-RAW-PROFILE-NEXT: 5b0-5c8:7 @@ -40,7 +52,7 @@ ; original code: -; clang -O3 -g -debug-info-for-profiling test.c -o a.out +; clang -O3 -g -fdebug-info-for-profiling test.c -fno-inline -o a.out #include int bar(int x, int y) { diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -22,21 +22,26 @@ namespace llvm { namespace sampleprof { -class ProfileGenerator { +// This base class for profile generation of sample-based PGO. We reuse all +// structures relating to function profiles and profile writers as seen in +// /ProfileData/SampleProf.h. +class ProfileGeneratorBase { public: - ProfileGenerator(ProfiledBinary *B) : Binary(B){}; - virtual ~ProfileGenerator() = default; - static std::unique_ptr + ProfileGeneratorBase(ProfiledBinary *Binary, + const ContextSampleCounterMap &Counters) + : Binary(Binary), SampleCounters(Counters){}; + virtual ~ProfileGeneratorBase() = default; + static std::unique_ptr create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters, enum PerfScriptType SampleType); virtual void generateProfile() = 0; - // Use SampleProfileWriter to serialize profile map - virtual void write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap); void write(); protected: + // Use SampleProfileWriter to serialize profile map + void write(std::unique_ptr Writer, + SampleProfileMap &ProfileMap); /* For each region boundary point, mark if it is begin or end (or both) of the region. Boundary points are inclusive. Log the sample count as well @@ -54,23 +59,51 @@ */ void findDisjointRanges(RangeSample &DisjointRanges, const RangeSample &Ranges); + // Helper function for updating body sample for a leaf location in + // FunctionProfile + void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, + const SampleContextFrame &LeafLoc, + uint64_t Count); // Used by SampleProfileWriter SampleProfileMap ProfileMap; ProfiledBinary *Binary = nullptr; -}; -class CSProfileGenerator : public ProfileGenerator { -protected: const ContextSampleCounterMap &SampleCounters; +}; + +class ProfileGenerator : public ProfileGeneratorBase { + +public: + ProfileGenerator(ProfiledBinary *Binary, + const ContextSampleCounterMap &Counters) + : ProfileGeneratorBase(Binary, Counters){}; + void generateProfile() override; + +private: + void generateLineNumBasedProfile(); + FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName); + // Helper function to get the leaf frame's FunctionProfile by traversing the + // inline stack and meanwhile it adds the total samples for each frame's + // function profile. + FunctionSamples & + getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec, + uint64_t Count); + void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); + void + populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); +}; + +using ProbeCounterMap = + std::unordered_map; +class CSProfileGenerator : public ProfileGeneratorBase { public: CSProfileGenerator(ProfiledBinary *Binary, const ContextSampleCounterMap &Counters) - : ProfileGenerator(Binary), SampleCounters(Counters){}; + : ProfileGeneratorBase(Binary, Counters){}; -public: void generateProfile() override; // Trim the context stack at a given depth. @@ -188,7 +221,8 @@ } } -protected: +private: + void generateLineNumBasedProfile(); // Lookup or create FunctionSamples for the context FunctionSamples & getFunctionProfileForContext(const SampleContextFrameVector &Context, @@ -196,49 +230,17 @@ // Post processing for profiles before writing out, such as mermining // and trimming cold profiles, running preinliner on profiles. void postProcessProfiles(); - void computeSummaryAndThreshold(); - void write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap) override; - - // Thresholds from profile summary to answer isHotCount/isColdCount queries. - uint64_t HotCountThreshold; - uint64_t ColdCountThreshold; - // Underlying context table serves for sample profile writer. - std::unordered_set Contexts; + void computeSummaryAndThreshold(); -private: - // Helper function for updating body sample for a leaf location in - // FunctionProfile - void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, - const SampleContextFrame &LeafLoc, - uint64_t Count); - void populateFunctionBodySamples(FunctionSamples &FunctionProfile, - const RangeSample &RangeCounters); - void populateFunctionBoundarySamples(SampleContextFrames ContextId, - FunctionSamples &FunctionProfile, - const BranchSample &BranchCounters); + void populateBodySamplesForFunction(FunctionSamples &FunctionProfile, + const RangeSample &RangeCounters); + void populateBoundarySamplesForFunction(SampleContextFrames ContextId, + FunctionSamples &FunctionProfile, + const BranchSample &BranchCounters); void populateInferredFunctionSamples(); -public: - // Deduplicate adjacent repeated context sequences up to a given sequence - // length. -1 means no size limit. - static int32_t MaxCompressionSize; - static int MaxContextDepth; -}; - -using ProbeCounterMap = - std::unordered_map; - -class PseudoProbeCSProfileGenerator : public CSProfileGenerator { - -public: - PseudoProbeCSProfileGenerator(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) - : CSProfileGenerator(Binary, Counters) {} - void generateProfile() override; - -private: + void generateProbeBasedProfile(); // Go through each address from range to extract the top frame probe by // looking up in the Address2ProbeMap void extractProbesFromRange(const RangeSample &RangeCounter, @@ -253,6 +255,18 @@ FunctionSamples & getFunctionProfileForLeafProbe(SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe); + // Thresholds from profile summary to answer isHotCount/isColdCount queries. + uint64_t HotCountThreshold; + uint64_t ColdCountThreshold; + + // Underlying context table serves for sample profile writer. + std::unordered_set Contexts; + +public: + // Deduplicate adjacent repeated context sequences up to a given sequence + // length. -1 means no size limit. + static int32_t MaxCompressionSize; + static int MaxContextDepth; }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -74,33 +74,30 @@ int CSProfileGenerator::MaxContextDepth = -1; -std::unique_ptr -ProfileGenerator::create(ProfiledBinary *Binary, - const ContextSampleCounterMap &SampleCounters, - enum PerfScriptType SampleType) { - std::unique_ptr ProfileGenerator; - if (SampleType == PERF_LBR_STACK) { - if (Binary->usePseudoProbes()) { - ProfileGenerator.reset( - new PseudoProbeCSProfileGenerator(Binary, SampleCounters)); - } else { - ProfileGenerator.reset(new CSProfileGenerator(Binary, SampleCounters)); - } +std::unique_ptr +ProfileGeneratorBase::create(ProfiledBinary *Binary, + const ContextSampleCounterMap &SampleCounters, + enum PerfScriptType SampleType) { + std::unique_ptr Generator; + if (SampleType == PERF_LBR) { + // TODO: Support probe based profile generation + Generator.reset(new ProfileGenerator(Binary, SampleCounters)); + } else if (SampleType == PERF_LBR_STACK) { + Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); } else { - // TODO: llvm_unreachable("Unsupported perfscript!"); } - return ProfileGenerator; + return Generator; } -void ProfileGenerator::write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap) { +void ProfileGeneratorBase::write(std::unique_ptr Writer, + SampleProfileMap &ProfileMap) { if (std::error_code EC = Writer->write(ProfileMap)) exitWithError(std::move(EC)); } -void ProfileGenerator::write() { +void ProfileGeneratorBase::write() { auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); if (std::error_code EC = WriterOrErr.getError()) exitWithError(EC, OutputFilename); @@ -116,8 +113,8 @@ write(std::move(WriterOrErr.get()), ProfileMap); } -void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges, - const RangeSample &Ranges) { +void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, + const RangeSample &Ranges) { /* Regions may overlap with each other. Using the boundary info, find all @@ -216,6 +213,137 @@ } } +void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( + FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, + uint64_t Count) { + // Filter out invalid negative(int type) lineOffset + if (LeafLoc.Callsite.LineOffset & 0x80000000) + return; + // Use the maximum count of samples with same line location + ErrorOr R = FunctionProfile.findSamplesAt( + LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator); + uint64_t PreviousCount = R ? R.get() : 0; + if (PreviousCount < Count) { + FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset, + LeafLoc.Callsite.Discriminator, + Count - PreviousCount); + } +} + +FunctionSamples & +ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { + SampleContext Context(FuncName); + auto Ret = ProfileMap.emplace(Context, FunctionSamples()); + if (Ret.second) { + FunctionSamples &FProfile = Ret.first->second; + FProfile.setContext(Context); + } + return Ret.first->second; +} + +void ProfileGenerator::generateProfile() { + if (Binary->usePseudoProbes()) { + // TODO: Support probe based profile generation + } else { + generateLineNumBasedProfile(); + } +} + +void ProfileGenerator::generateLineNumBasedProfile() { + assert(SampleCounters.size() == 1 && + "Must have one entry for profile generation."); + const SampleCounter &SC = SampleCounters.begin()->second; + // Fill in function body samples + populateBodySamplesForAllFunctions(SC.RangeCounter); + // Fill in boundary sample counts as well as call site samples for calls + populateBoundarySamplesForAllFunctions(SC.BranchCounter); +} + +FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( + const SampleContextFrameVector &FrameVec, uint64_t Count) { + // Get top level profile + FunctionSamples *FunctionProfile = + &getTopLevelFunctionProfile(FrameVec[0].CallerName); + FunctionProfile->addTotalSamples(Count); + + for (size_t I = 1; I < FrameVec.size(); I++) { + FunctionSamplesMap &SamplesMap = + FunctionProfile->functionSamplesAt(FrameVec[I - 1].Callsite); + auto Ret = + SamplesMap.emplace(FrameVec[I].CallerName.str(), FunctionSamples()); + if (Ret.second) { + SampleContext Context(FrameVec[I].CallerName); + Ret.first->second.setContext(Context); + } + FunctionProfile = &Ret.first->second; + FunctionProfile->addTotalSamples(Count); + } + + return *FunctionProfile; +} + +void ProfileGenerator::populateBodySamplesForAllFunctions( + const RangeSample &RangeCounter) { + RangeSample Ranges; + findDisjointRanges(Ranges, RangeCounter); + for (auto Range : Ranges) { + uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); + uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); + uint64_t Count = Range.second; + // Disjoint ranges have introduce zero-filled gap that + // doesn't belong to current context, filter them out. + if (Count == 0) + continue; + + InstructionPointer IP(Binary, RangeBegin, true); + // Disjoint ranges may have range in the middle of two instr, + // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range + // can be Addr1+1 to Addr2-1. We should ignore such range. + while (IP.Address <= RangeEnd) { + uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); + const SampleContextFrameVector &FrameVec = + Binary->getFrameLocationStack(Offset); + if (!FrameVec.empty()) { + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, Count); + updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), + Count); + } + // Move to next IP within the range. + IP.advance(); + } + } +} + +void ProfileGenerator::populateBoundarySamplesForAllFunctions( + const BranchSample &BranchCounters) { + for (auto Entry : BranchCounters) { + uint64_t SourceOffset = Entry.first.first; + uint64_t TargetOffset = Entry.first.second; + uint64_t Count = Entry.second; + assert(Count != 0 && "Unexpected zero weight branch"); + + // Get the callee name by branch target if it's a call branch. + StringRef CalleeName = FunctionSamples::getCanonicalFnName( + Binary->getFuncFromStartOffset(TargetOffset)); + if (CalleeName.size() == 0) + continue; + // Record called target sample and its count. + const SampleContextFrameVector &FrameVec = + Binary->getFrameLocationStack(SourceOffset); + if (!FrameVec.empty()) { + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, Count); + FunctionProfile.addCalledTargetSamples( + FrameVec.back().Callsite.LineOffset, + FrameVec.back().Callsite.Discriminator, CalleeName, Count); + } + // Add head samples for callee. + FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); + CalleeProfile.addHeadSamples(Count); + } +} + FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( const SampleContextFrameVector &Context, bool WasLeafInlined) { auto I = ProfileMap.find(SampleContext(Context)); @@ -235,6 +363,17 @@ void CSProfileGenerator::generateProfile() { FunctionSamples::ProfileIsCS = true; + if (Binary->usePseudoProbes()) { + // Enable pseudo probe functionalities in SampleProf + FunctionSamples::ProfileIsProbeBased = true; + generateProbeBasedProfile(); + } else { + generateLineNumBasedProfile(); + } + postProcessProfiles(); +} + +void CSProfileGenerator::generateLineNumBasedProfile() { for (const auto &CI : SampleCounters) { const StringBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); @@ -243,38 +382,19 @@ getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); // Fill in function body samples - populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter); + populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter); // Fill in boundary sample counts as well as call site samples for calls - populateFunctionBoundarySamples(CtxKey->Context, FunctionProfile, - CI.second.BranchCounter); + populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, + CI.second.BranchCounter); } // Fill in call site value sample for inlined calls and also use context to // infer missing samples. Since we don't have call count for inlined // functions, we estimate it from inlinee's profile using the entry of the // body sample. populateInferredFunctionSamples(); - - postProcessProfiles(); -} - -void CSProfileGenerator::updateBodySamplesforFunctionProfile( - FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, - uint64_t Count) { - // Filter out invalid negative(int type) lineOffset - if (LeafLoc.Callsite.LineOffset & 0x80000000) - return; - // Use the maximum count of samples with same line location - ErrorOr R = FunctionProfile.findSamplesAt( - LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator); - uint64_t PreviousCount = R ? R.get() : 0; - if (PreviousCount < Count) { - FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset, - LeafLoc.Callsite.Discriminator, - Count - PreviousCount); - } } -void CSProfileGenerator::populateFunctionBodySamples( +void CSProfileGenerator::populateBodySamplesForFunction( FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { // Compute disjoint ranges first, so we can use MAX // for calculating count for each location. @@ -290,13 +410,9 @@ continue; InstructionPointer IP(Binary, RangeBegin, true); - // Disjoint ranges may have range in the middle of two instr, // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range // can be Addr1+1 to Addr2-1. We should ignore such range. - if (IP.Address > RangeEnd) - continue; - while (IP.Address <= RangeEnd) { uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); @@ -312,7 +428,7 @@ } } -void CSProfileGenerator::populateFunctionBoundarySamples( +void CSProfileGenerator::populateBoundarySamplesForFunction( SampleContextFrames ContextId, FunctionSamples &FunctionProfile, const BranchSample &BranchCounters) { @@ -320,6 +436,8 @@ uint64_t SourceOffset = Entry.first.first; uint64_t TargetOffset = Entry.first.second; uint64_t Count = Entry.second; + assert(Count != 0 && "Unexpected zero weight branch"); + // Get the callee name by branch target if it's a call branch StringRef CalleeName = FunctionSamples::getCanonicalFnName( Binary->getFuncFromStartOffset(TargetOffset)); @@ -341,7 +459,6 @@ CalleeCtx.back() = *LeafLoc; CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); - assert(Count != 0 && "Unexpected zero weight branch"); CalleeProfile.addHeadSamples(Count); } } @@ -434,12 +551,6 @@ (Summary->getDetailedSummary())); } -void CSProfileGenerator::write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap) { - if (std::error_code EC = Writer->write(ProfileMap)) - exitWithError(std::move(EC)); -} - // Helper function to extract context prefix string stack // Extract context stack for reusing, leaf context stack will // be added compressed while looking up function profile @@ -452,10 +563,7 @@ } } -void PseudoProbeCSProfileGenerator::generateProfile() { - // Enable pseudo probe functionalities in SampleProf - FunctionSamples::ProfileIsProbeBased = true; - FunctionSamples::ProfileIsCS = true; +void CSProfileGenerator::generateProbeBasedProfile() { for (const auto &CI : SampleCounters) { const ProbeBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); @@ -467,12 +575,10 @@ // Fill in boundary samples for a call probe populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); } - - postProcessProfiles(); } -void PseudoProbeCSProfileGenerator::extractProbesFromRange( - const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter) { +void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, + ProbeCounterMap &ProbeCounter) { RangeSample Ranges; findDisjointRanges(Ranges, RangeCounter); for (const auto &Range : Ranges) { @@ -509,7 +615,7 @@ } } -void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( +void CSProfileGenerator::populateBodySamplesWithProbes( const RangeSample &RangeCounter, SampleContextFrames ContextStack) { ProbeCounterMap ProbeCounter; // Extract the top frame probes by looking up each address among the range in @@ -568,7 +674,7 @@ } } -void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( +void CSProfileGenerator::populateBoundarySamplesWithProbes( const BranchSample &BranchCounter, SampleContextFrames ContextStack) { for (auto BI : BranchCounter) { uint64_t SourceOffset = BI.first.first; @@ -592,7 +698,7 @@ } } -FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( +FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { // Explicitly copy the context for appending the leaf context diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -232,12 +232,6 @@ /// 3. Pseudo probe related sections, used by probe-based profile /// generation. void load(); - const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const { - auto I = Offset2LocStackMap.find(Offset); - assert(I != Offset2LocStackMap.end() && - "Can't find location for offset in the binary"); - return I->second; - } public: ProfiledBinary(const StringRef Path) @@ -310,13 +304,23 @@ } StringRef getFuncFromStartOffset(uint64_t Offset) { - return FuncStartAddrMap[Offset]; + auto I = FuncStartAddrMap.find(Offset); + if (I == FuncStartAddrMap.end()) + return StringRef(); + return I->second; } uint32_t getFuncSizeForContext(SampleContext &Context) { return FuncSizeTracker.getFuncSizeForContext(Context); } + const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const { + auto I = Offset2LocStackMap.find(Offset); + assert(I != Offset2LocStackMap.end() && + "Can't find location for offset in the binary"); + return I->second; + } + Optional getInlineLeafFrameLoc(uint64_t Offset) { const auto &Stack = getFrameLocationStack(Offset); if (Stack.empty()) diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -93,14 +93,9 @@ if (SkipSymbolization) return EXIT_SUCCESS; - // TBD - if (Reader->getPerfScriptType() == PERF_LBR) { - WithColor::warning() << "Currently LBR only perf script is not supported!"; - return EXIT_SUCCESS; - } - - std::unique_ptr Generator = ProfileGenerator::create( - Binary.get(), Reader->getSampleCounters(), Reader->getPerfScriptType()); + std::unique_ptr Generator = + ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(), + Reader->getPerfScriptType()); Generator->generateProfile(); Generator->write();