diff --git a/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test @@ -0,0 +1,46 @@ +; RUN: llvm-profgen --format=text --ignore-stack-samples --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 +; RUN: FileCheck %s --input-file %t + +; CHECK: main:88:0 +; CHECK-NEXT: 1: 0 +; CHECK-NEXT: 2: foo:88 +; CHECK-NEXT: 1: 0 +; CHECK-NEXT: 2: 15 +; CHECK-NEXT: 3: 15 +; CHECK-NEXT: 4: 14 +; CHECK-NEXT: 5: 1 +; CHECK-NEXT: 6: 15 +; CHECK-NEXT: 7: 0 +; CHECK-NEXT: 9: 0 +; CHECK-NEXT: 8: bar:28 +; CHECK-NEXT: 1: 14 +; CHECK-NEXT: 4: 14 +; CHECK-NEXT: !CFGChecksum: 72617220756 +; CHECK-NEXT: !CFGChecksum: 563088904013236 +; CHECK-NEXT: !CFGChecksum: 281479271677951 + + +; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling +; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls +; -g test.c -o a.out + +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test @@ -0,0 +1,48 @@ +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t1 --ignore-stack-samples +; RUN: FileCheck %s --input-file %t1 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t2 --ignore-stack-samples +; RUN: FileCheck %s --input-file %t2 + + +; CHECK: foo:75:0 +; CHECK-NEXT: 1: 0 +; CHECK-NEXT: 2: 15 +; CHECK-NEXT: 3: 15 +; CHECK-NEXT: 4: 15 +; CHECK-NEXT: 5: 0 +; CHECK-NEXT: 6: 15 +; CHECK-NEXT: 7: 0 +; CHECK-NEXT: 8: 15 bar:15 +; CHECK-NEXT: 9: 0 +; CHECK-NEXT: !CFGChecksum: 563088904013236 +; CHECK-NEXT: bar:30:15 +; CHECK-NEXT: 1: 15 +; CHECK-NEXT: 4: 15 +; CHECK-NEXT: !CFGChecksum: 72617220756 + + + +; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling +; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls +; -fno-inline-functions -g test.c -o a.out + +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -22,6 +22,9 @@ namespace llvm { namespace sampleprof { +using ProbeCounterMap = + std::unordered_map; + // This base class for profile generation of sample-based PGO. We reuse all // structures relating to function profiles and profile writers as seen in // /ProfileData/SampleProf.h. @@ -77,6 +80,13 @@ */ void findDisjointRanges(RangeSample &DisjointRanges, const RangeSample &Ranges); + + // Go through each address from range to extract the top frame probe by + // looking up in the Address2ProbeMap + void extractProbesFromRange(const RangeSample &RangeCounter, + ProbeCounterMap &ProbeCounter, + bool FindDisjointRanges = true); + // Helper function for updating body sample for a leaf location in // FunctionProfile void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, @@ -118,6 +128,7 @@ private: void generateLineNumBasedProfile(); + void generateProbeBasedProfile(); RangeSample preprocessRangeCounter(const RangeSample &RangeCounter); FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName); // Helper function to get the leaf frame's FunctionProfile by traversing the @@ -129,14 +140,14 @@ void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); + void populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter); + void + populateBoundarySamplesWithProbesForAllFunctions(const BranchSample &BranchCounters); void postProcessProfiles(); void trimColdProfiles(const SampleProfileMap &Profiles, uint64_t ColdCntThreshold); }; -using ProbeCounterMap = - std::unordered_map; - class CSProfileGenerator : public ProfileGeneratorBase { public: CSProfileGenerator(ProfiledBinary *Binary, @@ -281,10 +292,7 @@ void populateInferredFunctionSamples(); void generateProbeBasedProfile(); - // Go through each address from range to extract the top frame probe by - // looking up in the Address2ProbeMap - void extractProbesFromRange(const RangeSample &RangeCounter, - ProbeCounterMap &ProbeCounter); + // Fill in function body samples from probes void populateBodySamplesWithProbes(const RangeSample &RangeCounter, SampleContextFrames ContextStack); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -383,9 +383,7 @@ void ProfileGenerator::generateProfile() { if (Binary->usePseudoProbes()) { - // TODO: Support probe based profile generation - exitWithError("Probe based profile generation not supported for AutoFDO, " - "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`."); + generateProbeBasedProfile(); } else { generateLineNumBasedProfile(); } @@ -427,12 +425,80 @@ updateTotalSamples(); } +void ProfileGenerator::generateProbeBasedProfile() { + assert(SampleCounters.size() == 1 && + "Must have one entry for profile generation."); + // Enable pseudo probe functionalities in SampleProf + FunctionSamples::ProfileIsProbeBased = true; + const SampleCounter &SC = SampleCounters.begin()->second; + // Fill in function body samples + populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); + // Fill in boundary sample counts as well as call site samples for calls + populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); + + updateTotalSamples(); +} + +void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( + const RangeSample &RangeCounter) { + ProbeCounterMap ProbeCounter; + // preprocessRangeCounter returns disjoint ranges, so no longer to redo it inside + // extractProbesFromRange. + extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, false); + + for (const auto &PI : ProbeCounter) { + const MCDecodedPseudoProbe *Probe = PI.first; + uint64_t Count = PI.second; + SampleContextFrameVector FrameVec; + Binary->getInlineContextForProbe(Probe, FrameVec, true); + FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count); + FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); + if (Probe->isEntry()) + FunctionProfile.addHeadSamples(Count); + } +} + +void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( + const BranchSample &BranchCounters) { + for (const auto &Entry : BranchCounters) { + uint64_t SourceOffset = Entry.first.first; + uint64_t TargetOffset = Entry.first.second; + uint64_t Count = Entry.second; + assert(Count != 0 && "Unexpected zero weight branch"); + + StringRef CalleeName = getCalleeNameForOffset(TargetOffset); + if (CalleeName.size() == 0) + continue; + + uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); + const MCDecodedPseudoProbe *CallProbe = + Binary->getCallProbeForAddr(SourceAddress); + if (CallProbe == nullptr) + continue; + + // Record called target sample and its count. + SampleContextFrameVector FrameVec; + Binary->getInlineContextForProbe(CallProbe, FrameVec, true); + + if (!FrameVec.empty()) { + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, 0); + FunctionProfile.addCalledTargetSamples( + FrameVec.back().Location.LineOffset, 0, CalleeName, Count); + } + } +} + FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( const SampleContextFrameVector &FrameVec, uint64_t Count) { // Get top level profile FunctionSamples *FunctionProfile = &getTopLevelFunctionProfile(FrameVec[0].FuncName); FunctionProfile->addTotalSamples(Count); + if (Binary->usePseudoProbes()) { + const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + FunctionProfile->setFunctionHash(FuncDesc->FuncHash); + } for (size_t I = 1; I < FrameVec.size(); I++) { LineLocation Callsite( @@ -448,6 +514,10 @@ } FunctionProfile = &Ret.first->second; FunctionProfile->addTotalSamples(Count); + if (Binary->usePseudoProbes()) { + const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + FunctionProfile->setFunctionHash(FuncDesc->FuncHash); + } } return *FunctionProfile; @@ -580,8 +650,6 @@ computeSizeForProfiledFunctions(); if (Binary->usePseudoProbes()) { - // Enable pseudo probe functionalities in SampleProf - FunctionSamples::ProfileIsProbeBased = true; generateProbeBasedProfile(); } else { generateLineNumBasedProfile(); @@ -804,43 +872,20 @@ (Summary->getDetailedSummary())); } -// Helper function to extract context prefix string stack -// Extract context stack for reusing, leaf context stack will -// be added compressed while looking up function profile -static void extractPrefixContextStack( - SampleContextFrameVector &ContextStack, - const SmallVectorImpl &Probes, - ProfiledBinary *Binary) { - for (const auto *P : Probes) { - Binary->getInlineContextForProbe(P, ContextStack, true); - } -} - -void CSProfileGenerator::generateProbeBasedProfile() { - for (const auto &CI : SampleCounters) { - const auto *CtxKey = cast(CI.first.getPtr()); - SampleContextFrameVector ContextStack; - extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); - // Fill in function body samples from probes, also infer caller's samples - // from callee's probe - populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); - // Fill in boundary samples for a call probe - populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); +void ProfileGeneratorBase::extractProbesFromRange( + const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, + bool FindDisjointRanges) { + const RangeSample *PRanges = &RangeCounter; + RangeSample Ranges; + if (FindDisjointRanges) { + findDisjointRanges(Ranges, RangeCounter); + PRanges = &Ranges; } -} -void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, - ProbeCounterMap &ProbeCounter) { - RangeSample Ranges; - findDisjointRanges(Ranges, RangeCounter); - for (const auto &Range : Ranges) { + for (const auto &Range : *PRanges) { uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); uint64_t Count = Range.second; - // Disjoint ranges have introduce zero-filled gap that - // doesn't belong to current context, filter them out. - if (Count == 0) - continue; InstructionPointer IP(Binary, RangeBegin, true); // Disjoint ranges may have range in the middle of two instr, @@ -855,8 +900,6 @@ auto It = Address2ProbesMap.find(IP.Address); if (It != Address2ProbesMap.end()) { for (const auto &Probe : It->second) { - if (!Probe.isBlock()) - continue; ProbeCounter[&Probe] += Count; } } @@ -864,6 +907,33 @@ } } +// Helper function to extract context prefix string stack +// Extract context stack for reusing, leaf context stack will +// be added compressed while looking up function profile +static void extractPrefixContextStack( + SampleContextFrameVector &ContextStack, + const SmallVectorImpl &Probes, + ProfiledBinary *Binary) { + for (const auto *P : Probes) { + Binary->getInlineContextForProbe(P, ContextStack, true); + } +} + +void CSProfileGenerator::generateProbeBasedProfile() { + // Enable pseudo probe functionalities in SampleProf + FunctionSamples::ProfileIsProbeBased = true; + for (const auto &CI : SampleCounters) { + const auto *CtxKey = cast(CI.first.getPtr()); + SampleContextFrameVector ContextStack; + extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); + // Fill in function body samples from probes, also infer caller's samples + // from callee's probe + populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); + // Fill in boundary samples for a call probe + populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); + } +} + void CSProfileGenerator::populateBodySamplesWithProbes( const RangeSample &RangeCounter, SampleContextFrames ContextStack) { ProbeCounterMap ProbeCounter; @@ -876,6 +946,10 @@ for (const auto &PI : ProbeCounter) { const MCDecodedPseudoProbe *Probe = PI.first; uint64_t Count = PI.second; + // Disjoint ranges have introduce zero-filled gap that + // doesn't belong to current context, filter them out. + if (!Probe->isBlock() || Count == 0) + continue; FunctionSamples &FunctionProfile = getFunctionProfileForLeafProbe(ContextStack, Probe); // Record the current frame and FunctionProfile whenever samples are