diff --git a/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test @@ -0,0 +1,46 @@ +; RUN: llvm-profgen --format=text --ignore-stack-samples --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 +; RUN: FileCheck %s --input-file %t + +; CHECK: main:88:0 +; CHECK-NEXT: 1: 0 +; CHECK-NEXT: 2: foo:88 +; CHECK-NEXT: 1: 0 +; CHECK-NEXT: 2: 15 +; CHECK-NEXT: 3: 15 +; CHECK-NEXT: 4: 14 +; CHECK-NEXT: 5: 1 +; CHECK-NEXT: 6: 15 +; CHECK-NEXT: 7: 0 +; CHECK-NEXT: 9: 0 +; CHECK-NEXT: 8: bar:28 +; CHECK-NEXT: 1: 14 +; CHECK-NEXT: 4: 14 +; CHECK-NEXT: !CFGChecksum: 72617220756 +; CHECK-NEXT: !CFGChecksum: 563088904013236 +; CHECK-NEXT: !CFGChecksum: 281479271677951 + + +; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling +; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls +; -g test.c -o a.out + +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test @@ -0,0 +1,48 @@ +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t1 --ignore-stack-samples +; RUN: FileCheck %s --input-file %t1 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t2 --ignore-stack-samples +; RUN: FileCheck %s --input-file %t2 + + +; CHECK: foo:75:0 +; CHECK-NEXT: 1: 0 +; CHECK-NEXT: 2: 15 +; CHECK-NEXT: 3: 15 +; CHECK-NEXT: 4: 15 +; CHECK-NEXT: 5: 0 +; CHECK-NEXT: 6: 15 +; CHECK-NEXT: 7: 0 +; CHECK-NEXT: 8: 15 bar:15 +; CHECK-NEXT: 9: 0 +; CHECK-NEXT: !CFGChecksum: 563088904013236 +; CHECK-NEXT: bar:30:15 +; CHECK-NEXT: 1: 15 +; CHECK-NEXT: 4: 15 +; CHECK-NEXT: !CFGChecksum: 72617220756 + + + +; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling +; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls +; -fno-inline-functions -g test.c -o a.out + +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -22,6 +22,9 @@ namespace llvm { namespace sampleprof { +using ProbeCounterMap = + std::unordered_map; + // This base class for profile generation of sample-based PGO. We reuse all // structures relating to function profiles and profile writers as seen in // /ProfileData/SampleProf.h. @@ -77,6 +80,12 @@ */ void findDisjointRanges(RangeSample &DisjointRanges, const RangeSample &Ranges); + + // Go through each address from range to extract the top frame probe by + // looking up in the Address2ProbeMap + void extractProbesFromRange(const RangeSample &RangeCounter, + ProbeCounterMap &ProbeCounter); + // Helper function for updating body sample for a leaf location in // FunctionProfile void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, @@ -118,6 +127,7 @@ private: void generateLineNumBasedProfile(); + void generateProbeBasedProfile(); RangeSample preprocessRangeCounter(const RangeSample &RangeCounter); FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName); // Helper function to get the leaf frame's FunctionProfile by traversing the @@ -125,18 +135,18 @@ // function profile. FunctionSamples & getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec, - uint64_t Count); + uint64_t Count, bool WithProbe = false); void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); + void populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter); + void + populateBoundarySamplesWithProbesForAllFunctions(const BranchSample &BranchCounters); void postProcessProfiles(); void trimColdProfiles(const SampleProfileMap &Profiles, uint64_t ColdCntThreshold); }; -using ProbeCounterMap = - std::unordered_map; - class CSProfileGenerator : public ProfileGeneratorBase { public: CSProfileGenerator(ProfiledBinary *Binary, @@ -281,10 +291,7 @@ void populateInferredFunctionSamples(); void generateProbeBasedProfile(); - // Go through each address from range to extract the top frame probe by - // looking up in the Address2ProbeMap - void extractProbesFromRange(const RangeSample &RangeCounter, - ProbeCounterMap &ProbeCounter); + // Fill in function body samples from probes void populateBodySamplesWithProbes(const RangeSample &RangeCounter, SampleContextFrames ContextStack); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -382,9 +382,7 @@ void ProfileGenerator::generateProfile() { if (Binary->usePseudoProbes()) { - // TODO: Support probe based profile generation - exitWithError("Probe based profile generation not supported for AutoFDO, " - "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`."); + generateProbeBasedProfile(); } else { generateLineNumBasedProfile(); } @@ -426,12 +424,80 @@ updateTotalSamples(); } +void ProfileGenerator::generateProbeBasedProfile() { + assert(SampleCounters.size() == 1 && + "Must have one entry for profile generation."); + // Enable pseudo probe functionalities in SampleProf + FunctionSamples::ProfileIsProbeBased = true; + const SampleCounter &SC = SampleCounters.begin()->second; + // Fill in function body samples + populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); + // Fill in boundary sample counts as well as call site samples for calls + populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); + + updateTotalSamples(); +} + +void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( + const RangeSample &RangeCounter) { + ProbeCounterMap ProbeCounter; + extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter); + + for (const auto &PI : ProbeCounter) { + const MCDecodedPseudoProbe *Probe = PI.first; + uint64_t Count = PI.second; + SampleContextFrameVector FrameVec; + Binary->getInlineContextForProbe(Probe, FrameVec, true); + FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count, true); + FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); + if (Probe->isEntry()) + FunctionProfile.addHeadSamples(Count); + } +} + +void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( + const BranchSample &BranchCounters) { + for (const auto &Entry : BranchCounters) { + uint64_t SourceOffset = Entry.first.first; + uint64_t TargetOffset = Entry.first.second; + uint64_t Count = Entry.second; + assert(Count != 0 && "Unexpected zero weight branch"); + + StringRef CalleeName = getCalleeNameForOffset(TargetOffset); + if (CalleeName.size() == 0) + continue; + + uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); + const MCDecodedPseudoProbe *CallProbe = + Binary->getCallProbeForAddr(SourceAddress); + if (CallProbe == nullptr) + continue; + + // Record called target sample and its count. + SampleContextFrameVector FrameVec; + Binary->getInlineContextForProbe(CallProbe, FrameVec, true); + + if (!FrameVec.empty()) { + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, 0, true); + FunctionProfile.addCalledTargetSamples( + FrameVec.back().Location.LineOffset, + 0, + CalleeName, Count); + } + } +} + FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( - const SampleContextFrameVector &FrameVec, uint64_t Count) { + const SampleContextFrameVector &FrameVec, uint64_t Count, bool WithProbe) { // Get top level profile FunctionSamples *FunctionProfile = &getTopLevelFunctionProfile(FrameVec[0].FuncName); FunctionProfile->addTotalSamples(Count); + if (WithProbe) { + const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + FunctionProfile->setFunctionHash(FuncDesc->FuncHash); + } for (size_t I = 1; I < FrameVec.size(); I++) { LineLocation Callsite( @@ -447,6 +513,10 @@ } FunctionProfile = &Ret.first->second; FunctionProfile->addTotalSamples(Count); + if (WithProbe) { + const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + FunctionProfile->setFunctionHash(FuncDesc->FuncHash); + } } return *FunctionProfile; @@ -579,8 +649,6 @@ computeSizeForProfiledFunctions(); if (Binary->usePseudoProbes()) { - // Enable pseudo probe functionalities in SampleProf - FunctionSamples::ProfileIsProbeBased = true; generateProbeBasedProfile(); } else { generateLineNumBasedProfile(); @@ -816,6 +884,8 @@ } void CSProfileGenerator::generateProbeBasedProfile() { + // Enable pseudo probe functionalities in SampleProf + FunctionSamples::ProfileIsProbeBased = true; for (const auto &CI : SampleCounters) { const auto *CtxKey = cast(CI.first.getPtr()); SampleContextFrameVector ContextStack; @@ -828,7 +898,7 @@ } } -void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, +void ProfileGeneratorBase::extractProbesFromRange(const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter) { RangeSample Ranges; findDisjointRanges(Ranges, RangeCounter); @@ -836,10 +906,6 @@ uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); uint64_t Count = Range.second; - // Disjoint ranges have introduce zero-filled gap that - // doesn't belong to current context, filter them out. - if (Count == 0) - continue; InstructionPointer IP(Binary, RangeBegin, true); // Disjoint ranges may have range in the middle of two instr, @@ -854,8 +920,6 @@ auto It = Address2ProbesMap.find(IP.Address); if (It != Address2ProbesMap.end()) { for (const auto &Probe : It->second) { - if (!Probe.isBlock()) - continue; ProbeCounter[&Probe] += Count; } } @@ -875,6 +939,10 @@ for (const auto &PI : ProbeCounter) { const MCDecodedPseudoProbe *Probe = PI.first; uint64_t Count = PI.second; + // Disjoint ranges have introduce zero-filled gap that + // doesn't belong to current context, filter them out. + if (!Probe->isBlock() || Count == 0) + continue; FunctionSamples &FunctionProfile = getFunctionProfileForLeafProbe(ContextStack, Probe); // Record the current frame and FunctionProfile whenever samples are