diff --git a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test @@ -1,13 +1,32 @@ ; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER +; RUN: FileCheck %s --input-file %t -; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Range Counter: -; CHECK-UNWINDER: (800, 858): 1 -; CHECK-UNWINDER: (80e, 82b): 1 -; CHECK-UNWINDER: (80e, 858): 13 +; CHECK: [main:2 @ foo]:74:0 +; CHECK-NEXT: 2: 15 +; CHECK-NEXT: 3: 15 +; CHECK-NEXT: 4: 14 +; CHECK-NEXT: 5: 1 +; CHECK-NEXT: 6: 15 +; CHECK-NEXT: 8: 14 bar:14 +; CHECK-NEXT: !CFGChecksum: 138950591924 +; CHECK-NEXT:[main:2 @ foo:8 @ bar]:28:14 +; CHECK-NEXT: 1: 14 +; CHECK-NEXT: 2: 0 +; CHECK-NEXT: 3: 0 +; CHECK-NEXT: 4: 14 +; CHECK-NEXT: !CFGChecksum: 72617220756 -; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Branch Counter: -; CHECK-UNWINDER: (82b, 800): 1 -; CHECK-UNWINDER: (858, 80e): 15 + +; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Range Counter: +; CHECK-UNWINDER-EMPTY: +; CHECK-UNWINDER-NEXT: (800, 858): 1 +; CHECK-UNWINDER-NEXT: (80e, 82b): 1 +; CHECK-UNWINDER-NEXT: (80e, 858): 13 + +; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Branch Counter: +; CHECK-UNWINDER-EMPTY: +; CHECK-UNWINDER-NEXT: (82b, 800): 1 +; CHECK-UNWINDER-NEXT: (858, 80e): 15 ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test @@ -1,19 +1,34 @@ ; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER - - -; CHECK-UNWINDER: Binary(noinline-cs-pseudoprobe.perfbin)'s Range Counter: -; CHECK-UNWINDER: main:2 -; CHECK-UNWINDER: (79e, 7bf): 15 -; CHECK-UNWINDER: (7c4, 7cf): 15 -; CHECK-UNWINDER: main:2 @ foo:8 -; CHECK-UNWINDER: (760, 77f): 15 - -; CHECK-UNWINDER: Binary(noinline-cs-pseudoprobe.perfbin)'s Branch Counter: -; CHECK-UNWINDER: main:2 -; CHECK-UNWINDER: (7bf, 760): 15 -; CHECK-UNWINDER: (7cf, 79e): 16 -; CHECK-UNWINDER: main:2 @ foo:8 -; CHECK-UNWINDER: (77f, 7c4): 17 +; RUN: FileCheck %s --input-file %t + +; CHECK: [main:2 @ foo]:75:0 +; CHECK-NEXT: 2: 15 +; CHECK-NEXT: 3: 15 +; CHECK-NEXT: 4: 15 +; CHECK-NEXT: 6: 15 +; CHECK-NEXT: 8: 15 bar:15 +; CHECK-NEXT: !CFGChecksum: 138950591924 +; CHECK-NEXT:[main:2 @ foo:8 @ bar]:30:15 +; CHECK-NEXT: 1: 15 +; CHECK-NEXT: 2: 0 +; CHECK-NEXT: 3: 0 +; CHECK-NEXT: 4: 15 +; CHECK-NEXT: !CFGChecksum: 72617220756 + + +; CHECK-UNWINDER: Binary(noinline-cs-pseudoprobe.perfbin)'s Range Counter: +; CHECK-UNWINDER-NEXT: main:2 +; CHECK-UNWINDER-NEXT: (79e, 7bf): 15 +; CHECK-UNWINDER-NEXT: (7c4, 7cf): 15 +; CHECK-UNWINDER-NEXT: main:2 @ foo:8 +; CHECK-UNWINDER-NEXT: (760, 77f): 15 + +; CHECK-UNWINDER: Binary(noinline-cs-pseudoprobe.perfbin)'s Branch Counter: +; CHECK-UNWINDER-NEXT: main:2 +; CHECK-UNWINDER-NEXT: (7bf, 760): 15 +; CHECK-UNWINDER-NEXT: (7cf, 79e): 16 +; CHECK-UNWINDER-NEXT: main:2 @ foo:8 +; CHECK-UNWINDER-NEXT: (77f, 7c4): 17 ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -568,11 +568,7 @@ } if (HasHybridPerf) { - // Set up ProfileIsCS to enable context-sensitive functionalities - // in SampleProf - FunctionSamples::ProfileIsCS = true; PerfType = PERF_LBR_STACK; - } else { // TODO: Support other type of perf script PerfType = PERF_INVILID; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -25,7 +25,7 @@ ProfileGenerator(){}; virtual ~ProfileGenerator() = default; static std::unique_ptr - create(const BinarySampleCounterMap &SampleCounters, + create(const BinarySampleCounterMap &BinarySampleCounters, enum PerfScriptType SampleType); virtual void generateProfile() = 0; @@ -50,7 +50,6 @@ */ void findDisjointRanges(RangeSample &DisjointRanges, const RangeSample &Ranges); - // Used by SampleProfileWriter StringMap ProfileMap; }; @@ -65,6 +64,8 @@ public: void generateProfile() override { + // Enable context-sensitive functionalities in SampleProf + FunctionSamples::ProfileIsCS = true; for (const auto &BI : BinarySampleCounters) { ProfiledBinary *Binary = BI.first; for (const auto &CI : BI.second) { @@ -90,14 +91,16 @@ populateInferredFunctionSamples(); } +protected: + // Lookup or create FunctionSamples for the context + FunctionSamples &getFunctionProfileForContext(StringRef ContextId); + private: // Helper function for updating body sample for a leaf location in // FunctionProfile void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc, uint64_t Count); - // Lookup or create FunctionSamples for the context - FunctionSamples &getFunctionProfileForContext(StringRef ContextId); void populateFunctionBodySamples(FunctionSamples &FunctionProfile, const RangeSample &RangeCounters, ProfiledBinary *Binary); @@ -108,14 +111,38 @@ void populateInferredFunctionSamples(); }; +using ProbeCounterMap = std::unordered_map; + class PseudoProbeCSProfileGenerator : public CSProfileGenerator { public: PseudoProbeCSProfileGenerator(const BinarySampleCounterMap &Counters) : CSProfileGenerator(Counters) {} - void generateProfile() override { - // TODO - } + void generateProfile() override; + +private: + // Go throught each address from range to extract the top frame probe by + // looking up in the Address2ProbeMap + void extractProbesFromRange(const RangeSample &RangeCounter, + ProbeCounterMap &ProbeCounter, + ProfiledBinary *Binary); + // Fill in function body samples from probes + void populateBodySamplesWithProbes(const RangeSample &RangeCounter, + StringRef PrefixContextId, + ProfiledBinary *Binary); + // Fill in boundary samples for a call probe + void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, + StringRef PrefixContextId, + ProfiledBinary *Binary); + // Helper function to get FunctionSamples for the leaf inlined context + FunctionSamples & + getFunctionProfileForLeaf(StringRef PrefixContextId, + SmallVector &LeafInlinedContext, + const PseudoProbeFuncDesc *LeafFuncDesc); + // Helper function to get FunctionSamples for the leaf probe + FunctionSamples &getFunctionProfileForLeafProbe(StringRef PrefixContextId, + const PseudoProbe *LeafProbe, + ProfiledBinary *Binary); }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -316,5 +316,198 @@ } } +// Helper function to extract context prefix +// Remind that the string in ContextStrStack is in callee-caller order +// So process the string vector reversely +static std::string +extractPrefixContextId(const SmallVector &Probes, + ProfiledBinary *Binary) { + SmallVector ContextStrStack; + for (const auto *P : Probes) { + Binary->getInlineContextForProbe(P, ContextStrStack, true); + } + std::ostringstream OContextStr; + for (auto &CxtStr : ContextStrStack) { + if (OContextStr.str().size()) + OContextStr << " @ "; + OContextStr << CxtStr; + } + return OContextStr.str(); +} + +void PseudoProbeCSProfileGenerator::generateProfile() { + // Enable CS and pseudo probe functionalities in SampleProf + FunctionSamples::ProfileIsCS = true; + FunctionSamples::ProfileIsProbeBased = true; + for (const auto &BI : BinarySampleCounters) { + ProfiledBinary *Binary = BI.first; + for (const auto &CI : BI.second) { + const ProbeBasedCtxKey *CtxKey = + dyn_cast(CI.first.getPtr()); + // PrefixContextId is the context id string except for the leaf probe's + // context, the final ContextId will be: + // ContextId = PrefixContextId + LeafContextId; + std::string PrefixContextId = + extractPrefixContextId(CtxKey->Probes, Binary); + // Fill in function body samples from probes, also infer caller's samples + // from callee's probe + populateBodySamplesWithProbes(CI.second.RangeCounter, PrefixContextId, + Binary); + // Fill in boudary samples for a call probe + populateBoundarySamplesWithProbes(CI.second.BranchCounter, + PrefixContextId, Binary); + } + } +} + +void PseudoProbeCSProfileGenerator::extractProbesFromRange( + const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, + ProfiledBinary *Binary) { + RangeSample Ranges; + findDisjointRanges(Ranges, RangeCounter); + for (const auto Range : Ranges) { + uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); + uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); + uint64_t Count = Range.second; + // Disjoint ranges have introduce zero-filled gap that + // doesn't belong to current context, filter them out. + if (Count == 0) + continue; + + InstructionPointer IP(Binary, RangeBegin, true); + + // Disjoint ranges may have range in the middle of two instr, + // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range + // can be Addr1+1 to Addr2-1. We should ignore such range. + if (IP.Address > RangeEnd) + continue; + + while (IP.Address <= RangeEnd) { + const AddressProbesMap &Address2ProbesMap = + Binary->getAddress2ProbesMap(); + auto It = Address2ProbesMap.find(IP.Address); + if (It != Address2ProbesMap.end()) { + for (const auto &Probe : It->second) { + if (!Probe.isBlock()) + continue; + ProbeCounter[&Probe] += Count; + } + } + + IP.advance(); + } + } +} + +void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( + const RangeSample &RangeCounter, StringRef PrefixContextId, + ProfiledBinary *Binary) { + ProbeCounterMap ProbeCounter; + // Extract the top frame probes by looking up each address among the range in + // the Address2ProbeMap + extractProbesFromRange(RangeCounter, ProbeCounter, Binary); + for (auto PI : ProbeCounter) { + const PseudoProbe *Probe = PI.first; + uint64_t Count = PI.second; + FunctionSamples &FunctionProfile = + getFunctionProfileForLeafProbe(PrefixContextId, Probe, Binary); + // Drop the samples collected for a dangling probe since it's misleading. + // We still report the probe but with a special zero count. The compiler + // won't trust the zero count and will rely on the counts inference + // alogrithm to get the probe a reasonable count. Note that a zero count is + // different from a missing count, where the latter really tells the + // compiler that a probe is never executed. + if (Probe->isDangling()) + Count = 0; + + FunctionProfile.addBodySamples(Probe->Index, 0, Count); + FunctionProfile.addTotalSamples(Count); + if (Probe->isEntry()) { + FunctionProfile.addHeadSamples(Count); + // Look up for the caller's function profile + SmallVector LeafInlinedContext; + Binary->getInlineContextForProbe(Probe, LeafInlinedContext); + const auto *CallerDesc = Binary->getCallerDescForProbe(Probe); + if (CallerDesc != nullptr) { + StringRef CallerLoc = LeafInlinedContext.back(); + uint64_t CallerIndex = 0; + CallerLoc.split(":").second.getAsInteger(10, CallerIndex); + FunctionSamples &CallerProfile = getFunctionProfileForLeaf( + PrefixContextId, LeafInlinedContext, CallerDesc); + CallerProfile.addBodySamples(CallerIndex, 0, Count); + CallerProfile.addTotalSamples(Count); + CallerProfile.addCalledTargetSamples(CallerIndex, 0, + FunctionProfile.getName(), Count); + } + } + } +} + +void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( + const BranchSample &BranchCounter, StringRef PrefixContextId, + ProfiledBinary *Binary) { + for (auto BI : BranchCounter) { + uint64_t SourceOffset = BI.first.first; + uint64_t TargetOffset = BI.first.second; + uint64_t Count = BI.second; + StringRef CalleeName = FunctionSamples::getCanonicalFnName( + Binary->getFuncFromStartOffset(TargetOffset)); + if (CalleeName.size() == 0) + continue; + + uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); + const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress); + if (CallProbe == nullptr) + continue; + FunctionSamples &FunctionProfile = + getFunctionProfileForLeafProbe(PrefixContextId, CallProbe, Binary); + FunctionProfile.addBodySamples(CallProbe->Index, 0, Count); + FunctionProfile.addTotalSamples(Count); + FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName, + Count); + } +} + +FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeaf( + StringRef PrefixContextId, SmallVector &LeafInlinedContext, + const PseudoProbeFuncDesc *LeafFuncDesc) { + assert(LeafInlinedContext.size() && + "Profile context must have the leaf frame"); + std::ostringstream OContextStr; + OContextStr << PrefixContextId.str(); + + for (uint32_t I = 0; I < LeafInlinedContext.size() - 1; I++) { + if (OContextStr.str().size()) + OContextStr << " @ "; + OContextStr << LeafInlinedContext[I]; + } + // For leaf inlined context with the top frame, we should strip off the top + // frame's probe id, like: + // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" + if (OContextStr.str().size()) + OContextStr << " @ "; + StringRef LeafLoc = LeafInlinedContext.back(); + OContextStr << LeafLoc.split(":").first.str(); + + FunctionSamples &FunctionProile = + getFunctionProfileForContext(OContextStr.str()); + FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash); + return FunctionProile; +} + +FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( + StringRef PrefixContextId, const PseudoProbe *LeafProbe, + ProfiledBinary *Binary) { + SmallVector LeafInlinedContext; + Binary->getInlineContextForProbe(LeafProbe, LeafInlinedContext); + // Note that the context from probe doesn't include leaf frame, + // hence we need to retrieve and append the leaf frame. + const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID); + LeafInlinedContext.emplace_back(FuncDesc->FuncName + ":" + + Twine(LeafProbe->Index).str()); + return getFunctionProfileForLeaf(PrefixContextId, LeafInlinedContext, + FuncDesc); +} + } // end namespace sampleprof } // end namespace llvm diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -244,10 +244,19 @@ void getInlineContextForProbe(const PseudoProbe *Probe, SmallVector &InlineContextStack, - bool IncludeLeaf) const { + bool IncludeLeaf = false) const { return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack, IncludeLeaf); } + const AddressProbesMap &getAddress2ProbesMap() const { + return ProbeDecoder.getAddress2ProbesMap(); + } + const PseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) { + return ProbeDecoder.getFuncDescForGUID(GUID); + } + const PseudoProbeFuncDesc *getCallerDescForProbe(const PseudoProbe *Probe) { + return ProbeDecoder.getCallerDescForProbe(Probe); + } }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/PseudoProbe.h b/llvm/tools/llvm-profgen/PseudoProbe.h --- a/llvm/tools/llvm-profgen/PseudoProbe.h +++ b/llvm/tools/llvm-profgen/PseudoProbe.h @@ -195,17 +195,26 @@ // Look up the probe of a call for the input address const PseudoProbe *getCallProbeForAddr(uint64_t Address) const; + const PseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const; + // Helper function to populate one probe's inline stack into // \p InlineContextStack. // Current leaf location info will be added if IncludeLeaf is true // Example: // Current probe(bar:3) inlined at foo:2 then inlined at main:1 // IncludeLeaf = true, Output: [main:1, foo:2, bar:3] - // IncludeLeaf = false, OUtput: [main:1, foo:2] + // IncludeLeaf = false, Output: [main:1, foo:2] void getInlineContextForProbe(const PseudoProbe *Probe, SmallVector &InlineContextStack, bool IncludeLeaf) const; + + const AddressProbesMap &getAddress2ProbesMap() const { + return Address2ProbesMap; + } + + const PseudoProbeFuncDesc * + getCallerDescForProbe(const PseudoProbe *Probe) const; }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/PseudoProbe.cpp b/llvm/tools/llvm-profgen/PseudoProbe.cpp --- a/llvm/tools/llvm-profgen/PseudoProbe.cpp +++ b/llvm/tools/llvm-profgen/PseudoProbe.cpp @@ -313,21 +313,33 @@ return CallProbe; } +const PseudoProbeFuncDesc * +PseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const { + auto It = GUID2FuncDescMap.find(GUID); + assert(It != GUID2FuncDescMap.end() && "Function descriptor doesn't exist"); + return &It->second; +} + void PseudoProbeDecoder::getInlineContextForProbe( const PseudoProbe *Probe, SmallVector &InlineContextStack, bool IncludeLeaf) const { - if (IncludeLeaf) { - // Note that the context from probe doesn't include leaf frame, - // hence we need to retrieve and prepend leaf if requested. - auto It = GUID2FuncDescMap.find(Probe->GUID); - assert(It != GUID2FuncDescMap.end()); - StringRef FuncName = It->second.FuncName; - // InlineContextStack is in callee-caller order, so push leaf in the front - InlineContextStack.emplace_back(FuncName.str() + ":" + - Twine(Probe->Index).str()); - } - Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true); + if (!IncludeLeaf) + return; + // Note that the context from probe doesn't include leaf frame, + // hence we need to retrieve and prepend leaf if requested. + const auto *FuncDesc = getFuncDescForGUID(Probe->GUID); + InlineContextStack.emplace_back(FuncDesc->FuncName + ":" + + Twine(Probe->Index).str()); +} + +const PseudoProbeFuncDesc * +PseudoProbeDecoder::getCallerDescForProbe(const PseudoProbe *Probe) const { + PseudoProbeInlineTree *CallerNode = Probe->InlineTree; + if (std::get<0>(CallerNode->ISite) == 0) { + return nullptr; + } + return getFuncDescForGUID(std::get<0>(CallerNode->ISite)); } } // end namespace sampleprof