diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -55,6 +55,7 @@ #include #include #include +#include #include namespace llvm { @@ -353,6 +354,15 @@ // Decode pseudo_probe section to build address to probes map. bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size); + // Decode pseudo_probe section to build address to probes map for specifed + // functions only. + bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, + std::unordered_set &GuildFilter); + + bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, + uint64_t &LastAddr, + std::unordered_set &GuildFilter); + // Print pseudo_probe_desc section info void printGUID2FuncDescMap(raw_ostream &OS); diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -358,8 +358,9 @@ return true; } -bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, - std::size_t Size) { +bool MCPseudoProbeDecoder::buildAddress2ProbeMap( + MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, + std::unordered_set &GuildFilter) { // The pseudo_probe section encodes an inline forest and each tree has a // format like: // FUNCTION BODY (one for each uninlined function present in the text @@ -390,101 +391,110 @@ // FUNCTION BODY // A FUNCTION BODY entry describing the inlined function. - Data = Start; - End = Data + Size; - - MCDecodedPseudoProbeInlineTree *Root = &DummyInlineRoot; - MCDecodedPseudoProbeInlineTree *Cur = &DummyInlineRoot; - uint64_t LastAddr = 0; uint32_t Index = 0; - // A DFS-based decoding - while (Data < End) { - if (Root == Cur) { - // Use a sequential id for top level inliner. - Index = Root->getChildren().size(); - } else { - // Read inline site for inlinees - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) - return false; - Index = std::move(*ErrorOrIndex); - } + if (Cur == &DummyInlineRoot) { + // Use a sequential id for top level inliner. + Index = Cur->getChildren().size(); + } else { + // Read inline site for inlinees + auto ErrorOrIndex = readUnsignedNumber(); + if (!ErrorOrIndex) + return false; + Index = std::move(*ErrorOrIndex); + } + + // Read guid + auto ErrorOrCurGuid = readUnencodedNumber(); + if (!ErrorOrCurGuid) + return false; + uint64_t Guid = std::move(*ErrorOrCurGuid); + + // Decide if top-level node should be disgarded. + if (Cur == &DummyInlineRoot && !GuildFilter.empty() && + !GuildFilter.count(Guid)) + Cur = nullptr; + + // If the incoming node is null, all its children nodes should be disgarded. + if (Cur) { // Switch/add to a new tree node(inlinee) Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index)); - // Read guid - auto ErrorOrCurGuid = readUnencodedNumber(); - if (!ErrorOrCurGuid) - return false; - Cur->Guid = std::move(*ErrorOrCurGuid); - // Read number of probes in the current node. - auto ErrorOrNodeCount = readUnsignedNumber(); - if (!ErrorOrNodeCount) + Cur->Guid = Guid; + } + + // Read number of probes in the current node. + auto ErrorOrNodeCount = readUnsignedNumber(); + if (!ErrorOrNodeCount) + return false; + uint32_t NodeCount = std::move(*ErrorOrNodeCount); + // Read number of direct inlinees + auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); + if (!ErrorOrCurChildrenToProcess) + return false; + // Read all probes in this node + for (std::size_t I = 0; I < NodeCount; I++) { + // Read index + auto ErrorOrIndex = readUnsignedNumber(); + if (!ErrorOrIndex) return false; - uint32_t NodeCount = std::move(*ErrorOrNodeCount); - // Read number of direct inlinees - auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); - if (!ErrorOrCurChildrenToProcess) + uint32_t Index = std::move(*ErrorOrIndex); + // Read type | flag. + auto ErrorOrValue = readUnencodedNumber(); + if (!ErrorOrValue) return false; - Cur->ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); - // Read all probes in this node - for (std::size_t I = 0; I < NodeCount; I++) { - // Read index - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) + uint8_t Value = std::move(*ErrorOrValue); + uint8_t Kind = Value & 0xf; + uint8_t Attr = (Value & 0x70) >> 4; + // Read address + uint64_t Addr = 0; + if (Value & 0x80) { + auto ErrorOrOffset = readSignedNumber(); + if (!ErrorOrOffset) return false; - uint32_t Index = std::move(*ErrorOrIndex); - // Read type | flag. - auto ErrorOrValue = readUnencodedNumber(); - if (!ErrorOrValue) + int64_t Offset = std::move(*ErrorOrOffset); + Addr = LastAddr + Offset; + } else { + auto ErrorOrAddr = readUnencodedNumber(); + if (!ErrorOrAddr) return false; - uint8_t Value = std::move(*ErrorOrValue); - uint8_t Kind = Value & 0xf; - uint8_t Attr = (Value & 0x70) >> 4; - // Read address - uint64_t Addr = 0; - if (Value & 0x80) { - auto ErrorOrOffset = readSignedNumber(); - if (!ErrorOrOffset) - return false; - int64_t Offset = std::move(*ErrorOrOffset); - Addr = LastAddr + Offset; - } else { - auto ErrorOrAddr = readUnencodedNumber(); - if (!ErrorOrAddr) - return false; - Addr = std::move(*ErrorOrAddr); - } + Addr = std::move(*ErrorOrAddr); + } + + if (Cur) { // Populate Address2ProbesMap auto &Probes = Address2ProbesMap[Addr]; Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr, Cur); Cur->addProbes(&Probes.back()); - LastAddr = Addr; } + LastAddr = Addr; + } - // Look for the parent for the next node by subtracting the current - // node count from tree counts along the parent chain. The first node - // in the chain that has a non-zero tree count is the target. - while (Cur != Root) { - if (Cur->ChildrenToProcess == 0) { - Cur = static_cast(Cur->Parent); - if (Cur != Root) { - assert(Cur->ChildrenToProcess > 0 && - "Should have some unprocessed nodes"); - Cur->ChildrenToProcess -= 1; - } - } else { - break; - } - } + uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); + for (uint32_t I = 0; I < ChildrenToProcess; I++) { + buildAddress2ProbeMap(Cur, LastAddr, GuildFilter); } + return true; +} + +bool MCPseudoProbeDecoder::buildAddress2ProbeMap( + const uint8_t *Start, std::size_t Size, + std::unordered_set &GuildFilter) { + Data = Start; + End = Data + Size; + uint64_t LastAddr = 0; + while (Data < End) + buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuildFilter); assert(Data == End && "Have unprocessed data in pseudo_probe section"); - assert(Cur == Root && - " Cur should point to root when the forest is fully built up"); return true; } +bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, + std::size_t Size) { + std::unordered_set GuildFilter; + return buildAddress2ProbeMap(Start, Size, GuildFilter); +} + void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) { OS << "Pseudo Probe Desc:\n"; // Make the output deterministic diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test @@ -24,14 +24,14 @@ ; CHECK-NEXT: 4: 15 ; CHECK-NEXT: !CFGChecksum: 72617220756 -; CHECK-UNWINDER: [main:2] +; CHECK-UNWINDER: [0x7f4] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 79e-7bf:15 ; CHECK-UNWINDER-NEXT: 7c4-7cf:15 ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7bf->760:15 ; CHECK-UNWINDER-NEXT: 7cf->79e:16 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:8] +; CHECK-UNWINDER-NEXT: [0x7f4 @ 0x7bf] ; CHECK-UNWINDER-NEXT: 1 ; CHECK-UNWINDER-NEXT: 760-77f:15 ; CHECK-UNWINDER-NEXT: 1 diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test @@ -123,7 +123,7 @@ ; CHECK: 6: 1 fa:1 ; CHECK: !CFGChecksum: 563022570642068 -; CHECK-UNWINDER: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5] +; CHECK-UNWINDER: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab] ; CHECK-UNWINDER-NEXT: 3 ; CHECK-UNWINDER-NEXT: 7a0-7a7:1 ; CHECK-UNWINDER-NEXT: 7a0-7ab:3 @@ -132,33 +132,33 @@ ; CHECK-UNWINDER-NEXT: 7a7->7b2:1 ; CHECK-UNWINDER-NEXT: 7ab->7a0:4 ; CHECK-UNWINDER-NEXT: 7b5->7c0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5] ; CHECK-UNWINDER-NEXT: 1 ; CHECK-UNWINDER-NEXT: 7c0-7d4:1 ; CHECK-UNWINDER-NEXT: 1 ; CHECK-UNWINDER-NEXT: 7d4->7c0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7c0-7cd:1 ; CHECK-UNWINDER-NEXT: 7db-7e0:1 ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7cd->7db:1 ; CHECK-UNWINDER-NEXT: 7e0->7a0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7a0-7a7:1 ; CHECK-UNWINDER-NEXT: 7b2-7b5:1 ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7a7->7b2:1 ; CHECK-UNWINDER-NEXT: 7b5->7c0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0 @ 0x7b5] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7c0-7cd:2 ; CHECK-UNWINDER-NEXT: 7db-7e0:1 ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7cd->7db:2 ; CHECK-UNWINDER-NEXT: 7e0->7a0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0 @ 0x7b5 @ 0x7e0] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7a0-7a7:1 ; CHECK-UNWINDER-NEXT: 7b2-7b5:1 diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -333,7 +333,7 @@ }; // Utilities for LLVM-style RTTI - enum ContextKind { CK_StringBased, CK_ProbeBased }; + enum ContextKind { CK_StringBased, CK_AddrBased }; const ContextKind Kind; ContextKind getKind() const { return Kind; } ContextKey(ContextKind K) : Kind(K){}; @@ -359,34 +359,23 @@ } }; -// Probe based context key as the intermediate key of context -// String based context key will introduce redundant string handling -// since the callee context is inferred from the context string which -// need to be splitted by '@' to get the last location frame, so we -// can just use probe instead and generate the string in the end. -struct ProbeBasedCtxKey : public ContextKey { - SmallVector Probes; +// Address-based context id +struct AddrBasedCtxKey : public ContextKey { + SmallVector Context; - ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {} + bool WasLeafInlined; + AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){}; static bool classof(const ContextKey *K) { - return K->getKind() == CK_ProbeBased; + return K->getKind() == CK_AddrBased; } bool isEqual(const ContextKey *K) const override { - const ProbeBasedCtxKey *O = dyn_cast(K); - assert(O != nullptr && "Probe based key shouldn't be null in isEqual"); - return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(), - O->Probes.end()); + const AddrBasedCtxKey *Other = dyn_cast(K); + return Context == Other->Context; } void genHashCode() override { - for (const auto *P : Probes) { - HashCode = hash_combine(HashCode, P); - } - if (HashCode == 0) { - // Avoid zero value of HashCode when it's an empty list - HashCode = 1; - } + HashCode = hash_combine_range(Context.begin(), Context.end()); } }; @@ -433,22 +422,14 @@ std::shared_ptr getContextKey(); }; -struct ProbeStack { - SmallVector Stack; +struct AddressStack { + SmallVector Stack; ProfiledBinary *Binary; - ProbeStack(ProfiledBinary *B) : Binary(B) {} + AddressStack(ProfiledBinary *B) : Binary(B) {} bool pushFrame(UnwindState::ProfiledFrame *Cur) { assert(!Cur->isExternalFrame() && "External frame's not expected for context stack."); - const MCDecodedPseudoProbe *CallProbe = - Binary->getCallProbeForAddr(Cur->Address); - // We may not find a probe for a merged or external callsite. - // Callsite merging may cause the loss of original probe IDs. - // Cutting off the context from here since the inliner will - // not know how to consume a context with unknown callsites. - if (!CallProbe) - return false; - Stack.push_back(CallProbe); + Stack.push_back(Cur->Address); return true; } @@ -456,18 +437,7 @@ if (!Stack.empty()) Stack.pop_back(); } - // Use pseudo probe based context key to get the sample counter - // A context stands for a call path from 'main' to an uninlined - // callee with all inline frames recovered on that path. The probes - // belonging to that call path is the probes either originated from - // the callee or from any functions inlined into the callee. Since - // pseudo probes are organized in a tri-tree style after decoded, - // the tree path from the tri-tree root (which is the uninlined - // callee) to the probe node forms an inline context. - // Here we use a list of probe(pointer) as the context key to speed up - // aggregation and the final context string will be generate in - // ProfileGenerator - std::shared_ptr getContextKey(); + std::shared_ptr getContextKey(); }; /* diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -179,17 +179,12 @@ return KeyStr; } -std::shared_ptr ProbeStack::getContextKey() { - std::shared_ptr ProbeBasedKey = - std::make_shared(); - for (auto CallProbe : Stack) { - ProbeBasedKey->Probes.emplace_back(CallProbe); - } - CSProfileGenerator::compressRecursionContext( - ProbeBasedKey->Probes); - CSProfileGenerator::trimContext( - ProbeBasedKey->Probes); - return ProbeBasedKey; +std::shared_ptr AddressStack::getContextKey() { + std::shared_ptr KeyStr = std::make_shared(); + KeyStr->Context = Stack; + CSProfileGenerator::compressRecursionContext(KeyStr->Context); + CSProfileGenerator::trimContext(KeyStr->Context); + return KeyStr; } template @@ -252,8 +247,8 @@ void VirtualUnwinder::collectSamplesFromFrameTrie( UnwindState::ProfiledFrame *Cur) { if (Binary->usePseudoProbes()) { - ProbeStack Stack(Binary); - collectSamplesFromFrameTrie(Cur, Stack); + AddressStack Stack(Binary); + collectSamplesFromFrameTrie(Cur, Stack); } else { FrameStack Stack(Binary); collectSamplesFromFrameTrie(Cur, Stack); @@ -461,14 +456,17 @@ const ProfiledBinary *Binary) { if (const auto *CtxKey = dyn_cast(K)) { return SampleContext::getContextString(CtxKey->Context); - } else if (const auto *CtxKey = dyn_cast(K)) { - SampleContextFrameVector ContextStack; - for (const auto *Probe : CtxKey->Probes) { - Binary->getInlineContextForProbe(Probe, ContextStack, true); + } else if (const auto *CtxKey = dyn_cast(K)) { + std::ostringstream OContextStr; + for (uint32_t I = 0; I < CtxKey->Context.size(); I++) { + if (OContextStr.str().size()) + OContextStr << " @ "; + OContextStr << "0x" + << to_hexString( + Binary->virtualAddrToOffset(CtxKey->Context[I]), + false); } - // Probe context key at this point does not have leaf probe, so do not - // include the leaf inline location. - return SampleContext::getContextString(ContextStack, true); + return OContextStr.str(); } else { llvm_unreachable("unexpected key type"); } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -106,6 +106,8 @@ void showDensitySuggestion(double Density); + void collectProfiledFunctions(); + // Thresholds from profile summary to answer isHotCount/isColdCount queries. uint64_t HotCountThreshold; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -5,12 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include "ProfileGenerator.h" #include "ErrorHandling.h" #include "ProfiledBinary.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/ProfileData/ProfileCommon.h" +#include #include #include @@ -370,6 +370,39 @@ } } +void ProfileGeneratorBase::collectProfiledFunctions() { + std::unordered_set ProfiledFunctions; + // Go through all the stacks, ranges and branches in sample counters, use the + // start of the range to look up the function it belongs and record the + // function. + for (const auto &CI : SampleCounters) { + if (const auto *CtxKey = dyn_cast(CI.first.getPtr())) { + for (auto Addr : CtxKey->Context) { + if (FuncRange *FRange = Binary->findFuncRangeForOffset( + Binary->virtualAddrToOffset(Addr))) + ProfiledFunctions.insert(FRange->Func); + } + } + + for (auto Item : CI.second.RangeCounter) { + uint64_t StartOffset = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) + ProfiledFunctions.insert(FRange->Func); + } + + for (auto Item : CI.second.BranchCounter) { + uint64_t SourceOffset = Item.first.first; + uint64_t TargetOffset = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) + ProfiledFunctions.insert(FRange->Func); + if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) + ProfiledFunctions.insert(FRange->Func); + } + } + + Binary->setProfiledFunctions(ProfiledFunctions); +} + FunctionSamples & ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { SampleContext Context(FuncName); @@ -382,6 +415,7 @@ } void ProfileGenerator::generateProfile() { + collectProfiledFunctions(); if (Binary->usePseudoProbes()) { generateProbeBasedProfile(); } else { @@ -428,6 +462,7 @@ void ProfileGenerator::generateProbeBasedProfile() { assert(SampleCounters.size() == 1 && "Must have one entry for profile generation."); + Binary->decodePseudoProbe(); // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; const SampleCounter &SC = SampleCounters.begin()->second; @@ -442,16 +477,18 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( const RangeSample &RangeCounter) { ProbeCounterMap ProbeCounter; - // preprocessRangeCounter returns disjoint ranges, so no longer to redo it inside - // extractProbesFromRange. - extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, false); + // preprocessRangeCounter returns disjoint ranges, so no longer to redo it + // inside extractProbesFromRange. + extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, + false); for (const auto &PI : ProbeCounter) { const MCDecodedPseudoProbe *Probe = PI.first; uint64_t Count = PI.second; SampleContextFrameVector FrameVec; Binary->getInlineContextForProbe(Probe, FrameVec, true); - FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count); + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, Count); FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); if (Probe->isEntry()) FunctionProfile.addHeadSamples(Count); @@ -496,7 +533,8 @@ &getTopLevelFunctionProfile(FrameVec[0].FuncName); FunctionProfile->addTotalSamples(Count); if (Binary->usePseudoProbes()) { - const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + const auto *FuncDesc = Binary->getFuncDescForGUID( + Function::getGUID(FunctionProfile->getName())); FunctionProfile->setFunctionHash(FuncDesc->FuncHash); } @@ -515,7 +553,8 @@ FunctionProfile = &Ret.first->second; FunctionProfile->addTotalSamples(Count); if (Binary->usePseudoProbes()) { - const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + const auto *FuncDesc = Binary->getFuncDescForGUID( + Function::getGUID(FunctionProfile->getName())); FunctionProfile->setFunctionHash(FuncDesc->FuncHash); } } @@ -646,32 +685,23 @@ void CSProfileGenerator::generateProfile() { FunctionSamples::ProfileIsCSFlat = true; - if (Binary->getTrackFuncContextSize()) - computeSizeForProfiledFunctions(); + collectProfiledFunctions(); if (Binary->usePseudoProbes()) { generateProbeBasedProfile(); } else { generateLineNumBasedProfile(); } + + if (Binary->getTrackFuncContextSize()) + computeSizeForProfiledFunctions(); + postProcessProfiles(); } void CSProfileGenerator::computeSizeForProfiledFunctions() { std::unordered_set ProfiledFunctions; - - // Go through all the ranges in the CS counters, use the start of the range to - // look up the function it belongs and record the function. - for (const auto &CI : SampleCounters) { - for (const auto &Item : CI.second.RangeCounter) { - // FIXME: Filter the bogus crossing function range. - uint64_t StartOffset = Item.first.first; - if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) - ProfiledFunctions.insert(FRange->Func); - } - } - - for (auto *Func : ProfiledFunctions) + for (auto *Func : Binary->getProfiledFunctions()) Binary->computeInlinedContextSizeForFunc(Func); // Flush the symbolizer to save memory. @@ -907,25 +937,44 @@ } } -// Helper function to extract context prefix string stack -// Extract context stack for reusing, leaf context stack will -// be added compressed while looking up function profile -static void extractPrefixContextStack( - SampleContextFrameVector &ContextStack, - const SmallVectorImpl &Probes, - ProfiledBinary *Binary) { +static void +extractPrefixContextStack(SampleContextFrameVector &ContextStack, + const SmallVectorImpl &Addresses, + ProfiledBinary *Binary) { + SmallVector Probes; + for (auto Addr : reverse(Addresses)) { + const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr); + // These could be the cases when a probe is not found at a calliste. Cutting + // off the context from here since the inliner will not know how to consume + // a context with unknown callsites. + // 1. for functions that are not sampled when + // --decode-probe-for-profiled-functions-only is on. + // 2. for a merged callsite. Callsite merging may cause the loss of original + // probe IDs. + // 3. for an external callsite. + if (!CallProbe) + break; + Probes.push_back(CallProbe); + } + + std::reverse(Probes.begin(), Probes.end()); + + // Extract context stack for reusing, leaf context stack will be added + // compressed while looking up function profile. for (const auto *P : Probes) { Binary->getInlineContextForProbe(P, ContextStack, true); } } void CSProfileGenerator::generateProbeBasedProfile() { + Binary->decodePseudoProbe(); // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; for (const auto &CI : SampleCounters) { - const auto *CtxKey = cast(CI.first.getPtr()); + const AddrBasedCtxKey *CtxKey = + dyn_cast(CI.first.getPtr()); SampleContextFrameVector ContextStack; - extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); + extractPrefixContextStack(ContextStack, CtxKey->Context, Binary); // Fill in function body samples from probes, also infer caller's samples // from callee's probe populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -218,6 +218,9 @@ // A map of mapping function name to BinaryFunction info. std::unordered_map BinaryFunctions; + // A list of binary functions that have samples. + std::unordered_set ProfiledFunctions; + // An ordered map of mapping function's start offset to function range // relevant info. Currently to determine if the offset of ELF is the start of // a real function, we leverage the function range info from DWARF. @@ -278,6 +281,8 @@ template void setPreferredTextSegmentAddresses(const ELFFile &Obj, StringRef FileName); + void checkPseudoProbe(const ELFObjectFileBase *Obj); + void decodePseudoProbe(const ELFObjectFileBase *Obj); void @@ -331,6 +336,9 @@ setupSymbolizer(); load(); } + + void decodePseudoProbe(); + uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const { return VirtualAddress - BaseAddress; } @@ -453,6 +461,14 @@ return BinaryFunctions; } + std::unordered_set &getProfiledFunctions() { + return ProfiledFunctions; + } + + void setProfiledFunctions(std::unordered_set &Funcs) { + ProfiledFunctions = Funcs; + } + BinaryFunction *getBinaryFunction(StringRef FName) { auto I = BinaryFunctions.find(FName.str()); if (I == BinaryFunctions.end()) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -156,7 +156,8 @@ for (const auto &ChildNode : ProbeNode.getChildren()) { InlineSite Location = ChildNode.first; ProbeContext.back().second = std::get<1>(Location); - trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), + ProbeContext); } ProbeContext.pop_back(); @@ -208,8 +209,10 @@ // Find the preferred load address for text sections. setPreferredTextSegmentAddresses(Obj); - // Decode pseudo probe related section - decodePseudoProbe(Obj); + checkPseudoProbe(Obj); + + if (ShowDisassemblyOnly) + decodePseudoProbe(Obj); // Load debug info of subprograms from DWARF section. // If path of debug info binary is specified, use the debug info from it, @@ -287,7 +290,8 @@ } template -void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile &Obj, StringRef FileName) { +void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile &Obj, + StringRef FileName) { const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName); // FIXME: This should be the page size of the system running profiling. // However such info isn't available at post-processing time, assuming @@ -311,7 +315,8 @@ exitWithError("no executable segment found", FileName); } -void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *Obj) { +void ProfiledBinary::setPreferredTextSegmentAddresses( + const ELFObjectFileBase *Obj) { if (const auto *ELFObj = dyn_cast(Obj)) setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); else if (const auto *ELFObj = dyn_cast(Obj)) @@ -324,10 +329,38 @@ llvm_unreachable("invalid ELF object format"); } -void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { +void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) { if (UseDwarfCorrelation) return; + bool HasProbeDescSection = false; + bool HasPseudoProbeSection = false; + + StringRef FileName = Obj->getFileName(); + for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); + SI != SE; ++SI) { + const SectionRef &Section = *SI; + StringRef SectionName = unwrapOrError(Section.getName(), FileName); + if (SectionName == ".pseudo_probe_desc") { + HasProbeDescSection = true; + } else if (SectionName == ".pseudo_probe") { + HasPseudoProbeSection = true; + } + } + + // set UsePseudoProbes flag, used for PerfReader + UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection; +} + +void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { + if (!UsePseudoProbes) + return; + + std::unordered_set ProfiledGuids; + if (!ShowDisassemblyOnly) + for (auto *F : ProfiledFunctions) + ProfiledGuids.insert(Function::getGUID(F->FuncName)); + StringRef FileName = Obj->getFileName(); for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); SI != SE; ++SI) { @@ -339,21 +372,20 @@ if (!ProbeDecoder.buildGUID2FuncDescMap( reinterpret_cast(Contents.data()), Contents.size())) - exitWithError("Pseudo Probe decoder fail in .pseudo_probe_desc section"); + exitWithError( + "Pseudo Probe decoder fail in .pseudo_probe_desc section"); } else if (SectionName == ".pseudo_probe") { StringRef Contents = unwrapOrError(Section.getContents(), FileName); if (!ProbeDecoder.buildAddress2ProbeMap( reinterpret_cast(Contents.data()), - Contents.size())) + Contents.size(), ProfiledGuids)) exitWithError("Pseudo Probe decoder fail in .pseudo_probe section"); - // set UsePseudoProbes flag, used for PerfReader - UsePseudoProbes = true; } } // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe // is available - if (UsePseudoProbes && TrackFuncContextSize) { + if (TrackFuncContextSize) { for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { auto *Frame = Child.second.get(); StringRef FuncName = @@ -366,6 +398,13 @@ ProbeDecoder.printGUID2FuncDescMap(outs()); } +void ProfiledBinary::decodePseudoProbe() { + OwningBinary OBinary = unwrapOrError(createBinary(Path), Path); + Binary &ExeBinary = *OBinary.getBinary(); + auto *Obj = dyn_cast(&ExeBinary); + decodePseudoProbe(Obj); +} + void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) { // Note that the start offset of each ELF section can be a non-function // symbol, we need to binary search for the start of a real function range.