diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -599,10 +599,19 @@ errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; return; } + + MCPseudoProbeDecoder::Uint64Set GuidFilter; + MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; + for (const BinaryFunction *F : BC->getAllBinaryFunctions()) { + for (const MCSymbol *Sym : F->getSymbols()) { + FuncStartAddrs[Function::getGUID(NameResolver::restore(Sym->getName()))] = + F->getAddress(); + } + } Contents = PseudoProbeSection->getContents(); if (!BC->ProbeDecoder.buildAddress2ProbeMap( - reinterpret_cast(Contents.data()), - Contents.size())) { + reinterpret_cast(Contents.data()), Contents.size(), + GuidFilter, FuncStartAddrs)) { BC->ProbeDecoder.getAddress2ProbesMap().clear(); errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; return; @@ -3426,6 +3435,8 @@ // Address of the first probe is absolute. // Other probes' address are represented by delta auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { + assert(!isSentinelProbe(CurProbe->getAttributes()) && + "Sentinel probes should not be emitted"); EmitULEB128IntValue(CurProbe->getIndex()); uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); uint8_t Flag = @@ -3530,9 +3541,17 @@ reinterpret_cast(DescContents.data()), DescContents.size()); StringRef ProbeContents = PseudoProbeSection->getOutputContents(); + MCPseudoProbeDecoder::Uint64Set GuidFilter; + MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; + for (const BinaryFunction *F : BC->getAllBinaryFunctions()) { + const uint64_t Addr = + F->isEmitted() ? F->getOutputAddress() : F->getAddress(); + FuncStartAddrs[Function::getGUID( + NameResolver::restore(F->getOneName()))] = Addr; + } DummyDecoder.buildAddress2ProbeMap( reinterpret_cast(ProbeContents.data()), - ProbeContents.size()); + ProbeContents.size(), GuidFilter, FuncStartAddrs); DummyDecoder.printProbesForAllAddresses(outs()); } } diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -24,8 +24,15 @@ constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc"; +enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; + enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall }; +enum class PseudoProbeAttributes { + Reserved = 0x1, + Sentinel = 0x2, // A place holder for split function entry address. +}; + // The saturated distrution factor representing 100% for block probes. constexpr static uint64_t PseudoProbeFullDistributionFactor = std::numeric_limits::max(); @@ -80,6 +87,10 @@ float Factor; }; +static inline bool isSentinelProbe(uint32_t Flags) { + return Flags & (uint32_t)PseudoProbeAttributes::Sentinel; +} + Optional extractProbe(const Instruction &Inst); void setProbeDistributionFactor(Instruction &Inst, float Factor); diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -362,7 +362,7 @@ MCSection *getKCFITrapSection(const MCSection &TextSec) const; - MCSection *getPseudoProbeSection(const MCSection *TextSec) const; + MCSection *getPseudoProbeSection(const MCSection &TextSec) const; MCSection *getPseudoProbeDescSection(StringRef FuncName) const; diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -15,7 +15,9 @@ // // FUNCTION BODY (one for each outlined function present in the text section) // GUID (uint64) -// GUID of the function +// GUID of the function's source name which may be different from the +// actual binary linkage name. This GUID will be used to decode and +// generate a profile against the source function name. // NPROBES (ULEB128) // Number of probes originating from this function. // NUM_INLINED_FUNCTIONS (ULEB128) @@ -29,7 +31,9 @@ // ATTRIBUTE (uint3) // 1 - reserved // ADDRESS_TYPE (uint1) -// 0 - code address, 1 - address delta +// 0 - code address for regular probes (for downwards compatibility) +// - GUID of linkage name for sentinel probes +// 1 - address delta // CODE_ADDRESS (uint64 or ULEB128) // code address or address delta, depending on ADDRESS_TYPE // INLINED FUNCTION RECORDS @@ -39,11 +43,15 @@ // ID of the callsite probe (ULEB128) // FUNCTION BODY // A FUNCTION BODY entry describing the inlined function. +// +// TODO: retire the ADDRESS_TYPE encoding for code addresses once compatibility +// is no longer an issue. //===----------------------------------------------------------------------===// #ifndef LLVM_MC_MCPSEUDOPROBE_H #define LLVM_MC_MCPSEUDOPROBE_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/PseudoProbe.h" @@ -276,23 +284,20 @@ /// Instances of this class represent the pseudo probes inserted into a compile /// unit. -class MCPseudoProbeSection { +class MCPseudoProbeSections { public: - void addPseudoProbe(MCSection *Sec, const MCPseudoProbe &Probe, + void addPseudoProbe(MCSymbol *FuncSym, const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack) { - MCProbeDivisions[Sec].addPseudoProbe(Probe, InlineStack); + MCProbeDivisions[FuncSym].addPseudoProbe(Probe, InlineStack); } // TODO: Sort by getOrdinal to ensure a determinstic section order - using MCProbeDivisionMap = std::map; + using MCProbeDivisionMap = std::map; private: - // A collection of MCPseudoProbe for each text section. The MCPseudoProbes - // are grouped by GUID of the functions where they are from and will be - // encoded by groups. In the comdat scenario where a text section really only - // contains the code of a function solely, the probes associated with a comdat - // function are still grouped by GUIDs due to inlining that can bring probes - // from different functions into one function. + // A collection of MCPseudoProbe for each function. The MCPseudoProbes are + // grouped by GUIDs due to inlining that can bring probes from different + // functions into one function. MCProbeDivisionMap MCProbeDivisions; public: @@ -304,18 +309,18 @@ }; class MCPseudoProbeTable { - // A collection of MCPseudoProbe in the current module grouped by text - // sections. MCPseudoProbes will be encoded into a corresponding + // A collection of MCPseudoProbe in the current module grouped by + // functions. MCPseudoProbes will be encoded into a corresponding // .pseudoprobe section. With functions emitted as separate comdats, // a text section really only contains the code of a function solely, and the // probes associated with the text section will be emitted into a standalone // .pseudoprobe section that shares the same comdat group with the function. - MCPseudoProbeSection MCProbeSections; + MCPseudoProbeSections MCProbeSections; public: static void emit(MCObjectStreamer *MCOS); - MCPseudoProbeSection &getProbeSections() { return MCProbeSections; } + MCPseudoProbeSections &getProbeSections() { return MCProbeSections; } #ifndef NDEBUG static int DdgPrintIndent; @@ -341,6 +346,9 @@ /// Points to the end of the buffer. const uint8_t *End = nullptr; + /// Whether encoding is based on a starting probe with absolute code address. + bool EncodingIsAddrBased = false; + // Decoding helper function template ErrorOr readUnencodedNumber(); template ErrorOr readUnsignedNumber(); @@ -348,20 +356,21 @@ ErrorOr readString(uint32_t Size); public: + using Uint64Set = DenseSet; + using Uint64Map = DenseMap; + // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); - // Decode pseudo_probe section to build address to probes map. - bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size); - // Decode pseudo_probe section to build address to probes map for specifed // functions only. bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, - std::unordered_set &GuildFilter); + const Uint64Set &GuildFilter, + const Uint64Map &FuncStartAddrs); bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, - uint64_t &LastAddr, - std::unordered_set &GuildFilter); + uint64_t &LastAddr, const Uint64Set &GuildFilter, + const Uint64Map &FuncStartAddrs); // Print pseudo_probe_desc section info void printGUID2FuncDescMap(raw_ostream &OS); diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -1105,7 +1105,8 @@ /// Emit the a pseudo probe into the current section. virtual void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, - const MCPseudoProbeInlineStack &InlineStack); + const MCPseudoProbeInlineStack &InlineStack, + MCSymbol *FnSym); /// Set the bundle alignment mode from now on in the section. /// The argument is the power of 2 to which the alignment is set. The diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -41,8 +41,6 @@ pair_hash>; using FuncProbeFactorMap = StringMap; -enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; - class PseudoProbeDescriptor { uint64_t FunctionGUID; uint64_t FunctionHash; diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -48,5 +48,6 @@ } SmallVector InlineStack(llvm::reverse(ReversedInlineStack)); - Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack); + Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack, + Asm->CurrentFnSym); } diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -378,7 +378,7 @@ void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, - const MCPseudoProbeInlineStack &InlineStack) override; + const MCPseudoProbeInlineStack &InlineStack, MCSymbol *FnSym) override; void emitBundleAlignMode(unsigned AlignPow2) override; void emitBundleLock(bool AlignToEnd) override; @@ -2338,13 +2338,16 @@ void MCAsmStreamer::emitPseudoProbe( uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, - const MCPseudoProbeInlineStack &InlineStack) { + const MCPseudoProbeInlineStack &InlineStack, MCSymbol *FnSym) { OS << "\t.pseudoprobe\t" << Guid << " " << Index << " " << Type << " " << Attr; // Emit inline stack like // @ GUIDmain:3 @ GUIDCaller:1 @ GUIDDirectCaller:11 for (const auto &Site : InlineStack) OS << " @ " << std::get<0>(Site) << ":" << std::get<1>(Site); + + OS << " " << FnSym->getName(); + EmitEOL(); } diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -1160,11 +1160,11 @@ } MCSection * -MCObjectFileInfo::getPseudoProbeSection(const MCSection *TextSec) const { +MCObjectFileInfo::getPseudoProbeSection(const MCSection &TextSec) const { if (Ctx->getObjectFileType() == MCContext::IsELF) { - const auto *ElfSec = static_cast(TextSec); + const auto &ElfSec = static_cast(TextSec); // Create a separate section for probes that comes with a comdat function. - if (const MCSymbol *Group = ElfSec->getGroup()) { + if (const MCSymbol *Group = ElfSec.getGroup()) { auto *S = static_cast(PseudoProbeSection); auto Flags = S->getFlags() | ELF::SHF_GROUP; return Ctx->getELFSection(S->getName(), S->getType(), Flags, diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -5904,10 +5904,16 @@ InlineStack.push_back(Site); } + // Parse function entry name + StringRef FnName; + if (parseIdentifier(FnName)) + return Error(getLexer().getLoc(), "unexpected token in '.pseudoprobe' directive"); + MCSymbol *FnSym = getContext().lookupSymbol(FnName); + if (parseEOL()) return true; - getStreamer().emitPseudoProbe(Guid, Index, Type, Attr, InlineStack); + getStreamer().emitPseudoProbe(Guid, Index, Type, Attr, InlineStack, FnSym); return false; } diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -14,12 +14,17 @@ #include "llvm/MC/MCFragment.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" +#include +#include #include #include #include +#include #define DEBUG_TYPE "mcpseudoprobe" @@ -43,6 +48,10 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const { + bool IsSentinel = isSentinelProbe(getAttributes()); + assert((LastProbe || IsSentinel) && + "Last probe should not be null for non-sentinel probes"); + // Emit Index MCOS->emitULEB128IntValue(Index); // Emit Type and the flag: @@ -53,10 +62,11 @@ assert(Attributes <= 0x7 && "Probe attributes too big to encode, exceeding 7"); uint8_t PackedType = Type | (Attributes << 4); - uint8_t Flag = LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; + uint8_t Flag = + !IsSentinel ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; MCOS->emitInt8(Flag | PackedType); - if (LastProbe) { + if (!IsSentinel) { // Emit the delta between the address label and LastProbe. const MCExpr *AddrDelta = buildSymbolDiff(MCOS, Label, LastProbe->getLabel()); @@ -67,9 +77,8 @@ MCOS->insert(new MCPseudoProbeAddrFragment(AddrDelta)); } } else { - // Emit label as a symbolic code address. - MCOS->emitSymbolValue( - Label, MCOS->getContext().getAsmInfo()->getCodePointerSize()); + // Emit the GUID of the split function that the sentinel probe represents. + MCOS->emitInt64(Guid); } LLVM_DEBUG({ @@ -81,7 +90,7 @@ void MCPseudoProbeInlineTree::addPseudoProbe( const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack) { // The function should not be called on the root. - assert(isRoot() && "Should not be called on root"); + assert(isRoot() && "Should only be called on root"); // When it comes here, the input look like: // Probe: GUID of C, ... @@ -128,43 +137,57 @@ dbgs() << "Group [\n"; MCPseudoProbeTable::DdgPrintIndent += 2; }); + assert(!isRoot() && "Root should be handled seperately"); + // Emit probes grouped by GUID. - if (Guid != 0) { - LLVM_DEBUG({ - dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); - dbgs() << "GUID: " << Guid << "\n"; - }); - // Emit Guid - MCOS->emitInt64(Guid); - // Emit number of probes in this node - MCOS->emitULEB128IntValue(Probes.size()); - // Emit number of direct inlinees - MCOS->emitULEB128IntValue(Children.size()); - // Emit probes in this group - for (const auto &Probe : Probes) { - Probe.emit(MCOS, LastProbe); - LastProbe = &Probe; - } - } else { - assert(Probes.empty() && "Root should not have probes"); + LLVM_DEBUG({ + dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); + dbgs() << "GUID: " << Guid << "\n"; + }); + // Emit Guid + MCOS->emitInt64(Guid); + // Emit number of probes in this node, including a sentinel probe for + // top-level functions if needed. + bool NeedSentinel = false; + if (Parent->isRoot()) { + assert(isSentinelProbe(LastProbe->getAttributes()) && + "Starting probe of a top-level function should be a sentinel probe"); + // The main body of a split function doesn't need a sentinel probe. + if (LastProbe->getGuid() != Guid) + NeedSentinel = true; } - // Emit sorted descendant - // InlineSite is unique for each pair, - // so there will be no ordering of Inlinee based on MCPseudoProbeInlineTree* - std::map Inlinees; - for (auto &Child : Children) - Inlinees[Child.first] = Child.second.get(); + MCOS->emitULEB128IntValue(Probes.size() + NeedSentinel); + // Emit number of direct inlinees + MCOS->emitULEB128IntValue(Children.size()); + // Emit sentinel probe for top-level functions + if (NeedSentinel) + LastProbe->emit(MCOS, nullptr); + + // Emit probes in this group + for (const auto &Probe : Probes) { + Probe.emit(MCOS, LastProbe); + LastProbe = &Probe; + } + + // Emit sorted descendant. InlineSite is unique for each pair, so there will + // be no ordering of Inlinee based on MCPseudoProbeInlineTree* + using InlineeType = std::pair; + auto Comparer = [](const InlineeType &A, const InlineeType &B) { + return A.first < B.first; + }; + std::vector Inlinees; + for (const auto &Child : Children) + Inlinees.emplace_back(Child.first, Child.second.get()); + std::sort(Inlinees.begin(), Inlinees.end(), Comparer); for (const auto &Inlinee : Inlinees) { - if (Guid) { - // Emit probe index - MCOS->emitULEB128IntValue(std::get<1>(Inlinee.first)); - LLVM_DEBUG({ - dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); - dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; - }); - } + // Emit probe index + MCOS->emitULEB128IntValue(std::get<1>(Inlinee.first)); + LLVM_DEBUG({ + dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); + dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; + }); // Emit the group Inlinee.second->emit(MCOS, LastProbe); } @@ -176,17 +199,37 @@ }); } -void MCPseudoProbeSection::emit(MCObjectStreamer *MCOS) { +void MCPseudoProbeSections::emit(MCObjectStreamer *MCOS) { MCContext &Ctx = MCOS->getContext(); - for (auto &ProbeSec : MCProbeDivisions) { - const MCPseudoProbe *LastProbe = nullptr; - if (auto *S = - Ctx.getObjectFileInfo()->getPseudoProbeSection(ProbeSec.first)) { + const auto *FuncSym = ProbeSec.first; + const auto &Root = ProbeSec.second; + if (auto *S = Ctx.getObjectFileInfo()->getPseudoProbeSection( + FuncSym->getSection())) { // Switch to the .pseudoprobe section or a comdat group. MCOS->switchSection(S); // Emit probes grouped by GUID. - ProbeSec.second.emit(MCOS, LastProbe); + // Emit sorted descendant. InlineSite is unique for each pair, so there + // will be no ordering of Inlinee based on MCPseudoProbeInlineTree* + using InlineeType = std::pair; + auto Comparer = [](const InlineeType &A, const InlineeType &B) { + return A.first < B.first; + }; + std::vector Inlinees; + for (const auto &Child : Root.getChildren()) + Inlinees.emplace_back(Child.first, Child.second.get()); + std::sort(Inlinees.begin(), Inlinees.end(), Comparer); + + for (const auto &Inlinee : Inlinees) { + // Emit the group guarded by a sentinel probe. + MCPseudoProbe SentinelProbe(const_cast(FuncSym), + MD5Hash(FuncSym->getName()), + (uint32_t)PseudoProbeReservedId::Invalid, + (uint32_t)PseudoProbeType::Block, + (uint32_t)PseudoProbeAttributes::Sentinel); + const MCPseudoProbe *Probe = &SentinelProbe; + Inlinee.second->emit(MCOS, Probe); + } } } } @@ -360,39 +403,13 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, - std::unordered_set &GuildFilter) { + const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) { // The pseudo_probe section encodes an inline forest and each tree has a - // format like: - // FUNCTION BODY (one for each uninlined function present in the text - // section) - // GUID (uint64) - // GUID of the function - // NPROBES (ULEB128) - // Number of probes originating from this function. - // NUM_INLINED_FUNCTIONS (ULEB128) - // Number of callees inlined into this function, aka number of - // first-level inlinees - // PROBE RECORDS - // A list of NPROBES entries. Each entry contains: - // INDEX (ULEB128) - // TYPE (uint4) - // 0 - block probe, 1 - indirect call, 2 - direct call - // ATTRIBUTE (uint3) - // 1 - tail call, 2 - dangling - // ADDRESS_TYPE (uint1) - // 0 - code address, 1 - address delta - // CODE_ADDRESS (uint64 or ULEB128) - // code address or address delta, depending on Flag - // INLINED FUNCTION RECORDS - // A list of NUM_INLINED_FUNCTIONS entries describing each of the - // inlined callees. Each record contains: - // INLINE SITE - // Index of the callsite probe (ULEB128) - // FUNCTION BODY - // A FUNCTION BODY entry describing the inlined function. + // format defined in MCPseudoProbe.h uint32_t Index = 0; - if (Cur == &DummyInlineRoot) { + bool IsTopLevelFunc = Cur == &DummyInlineRoot; + if (IsTopLevelFunc) { // Use a sequential id for top level inliner. Index = Cur->getChildren().size(); } else { @@ -410,8 +427,7 @@ uint64_t Guid = std::move(*ErrorOrCurGuid); // Decide if top-level node should be disgarded. - if (Cur == &DummyInlineRoot && !GuildFilter.empty() && - !GuildFilter.count(Guid)) + if (IsTopLevelFunc && !GuidFilter.empty() && !GuidFilter.count(Guid)) Cur = nullptr; // If the incoming node is null, all its children nodes should be disgarded. @@ -419,6 +435,10 @@ // Switch/add to a new tree node(inlinee) Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index)); Cur->Guid = Guid; + if (IsTopLevelFunc && !EncodingIsAddrBased) { + if (auto V = FuncStartAddrs.lookup(Guid)) + LastAddr = V; + } } // Read number of probes in the current node. @@ -457,9 +477,21 @@ if (!ErrorOrAddr) return false; Addr = std::move(*ErrorOrAddr); + if (isSentinelProbe(Attr)) { + // For sentinel probe, the addr field actually stores the GUID of the + // split function. Convert it to the real address. + if (auto V = FuncStartAddrs.lookup(Addr)) + Addr = V; + } else { + // For now we assume all probe encoding should be either based on + // leading probe address or function start address. + // The scheme is for downwards compatibility. + // TODO: retire this scheme once compatibility is no longer an issue. + EncodingIsAddrBased = true; + } } - if (Cur) { + if (Cur && !isSentinelProbe(Attr)) { // Populate Address2ProbesMap auto &Probes = Address2ProbesMap[Addr]; Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr, @@ -471,30 +503,25 @@ uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); for (uint32_t I = 0; I < ChildrenToProcess; I++) { - buildAddress2ProbeMap(Cur, LastAddr, GuildFilter); + buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs); } return true; } bool MCPseudoProbeDecoder::buildAddress2ProbeMap( - const uint8_t *Start, std::size_t Size, - std::unordered_set &GuildFilter) { + const uint8_t *Start, std::size_t Size, const Uint64Set &GuidFilter, + const Uint64Map &FuncStartAddrs) { Data = Start; End = Data + Size; uint64_t LastAddr = 0; while (Data < End) - buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuildFilter); + buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter, + FuncStartAddrs); assert(Data == End && "Have unprocessed data in pseudo_probe section"); return true; } -bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, - std::size_t Size) { - std::unordered_set GuildFilter; - return buildAddress2ProbeMap(Start, Size, GuildFilter); -} - void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) { OS << "Pseudo Probe Desc:\n"; // Make the output deterministic diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1102,7 +1102,8 @@ void MCStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, - const MCPseudoProbeInlineStack &InlineStack) { + const MCPseudoProbeInlineStack &InlineStack, + MCSymbol *FnSym) { auto &Context = getContext(); // Create a symbol at in the current section for use in the probe. @@ -1116,7 +1117,7 @@ // Add the probe entry to this section's entries. Context.getMCPseudoProbeTable().getProbeSections().addPseudoProbe( - getCurrentSectionOnly(), Probe, InlineStack); + FnSym, Probe, InlineStack); } void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo, diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll @@ -10,17 +10,18 @@ ; RUN: llvm-mc -filetype=obj <%t1 -o %t4 ; RUN: llvm-objdump --section-headers %t4 | FileCheck %s --check-prefix=CHECK-OBJ + define dso_local void @foo2() !dbg !7 { ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]] -; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0 foo2 ret void, !dbg !10 } define dso_local void @foo() #0 !dbg !11 { ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]] ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]] -; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 -; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2 +; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 foo +; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2 foo call void @foo2(), !dbg !12 ret void, !dbg !13 } @@ -29,9 +30,9 @@ ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]] ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]] ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]] -; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0 -; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2 -; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2 +; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0 entry +; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2 entry +; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2 entry call void @foo(), !dbg !18 ret i32 0, !dbg !19 } @@ -71,6 +72,7 @@ ; CHECK-OBJ: .pseudo_probe_desc ; CHECK-OBJ: .pseudo_probe +; CHECK-OBJ-NOT: .rela.pseudo_probe !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll @@ -18,15 +18,15 @@ %cmp = icmp eq i32 %x, 0 ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0 -; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0 foo br i1 %cmp, label %bb1, label %bb2 bb1: ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 -; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0 -; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0 foo +; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 foo store i32 6, ptr @a, align 4 br label %bb3 @@ -34,8 +34,8 @@ ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 -; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0 -; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0 foo +; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 foo store i32 8, ptr @a, align 4 br label %bb3 @@ -44,22 +44,22 @@ ret void, !dbg !12 } -declare void @bar(i32 %x) +declare void @bar(i32 %x) define internal void @foo2(ptr %f) !dbg !4 { entry: ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1) ; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0 -; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 foo2 ; Check pseudo_probe metadata attached to the indirect call instruction. ; CHECK-IL: call void %f(i32 1), !dbg ![[#PROBE0:]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 2, 1, 0 -; CHECK-ASM: .pseudoprobe [[#GUID2]] 2 1 0 +; CHECK-ASM: .pseudoprobe [[#GUID2]] 2 1 0 foo2 call void %f(i32 1), !dbg !13 ; Check pseudo_probe metadata attached to the direct call instruction. ; CHECK-IL: call void @bar(i32 1), !dbg ![[#PROBE1:]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 3, 2, 0 -; CHECK-ASM: .pseudoprobe [[#GUID2]] 3 2 0 +; CHECK-ASM: .pseudoprobe [[#GUID2]] 3 2 0 foo2 call void @bar(i32 1) ret void } @@ -92,7 +92,8 @@ ; CHECK-ASM-NEXT: .ascii "foo2" ; CHECK-OBJ-COUNT-2: .pseudo_probe_desc -; CHECK-OBJ-COUNT-2: .pseudo_probe +; CHECK-OBJ: .pseudo_probe +; CHECK-OBJ-NOT: .rela.pseudo_probe !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!9, !10} diff --git a/llvm/test/tools/llvm-profgen/Inputs/func-split.perfbin b/llvm/test/tools/llvm-profgen/Inputs/func-split.perfbin index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ ProfiledFunctions; for (auto *Func : Binary->getProfiledFunctions()) Binary->computeInlinedContextSizeForFunc(Func); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -11,6 +11,7 @@ #include "CallContext.h" #include "ErrorHandling.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -166,8 +167,8 @@ using ProbeFrameStack = SmallVector>; void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder, - MCDecodedPseudoProbeInlineTree &ProbeNode, - ProbeFrameStack &Context); + MCDecodedPseudoProbeInlineTree &ProbeNode, + ProbeFrameStack &Context); void dump() { RootContext.dumpTree(); } @@ -218,8 +219,14 @@ // A list of binary functions that have samples. std::unordered_set ProfiledFunctions; + // GUID to Elf symbol start address map + DenseMap SymbolStartAddrs; + + // Start address to Elf symbol GUID map + std::unordered_multimap StartAddrToSymMap; + // An ordered map of mapping function's start address to function range - // relevant info. Currently to determine if the address of ELF is the start of + // relevant info. Currently to determine if the offset of ELF is the start of // a real function, we leverage the function range info from DWARF. std::map StartAddrToFuncRangeMap; @@ -278,7 +285,8 @@ void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O); template - void setPreferredTextSegmentAddresses(const ELFFile &Obj, StringRef FileName); + void setPreferredTextSegmentAddresses(const ELFFile &Obj, + StringRef FileName); void checkPseudoProbe(const ELFObjectFileBase *Obj); @@ -298,6 +306,9 @@ // Load debug info from DWARF unit. void loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit); + // Create elf symbol to its start address mapping. + void populateElfSymbolAddressList(const ELFObjectFileBase *O); + // A function may be spilt into multiple non-continuous address ranges. We use // this to set whether start address of a function is the real entry of the // function and also set false to the non-function label. @@ -348,7 +359,9 @@ return Address - BaseAddress + getPreferredBaseAddress(); } // Return the preferred load address for the first executable segment. - uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; } + uint64_t getPreferredBaseAddress() const { + return PreferredTextSegmentAddresses[0]; + } // Return the preferred load address for the first loadable segment. uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; } // Return the file offset for the first executable segment. diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -204,11 +204,6 @@ // Find the preferred load address for text sections. setPreferredTextSegmentAddresses(Obj); - checkPseudoProbe(Obj); - - if (ShowDisassemblyOnly) - decodePseudoProbe(Obj); - // Load debug info of subprograms from DWARF section. // If path of debug info binary is specified, use the debug info from it, // otherwise use the debug info from the executable binary. @@ -220,6 +215,17 @@ loadSymbolsFromDWARF(*cast(&ExeBinary)); } + DisassembleFunctionSet.insert(DisassembleFunctions.begin(), + DisassembleFunctions.end()); + + checkPseudoProbe(Obj); + + if (UsePseudoProbes) + populateElfSymbolAddressList(Obj); + + if (ShowDisassemblyOnly) + decodePseudoProbe(Obj); + // Disassemble the text sections. disassemble(Obj); @@ -352,10 +358,31 @@ if (!UsePseudoProbes) return; - std::unordered_set ProfiledGuids; - if (!ShowDisassemblyOnly) - for (auto *F : ProfiledFunctions) - ProfiledGuids.insert(Function::getGUID(F->FuncName)); + MCPseudoProbeDecoder::Uint64Set GuidFilter; + MCPseudoProbeDecoder::Uint64Map FuncStartAddresses; + if (ShowDisassemblyOnly) { + if (DisassembleFunctionSet.empty()) { + FuncStartAddresses = SymbolStartAddrs; + } else { + for (auto &F : DisassembleFunctionSet) { + auto GUID = Function::getGUID(F.first()); + if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) { + FuncStartAddresses[GUID] = StartAddr; + FuncRange &Range = StartAddrToFuncRangeMap[StartAddr]; + GuidFilter.insert(Function::getGUID(Range.getFuncName())); + } + } + } + } else { + for (auto *F : ProfiledFunctions) { + GuidFilter.insert(Function::getGUID(F->FuncName)); + for (auto &Range : F->Ranges) { + auto GUIDs = StartAddrToSymMap.equal_range(Range.first); + for (auto I = GUIDs.first; I != GUIDs.second; ++I) + FuncStartAddresses[I->second] = I->first; + } + } + } StringRef FileName = Obj->getFileName(); for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); @@ -374,7 +401,7 @@ StringRef Contents = unwrapOrError(Section.getContents(), FileName); if (!ProbeDecoder.buildAddress2ProbeMap( reinterpret_cast(Contents.data()), - Contents.size(), ProfiledGuids)) + Contents.size(), GuidFilter, FuncStartAddresses)) exitWithError("Pseudo Probe decoder fail in .pseudo_probe section"); } } @@ -578,8 +605,6 @@ for (std::pair &SecSyms : AllSymbols) stable_sort(SecSyms.second); - DisassembleFunctionSet.insert(DisassembleFunctions.begin(), - DisassembleFunctions.end()); assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) && "Functions to disassemble should be only specified together with " "--show-disassembly-only"); @@ -653,6 +678,20 @@ } } +void ProfiledBinary::populateElfSymbolAddressList( + const ELFObjectFileBase *Obj) { + // Create a mapping from virtual address to symbol GUID and the other way + // around. + StringRef FileName = Obj->getFileName(); + for (const SymbolRef &Symbol : Obj->symbols()) { + const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); + const StringRef Name = unwrapOrError(Symbol.getName(), FileName); + uint64_t GUID = Function::getGUID(Name); + SymbolStartAddrs[GUID] = Addr; + StartAddrToSymMap.emplace(Addr, GUID); + } +} + void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { for (const auto &DieInfo : CompilationUnit.dies()) { llvm::DWARFDie Die(&CompilationUnit, &DieInfo);