Index: lld/ELF/CMakeLists.txt =================================================================== --- lld/ELF/CMakeLists.txt +++ lld/ELF/CMakeLists.txt @@ -18,6 +18,7 @@ Arch/SPARCV9.cpp Arch/X86.cpp Arch/X86_64.cpp + CallGraphSort.cpp Driver.cpp DriverUtils.cpp EhFrame.cpp Index: lld/ELF/CallGraphSort.h =================================================================== --- /dev/null +++ lld/ELF/CallGraphSort.h @@ -0,0 +1,24 @@ +//===- CallGraphSort.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_CALL_GRAPH_SORT_H +#define LLD_ELF_CALL_GRAPH_SORT_H + +#include "llvm/ADT/DenseMap.h" + +namespace lld { +namespace elf { +class InputSectionBase; + +llvm::DenseMap +computeCallGraphProfileOrder(); +} +} + +#endif Index: lld/ELF/CallGraphSort.cpp =================================================================== --- /dev/null +++ lld/ELF/CallGraphSort.cpp @@ -0,0 +1,353 @@ +//===- CallGraphSort.cpp --------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file This file implements Call-Chain Clustering from: +/// Optimizing Function Placement for Large-Scale Data-Center Applications +/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf +/// +/// The goal of this algorithm is to improve runtime performance of the final +/// executable by arranging code sections such that page table and i-cache +/// misses are minimized. +/// +/// Definitions: +/// * Cluster +/// * An ordered list of input sections which are layed out as a unit. At the +/// beginning of the algorithm each input section has its own cluster and +/// the weight of the cluster is the sum of the weight of all incomming +/// edges. +/// * Call-Chain Clustering (Cł) Heuristic +/// * Defines when and how clusters are combined. Pick the highest weight edge +/// from cluster _u_ to _v_ then move the sections in _v_ and append them to +/// _u_ unless the combined size would be larger than the page size. +/// * Density +/// * The weight of the cluster divided by the size of the cluster. This is a +/// proxy for the ammount of execution time spent per byte of the cluster. +/// +/// It does so given a call graph profile by the following: +/// * Build a call graph from the profile +/// * While there are unresolved edges +/// * Find the edge with the highest weight +/// * Check if merging the two clusters would create a cluster larger than the +/// target page size +/// * If not, contract that edge putting the callee after the caller +/// * Sort remaining clusters by density +/// +//===----------------------------------------------------------------------===// + +#include "CallGraphSort.h" +#include "SymbolTable.h" +#include "Target.h" + +#include "llvm/Support/MathExtras.h" + +#include +#include +#include + +using namespace llvm; +using namespace lld; +using namespace lld::elf; + +namespace { +class CallGraphSort { + using NodeIndex = std::ptrdiff_t; + using EdgeIndex = std::ptrdiff_t; + + struct Node { + Node() = default; + Node(const InputSectionBase *IS); + std::vector Sections; + std::vector IncidentEdges; + int64_t Size = 0; + uint64_t Weight = 0; + }; + + struct Edge { + NodeIndex From; + NodeIndex To; + mutable uint64_t Weight; + bool operator==(const Edge Other) const; + bool operator<(const Edge Other) const; + void kill(); + bool isDead() const; + }; + + struct EdgeDenseMapInfo { + static Edge getEmptyKey() { + return {DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey(), 0}; + } + static Edge getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey(), 0}; + } + static unsigned getHashValue(const Edge &Val) { + return hash_combine(DenseMapInfo::getHashValue(Val.From), + DenseMapInfo::getHashValue(Val.To)); + } + static bool isEqual(const Edge &LHS, const Edge &RHS) { return LHS == RHS; } + }; + + std::vector Nodes; + std::vector Edges; + struct EdgePriorityCmp { + std::vector &Edges; + bool operator()(EdgeIndex A, EdgeIndex B) const { + return Edges[A].Weight < Edges[B].Weight; + } + }; + + typedef std::multiset priority; + typedef std::unordered_map priority_lookup; + + priority WorkQueue{EdgePriorityCmp{Edges}}; + priority_lookup WorkLookup; + + void contractEdge(EdgeIndex CEI); + void generateClusters(); + +public: + CallGraphSort(DenseMap, + uint64_t> &Profile); + + DenseMap run(); +}; +} // end anonymous namespace + +CallGraphSort::Node::Node(const InputSectionBase *IS) { + Sections.push_back(IS); + Size = IS->getSize(); +} + +bool CallGraphSort::Edge::operator==(const Edge Other) const { + return From == Other.From && To == Other.To; +} + +bool CallGraphSort::Edge::operator<(const Edge Other) const { + if (From != Other.From) + return From < Other.From; + return To < Other.To; +} + +void CallGraphSort::Edge::kill() { + From = 0; + To = 0; +} + +bool CallGraphSort::Edge::isDead() const { return From == 0 && To == 0; } + +// Take the edge list in Config->CallGraphProfile, resolve symbol names to +// Symbols, and generate a graph between InputSections with the provided +// weights. +CallGraphSort::CallGraphSort( + DenseMap, uint64_t> + &Profile) { + DenseMap SecToNode; + DenseMap EdgeMap; + + auto GetOrCreateNode = [&](const InputSectionBase *IS) -> NodeIndex { + auto Res = SecToNode.insert(std::make_pair(IS, Nodes.size())); + if (Res.second) + Nodes.emplace_back(IS); + return Res.first->second; + }; + + // Create the graph. + for (const auto &C : Profile) { + const Symbol *FromSym = C.first.first; + const Symbol *ToSym = C.first.second; + uint64_t Weight = C.second; + + if (Weight == 0) + continue; + + // Get the input section for a given symbol. + auto *FromDR = dyn_cast_or_null(FromSym); + auto *ToDR = dyn_cast_or_null(ToSym); + if (!FromDR || !ToDR) + continue; + + auto *FromSB = dyn_cast_or_null(FromDR->Section); + auto *ToSB = dyn_cast_or_null(ToDR->Section); + if (!FromSB || !ToSB || FromSB->getSize() == 0 || ToSB->getSize() == 0) + continue; + + NodeIndex From = GetOrCreateNode(FromSB); + NodeIndex To = GetOrCreateNode(ToSB); + Edge E{From, To, Weight}; + + // Add or increment an edge + auto Res = EdgeMap.insert(std::make_pair(E, Edges.size())); + EdgeIndex EI = Res.first->second; + if (Res.second) { + Edges.push_back(E); + Nodes[From].IncidentEdges.push_back(EI); + Nodes[To].IncidentEdges.push_back(EI); + } else + Edges[EI].Weight = SaturatingAdd(Edges[EI].Weight, Weight); + + Nodes[To].Weight = SaturatingAdd(Nodes[To].Weight, Weight); + } +} + +/// Remove edge \p CEI from the graph while simultaneously merging its two +/// incident vertices u and v. This merges any duplicate edges between u and v +/// by accumulating their weights. +void CallGraphSort::contractEdge(EdgeIndex CEI) { + // Make a copy of the edge as the original will be marked killed while being + // used. + Edge CE = Edges[CEI]; + if (CE.From == CE.To) { + auto I = WorkLookup.find(CEI); + if (I != WorkLookup.end()) { + WorkQueue.erase(I->second); + WorkLookup.erase(I); + } else + assert(std::find(WorkQueue.begin(), WorkQueue.end(), CEI) == + WorkQueue.end()); + Edges[CEI].kill(); + return; + } + std::vector &FE = Nodes[CE.From].IncidentEdges; + + // Remove the self edge from From. + FE.erase(std::remove(FE.begin(), FE.end(), CEI)); + std::vector &TE = Nodes[CE.To].IncidentEdges; + + // Update all edges incident with To to reference From instead. Then if they + // aren't self edges add them to From. + for (EdgeIndex EI : TE) { + Edge &E = Edges[EI]; + if (E.From == CE.To) + E.From = CE.From; + if (E.To == CE.To) + E.To = CE.From; + if (E.To == E.From) { + E.kill(); + continue; + } + FE.push_back(EI); + } + + // Free memory. + std::vector().swap(TE); + + if (FE.empty()) + return; + + // Sort edges so they can be merged. The stability of this sort doesn't matter + // as equal edges will be merged in an order independent manner. + std::sort(FE.begin(), FE.end(), + [&](EdgeIndex AI, EdgeIndex BI) { return Edges[AI] < Edges[BI]; }); + + // std::unique, but also merge equal values. + auto First = FE.begin(); + auto Last = FE.end(); + auto Result = First; + while (++First != Last) { + if (Edges[*Result] == Edges[*First]) { + // Remove first and result. + auto F = WorkLookup.find(*First); + if (F != WorkLookup.end()) { + WorkQueue.erase(F->second); + WorkLookup.erase(F); + } else + assert(std::find(WorkQueue.begin(), WorkQueue.end(), *First) == + WorkQueue.end()); + auto R = WorkLookup.find(*Result); + if (R == WorkLookup.end()) { + assert(std::find(WorkQueue.begin(), WorkQueue.end(), *Result) == + WorkQueue.end()); + R = WorkLookup.insert(std::make_pair(*Result, WorkQueue.end())).first; + } else + WorkQueue.erase(R->second); + + Edges[*Result].Weight = + SaturatingAdd(Edges[*Result].Weight, Edges[*First].Weight); + Edges[*First].kill(); + + // Reinsert result. + R->second = WorkQueue.insert(*Result); + } else if (++Result != First) + *Result = *First; + } + FE.erase(++Result, FE.end()); +} + +// Group InputSections into clusters using the Call-Chain Clustering heuristic +// then sort the clusters by density. +void CallGraphSort::generateClusters() { + for (size_t I = 0; I < Edges.size(); ++I) { + WorkLookup[I] = WorkQueue.insert(I); + } + + // Collapse the graph. + while (!WorkQueue.empty()) { + priority::const_iterator I = --WorkQueue.end(); + EdgeIndex MaxI = *I; + const Edge MaxE = Edges[MaxI]; + WorkLookup.erase(*I); + WorkQueue.erase(I); + if (MaxE.isDead()) + continue; + // Merge the Nodes. + Node &From = Nodes[MaxE.From]; + Node &To = Nodes[MaxE.To]; + if (From.Size + To.Size > Target->PageSize) + continue; + contractEdge(MaxI); + From.Sections.insert(From.Sections.end(), To.Sections.begin(), + To.Sections.end()); + From.Size += To.Size; + From.Weight = SaturatingAdd(From.Weight, To.Weight); + To.Sections.clear(); + To.Size = 0; + To.Weight = 0; + } + + // Remove empty or dead nodes. + Nodes.erase(std::remove_if(Nodes.begin(), Nodes.end(), + [](const Node &N) { + return N.Size == 0 || N.Sections.empty(); + }), + Nodes.end()); + + // Sort by density. Invalidates all NodeIndexs. + std::sort(Nodes.begin(), Nodes.end(), [](const Node &A, const Node &B) { + return (APFloat(APFloat::IEEEdouble(), A.Weight) / + APFloat(APFloat::IEEEdouble(), A.Size)) + .compare(APFloat(APFloat::IEEEdouble(), B.Weight) / + APFloat(APFloat::IEEEdouble(), B.Size)) == + APFloat::cmpLessThan; + }); +} + +DenseMap CallGraphSort::run() { + generateClusters(); + + // Generate order. + llvm::DenseMap OrderMap; + ssize_t CurOrder = 1; + + for (const Node &N : Nodes) + for (const InputSectionBase *IS : N.Sections) + OrderMap[IS] = CurOrder++; + + return OrderMap; +} + +// Sort sections by the profile data provided by -callgraph-profile-file +// +// This first builds a call graph based on the profile data then iteratively +// merges the hottest call edges as long as it would not create a cluster larger +// than the page size. All clusters are then sorted by a density metric to +// further improve locality. +DenseMap elf::computeCallGraphProfileOrder() { + return CallGraphSort(Config->CallGraphProfile).run(); +} Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -10,6 +10,7 @@ #ifndef LLD_ELF_CONFIG_H #define LLD_ELF_CONFIG_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -24,6 +25,7 @@ namespace elf { class InputFile; +class Symbol; enum ELFKind { ELFNoneKind, @@ -91,6 +93,7 @@ llvm::StringRef SoName; llvm::StringRef Sysroot; llvm::StringRef ThinLTOCacheDir; + llvm::StringRef CallGraphProfileFile; std::string Rpath; std::vector VersionDefinitions; std::vector Argv; @@ -103,6 +106,8 @@ std::vector VersionScriptGlobals; std::vector VersionScriptLocals; std::vector BuildIdVector; + llvm::DenseMap, uint64_t> + CallGraphProfile; bool AllowMultipleDefinition; bool AndroidPackDynRelocs = false; bool ARMHasBlx = false; @@ -111,6 +116,7 @@ bool AsNeeded = false; bool Bsymbolic; bool BsymbolicFunctions; + bool CallGraphProfileSort = true; bool CompressDebugSections; bool DefineCommon; bool Demangle = true; Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -565,6 +565,42 @@ return {BuildIdKind::None, {}}; } +// This reads a list of call edges with weights one line at a time from a file +// with the following format for each line: +// +// ^[.*]+ [.*]+ [.*]+$ +// +// It interprets the first value as an unsigned 64 bit weight, the second as +// the symbol the call is from, and the third as the symbol the call is to. +// +// Example: +// +// 5000 c a +// 4000 c b +// 18446744073709551615 e d +// +template +void readCallGraphProfile(MemoryBufferRef MB) { + for (StringRef L : args::getLines(MB)) { + SmallVector Fields; + L.split(Fields, ' '); + if (Fields.size() != 3) { + error("parse error: " + MB.getBufferIdentifier() + ": " + L); + return; + } + uint64_t Count; + if (!to_integer(Fields[0], Count)) { + error("parse error: " + MB.getBufferIdentifier() + ": " + L); + return; + } + StringRef From = Fields[1]; + StringRef To = Fields[2]; + Config->CallGraphProfile[std::make_pair(Symtab->addUndefined(From), + Symtab->addUndefined(To))] = + Count; + } +} + static bool getCompressDebugSections(opt::InputArgList &Args) { StringRef S = Args.getLastArgValue(OPT_compress_debug_sections, "none"); if (S == "none") @@ -590,6 +626,8 @@ Config->AuxiliaryList = args::getStrings(Args, OPT_auxiliary); Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions); + Config->CallGraphProfileSort = Args.hasFlag( + OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true); Config->Chroot = Args.getLastArgValue(OPT_chroot); Config->CompressDebugSections = getCompressDebugSections(Args); Config->DefineCommon = Args.hasFlag(OPT_define_common, OPT_no_define_common, @@ -743,6 +781,9 @@ if (Optional Buffer = readFile(Arg->getValue())) Config->SymbolOrderingFile = args::getLines(*Buffer); + if (auto *Arg = Args.getLastArg(OPT_call_graph_profile_file)) + Config->CallGraphProfileFile = Arg->getValue(); + // If --retain-symbol-file is used, we'll keep only the symbols listed in // the file and discard all others. if (auto *Arg = Args.getLastArg(OPT_retain_symbols_file)) { @@ -1015,6 +1056,11 @@ Config->HasDynSymTab = !SharedFiles.empty() || Config->Pic || Config->ExportDynamic; + if (!Config->CallGraphProfileFile.empty()) + if (Optional Buffer = + readFile(Config->CallGraphProfileFile)) + readCallGraphProfile(*Buffer); + // Some symbols (such as __ehdr_start) are defined lazily only when there // are undefined symbols for them, so we add these to trigger that logic. for (StringRef Sym : Script->ReferencedSymbols) Index: lld/ELF/InputFiles.h =================================================================== --- lld/ELF/InputFiles.h +++ lld/ELF/InputFiles.h @@ -154,6 +154,7 @@ typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::CGProfile Elf_CGProfile; StringRef getShtGroupSignature(ArrayRef Sections, const Elf_Shdr &Sec); @@ -201,6 +202,7 @@ initializeSections(llvm::DenseSet &ComdatGroups); void initializeSymbols(); void initializeDwarf(); + void parseCGProfile(); InputSectionBase *getRelocTarget(const Elf_Shdr &Sec); InputSectionBase *createInputSection(const Elf_Shdr &Sec); StringRef getSectionName(const Elf_Shdr &Sec); @@ -218,6 +220,8 @@ std::unique_ptr DwarfLine; llvm::DenseMap> VariableLoc; llvm::once_flag InitDwarfLine; + + ArrayRef CGProfile; }; // LazyObjFile is analogous to ArchiveFile in the sense that Index: lld/ELF/InputFiles.cpp =================================================================== --- lld/ELF/InputFiles.cpp +++ lld/ELF/InputFiles.cpp @@ -177,6 +177,15 @@ return ""; } +template +void lld::elf::ObjFile::parseCGProfile() { + for (const Elf_CGProfile &CGPE : CGProfile) { + uint64_t &C = Config->CallGraphProfile[std::make_pair( + &getSymbol(CGPE.cgp_from), &getSymbol(CGPE.cgp_to))]; + C = std::max(C, (uint64_t)CGPE.cgp_weight); + } +} + // Returns "", "foo.a(bar.o)" or "baz.o". std::string lld::toString(const InputFile *F) { if (!F) @@ -242,6 +251,7 @@ // Read section and symbol tables. initializeSections(ComdatGroups); initializeSymbols(); + parseCGProfile(); } // Sections with SHT_GROUP and comdat bits define comdat section groups. @@ -588,6 +598,13 @@ if (Name == ".eh_frame" && !Config->Relocatable) return make(this, &Sec, Name); + // Profile data. + if (Name == ".note.llvm.cgprofile") { + CGProfile = check( + this->getObj().template getSectionContentsAsArray(&Sec)); + return &InputSection::Discarded; + } + if (shouldMerge(Sec)) return make(this, &Sec, Name); return make(this, &Sec, Name); Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -51,6 +51,12 @@ def as_needed: F<"as-needed">, HelpText<"Only set DT_NEEDED for shared libraries if used">; +def call_graph_profile_file: S<"call-graph-profile-file">, + HelpText<"Layout sections to optimize the given callgraph">; + +def call_graph_profile_sort: F<"call-graph-profile-sort">, + HelpText<"Sort sections by call graph profile information">; + // -chroot doesn't have a help text because it is an internal option. def chroot: S<"chroot">; @@ -163,6 +169,9 @@ def no_as_needed: F<"no-as-needed">, HelpText<"Always DT_NEEDED for shared libraries">; +def no_call_graph_profile_sort: F<"no-call-graph-profile-sort">, + HelpText<"Don't sort sections by call graph profile information">; + def no_color_diagnostics: F<"no-color-diagnostics">, HelpText<"Do not use colors in diagnostics">; Index: lld/ELF/Writer.cpp =================================================================== --- lld/ELF/Writer.cpp +++ lld/ELF/Writer.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Writer.h" +#include "CallGraphSort.h" #include "Config.h" #include "Filesystem.h" #include "LinkerScript.h" @@ -1019,6 +1020,15 @@ template void Writer::sortInputSections() { assert(!Script->HasSectionsCommand); + // Use the rarely used option -call-graph-ordering-file to sort sections. + if (Config->CallGraphProfileSort && !Config->CallGraphProfile.empty()) { + DenseMap OrderMap = + computeCallGraphProfileOrder(); + + if (OutputSection *Sec = findSection(".text")) + Sec->sort([&](InputSectionBase *S) { return OrderMap.lookup(S); }); + } + // Sort input sections by priority using the list provided // by --symbol-ordering-file. DenseMap Order = buildSectionOrder(); Index: lld/test/ELF/Inputs/cgprofile.txt =================================================================== --- /dev/null +++ lld/test/ELF/Inputs/cgprofile.txt @@ -0,0 +1,7 @@ +5000 c a +4000 c b +0 d e +18446744073709551615 e d +18446744073709551611 f d +18446744073709551612 f e +6000 c h Index: lld/test/ELF/cgprofile-object.s =================================================================== --- /dev/null +++ lld/test/ELF/cgprofile-object.s @@ -0,0 +1,50 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: ld.lld %t -o %t2 +# RUN: llvm-readobj -symbols %t2 | FileCheck %s +# RUN: ld.lld %t -o %t2 -no-call-graph-profile-sort +# RUN: llvm-readobj -symbols %t2 | FileCheck %s --check-prefix=NOSORT + + .section .text.hot._Z4fooav,"ax",@progbits + .globl _Z4fooav +_Z4fooav: + retq + + .section .text.hot._Z4foobv,"ax",@progbits + .globl _Z4foobv +_Z4foobv: + retq + + .section .text.hot._Z3foov,"ax",@progbits + .globl _Z3foov +_Z3foov: + retq + + .section .text.hot._start,"ax",@progbits + .globl _start +_start: + retq + + + .cg_profile _start, _Z3foov, 1 + .cg_profile _Z4fooav, _Z4foobv, 1 + .cg_profile _Z3foov, _Z4fooav, 1 + +# CHECK: Name: _Z3foov +# CHECK-NEXT: Value: 0x201001 +# CHECK: Name: _Z4fooav +# CHECK-NEXT: Value: 0x201002 +# CHECK: Name: _Z4foobv +# CHECK-NEXT: Value: 0x201003 +# CHECK: Name: _start +# CHECK-NEXT: Value: 0x201000 + +# NOSORT: Name: _Z3foov +# NOSORT-NEXT: Value: 0x201002 +# NOSORT: Name: _Z4fooav +# NOSORT-NEXT: Value: 0x201000 +# NOSORT: Name: _Z4foobv +# NOSORT-NEXT: Value: 0x201001 +# NOSORT: Name: _start +# NOSORT-NEXT: Value: 0x201003 Index: lld/test/ELF/cgprofile.s =================================================================== --- /dev/null +++ lld/test/ELF/cgprofile.s @@ -0,0 +1,128 @@ +# REQUIRES: x86 +# +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1 +# RUN: ld.lld %t1 -e a -o %t -call-graph-profile-file %p/Inputs/cgprofile.txt +# RUN: llvm-readobj -symbols %t | FileCheck %s + + .section .text.a,"ax",@progbits + .global a +a: + .zero 20 + + .section .text.b,"ax",@progbits + .global b +b: + .zero 1 + + .section .text.c,"ax",@progbits + .global c +c: + .zero 4095 + + .section .text.d,"ax",@progbits + .global d +d: + .zero 51 + + .section .text.e,"ax",@progbits + .global e +e: + .zero 42 + + .section .text.f,"ax",@progbits + .global f +f: + .zero 42 + + .section .text.g,"ax",@progbits + .global g +g: + .zero 34 + + .section .text.h,"ax",@progbits + .global h +h: + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: (0) +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Local (0x0) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined (0x0) +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: a +# CHECK-NEXT: Value: 0x202022 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: b +# CHECK-NEXT: Value: 0x202021 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: c +# CHECK-NEXT: Value: 0x201022 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: d +# CHECK-NEXT: Value: 0x20208A +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: e +# CHECK-NEXT: Value: 0x202060 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: f +# CHECK-NEXT: Value: 0x202036 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: g +# CHECK-NEXT: Value: 0x201000 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: h +# CHECK-NEXT: Value: 0x201022 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT:] Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -84,6 +84,7 @@ void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); void initializeCFGPrinterLegacyPassPass(PassRegistry&); +void initializeCFGProfilePassPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&); void initializeCFGViewerLegacyPassPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&); Index: llvm/include/llvm/LinkAllPasses.h =================================================================== --- llvm/include/llvm/LinkAllPasses.h +++ llvm/include/llvm/LinkAllPasses.h @@ -76,6 +76,7 @@ (void) llvm::createCallGraphDOTPrinterPass(); (void) llvm::createCallGraphViewerPass(); (void) llvm::createCFGSimplificationPass(); + (void) llvm::createCFGProfilePass(); (void) llvm::createCFLAndersAAWrapperPass(); (void) llvm::createCFLSteensAAWrapperPass(); (void) llvm::createStructurizeCFGPass(); Index: llvm/include/llvm/MC/MCAssembler.h =================================================================== --- llvm/include/llvm/MC/MCAssembler.h +++ llvm/include/llvm/MC/MCAssembler.h @@ -393,6 +393,13 @@ const MCLOHContainer &getLOHContainer() const { return const_cast(this)->getLOHContainer(); } + + struct CGProfileEntry { + const MCSymbol *From; + const MCSymbol *To; + uint64_t Count; + }; + std::vector CGProfile; /// @} /// \name Backend Data Access /// @{ Index: llvm/include/llvm/MC/MCELFStreamer.h =================================================================== --- llvm/include/llvm/MC/MCELFStreamer.h +++ llvm/include/llvm/MC/MCELFStreamer.h @@ -66,6 +66,9 @@ void EmitValueToAlignment(unsigned, int64_t, unsigned, unsigned) override; + void emitCGProfileEntry(const MCSymbol *From, const MCSymbol *To, + uint64_t Count) override; + void FinishImpl() override; void EmitBundleAlignMode(unsigned AlignPow2) override; Index: llvm/include/llvm/MC/MCStreamer.h =================================================================== --- llvm/include/llvm/MC/MCStreamer.h +++ llvm/include/llvm/MC/MCStreamer.h @@ -848,6 +848,9 @@ SMLoc Loc = SMLoc()); virtual void EmitWinEHHandlerData(SMLoc Loc = SMLoc()); + virtual void emitCGProfileEntry(const MCSymbol *From, const MCSymbol *To, + uint64_t Count); + /// Get the .pdata section used for the given section. Typically the given /// section is either the main .text section or some other COMDAT .text /// section, but it may be any section containing code. Index: llvm/include/llvm/Object/ELFTypes.h =================================================================== --- llvm/include/llvm/Object/ELFTypes.h +++ llvm/include/llvm/Object/ELFTypes.h @@ -40,6 +40,7 @@ template struct Elf_Hash_Impl; template struct Elf_GnuHash_Impl; template struct Elf_Chdr_Impl; +template struct Elf_CGProfile_Impl; template struct ELFType { private: @@ -66,6 +67,7 @@ using Hash = Elf_Hash_Impl>; using GnuHash = Elf_GnuHash_Impl>; using Chdr = Elf_Chdr_Impl>; + using CGProfile = Elf_CGProfile_Impl>; using DynRange = ArrayRef; using ShdrRange = ArrayRef; using SymRange = ArrayRef; @@ -590,6 +592,14 @@ Elf_Xword ch_addralign; }; +template +struct Elf_CGProfile_Impl { + LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) + Elf_Word cgp_from; + Elf_Word cgp_to; + Elf_Xword cgp_weight; +}; + // MIPS .reginfo section template struct Elf_Mips_RegInfo; Index: llvm/include/llvm/Transforms/Instrumentation.h =================================================================== --- llvm/include/llvm/Transforms/Instrumentation.h +++ llvm/include/llvm/Transforms/Instrumentation.h @@ -180,6 +180,8 @@ ModulePass *createSanitizerCoverageModulePass( const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()); +ModulePass *createCFGProfilePass(); + /// \brief Calculate what to divide by to scale counts. /// /// Given the maximum count, calculate a divisor that will scale all the Index: llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -98,16 +98,60 @@ StringRef Section; GetObjCImageInfo(M, Version, Flags, Section); - if (Section.empty()) - return; + if (!Section.empty()) { + auto &C = getContext(); + auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + Streamer.SwitchSection(S); + Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(Version, 4); + Streamer.EmitIntValue(Flags, 4); + Streamer.AddBlankLine(); + } - auto &C = getContext(); - auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - Streamer.SwitchSection(S); - Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); - Streamer.EmitIntValue(Version, 4); - Streamer.EmitIntValue(Flags, 4); - Streamer.AddBlankLine(); + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + + MDNode *CFGProfile = nullptr; + + for (const auto &MFE : ModuleFlags) { + StringRef Key = MFE.Key->getString(); + if (Key == "CFG Profile") { + CFGProfile = cast(MFE.Val); + break; + } + } + + if (!CFGProfile) + return; + /*MCSectionELF *Sec = + getContext().getELFSection(".note.llvm.callgraph", ELF::SHT_NOTE, 0); + Streamer.SwitchSection(Sec); + SmallString<256> Out; + for (const auto &Edge : CFGProfile->operands()) { + raw_svector_ostream O(Out); + MDNode *E = cast(Edge); + O << cast(E->getOperand(0))->getString() << " " + << cast(E->getOperand(1))->getString() << " " + << cast(E->getOperand(2)) + ->getValue() + ->getUniqueInteger() + .getZExtValue() + << "\n"; + Streamer.EmitBytes(O.str()); + Out.clear(); + }*/ + for (const auto &Edge : CFGProfile->operands()) { + MDNode *E = cast(Edge); + const MCSymbol *From = Streamer.getContext().getOrCreateSymbol( + cast(E->getOperand(0))->getString()); + const MCSymbol *To = Streamer.getContext().getOrCreateSymbol( + cast(E->getOperand(1))->getString()); + uint64_t Count = cast(E->getOperand(2)) + ->getValue() + ->getUniqueInteger() + .getZExtValue(); + Streamer.emitCGProfileEntry(From, To, Count); + } } MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( Index: llvm/lib/MC/ELFObjectWriter.cpp =================================================================== --- llvm/lib/MC/ELFObjectWriter.cpp +++ llvm/lib/MC/ELFObjectWriter.cpp @@ -1299,6 +1299,13 @@ } } + MCSectionELF *CGProfileSection = nullptr; + if (!Asm.CGProfile.empty()) { + CGProfileSection = + Ctx.getELFSection(".note.llvm.cgprofile", ELF::SHT_NOTE, 0, 16, ""); + SectionIndexMap[CGProfileSection] = addToSectionTable(CGProfileSection); + } + for (MCSectionELF *Group : Groups) { align(Group->getAlignment()); @@ -1333,6 +1340,17 @@ SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd); } + if (CGProfileSection) { + uint64_t SecStart = getStream().tell(); + for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) { + write32(CGPE.From->getIndex()); + write32(CGPE.To->getIndex()); + write64(CGPE.Count); + } + uint64_t SecEnd = getStream().tell(); + SectionOffsets[CGProfileSection] = std::make_pair(SecStart, SecEnd); + } + { uint64_t SecStart = getStream().tell(); const MCSectionELF *Sec = createStringTable(Ctx); Index: llvm/lib/MC/MCAsmStreamer.cpp =================================================================== --- llvm/lib/MC/MCAsmStreamer.cpp +++ llvm/lib/MC/MCAsmStreamer.cpp @@ -291,6 +291,9 @@ SMLoc Loc) override; void EmitWinEHHandlerData(SMLoc Loc) override; + void emitCGProfileEntry(const MCSymbol *From, const MCSymbol *To, + uint64_t Count) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool PrintSchedInfo) override; @@ -1561,6 +1564,16 @@ EmitEOL(); } +void MCAsmStreamer::emitCGProfileEntry(const MCSymbol *From, const MCSymbol *To, + uint64_t Count) { + OS << "\t.cg_profile "; + From->print(OS, MAI); + OS << ", "; + To->print(OS, MAI); + OS << ", " << Count; + EmitEOL(); +} + void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &STI, bool PrintSchedInfo) { Index: llvm/lib/MC/MCELFStreamer.cpp =================================================================== --- llvm/lib/MC/MCELFStreamer.cpp +++ llvm/lib/MC/MCELFStreamer.cpp @@ -365,6 +365,11 @@ ValueSize, MaxBytesToEmit); } +void MCELFStreamer::emitCGProfileEntry(const MCSymbol *From, const MCSymbol *To, + uint64_t Count) { + getAssembler().CGProfile.push_back({From, To, Count}); +} + void MCELFStreamer::EmitIdent(StringRef IdentString) { MCSection *Comment = getAssembler().getContext().getELFSection( ".comment", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, ""); Index: llvm/lib/MC/MCParser/ELFAsmParser.cpp =================================================================== --- llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -85,6 +85,7 @@ addDirectiveHandler< &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveSubsection>(".subsection"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveCGProfile>(".cg_profile"); } // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is @@ -149,6 +150,7 @@ bool ParseDirectiveWeakref(StringRef, SMLoc); bool ParseDirectiveSymbolAttribute(StringRef, SMLoc); bool ParseDirectiveSubsection(StringRef, SMLoc); + bool ParseDirectiveCGProfile(StringRef, SMLoc); private: bool ParseSectionName(StringRef &SectionName); @@ -838,6 +840,40 @@ return false; } +/// ParseDirectiveCGProfile +/// ::= .cg_profile identifier, identifier, +bool ELFAsmParser::ParseDirectiveCGProfile(StringRef, SMLoc) { + StringRef From; + if (getParser().parseIdentifier(From)) + return TokError("expected identifier in directive"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("expected a comma"); + Lex(); + + StringRef To; + if (getParser().parseIdentifier(To)) + return TokError("expected identifier in directive"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("expected a comma"); + Lex(); + + int64_t Count; + if (getParser().parseIntToken( + Count, "expected integer count in '.cg_profile' directive")) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + MCSymbol *FromSym = getContext().getOrCreateSymbol(From); + MCSymbol *ToSym = getContext().getOrCreateSymbol(To); + + getStreamer().emitCGProfileEntry(FromSym, ToSym, Count); + return false; +} + namespace llvm { MCAsmParserExtension *createELFAsmParser() { Index: llvm/lib/MC/MCStreamer.cpp =================================================================== --- llvm/lib/MC/MCStreamer.cpp +++ llvm/lib/MC/MCStreamer.cpp @@ -639,6 +639,10 @@ getContext().reportError(Loc, "Chained unwind areas can't have handlers!"); } +void MCStreamer::emitCGProfileEntry(const MCSymbol *From, const MCSymbol *To, + uint64_t Count) { +} + static MCSection *getWinCFISection(MCContext &Context, unsigned *NextWinCFIID, MCSection *MainCFISec, const MCSection *TextSec) { Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -672,6 +672,8 @@ MPM.add(createConstantMergePass()); // Merge dup global constants } + MPM.add(createCFGProfilePass()); + if (MergeFunctions) MPM.add(createMergeFunctionsPass()); Index: llvm/lib/Transforms/Instrumentation/CFGProfile.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Instrumentation/CFGProfile.cpp @@ -0,0 +1,103 @@ +//===-- CFGProfile.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Instrumentation.h" + +#include + +using namespace llvm; + +class CFGProfilePass : public ModulePass { +public: + static char ID; + + CFGProfilePass() : ModulePass(ID) { + initializeCFGProfilePassPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "CFGProfilePass"; } + +private: + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } +}; + +bool CFGProfilePass::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + llvm::DenseMap, uint64_t> Counts; + + for (auto &F : M) { + if (F.isDeclaration()) + continue; + getAnalysis(F).getBPI(); + auto &BFI = getAnalysis(F).getBFI(); + for (auto &BB : F) { + Optional BBCount = BFI.getBlockProfileCount(&BB); + if (!BBCount) + continue; + for (auto &I : BB) { + auto *CI = dyn_cast(&I); + if (!CI) + continue; + Function *CalledF = CI->getCalledFunction(); + if (!CalledF || CalledF->isIntrinsic()) + continue; + + uint64_t &Count = + Counts[std::make_pair(F.getName(), CalledF->getName())]; + Count = SaturatingAdd(Count, *BBCount); + } + } + } + + if (Counts.empty()) + return false; + + LLVMContext &Context = M.getContext(); + MDBuilder MDB(Context); + std::vector Nodes; + + for (auto E : Counts) { + SmallVector Vals; + Vals.push_back(MDB.createString(E.first.first)); + Vals.push_back(MDB.createString(E.first.second)); + Vals.push_back(MDB.createConstant( + ConstantInt::get(Type::getInt64Ty(Context), E.second))); + Nodes.push_back(MDNode::get(Context, Vals)); + } + + M.addModuleFlag(Module::Append, "CFG Profile", MDNode::get(Context, Nodes)); + + return true; +} + +char CFGProfilePass::ID = 0; +INITIALIZE_PASS_BEGIN(CFGProfilePass, "cfg-profile", + "Generate profile information from the CFG.", false, false) + INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) + INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) + INITIALIZE_PASS_END(CFGProfilePass, "cfg-profile", + "Generate profile information from the CFG.", false, false) + +ModulePass *llvm::createCFGProfilePass() { + return new CFGProfilePass(); +} Index: llvm/lib/Transforms/Instrumentation/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMInstrumentation AddressSanitizer.cpp BoundsChecking.cpp + CFGProfile.cpp DataFlowSanitizer.cpp GCOVProfiling.cpp MemorySanitizer.cpp Index: llvm/lib/Transforms/Instrumentation/Instrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -60,6 +60,7 @@ initializeAddressSanitizerModulePass(Registry); initializeBoundsCheckingLegacyPassPass(Registry); initializeGCOVProfilerLegacyPassPass(Registry); + initializeCFGProfilePassPass(Registry); initializePGOInstrumentationGenLegacyPassPass(Registry); initializePGOInstrumentationUseLegacyPassPass(Registry); initializePGOIndirectCallPromotionLegacyPassPass(Registry); Index: llvm/tools/llvm-readobj/ELFDumper.cpp =================================================================== --- llvm/tools/llvm-readobj/ELFDumper.cpp +++ llvm/tools/llvm-readobj/ELFDumper.cpp @@ -97,6 +97,7 @@ using Elf_Vernaux = typename ELFO::Elf_Vernaux; \ using Elf_Verdef = typename ELFO::Elf_Verdef; \ using Elf_Verdaux = typename ELFO::Elf_Verdaux; \ + using Elf_CGProfile = typename ELFT::CGProfile; \ using uintX_t = typename ELFO::uintX_t; namespace { @@ -161,6 +162,8 @@ void printHashHistogram() override; + void printCGProfile() override; + void printNotes() override; private: @@ -205,6 +208,7 @@ const Elf_Hash *HashTable = nullptr; const Elf_GnuHash *GnuHashTable = nullptr; const Elf_Shdr *DotSymtabSec = nullptr; + const Elf_Shdr *DotCGProfileSec = nullptr; StringRef DynSymtabName; ArrayRef ShndxTable; @@ -249,9 +253,11 @@ Elf_Rela_Range dyn_relas() const; std::string getFullSymbolName(const Elf_Sym *Symbol, StringRef StrTable, bool IsDynamic) const; + StringRef getStaticSymbolName(uint32_t Index) const; void printSymbolsHelper(bool IsDynamic) const; const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; } + const Elf_Shdr *getDotCGProfileSec() const { return DotCGProfileSec; } ArrayRef getShndxTable() const { return ShndxTable; } StringRef getDynamicStringTable() const { return DynamicStringTable; } const DynRegionInfo &getDynRelRegion() const { return DynRelRegion; } @@ -309,6 +315,7 @@ bool IsDynamic) = 0; virtual void printProgramHeaders(const ELFFile *Obj) = 0; virtual void printHashHistogram(const ELFFile *Obj) = 0; + virtual void printCGProfile(const ELFFile *Obj) = 0; virtual void printNotes(const ELFFile *Obj) = 0; const ELFDumper *dumper() const { return Dumper; } @@ -336,6 +343,7 @@ size_t Offset) override; void printProgramHeaders(const ELFO *Obj) override; void printHashHistogram(const ELFFile *Obj) override; + void printCGProfile(const ELFFile *Obj) override; void printNotes(const ELFFile *Obj) override; private: @@ -394,6 +402,7 @@ void printDynamicRelocations(const ELFO *Obj) override; void printProgramHeaders(const ELFO *Obj) override; void printHashHistogram(const ELFFile *Obj) override; + void printCGProfile(const ELFFile *Obj) override; void printNotes(const ELFFile *Obj) override; private: @@ -735,6 +744,16 @@ } template +StringRef ELFDumper::getStaticSymbolName(uint32_t Index) const { + StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec)); + Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec)); + if (Index >= Syms.size()) + reportError("Invalid symbol index"); + const Elf_Sym *Sym = &Syms[Index]; + return unwrapOrError(Sym->getName(StrTable)); +} + +template static void getSectionNameIndex(const ELFFile &Obj, const typename ELFT::Sym *Symbol, const typename ELFT::Sym *FirstSym, @@ -1342,6 +1361,12 @@ reportError("Multiple SHT_GNU_verneed"); dot_gnu_version_r_sec = &Sec; break; + case ELF::SHT_NOTE: + if (unwrapOrError(Obj->getSectionName(&Sec)) != ".note.llvm.cgprofile") + break; + if (DotCGProfileSec != nullptr) + reportError("Multiple .note.llvm.cgprofile"); + DotCGProfileSec = &Sec; } } @@ -1486,6 +1511,10 @@ ELFDumperStyle->printHashHistogram(Obj); } +template void ELFDumper::printCGProfile() { + ELFDumperStyle->printCGProfile(Obj); +} + template void ELFDumper::printNotes() { ELFDumperStyle->printNotes(Obj); } @@ -3388,6 +3417,11 @@ } } +template +void GNUStyle::printCGProfile(const ELFFile *Obj) { + OS<< "GNUStyle::printCGProfile not implemented\n"; +} + static std::string getGNUNoteTypeName(const uint32_t NT) { static const struct { uint32_t ID; @@ -3988,6 +4022,22 @@ W.startLine() << "Hash Histogram not implemented!\n"; } + + +template +void LLVMStyle::printCGProfile(const ELFFile *Obj) { + ListScope L(W, "CGProfile"); + if (!this->dumper()->getDotCGProfileSec()) + return; + auto CGProfile = unwrapOrError(Obj->template getSectionContentsAsArray(this->dumper()->getDotCGProfileSec())); + for (const Elf_CGProfile &CGPE : CGProfile) { + DictScope D(W, "CGProfileEntry"); + W.printNumber("From", this->dumper()->getStaticSymbolName(CGPE.cgp_from), CGPE.cgp_from); + W.printNumber("To", this->dumper()->getStaticSymbolName(CGPE.cgp_to), CGPE.cgp_to); + W.printNumber("Weight", CGPE.cgp_weight); + } +} + template void LLVMStyle::printNotes(const ELFFile *Obj) { W.startLine() << "printNotes not implemented!\n"; Index: llvm/tools/llvm-readobj/ObjDumper.h =================================================================== --- llvm/tools/llvm-readobj/ObjDumper.h +++ llvm/tools/llvm-readobj/ObjDumper.h @@ -47,6 +47,7 @@ virtual void printVersionInfo() {} virtual void printGroupSections() {} virtual void printHashHistogram() {} + virtual void printCGProfile() {} virtual void printNotes() {} // Only implemented for ARM ELF at this time. Index: llvm/tools/llvm-readobj/llvm-readobj.cpp =================================================================== --- llvm/tools/llvm-readobj/llvm-readobj.cpp +++ llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -284,6 +284,8 @@ cl::alias HashHistogramShort("I", cl::desc("Alias for -elf-hash-histogram"), cl::aliasopt(HashHistogram)); + cl::opt CGProfile("elf-cg-profile", cl::desc("Display callgraph profile section")); + cl::opt Output("elf-output-style", cl::desc("Specify ELF dump style"), cl::values(clEnumVal(LLVM, "LLVM default style"), @@ -439,6 +441,8 @@ Dumper->printGroupSections(); if (opts::HashHistogram) Dumper->printHashHistogram(); + if (opts::CGProfile) + Dumper->printCGProfile(); if (opts::Notes) Dumper->printNotes(); }