Index: ELF/Config.h =================================================================== --- ELF/Config.h +++ ELF/Config.h @@ -10,6 +10,7 @@ #ifndef LLD_ELF_CONFIG_H #define LLD_ELF_CONFIG_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -108,6 +109,8 @@ std::vector VersionScriptLocals; std::vector BuildIdVector; llvm::MapVector RenamedSymbols; + llvm::DenseMap, uint64_t> + CGProfile; bool AllowMultipleDefinition; bool AsNeeded = false; bool Bsymbolic; Index: ELF/Driver.cpp =================================================================== --- ELF/Driver.cpp +++ ELF/Driver.cpp @@ -600,6 +600,23 @@ return Ret; } +static void readCallGraph(MemoryBufferRef MB) { + DenseMap, uint64_t> Ret; + std::vector Lines = getLines(MB); + for (StringRef L : Lines) { + SmallVector Fields; + L.split(Fields, ' '); + if (Fields.size() != 3) + fatal("parse error"); + uint64_t Count; + if (!to_integer(Fields[0], Count)) + fatal("parse error"); + StringRef From = Fields[1]; + StringRef To = Fields[2]; + Config->CGProfile[std::make_pair(From, To)] = Count; + } +} + static bool getCompressDebugSections(opt::InputArgList &Args) { StringRef S = Args.getLastArgValue(OPT_compress_debug_sections, "none"); if (S == "none") @@ -727,6 +744,10 @@ if (Optional Buffer = readFile(Arg->getValue())) Config->SymbolOrderingFile = getLines(*Buffer); + if (auto *Arg = Args.getLastArg(OPT_callgraph_ordering_file)) + if (Optional Buffer = readFile(Arg->getValue())) + readCallGraph(*Buffer); + // If --retain-symbol-file is used, we'll keep only the symbols listed in // the file and discard all others. if (auto *Arg = Args.getLastArg(OPT_retain_symbols_file)) { Index: ELF/Options.td =================================================================== --- ELF/Options.td +++ ELF/Options.td @@ -52,6 +52,9 @@ def as_needed: F<"as-needed">, HelpText<"Only set DT_NEEDED for shared libraries if used">; +def callgraph_ordering_file: S<"callgraph-ordering-file">, + HelpText<"Layout sections to optimize the given callgraph">; + // -chroot doesn't have a help text because it is an internal option. def chroot: S<"chroot">; Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -20,11 +20,13 @@ #include "SyntheticSections.h" #include "Target.h" #include "Threads.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace llvm; using namespace llvm::ELF; @@ -879,6 +881,152 @@ Sec->sort([&](InputSectionBase *S) { return SectionOrder.lookup(S); }); } +// Sort sections by the profile data provided by -callgraph-ordering-file +// +// This algorithm is based on Call-Chain Clustering from: +// Optimizing Function Placement for Large-Scale Data-Center Applications +// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf +// +// This first builds a call graph based on the profile data then iteratively +// merges the hottest call edges as long as it would not create a cluster larger +// than the page size. All clusters are then sorted by a density metric to +// further improve locality. +template static void sortByCGProfile() { + using NodeIndex = std::ptrdiff_t; + + struct Node { + Node() {} + Node(const InputSectionBase *IS) { + Sections.push_back(IS); + Size = IS->getSize(); + } + std::vector Sections; + int64_t Size = 0; + uint64_t Weight = 0; + }; + + struct Edge { + NodeIndex From; + NodeIndex To; + mutable uint64_t Weight; + bool operator==(const Edge Other) const { + return From == Other.From && To == Other.To; + } + }; + + struct EdgeHash { + std::size_t operator()(const Edge E) const { + return llvm::hash_combine(E.From, E.To); + }; + }; + + std::vector Nodes; + std::unordered_set Edges; + + auto InsertOrIncrementEdge = [](std::unordered_set &Edges, + const Edge E) { + if (E.From == E.To) + return; + auto Res = Edges.insert(E); + if (!Res.second) + Res.first->Weight = SaturatingAdd(Res.first->Weight, E.Weight); + }; + + { + llvm::DenseMap SecToNode; + + auto GetOrCreateNode = + [&Nodes, &SecToNode](const InputSectionBase *IS) -> NodeIndex { + auto Res = SecToNode.insert(std::make_pair(IS, Nodes.size())); + if (Res.second) + Nodes.emplace_back(IS); + return Res.first->second; + }; + + // Create the graph. + for (const auto &C : Config->CGProfile) { + if (C.second == 0) + continue; + DefinedRegular *FromDR = + dyn_cast_or_null(Symtab->find(C.first.first)); + DefinedRegular *ToDR = + dyn_cast_or_null(Symtab->find(C.first.second)); + if (!FromDR || !ToDR) + continue; + auto FromSB = dyn_cast_or_null(FromDR->Section); + auto ToSB = dyn_cast_or_null(ToDR->Section); + if (!FromSB || !ToSB) + continue; + NodeIndex From = GetOrCreateNode(FromSB); + NodeIndex To = GetOrCreateNode(ToSB); + InsertOrIncrementEdge(Edges, {From, To, C.second}); + Nodes[To].Weight = SaturatingAdd(Nodes[To].Weight, C.second); + } + } + + // Collapse the graph. + while (!Edges.empty()) { + // Find the largest edge + // FIXME: non deterministic order for equal edges. + // FIXME: n^2 + auto Max = std::max_element( + Edges.begin(), Edges.end(), + [](const Edge A, const Edge B) { return A.Weight < B.Weight; }); + const Edge MaxE = *Max; + Edges.erase(Max); + // Merge the Nodes. + Node &From = Nodes[MaxE.From]; + Node &To = Nodes[MaxE.To]; + if (From.Size + To.Size > Target->PageSize) + continue; + From.Sections.insert(From.Sections.end(), To.Sections.begin(), + To.Sections.end()); + To.Sections.clear(); + From.Size += To.Size; + From.Weight = SaturatingAdd(From.Weight, To.Weight); + // Collect all edges from or to the removed node and update them for the new + // node. + std::vector OldEdges; + // FIXME: n^2 + for (auto EI = Edges.begin(), EE = Edges.end(); EI != EE;) { + if (EI->From == MaxE.To || EI->To == MaxE.To) { + OldEdges.push_back(*EI); + EI = Edges.erase(EI); + } else + ++EI; + } + for (const Edge E : OldEdges) { + InsertOrIncrementEdge(Edges, + {E.From == MaxE.To ? MaxE.From : E.From, + E.To == MaxE.To ? MaxE.From : E.To, E.Weight}); + } + } + + // Sort by density. + std::sort(Nodes.begin(), Nodes.end(), [](const Node &A, const Node &B) { + return double(A.Weight) / double(A.Size) < + double(B.Weight) / double(B.Size); + }); + + // Generate order. + llvm::DenseMap OrderMap; + ssize_t CurOrder = 0; + + for (const Node &N : Nodes) { + if (N.Sections.empty()) + continue; + for (const InputSectionBase *IS : N.Sections) + OrderMap[IS] = CurOrder++; + } + for (BaseCommand *Base : Script->Opt.Commands) { + auto *OS = dyn_cast(Base); + if (!OS || OS->Name != ".text") + continue; + OS->sort([&](InputSectionBase *IS) { return OrderMap.lookup(IS); }); + break; + } +} + template void Writer::forEachRelSec(std::function Fn) { for (InputSectionBase *IS : InputSections) { @@ -911,6 +1059,7 @@ Old.end()); Script->fabricateDefaultCommands(); + sortByCGProfile(); sortBySymbolsOrder(); sortInitFini(findSection(".init_array")); sortInitFini(findSection(".fini_array")); Index: test/ELF/Inputs/cgprofile.txt =================================================================== --- /dev/null +++ test/ELF/Inputs/cgprofile.txt @@ -0,0 +1,6 @@ +5000 c a +4000 c b +0 d e +18446744073709551615 e d +18446744073709551611 f d +18446744073709551612 f e Index: test/ELF/cgprofile.s =================================================================== --- /dev/null +++ test/ELF/cgprofile.s @@ -0,0 +1,99 @@ +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1 +# RUN: ld.lld %t1 -e a -o %t -callgraph-ordering-file %p/Inputs/cgprofile.txt +# RUN: llvm-readobj -symbols %t | FileCheck %s + + .section .text.a,"ax",@progbits + .global a +a: + .zero 20 + + .section .text.b,"ax",@progbits + .global b +b: + .zero 1 + + .section .text.c,"ax",@progbits + .global c +c: + .zero 4095 + + .section .text.d,"ax",@progbits + .global d +d: + .zero 51 + + .section .text.e,"ax",@progbits + .global e +e: + .zero 42 + + .section .text.f,"ax",@progbits + .global f +f: + .zero 42 + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: (0) +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Local (0x0) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined (0x0) +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: a +# CHECK-NEXT: Value: 0x202000 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: b +# CHECK-NEXT: Value: 0x201FFF +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: c +# CHECK-NEXT: Value: 0x201000 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: d +# CHECK-NEXT: Value: 0x202068 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: e +# CHECK-NEXT: Value: 0x20203E +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: f +# CHECK-NEXT: Value: 0x202014 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT:]