diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -9,6 +9,7 @@ #ifndef LLD_COFF_CONFIG_H #define LLD_COFF_CONFIG_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/COFF.h" @@ -29,6 +30,7 @@ class StringChunk; class Symbol; class InputFile; +class SectionChunk; // Short aliases. static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; @@ -200,6 +202,14 @@ // Used for /lto-obj-path: llvm::StringRef ltoObjPath; + // Used for /call-graph-ordering-file: + llvm::MapVector, + uint64_t> + callGraphProfile; + + // Used for /print-symbol-order: + StringRef printSymbolOrder; + uint64_t align = 4096; uint64_t imageBase = -1; uint64_t fileAlign = 512; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -924,6 +924,46 @@ } } +static void parseCallGraphFile(StringRef path) { + std::unique_ptr mb = CHECK( + MemoryBuffer::getFile(path, -1, false, true), "could not open " + path); + + // Build a map from symbol name to section + DenseMap map; + for (ObjFile *file : ObjFile::instances) + for (Symbol *sym : file->getSymbols()) + if (sym) + map[sym->getName()] = sym; + + auto findSection = [&](StringRef name) -> SectionChunk * { + Symbol *sym = map.lookup(name); + if (!sym) { + if (config->warnMissingOrderSymbol) + warn(path + ": no such symbol: " + name); + return nullptr; + } + + if (DefinedCOFF *dr = dyn_cast_or_null(sym)) + return dyn_cast_or_null(dr->getChunk()); + return nullptr; + }; + + for (StringRef line : args::getLines(*mb)) { + SmallVector fields; + line.split(fields, ' '); + uint64_t count; + + if (fields.size() != 3 || !to_integer(fields[2], count)) { + error(path + ": parse error"); + return; + } + + if (SectionChunk *from = findSection(fields[0])) + if (SectionChunk *to = findSection(fields[1])) + config->callGraphProfile[std::make_pair(from, to)] += count; + } +} + static void markAddrsig(Symbol *s) { if (auto *d = dyn_cast_or_null(s)) if (SectionChunk *c = dyn_cast_or_null(d->getChunk())) @@ -2020,12 +2060,23 @@ if (config->manifest == Configuration::SideBySide) createSideBySideManifest(); + if (args.hasArg(OPT_order) && args.hasArg(OPT_call_graph_ordering_file)) + error("/order and /call-graph-order-file may not be used together"); + // Handle /order. We want to do this at this moment because we // need a complete list of comdat sections to warn on nonexistent // functions. if (auto *arg = args.getLastArg(OPT_order)) parseOrderFile(arg->getValue()); + // Handle /call-graph-ordering-file + if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) + parseCallGraphFile(arg->getValue()); + + // Handle /print-symbol-order + if (auto *arg = args.getLastArg(OPT_print_symbol_order)) + config->printSymbolOrder = arg->getValue(); + // Identify unreferenced COMDAT sections. if (config->doGC) markLive(symtab->getChunks()); diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -234,6 +234,13 @@ def threads : P<"threads", "Number of threads. '1' disables multi-threading. By " "default all available hardware threads are used">; +def call_graph_ordering_file: P< + "call-graph-ordering-file", + "Layout sections to optimize the given callgraph">; +def print_symbol_order: P< + "print-symbol-order", + "Print a symbol order specified by /call-graph-ordering-file into the " + "specified file">; // Flags for debugging def lldmap : F<"lldmap">; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -15,6 +15,7 @@ #include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" +#include "lld/Common/CallGraphSort.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Timer.h" @@ -229,6 +230,7 @@ void setSectionPermissions(); void writeSections(); void writeBuildId(); + void sortSections(); void sortExceptionTable(); void sortCRTSectionChunks(std::vector &chunks); void addSyntheticIdata(); @@ -798,6 +800,27 @@ name.startswith(".xdata$") || name.startswith(".eh_frame$"); } +void Writer::sortSections() { + if (!config->callGraphProfile.empty()) { + DenseMap order = + computeCallGraphProfileOrder( + config->callGraphProfile, config->printSymbolOrder, + [](const SectionChunk *sc, Symbol *sym) { + if (auto *d = dyn_cast_or_null(sym)) + if (d->isCOMDAT && !d->getCOFFSymbol().isSection()) + return sc == d->getChunk(); + return false; + }); + for (auto I : order) { + if (DefinedRegular *sym = I.first->sym) + config->order[sym->getName()] = I.second; + } + } + if (!config->order.empty()) + for (auto it : partialSections) + sortBySectionOrder(it.second->chunks); +} + // Create output section objects and add them to OutputSections. void Writer::createSections() { // First, create the builtin sections. @@ -861,10 +884,7 @@ if (hasIdata) addSyntheticIdata(); - // Process an /order option. - if (!config->order.empty()) - for (auto it : partialSections) - sortBySectionOrder(it.second->chunks); + sortSections(); if (hasIdata) locateImportTables(); diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt --- a/lld/Common/CMakeLists.txt +++ b/lld/Common/CMakeLists.txt @@ -29,6 +29,7 @@ add_lld_library(lldCommon Args.cpp + CallGraphSort.cpp DWARF.cpp ErrorHandler.cpp Filesystem.cpp diff --git a/lld/ELF/CallGraphSort.cpp b/lld/Common/CallGraphSort.cpp rename from lld/ELF/CallGraphSort.cpp rename to lld/Common/CallGraphSort.cpp --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/Common/CallGraphSort.cpp @@ -40,70 +40,29 @@ /// //===----------------------------------------------------------------------===// -#include "CallGraphSort.h" -#include "OutputSections.h" -#include "SymbolTable.h" -#include "Symbols.h" - +#include "lld/Common/CallGraphSort.h" +#include "../COFF/Chunks.h" +#include "../COFF/Symbols.h" +#include "../ELF/InputSection.h" +#include "../ELF/Symbols.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/FileSystem.h" + +#include #include -using namespace llvm; using namespace lld; -using namespace lld::elf; - -namespace { -struct Edge { - int from; - uint64_t weight; -}; - -struct Cluster { - Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} - - double getDensity() const { - if (size == 0) - return 0; - return double(weight) / double(size); - } - - int next; - int prev; - size_t size = 0; - uint64_t weight = 0; - uint64_t initialWeight = 0; - Edge bestPred = {-1, 0}; -}; - -class CallGraphSort { -public: - CallGraphSort(); - - DenseMap run(); - -private: - std::vector clusters; - std::vector sections; -}; - -// Maximum amount the combined cluster density can be worse than the original -// cluster to consider merging. -constexpr int MAX_DENSITY_DEGRADATION = 8; - -// Maximum cluster size in bytes. -constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024; -} // end anonymous namespace - -using SectionPair = - std::pair; // Take the edge list in Config->CallGraphProfile, resolve symbol names to // Symbols, and generate a graph between InputSections with the provided // weights. -CallGraphSort::CallGraphSort() { - MapVector &profile = config->callGraphProfile; - DenseMap secToCluster; +template +CallGraphSort::CallGraphSort( + MapVector, uint64_t> &profile) { + DenseMap secToCluster; - auto getOrCreateNode = [&](const InputSectionBase *isec) -> int { + auto getOrCreateNode = [&](const T *isec) -> int { auto res = secToCluster.try_emplace(isec, clusters.size()); if (res.second) { sections.push_back(isec); @@ -113,9 +72,9 @@ }; // Create the graph. - for (std::pair &c : profile) { - const auto *fromSB = cast(c.first.first->repl); - const auto *toSB = cast(c.first.second->repl); + for (std::pair, uint64_t> &c : profile) { + const auto *fromSB = cast(c.first.first->repl); + const auto *toSB = cast(c.first.second->repl); uint64_t weight = c.second; // Ignore edges between input sections belonging to different output @@ -178,7 +137,10 @@ // Group InputSections into clusters using the Call-Chain Clustering heuristic // then sort the clusters by density. -DenseMap CallGraphSort::run() { +template +DenseMap +CallGraphSort::run(StringRef printSymbolOrder, + function_ref cond) { std::vector sorted(clusters.size()); std::vector leaders(clusters.size()); @@ -221,8 +183,8 @@ return clusters[a].getDensity() > clusters[b].getDensity(); }); - DenseMap orderMap; - int curOrder = 1; + DenseMap orderMap; + int curOrder = INT_MIN; for (int leader : sorted) for (int i = leader;;) { orderMap[sections[i]] = curOrder++; @@ -231,25 +193,24 @@ break; } - if (!config->printSymbolOrder.empty()) { + if (!printSymbolOrder.empty()) { std::error_code ec; - raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); + raw_fd_ostream os(printSymbolOrder, ec, sys::fs::OF_None); if (ec) { - error("cannot open " + config->printSymbolOrder + ": " + ec.message()); + error("cannot open " + printSymbolOrder + ": " + ec.message()); return orderMap; } - // Print the symbols ordered by C3, in the order of increasing curOrder // Instead of sorting all the orderMap, just repeat the loops above. for (int leader : sorted) for (int i = leader;;) { + const T *sc = sections[i]; + // Search all the symbols in the file of the section // and find out a Defined symbol with name that is within the section. - for (Symbol *sym : sections[i]->file->getSymbols()) - if (!sym->isSection()) // Filter out section-type symbols here. - if (auto *d = dyn_cast(sym)) - if (sections[i] == d->section) - os << sym->getName() << "\n"; + for (Symbol *sym : sc->file->getSymbols()) + if (cond(sc, sym)) + os << sym->getName() << "\n"; i = clusters[i].next; if (i == leader) break; @@ -259,11 +220,5 @@ return orderMap; } -// Sort sections by the profile data provided by -callgraph-profile-file -// -// This first builds a call graph based on the profile data then merges sections -// according to the C³ heuristic. All clusters are then sorted by a density -// metric to further improve locality. -DenseMap elf::computeCallGraphProfileOrder() { - return CallGraphSort().run(); -} +template class CallGraphSort; +template class CallGraphSort; diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -23,7 +23,6 @@ Arch/X86.cpp Arch/X86_64.cpp ARMErrataFix.cpp - CallGraphSort.cpp DWARF.cpp Driver.cpp DriverUtils.cpp diff --git a/lld/ELF/CallGraphSort.h b/lld/ELF/CallGraphSort.h deleted file mode 100644 --- a/lld/ELF/CallGraphSort.h +++ /dev/null @@ -1,22 +0,0 @@ -//===- CallGraphSort.h ------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_ELF_CALL_GRAPH_SORT_H -#define LLD_ELF_CALL_GRAPH_SORT_H - -#include "llvm/ADT/DenseMap.h" - -namespace lld { -namespace elf { -class InputSectionBase; - -llvm::DenseMap computeCallGraphProfileOrder(); -} // namespace elf -} // namespace lld - -#endif diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -9,7 +9,6 @@ #include "Writer.h" #include "AArch64ErrataFix.h" #include "ARMErrataFix.h" -#include "CallGraphSort.h" #include "Config.h" #include "LinkerScript.h" #include "MapFile.h" @@ -19,6 +18,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "lld/Common/CallGraphSort.h" #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" @@ -1305,7 +1305,14 @@ DenseMap sectionOrder; // Use the rarely used option -call-graph-ordering-file to sort sections. if (!config->callGraphProfile.empty()) - return computeCallGraphProfileOrder(); + return computeCallGraphProfileOrder( + config->callGraphProfile, config->printSymbolOrder, + [](const InputSectionBase *sc, Symbol *sym) { + if (!sym->isSection()) // Filter out section-type symbols here. + if (auto *d = dyn_cast(sym)) + return sc == d->section; + return false; + }); if (config->symbolOrderingFile.empty()) return sectionOrder; diff --git a/lld/include/lld/Common/CallGraphSort.h b/lld/include/lld/Common/CallGraphSort.h new file mode 100644 --- /dev/null +++ b/lld/include/lld/Common/CallGraphSort.h @@ -0,0 +1,79 @@ +//===- CallGraphSort.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CALL_GRAPH_SORT_H +#define LLD_CALL_GRAPH_SORT_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" + +using namespace llvm; + +namespace { +struct Edge { + int from; + uint64_t weight; +}; + +struct Cluster { + Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} + + double getDensity() const { + if (size == 0) + return 0; + return double(weight) / double(size); + } + + int next; + int prev; + size_t size = 0; + uint64_t weight = 0; + uint64_t initialWeight = 0; + Edge bestPred = {-1, 0}; +}; + +template using SectionPair = std::pair; + +// Maximum amount the combined cluster density can be worse than the original +// cluster to consider merging. +constexpr int MAX_DENSITY_DEGRADATION = 8; + +// Maximum cluster size in bytes. +constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024; +} // end anonymous namespace + +template class CallGraphSort { +public: + CallGraphSort(MapVector, uint64_t> &profile); + + DenseMap run(StringRef printSymbolOrder, + function_ref cond); + +private: + std::vector clusters; + std::vector sections; +}; + +namespace lld { + +// Sort sections by the profile data provided by /callgraph-profile-file +// +// This first builds a call graph based on the profile data then merges sections +// according to the C³ heuristic. All clusters are then sorted by a density +// metric to further improve locality. +template +DenseMap +computeCallGraphProfileOrder(MapVector, uint64_t> &profile, + StringRef printSymbolOrder, + function_ref cond) { + return CallGraphSort(profile).run(printSymbolOrder, cond); +} + +} // namespace lld + +#endif diff --git a/lld/test/COFF/cgprofile-bad-clusters.s b/lld/test/COFF/cgprofile-bad-clusters.s new file mode 100644 --- /dev/null +++ b/lld/test/COFF/cgprofile-bad-clusters.s @@ -0,0 +1,61 @@ +# REQUIRES: x86 +# This test checks that CallGraphSort ignores edges that would form "bad" +# clusters. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t +# RUN: echo "A C 1" > %t.call_graph +# RUN: echo "E B 4" >> %t.call_graph +# RUN: echo "C D 2" >> %t.call_graph +# RUN: echo "B D 1" >> %t.call_graph +# RUN: echo "F G 6" >> %t.call_graph +# RUN: echo "G H 5" >> %t.call_graph +# RUN: echo "H I 4" >> %t.call_graph +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /debug:symtab +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s + + .section .text,"ax",one_only,A + .globl A +A: + retq + + .section .text,"ax",one_only,D +D: + .fill 1000, 1, 0 + + .section .text,"ax",one_only,E +E: + retq + + .section .text,"ax",one_only,C +C: + retq + + .section .text,"ax",one_only,B +B: + .fill 1000, 1, 0 + + .section .text,"ax",one_only,F +F: + .fill (1024 * 1024) - 1, 1, 0 + + .section .text,"ax",one_only,G +G: + retq + + .section .text,"ax",one_only,H +H: + retq + + .section .text,"ax",one_only,I +I: + .fill 13, 1, 0 + +# CHECK: 140001000 t H +# CHECK: 140001001 t I +# CHECK: 14000100e T A +# CHECK: 14000100f t C +# CHECK: 140001010 t E +# CHECK: 140001011 t B +# CHECK: 1400013f9 t D +# CHECK: 1400017e1 t F +# CHECK: 1401017e0 t G diff --git a/lld/test/COFF/cgprofile-icf.s b/lld/test/COFF/cgprofile-icf.s new file mode 100644 --- /dev/null +++ b/lld/test/COFF/cgprofile-icf.s @@ -0,0 +1,45 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t + +# RUN: echo "A B 100" > %t.call_graph +# RUN: echo "A C 40" >> %t.call_graph +# RUN: echo "C D 61" >> %t.call_graph +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /debug:symtab /opt:icf +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /debug:symtab +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s --check-prefix=NOICF + + .section .text,"x",one_only,D + .globl D +D: + mov $60, %rax + retq + + .section .text,"x",one_only,C + .globl C +C: + mov $60, %rax + retq + + .section .text,"x",one_only,B + .globl B +B: + mov $2, %rax + retq + + .section .text,"x",one_only,A + .globl A +A: + mov $42, %rax + retq + +# CHECK: 140001000 T A +# CHECK: 140001008 T C +# CHECK: 140001008 T D +# CHECK: 140001010 T B + +# NOICF: 140001000 T A +# NOICF: 140001008 T B +# NOICF: 140001010 T C +# NOICF: 140001018 T D diff --git a/lld/test/COFF/cgprofile-print.s b/lld/test/COFF/cgprofile-print.s new file mode 100644 --- /dev/null +++ b/lld/test/COFF/cgprofile-print.s @@ -0,0 +1,35 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t +# RUN: echo "A B 5" > %t.call_graph +# RUN: echo "B C 50" >> %t.call_graph +# RUN: echo "C D 40" >> %t.call_graph +# RUN: echo "D B 10" >> %t.call_graph +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /print-symbol-order:%t3 +# RUN: FileCheck %s --input-file %t3 + +# CHECK: B +# CHECK-NEXT: C +# CHECK-NEXT: D +# CHECK-NEXT: A + +.section .text, "x", one_only, A +.globl A +A: + nop + +.section .text, "x", one_only, B +.globl B +B: + nop + +.section .text, "x", one_only, C +.globl C +C: + nop + +.section .text, "x", one_only, D +.globl D +D: + nop + diff --git a/lld/test/COFF/cgprofile-txt.s b/lld/test/COFF/cgprofile-txt.s new file mode 100644 --- /dev/null +++ b/lld/test/COFF/cgprofile-txt.s @@ -0,0 +1,43 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t +# RUN: lld-link /subsystem:console /entry:A %t /out:%t2 /debug:symtab +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s --check-prefix=NOSORT + +# RUN: echo "A B 5" > %t.call_graph +# RUN: echo "B C 50" >> %t.call_graph +# RUN: echo "C D 40" >> %t.call_graph +# RUN: echo "D B 10" >> %t.call_graph +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /debug:symtab +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s + +# NOSORT: 140001000 T A +# NOSORT: 140001001 T B +# NOSORT: 140001002 T C +# NOSORT: 140001003 T D + +# CHECK: 140001000 T B +# CHECK: 140001001 T C +# CHECK: 140001002 T D +# CHECK: 140001003 T A + +.section .text, "x", one_only, A +.globl A +A: + nop + +.section .text, "x", one_only, B +.globl B +B: + nop + +.section .text, "x", one_only, C +.globl C +C: + nop + +.section .text, "x", one_only, D +.globl D +D: + nop +