diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -72,6 +72,7 @@ Option Passes Support + TransformUtils TargetParser LINK_LIBS diff --git a/lld/ELF/CallGraphSort.h b/lld/ELF/CallGraphSort.h --- a/lld/ELF/CallGraphSort.h +++ b/lld/ELF/CallGraphSort.h @@ -14,6 +14,8 @@ namespace lld::elf { class InputSectionBase; +llvm::DenseMap applyCDSort(); + llvm::DenseMap computeCallGraphProfileOrder(); } // namespace lld::elf diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -45,6 +45,7 @@ #include "InputSection.h" #include "Symbols.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Transforms/Utils/CodeLayout.h" #include @@ -260,11 +261,87 @@ return orderMap; } +using CallT = std::pair; + +// Fill in necessary data for Cache-Directed Sort. +void buildCallGraph(std::vector &FuncSizes, + std::vector &FuncCounts, + std::vector> &CallCounts, + std::vector &CallOffsets, + std::vector &Sections) { + MapVector &Profile = config->callGraphProfile; + DenseMap SecToTargetId; + + auto getOrCreateNode = [&](const InputSectionBase *InSec) -> size_t { + auto Res = SecToTargetId.try_emplace(InSec, Sections.size()); + if (Res.second) { + // Isec does not appear before in the graph. + Sections.push_back(InSec); + assert(InSec->getSize() > 0 && "found a function with zero size"); + FuncSizes.push_back(InSec->getSize()); + FuncCounts.push_back(0); + } + return Res.first->second; + }; + + // Create the graph + for (std::pair &C : Profile) { + const InputSectionBase *FromSB = cast(C.first.first); + const InputSectionBase *ToSB = cast(C.first.second); + // Ignore edges between input sections belonging to different sections. + if (FromSB->getOutputSection() != ToSB->getOutputSection()) + continue; + + uint64_t Weight = C.second; + // Ignore edges with zero weight. + if (Weight == 0) + continue; + + size_t From = getOrCreateNode(FromSB); + size_t To = getOrCreateNode(ToSB); + + // Ignore self-edges (recursive calls). + if (From == To) + continue; + + auto It = std::make_pair(From, To); + CallCounts.push_back(std::make_pair(It, Weight)); + CallOffsets.push_back((FuncSizes[From] + 1) / 2); + FuncCounts[To] += Weight; + } +} + +// Sort sections by the profile data using the Cache-Directed Sort algorithm. +// The placement is done by optimizing the locality by co-locating frequently +// executed code sections together. +DenseMap elf::applyCDSort() { + // Creating a call graph and necessary data. + std::vector FuncSizes; + std::vector FuncCounts; + std::vector> CallCounts; + std::vector CallOffsets; + std::vector Sections; + buildCallGraph(FuncSizes, FuncCounts, CallCounts, CallOffsets, Sections); + + // Run the layout algorithm. + std::vector SortedSections = + applyCDSLayout(FuncSizes, FuncCounts, CallCounts, CallOffsets); + + // Create the final order. + DenseMap OrderMap; + int CurOrder = 1; + for (uint64_t SecIdx : SortedSections) + OrderMap[Sections[SecIdx]] = CurOrder++; + + return OrderMap; +} + // Sort sections by the profile data provided by --callgraph-profile-file. // // This first builds a call graph based on the profile data then merges sections -// according to the C³ heuristic. All clusters are then sorted by a density -// metric to further improve locality. +// according either to the C³ or Cache-Directed-Sort ordering algorithm. DenseMap elf::computeCallGraphProfileOrder() { + if (config->useCDSort) + return applyCDSort(); return CallGraphSort().run(); } diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -289,6 +289,7 @@ bool undefinedVersion; bool unique; bool useAndroidRelrTags = false; + bool useCDSort; bool warnBackrefs; llvm::SmallVector warnBackrefsExclude; bool warnCommon; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1360,6 +1360,7 @@ config->unique = args.hasArg(OPT_unique); config->useAndroidRelrTags = args.hasFlag( OPT_use_android_relr_tags, OPT_no_use_android_relr_tags, false); + config->useCDSort = args.hasFlag(OPT_use_cdsort, OPT_no_use_cdsort, true); config->warnBackrefs = args.hasFlag(OPT_warn_backrefs, OPT_no_warn_backrefs, false); config->warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -337,6 +337,10 @@ "Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*", "Use SHT_RELR / DT_RELR* tags (default)">; +defm use_cdsort: B<"use-cdsort", + "Use cache-directed-sort to reorder functions in the binary (default)", + "Do not use cache-directed-sort to reorder functions in the binary">; + def pic_veneer: F<"pic-veneer">, HelpText<"Always generate position independent thunks (veneers)">; diff --git a/lld/test/ELF/cgprofile-bad-clusters.s b/lld/test/ELF/cgprofile-bad-clusters.s --- a/lld/test/ELF/cgprofile-bad-clusters.s +++ b/lld/test/ELF/cgprofile-bad-clusters.s @@ -10,7 +10,7 @@ # RUN: echo "F G 6" >> %t.call_graph # RUN: echo "G H 5" >> %t.call_graph # RUN: echo "H I 4" >> %t.call_graph -# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 +# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --no-use-cdsort -o %t2 # RUN: llvm-readobj --symbols %t2 | FileCheck %s .section .text.A,"ax",@progbits diff --git a/lld/test/ELF/cgprofile-icf.s b/lld/test/ELF/cgprofile-icf.s --- a/lld/test/ELF/cgprofile-icf.s +++ b/lld/test/ELF/cgprofile-icf.s @@ -5,9 +5,9 @@ # RUN: echo "A B 100" > %t.call_graph # RUN: echo "A C 40" >> %t.call_graph # RUN: echo "C D 61" >> %t.call_graph -# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t.out -icf=all +# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --no-use-cdsort -o %t.out -icf=all # RUN: llvm-readobj --symbols %t.out | FileCheck %s -# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2.out +# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --no-use-cdsort -o %t2.out # RUN: llvm-readobj --symbols %t2.out | FileCheck %s --check-prefix=NOICF .section .text.D,"ax",@progbits diff --git a/lld/test/ELF/cgprofile-rela.test b/lld/test/ELF/cgprofile-rela.test --- a/lld/test/ELF/cgprofile-rela.test +++ b/lld/test/ELF/cgprofile-rela.test @@ -3,7 +3,7 @@ # REQUIRES: x86 # RUN: yaml2obj %s -o %t.o -# RUN: ld.lld %t.o -o %t +# RUN: ld.lld --no-use-cdsort %t.o -o %t # RUN: llvm-nm --no-sort %t | FileCheck %s # RUN: ld.lld --no-call-graph-profile-sort %t.o -o %t # RUN: llvm-nm --no-sort %t | FileCheck %s --check-prefix=NO-CG diff --git a/lld/test/ELF/cgprofile-txt.s b/lld/test/ELF/cgprofile-txt.s --- a/lld/test/ELF/cgprofile-txt.s +++ b/lld/test/ELF/cgprofile-txt.s @@ -24,8 +24,10 @@ # RUN: echo "TooManyPreds8 TooManyPreds 10" >> %t.call_graph # RUN: echo "TooManyPreds9 TooManyPreds 10" >> %t.call_graph # RUN: echo "TooManyPreds10 TooManyPreds 11" >> %t.call_graph -# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 +# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --no-use-cdsort -o %t2 # RUN: llvm-readobj --symbols %t2 | FileCheck %s +# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --use-cdsort -o %t2 +# RUN: llvm-readobj --symbols %t2 | FileCheck %s --check-prefix=CDSORT .section .text.D,"ax",@progbits D: @@ -159,6 +161,31 @@ # CHECK: Name: _init2 # CHECK-NEXT: Value: 0x201141 +# CDSORT: Name: D +# CDSORT-NEXT: Value: 0x201123 +# CDSORT: Name: TooManyPreds +# CDSORT-NEXT: Value: 0x20112F +# CDSORT: Name: TooManyPreds10 +# CDSORT-NEXT: Value: 0x20112E +# CDSORT: Name: C +# CDSORT-NEXT: Value: 0x201122 +# CDSORT: Name: B +# CDSORT-NEXT: Value: 0x201121 +# CDSORT: Name: A +# CDSORT-NEXT: Value: 0x201120 +# CDSORT: Name: TS +# CDSORT-NEXT: Value: 0x20113D +# CDSORT: Name: PP +# CDSORT-NEXT: Value: 0x20113C +# CDSORT: Name: QC +# CDSORT-NEXT: Value: 0x20113E +# CDSORT: Name: GB +# CDSORT-NEXT: Value: 0x20113F +# CDSORT: Name: _init +# CDSORT-NEXT: Value: 0x201140 +# CDSORT: Name: _init2 +# CDSORT-NEXT: Value: 0x201141 + # NOSORT: Name: D # NOSORT-NEXT: Value: 0x201120 # NOSORT: Name: TooManyPreds diff --git a/lld/test/ELF/cgprofile-txt2.s b/lld/test/ELF/cgprofile-txt2.s --- a/lld/test/ELF/cgprofile-txt2.s +++ b/lld/test/ELF/cgprofile-txt2.s @@ -5,17 +5,28 @@ # RUN: echo "B C 50" >> %t.call_graph # RUN: echo "C D 40" >> %t.call_graph # RUN: echo "D B 10" >> %t.call_graph +# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --no-use-cdsort -o %t2 +# RUN: llvm-readobj --symbols %t2 | FileCheck %s --check-prefix=CHECKC3 # RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 -# RUN: llvm-readobj --symbols %t2 | FileCheck %s +# RUN: llvm-readobj --symbols %t2 | FileCheck %s --check-prefix=CHECKCDS -# CHECK: Name: A -# CHECK-NEXT: Value: 0x201123 -# CHECK: Name: B -# CHECK-NEXT: Value: 0x201120 -# CHECK: Name: C -# CHECK-NEXT: Value: 0x201121 -# CHECK: Name: D -# CHECK-NEXT: Value: 0x201122 +# CHECKC3: Name: A +# CHECKC3-NEXT: Value: 0x201123 +# CHECKC3: Name: B +# CHECKC3-NEXT: Value: 0x201120 +# CHECKC3: Name: C +# CHECKC3-NEXT: Value: 0x201121 +# CHECKC3: Name: D +# CHECKC3-NEXT: Value: 0x201122 + +# CHECKCDS: Name: A +# CHECKCDS-NEXT: Value: 0x201120 +# CHECKCDS: Name: B +# CHECKCDS-NEXT: Value: 0x201121 +# CHECKCDS: Name: C +# CHECKCDS-NEXT: Value: 0x201122 +# CHECKCDS: Name: D +# CHECKCDS-NEXT: Value: 0x201123 .section .text.A,"ax",@progbits .globl A