Index: ELF/CMakeLists.txt =================================================================== --- ELF/CMakeLists.txt +++ ELF/CMakeLists.txt @@ -6,6 +6,7 @@ Driver.cpp DriverUtils.cpp Error.cpp + ICF.cpp InputFiles.cpp InputSection.cpp LinkerScript.cpp Index: ELF/Config.h =================================================================== --- ELF/Config.h +++ ELF/Config.h @@ -63,6 +63,7 @@ bool ExportDynamic; bool GcSections; bool GnuHash = false; + bool ICF; bool Mips64EL = false; bool NoInhibitExec; bool NoUndefined; Index: ELF/Driver.cpp =================================================================== --- ELF/Driver.cpp +++ ELF/Driver.cpp @@ -10,6 +10,7 @@ #include "Driver.h" #include "Config.h" #include "Error.h" +#include "ICF.h" #include "InputFiles.h" #include "LinkerScript.h" #include "SymbolTable.h" @@ -216,6 +217,7 @@ Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags); Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); Config->GcSections = Args.hasArg(OPT_gc_sections); + Config->ICF = Args.hasArg(OPT_icf); Config->NoInhibitExec = Args.hasArg(OPT_noinhibit_exec); Config->NoUndefined = Args.hasArg(OPT_no_undefined); Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); @@ -370,5 +372,7 @@ Symtab.scanShlibUndefined(); if (Config->GcSections) markLive(&Symtab); + if (Config->ICF) + doIcf(&Symtab); writeResult(&Symtab); } Index: ELF/ICF.h =================================================================== --- /dev/null +++ ELF/ICF.h @@ -0,0 +1,22 @@ +//===- ICF.h --------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ICF_H +#define LLD_ELF_ICF_H + +namespace lld { +namespace elf2 { + +template class SymbolTable; + +template void doIcf(SymbolTable *); +} +} + +#endif Index: ELF/ICF.cpp =================================================================== --- /dev/null +++ ELF/ICF.cpp @@ -0,0 +1,382 @@ +//===- ICF.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Identical Code Folding is a feature to merge sections not by name (which +// is regular comdat handling) but by contents. If two non-writable sections +// have the same data, relocations, attributes, etc., then the two +// are considered identical and merged by the linker. This optimization +// makes outputs smaller. +// +// ICF is theoretically a problem of reducing graphs by merging as many +// identical subgraphs as possible if we consider sections as vertices and +// relocations as edges. It may sound simple, but it is a bit more +// complicated than you might think. The order of processing sections +// matters because merging two sections can make other sections, whose +// relocations now point to the same section, mergeable. Graphs may contain +// cycles. We need a sophisticated algorithm to do this properly and +// efficiently. +// +// What we do in this file is this. We split sections into groups. Sections +// in the same group are considered identical. +// +// We begin by optimistically putting all sections into a single equivalence +// class. Then we apply a series of checks that split this initial +// equivalence class into more and more refined equivalence classes based on +// the properties by which a section can be distinguished. +// +// We begin by checking that the section contents and flags are the +// same. This only needs to be done once since these properties don't depend +// on the current equivalence class assignment. +// +// Then we split the equivalence classes based on checking that their +// relocations are the same, where relocation targets are compared by their +// equivalence class, not the concrete section. This may need to be done +// multiple times because as the equivalence classes are refined, two +// sections that had a relocation target in the same equivalence class may +// now target different equivalence classes, and hence these two sections +// must be put in different equivalence classes (whereas in the previous +// iteration they were not since the relocation target was the same.) +// +// Our algorithm is smart enough to merge the following mutually-recursive +// functions. +// +// void foo() { bar(); } +// void bar() { foo(); } +// +// This algorithm is so-called "optimistic" algorithm described in +// http://research.google.com/pubs/pub36912.html. (Note that what GNU +// gold implemented is different from the optimistic algorithm.) +// +//===----------------------------------------------------------------------===// + +#include "ICF.h" +#include "Config.h" +#include "OutputSections.h" +#include "SymbolTable.h" + +#include "llvm/ADT/Hashing.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" + +using namespace lld; +using namespace lld::elf2; +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +namespace lld { +namespace elf2 { +template class ICF { + typedef typename ELFFile::Elf_Shdr Elf_Shdr; + typedef typename ELFFile::Elf_Sym Elf_Sym; + typedef typename ELFFile::uintX_t uintX_t; + typedef Elf_Rel_Impl Elf_Rel; + + using Comparator = std::function *, + const InputSection *)>; + +public: + void run(SymbolTable *Symtab); + +private: + uint64_t NextId = 1; + + static void setLive(SymbolTable *S); + static uint64_t relSize(InputSection *S); + static uint64_t getHash(InputSection *S); + static bool isEligible(InputSectionBase *Sec); + static std::vector *> getSections(SymbolTable *S); + static SymbolBody *getSymbol(const InputSection *Sec, + const Elf_Rel *Rel); + + void segregate(InputSection **Begin, InputSection **End, + Comparator Eq); + + void forEachGroup(std::vector *> &V, Comparator Eq); + + template + static bool relocationEq(iterator_range RA, + iterator_range RB); + + template + static bool variableEq(const InputSection *A, + const InputSection *B, + iterator_range RA, + iterator_range RB); + + static bool equalsConstant(const InputSection *A, + const InputSection *B); + + static bool equalsVariable(const InputSection *A, + const InputSection *B); +}; +} +} + +// Returns a hash seed for relocation sections for S. +template uint64_t ICF::relSize(InputSection *S) { + uint64_t Ret = 0; + for (const Elf_Shdr *H : S->RelocSections) + Ret += H->sh_size; + return Ret; +} + +// Returns a hash value for S. Note that the information about +// relocation targets is not included in the hash value. +template uint64_t ICF::getHash(InputSection *S) { + uint64_t Flags = S->getSectionHdr()->sh_flags; + uint64_t H = hash_combine(Flags, S->getSize()); + if (S->RelocSections.empty()) + return H; + return hash_combine(H, relSize(S)); +} + +// Returns true if Sec is subject of ICF. +template bool ICF::isEligible(InputSectionBase *Sec) { + if (!Sec || Sec == InputSection::Discarded || !Sec->Live) + return false; + auto *S = dyn_cast>(Sec); + if (!S) + return false; + + // .init and .fini contains instructions that must be executed to + // initialize and finalize the process. They cannot and should not + // be merged. + StringRef Name = S->getSectionName(); + if (Name == ".init" || Name == ".fini") + return false; + + const Elf_Shdr &H = *S->getSectionHdr(); + return (H.sh_flags & SHF_ALLOC) && (~H.sh_flags & SHF_WRITE); +} + +template +std::vector *> +ICF::getSections(SymbolTable *Symtab) { + std::vector *> V; + for (const std::unique_ptr> &F : Symtab->getObjectFiles()) + for (InputSectionBase *S : F->getSections()) + if (isEligible(S)) + V.push_back(cast>(S)); + return V; +} + +template +SymbolBody *ICF::getSymbol(const InputSection *Sec, + const Elf_Rel *Rel) { + uint32_t SymIdx = Rel->getSymbol(Config->Mips64EL); + return Sec->File->getSymbolBody(SymIdx); +} + +// All sections between Begin and End must have the same group ID before +// you call this function. This function compare sections between Begin +// and End using Eq and assign new group IDs for new groups. +template +void ICF::segregate(InputSection **Begin, InputSection **End, + Comparator Eq) { + // This loop rearranges [Begin, End) so that all sections that are + // equal in terms of Eq are contiguous. The algorithm is quadratic in + // the worst case, but that is not an issue in practice because the + // number of distinct sections in [Begin, End) is usually very small. + InputSection **I = Begin; + for (;;) { + InputSection *Head = *I; + auto Bound = std::stable_partition( + I + 1, End, [&](InputSection *S) { return Eq(Head, S); }); + if (Bound == End) + return; + uint64_t Id = NextId++; + for (; I != Bound; ++I) + (*I)->GroupId = Id; + } +} + +template +void ICF::forEachGroup(std::vector *> &V, + Comparator Eq) { + for (auto I = V.begin(), E = V.end(); I != E;) { + InputSection *Head = *I; + auto Bound = std::find_if(I + 1, E, [&](InputSection *S) { + return S->GroupId != Head->GroupId; + }); + segregate(&*I, &*Bound, Eq); + I = Bound; + } +} + +// Compare two lists of relocations. +template +template +bool ICF::relocationEq(iterator_range RelsA, + iterator_range RelsB) { + const RelTy *IA = RelsA.begin(); + const RelTy *EA = RelsA.end(); + const RelTy *IB = RelsB.begin(); + const RelTy *EB = RelsB.end(); + if (EA - IA != EB - IB) + return false; + for (; IA != EA; ++IA, ++IB) + if (IA->r_offset != IB->r_offset || + IA->getType(Config->Mips64EL) != IB->getType(Config->Mips64EL) || + getAddend(*IA) != getAddend(*IB)) + return false; + return true; +} + +// Compare "non-moving" part of two InputSections, namely everything +// except relocation targets. +template +bool ICF::equalsConstant(const InputSection *A, + const InputSection *B) { + if (A->RelocSections.size() != B->RelocSections.size()) + return false; + + for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) { + const Elf_Shdr *RA = A->RelocSections[I]; + const Elf_Shdr *RB = B->RelocSections[I]; + ELFFile &FileA = A->File->getObj(); + ELFFile &FileB = B->File->getObj(); + if (RA->sh_type == SHT_RELA) { + if (!relocationEq(FileA.relas(RA), FileB.relas(RB))) + return false; + } else { + if (!relocationEq(FileA.rels(RA), FileB.rels(RB))) + return false; + } + } + + return A->getSectionHdr()->sh_flags == B->getSectionHdr()->sh_flags && + A->getSize() == B->getSize() && + A->getSectionData() == B->getSectionData(); +} + +template +template +bool ICF::variableEq(const InputSection *A, + const InputSection *B, + iterator_range RelsA, + iterator_range RelsB) { + const RelTy *IA = RelsA.begin(); + const RelTy *EA = RelsA.end(); + const RelTy *IB = RelsB.begin(); + for (; IA != EA; ++IA, ++IB) { + // If both IA and BA are pointing to the same local symbol, + // this "if" condition must be true. + if (A->File == B->File && + IA->getSymbol(Config->Mips64EL) == IB->getSymbol(Config->Mips64EL)) + continue; + + // Otherwise, IA and BA must be pointing to the global symbols. + SymbolBody *SA = getSymbol(A, (const Elf_Rel *)IA); + SymbolBody *SB = getSymbol(B, (const Elf_Rel *)IB); + if (!SA || !SB) + return false; + + // The global symbols should be simply the same. + if (SA->repl() == SB->repl()) + continue; + + // Or, the symbols should be pointing to the same section + // in terms of the group ID. + auto *DA = dyn_cast>(SA->repl()); + auto *DB = dyn_cast>(SB->repl()); + if (!DA || !DB) + return false; + if (DA->Sym.st_value != DB->Sym.st_value) + return false; + InputSection *X = dyn_cast>(DA->Section); + InputSection *Y = dyn_cast>(DB->Section); + if (X && Y && X->GroupId && X->GroupId == Y->GroupId) + continue; + return false; + } + return true; +} + +// Compare "moving" part of two InputSections, namely relocation targets. +template +bool ICF::equalsVariable(const InputSection *A, + const InputSection *B) { + for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) { + const Elf_Shdr *RA = A->RelocSections[I]; + const Elf_Shdr *RB = B->RelocSections[I]; + ELFFile &FileA = A->File->getObj(); + ELFFile &FileB = B->File->getObj(); + if (RA->sh_type == SHT_RELA) { + if (!variableEq(A, B, FileA.relas(RA), FileB.relas(RB))) + return false; + } else { + if (!variableEq(A, B, FileA.rels(RA), FileB.rels(RB))) + return false; + } + } + return true; +} + +// The main function of ICF. +template void ICF::run(SymbolTable *Symtab) { + // Initially, we use hash values as section group IDs. Therefore, + // if two sections have the same ID, they are likely (but not + // guaranteed) to have the same static contents in terms of ICF. + std::vector *> V = getSections(Symtab); + for (InputSection *S : V) + // Set MSB on to avoid collisions with serial group IDs + S->GroupId = getHash(S) | (uint64_t(1) << 63); + + // From now on, sections in V are ordered so that sections in + // the same group are consecutive in the vector. + std::stable_sort(V.begin(), V.end(), + [](InputSection *A, InputSection *B) { + return A->GroupId < B->GroupId; + }); + + // Compare static contents and assign unique IDs for each static content. + forEachGroup(V, equalsConstant); + + // Split groups by comparing relocations until we get a convergence. + int Cnt = 1; + for (;;) { + ++Cnt; + uint64_t Id = NextId; + forEachGroup(V, equalsVariable); + if (Id == NextId) + break; + } + if (Config->Verbose) + llvm::outs() << "ICF needed " << Cnt << " iterations.\n"; + + // Merge sections in the same group. + for (auto I = V.begin(), E = V.end(); I != E;) { + InputSection *Head = *I++; + auto Bound = std::find_if(I, E, [&](InputSection *S) { + return Head->GroupId != S->GroupId; + }); + if (I == Bound) + continue; + if (Config->Verbose) + llvm::outs() << "Selected " << Head->getSectionName() << "\n"; + while (I != Bound) { + InputSection *S = *I++; + if (Config->Verbose) + llvm::outs() << " Removed " << S->getSectionName() << "\n"; + Head->replace(S); + } + } +} + +// ICF entry point function. +template void elf2::doIcf(SymbolTable *Symtab) { + ICF().run(Symtab); +} + +template void elf2::doIcf(SymbolTable *); +template void elf2::doIcf(SymbolTable *); +template void elf2::doIcf(SymbolTable *); +template void elf2::doIcf(SymbolTable *); Index: ELF/InputFiles.cpp =================================================================== --- ELF/InputFiles.cpp +++ ELF/InputFiles.cpp @@ -288,7 +288,10 @@ return nullptr; if (Index >= Sections.size() || !Sections[Index]) fatal("Invalid section index"); - return Sections[Index]; + InputSectionBase *S = Sections[Index]; + if (S == InputSectionBase::Discarded) + return S; + return S->Repl; } template Index: ELF/InputSection.h =================================================================== --- ELF/InputSection.h +++ ELF/InputSection.h @@ -18,6 +18,7 @@ namespace lld { namespace elf2 { +template class ICF; template class ObjectFile; template class OutputSection; template class OutputSectionBase; @@ -45,7 +46,14 @@ uint32_t Align = 1; // Used for garbage collection. - bool Live = false; + bool Live; + + // This pointer points to the "real" instance of this instnace. + // Usually Repl == this. However, if ICF merges two sections, + // Repl pointer of one section points to another section. So, + // if you need to get a pointer to this instnace, do not use + // this but instead this->Repl. + InputSectionBase *Repl; // Returns the size of this section (even if this is a common or BSS.) size_t getSize() const { return Header->sh_size; } @@ -136,6 +144,7 @@ // This corresponds to a non SHF_MERGE section of an input file. template class InputSection : public InputSectionBase { + friend ICF; typedef InputSectionBase Base; typedef typename llvm::object::ELFFile::Elf_Shdr Elf_Shdr; typedef typename llvm::object::ELFFile::Elf_Rela Elf_Rela; @@ -158,6 +167,12 @@ uint64_t OutSecOff = 0; static bool classof(const InputSectionBase *S); + + // Called by ICF to merge two input sections. + void replace(InputSection *Other); + + // Used by ICF. + uint64_t GroupId = 0; }; // MIPS .reginfo section provides information on the registers used by the code Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -25,7 +25,7 @@ InputSectionBase::InputSectionBase(ObjectFile *File, const Elf_Shdr *Header, Kind SectionKind) - : Header(Header), File(File), SectionKind(SectionKind) { + : Header(Header), File(File), SectionKind(SectionKind), Repl(this) { // The garbage collector sets sections' Live bits. // If GC is disabled, all sections are considered live by default. Live = !Config->GcSections; @@ -81,7 +81,7 @@ uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL); if (SymbolBody *B = File->getSymbolBody(SymIndex)) if (auto *D = dyn_cast>(B->repl())) - return D->Section; + return D->Section->Repl; // Local symbol if (const Elf_Sym *Sym = File->getLocalSymbol(SymIndex)) if (InputSectionBase *Sec = File->getSection(*Sym)) @@ -270,6 +270,13 @@ } template +void InputSection::replace(InputSection *Other) { + this->Align = std::max(this->Align, Other->Align); + Other->Repl = this->Repl; + Other->Live = false; +} + +template SplitInputSection::SplitInputSection( ObjectFile *File, const Elf_Shdr *Header, typename InputSectionBase::Kind SectionKind) Index: ELF/Options.td =================================================================== --- ELF/Options.td +++ ELF/Options.td @@ -57,6 +57,9 @@ def hash_style : Separate<["--", "-"], "hash-style">, HelpText<"Specify hash style (sysv, gnu or both)">; +def icf : Flag<["--"], "icf=all">, + HelpText<"Enable Identical Code Folding.">; + def gc_sections : Flag<["--"], "gc-sections">, HelpText<"Enable garbage collection of unused sections">; Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -734,6 +734,7 @@ template void OutputSection::addSection(InputSectionBase *C) { + assert(C->Live); auto *S = cast>(C); Sections.push_back(S); S->OutSec = this; Index: ELF/Symbols.h =================================================================== --- ELF/Symbols.h +++ ELF/Symbols.h @@ -221,16 +221,27 @@ DefinedRegular(StringRef N, const Elf_Sym &Sym, InputSectionBase *Section) : DefinedElf(SymbolBody::DefinedRegularKind, N, Sym), - Section(Section) {} + Section(Section ? Section->Repl : NullInputSection) {} static bool classof(const SymbolBody *S) { return S->kind() == SymbolBody::DefinedRegularKind; } - // If this is null, the symbol is absolute. - InputSectionBase *Section; + // The input section this symbol belongs to. Notice that this is + // a reference to a pointer. We are using two levels of indirections + // because of ICF. If ICF decides two sections need to be merged, it + // manipulates this Section pointers so that they point to the same + // section. This is a bit tricky, so be careful to not be confused. + // If this is null, the symbol is an absolute symbol. + InputSectionBase *&Section; + +private: + static InputSectionBase *NullInputSection; }; +template +InputSectionBase *DefinedRegular::NullInputSection; + // DefinedSynthetic is a class to represent linker-generated ELF symbols. // The difference from the regular symbol is that DefinedSynthetic symbols // don't belong to any input files or sections. Thus, its constructor Index: ELF/Symbols.cpp =================================================================== --- ELF/Symbols.cpp +++ ELF/Symbols.cpp @@ -42,6 +42,7 @@ // This is an absolute symbol. if (!SC) return D->Sym.st_value; + assert(SC->Live); // Symbol offsets for AMDGPU need to be the offset in bytes of the symbol // from the beginning of the section. Index: test/ELF/Inputs/icf2.s =================================================================== --- /dev/null +++ test/ELF/Inputs/icf2.s @@ -0,0 +1,5 @@ +.globl f1, f2 +.section .text.f2, "ax" +f2: + mov $60, %rdi + call f1 Index: test/ELF/icf1.s =================================================================== --- /dev/null +++ test/ELF/icf1.s @@ -0,0 +1,23 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: ld.lld %t -o %t2 --icf=all --verbose | FileCheck %s + +# CHECK: Selected .text.f1 +# CHECK: Removed .text.f2 + +.globl _start, f1, f2 +_start: + ret + +.section .text.f1, "ax" +f1: + mov $60, %rax + mov $42, %rdi + syscall + +.section .text.f2, "ax" +f2: + mov $60, %rax + mov $42, %rdi + syscall Index: test/ELF/icf2.s =================================================================== --- /dev/null +++ test/ELF/icf2.s @@ -0,0 +1,17 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/icf2.s -o %t2 +# RUN: ld.lld %t1 %t2 -o %t --icf=all --verbose | FileCheck %s + +# CHECK: Selected .text.f1 +# CHECK: Removed .text.f2 + +.globl _start, f1, f2 +_start: + ret + +.section .text.f1, "ax" +f1: + mov $60, %rdi + call f2 Index: test/ELF/icf3.s =================================================================== --- /dev/null +++ test/ELF/icf3.s @@ -0,0 +1,19 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/icf2.s -o %t2 +# RUN: ld.lld %t1 %t2 -o %t --icf=all --verbose | FileCheck %s + +# CHECK-NOT: Selected .text.f1 +# CHECK-NOT: Selected .text.f2 + +.globl _start, f1, f2 +_start: + ret + +# This section is not mergeable because the content is different from f2. +.section .text.f1, "ax" +f1: + mov $60, %rdi + call f2 + mov $0, %rax Index: test/ELF/icf4.s =================================================================== --- /dev/null +++ test/ELF/icf4.s @@ -0,0 +1,19 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: ld.lld %t -o %t --icf=all --verbose | FileCheck %s + +# CHECK-NOT: Selected .text.f1 +# CHECK-NOT: Selected .text.f2 + +.globl _start, f1, f2 +_start: + ret + +.section .text.f1, "ax" +f1: + mov $1, %rax + +.section .text.f2, "ax" +f2: + mov $0, %rax Index: test/ELF/icf5.s =================================================================== --- /dev/null +++ test/ELF/icf5.s @@ -0,0 +1,19 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: ld.lld %t -o %t --icf=all --verbose | FileCheck %s + +# CHECK-NOT: Selected .text.f1 +# CHECK-NOT: Selected .text.f2 + +.globl _start, f1, f2 +_start: + ret + +.section .text.f1, "ax" +f1: + mov $0, %rax + +.section .text.f2, "awx" +f2: + mov $0, %rax Index: test/ELF/icf6.s =================================================================== --- /dev/null +++ test/ELF/icf6.s @@ -0,0 +1,23 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: ld.lld %t -o %t2 --icf=all --verbose | FileCheck %s + +# CHECK-NOT: Selected .text.f1 +# CHECK-NOT: Selected .text.f2 + +.globl _start, f1, f2 +_start: + ret + +.section .init, "ax" +f1: + mov $60, %rax + mov $42, %rdi + syscall + +.section .fini, "ax" +f2: + mov $60, %rax + mov $42, %rdi + syscall