Index: ELF/Config.h =================================================================== --- ELF/Config.h +++ ELF/Config.h @@ -10,6 +10,7 @@ #ifndef LLD_ELF_CONFIG_H #define LLD_ELF_CONFIG_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ELF.h" @@ -29,6 +30,17 @@ ELF64BEKind }; +struct InputSectionDescription { + std::vector Names; + llvm::StringRef InputFile; + std::vector ExcludeFiles; +}; + +struct OutputSectionDescription { + llvm::StringRef Name; + std::vector InputSections; +}; + struct Configuration { SymbolBody *EntrySym = nullptr; InputFile *FirstElf = nullptr; @@ -41,6 +53,7 @@ llvm::StringRef SoName; llvm::StringRef Sysroot; std::string RPath; + llvm::MapVector OutputSections; std::vector SearchPaths; std::vector Undefined; bool AllowMultipleDefinition; Index: ELF/LinkerScript.cpp =================================================================== --- ELF/LinkerScript.cpp +++ ELF/LinkerScript.cpp @@ -36,8 +36,11 @@ static std::vector tokenize(StringRef S); static StringRef skipSpace(StringRef S); StringRef next(); + StringRef peek(); bool atEOF() { return Tokens.size() == Pos; } void expect(StringRef Expect); + template + void mapBraces(StringRef OpenBrace, StringRef CloseBrace, Function F); void addFile(StringRef Path); @@ -46,10 +49,13 @@ void readExtern(); void readGroup(); void readInclude(); + void readInputSectionDescription(OutputSectionDescription &OutSec); void readOutput(); void readOutputArch(); void readOutputFormat(); + void readOutputSectionDescription(); void readSearchDir(); + void readSections(); StringSaver Saver; std::vector Tokens; @@ -78,6 +84,8 @@ readOutputFormat(); } else if (Tok == "SEARCH_DIR") { readSearchDir(); + } else if (Tok == "SECTIONS") { + readSections(); } else { error("unknown directive: " + Tok); } @@ -133,17 +141,32 @@ } StringRef LinkerScript::next() { - if (Pos == Tokens.size()) + if (atEOF()) error("unexpected EOF"); return Tokens[Pos++]; } +StringRef LinkerScript::peek() { + if (atEOF()) + error("unexpected EOF"); + return Tokens[Pos]; +} + void LinkerScript::expect(StringRef Expect) { StringRef Tok = next(); if (Tok != Expect) error(Expect + " expected, but got " + Tok); } +template +void LinkerScript::mapBraces(StringRef OpenBrace, StringRef CloseBrace, + Function F) { + expect(OpenBrace); + while (peek() != CloseBrace) + F(); + expect(CloseBrace); +} + void LinkerScript::addFile(StringRef S) { if (sys::path::is_absolute(S)) { Driver->addFile(S); @@ -218,6 +241,22 @@ Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); } +void LinkerScript::readInputSectionDescription( + OutputSectionDescription &OutSec) { + OutSec.InputSections.emplace_back(); + InputSectionDescription &InSec = OutSec.InputSections.back(); + + InSec.InputFile = next(); + mapBraces("(", ")", [this, &InSec]() { + StringRef Tok = next(); + if (Tok == "EXCLUDE_FILE") + mapBraces("(", ")", + [this, &InSec]() { InSec.ExcludeFiles.push_back(next()); }); + else + InSec.Names.push_back(Tok); + }); +} + void LinkerScript::readOutput() { // -o takes predecence over OUTPUT(). expect("("); @@ -249,12 +288,26 @@ expect(")"); } +void LinkerScript::readOutputSectionDescription() { + StringRef Name = next(); + OutputSectionDescription &OutSec = Config->OutputSections[Name]; + OutSec.Name = Name; + + expect(":"); + mapBraces("{", "}", + [this, &OutSec]() { readInputSectionDescription(OutSec); }); +} + void LinkerScript::readSearchDir() { expect("("); Config->SearchPaths.push_back(next()); expect(")"); } +void LinkerScript::readSections() { + mapBraces("{", "}", [this]() { readOutputSectionDescription(); }); +} + // Entry point. The other functions or classes are private to this file. void lld::elf2::readLinkerScript(BumpPtrAllocator *A, MemoryBufferRef MB) { LinkerScript(A, MB.getBuffer()).run(); Index: ELF/OutputSections.h =================================================================== --- ELF/OutputSections.h +++ ELF/OutputSections.h @@ -95,6 +95,8 @@ } uint32_t getType() { return Header.sh_type; } + enum Kind { OutputKind, MergeOutputKind, OtherKind }; + virtual Kind kind() const { return OtherKind; } virtual void finalize() {} virtual void writeTo(uint8_t *Buf) = 0; @@ -220,8 +222,14 @@ typedef typename llvm::object::ELFFile::Elf_Rela Elf_Rela; typedef typename llvm::object::ELFFile::uintX_t uintX_t; OutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags); + typename OutputSectionBase::Kind kind() const override { + return OutputSectionBase::OutputKind; + } void addSection(InputSection *C); void writeTo(uint8_t *Buf) override; + static bool classof(const OutputSectionBase *OS) { + return OS->kind() == OutputSectionBase::OutputKind; + } private: std::vector *> Sections; @@ -235,10 +243,16 @@ public: MergeOutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags); + typename OutputSectionBase::Kind kind() const override { + return OutputSectionBase::MergeOutputKind; + } void addSection(MergeInputSection *S); void writeTo(uint8_t *Buf) override; unsigned getOffset(StringRef Val); void finalize() override; + static bool classof(const OutputSectionBase *OS) { + return OS->kind() == OutputSectionBase::MergeOutputKind; + } private: llvm::StringTableBuilder Builder{llvm::StringTableBuilder::RAW}; Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -14,6 +14,7 @@ #include "Target.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/StringSaver.h" @@ -37,8 +38,16 @@ typedef typename ELFFile::Elf_Sym_Range Elf_Sym_Range; typedef typename ELFFile::Elf_Rela Elf_Rela; Writer(SymbolTable &S) : Symtab(S) {} + virtual ~Writer() {} void run(); +protected: + virtual bool compareOutputSections(OutputSectionBase *A, + OutputSectionBase *B) const; + virtual bool discardInputSection(InputSectionBase *IS) const; + virtual bool hasBoundingSymbols(OutputSectionBase *OS) const; + virtual StringRef getOutputSectionName(StringRef S) const; + private: void copyLocalSymbols(); void createSections(); @@ -79,6 +88,23 @@ uintX_t FileSize; uintX_t SectionHeaderOff; }; + +template class LinkerScriptWriter : public Writer { +public: + LinkerScriptWriter(SymbolTable &S) : Writer(S) { + parseSectionDescriptions(); + } + +private: + void parseSectionDescriptions(); + bool compareOutputSections(OutputSectionBase *A, + OutputSectionBase *B) const override; + bool discardInputSection(InputSectionBase *IS) const override; + bool hasBoundingSymbols(OutputSectionBase *OS) const override; + StringRef getOutputSectionName(StringRef S) const override; + + llvm::StringMap InputToOutputSection; +}; } // anonymous namespace template void lld::elf2::writeResult(SymbolTable *Symtab) { @@ -124,7 +150,10 @@ DynamicSection Dynamic(*Symtab); Out::Dynamic = &Dynamic; - Writer(*Symtab).run(); + if (Config->OutputSections.empty()) + Writer(*Symtab).run(); + else + LinkerScriptWriter(*Symtab).run(); } // The main function of the writer. @@ -306,8 +335,8 @@ // Output section ordering is determined by this function. template -static bool compareOutputSections(OutputSectionBase *A, - OutputSectionBase *B) { +bool Writer::compareOutputSections(OutputSectionBase *A, + OutputSectionBase *B) const { typedef typename ELFFile::uintX_t uintX_t; uintX_t AFlags = A->getFlags(); @@ -395,7 +424,13 @@ Out::Bss->setSize(Off); } -static StringRef getOutputName(StringRef S) { +template +static bool isRegularSection(OutputSectionBase *OS) { + return isa>(OS) || isa>(OS); +} + +template +StringRef Writer::getOutputSectionName(StringRef S) const { if (S.startswith(".text.")) return ".text"; if (S.startswith(".rodata.")) @@ -407,6 +442,16 @@ return S; } +template +bool Writer::discardInputSection(InputSectionBase *IS) const { + return !IS || !IS->isLive() || IS == &InputSection::Discarded; +} + +template +bool Writer::hasBoundingSymbols(OutputSectionBase *OS) const { + return true; +} + // Create output section objects and add them to OutputSections. template void Writer::createSections() { // .interp needs to be on the first page in the output file. @@ -419,11 +464,9 @@ Map[{Out::Bss->getName(), Out::Bss->getType(), Out::Bss->getFlags(), 0}] = Out::Bss; - std::vector *> RegularSections; - for (const std::unique_ptr> &F : Symtab.getObjectFiles()) { for (InputSectionBase *C : F->getSections()) { - if (!C || !C->isLive() || C == &InputSection::Discarded) + if (discardInputSection(C)) continue; const Elf_Shdr *H = C->getSectionHdr(); uintX_t OutFlags = H->sh_flags & ~SHF_GROUP; @@ -432,7 +475,7 @@ // mapping from input to output. auto *IS = dyn_cast>(C); uintX_t EntSize = IS ? 0 : H->sh_entsize; - SectionKey Key{getOutputName(C->getSectionName()), + SectionKey Key{getOutputSectionName(C->getSectionName()), H->sh_type, OutFlags, EntSize}; OutputSectionBase *&Sec = Map[Key]; if (!Sec) { @@ -443,13 +486,13 @@ Sec = new (MSecAlloc.Allocate()) MergeOutputSection(Key.Name, Key.Type, Key.Flags); OutputSections.push_back(Sec); - RegularSections.push_back(Sec); } - if (IS) - static_cast *>(Sec)->addSection(IS); + if (auto *OS = dyn_cast>(Sec)) + OS->addSection(IS); + else if (auto *MOS = dyn_cast>(Sec)) + MOS->addSection(cast>(C)); else - static_cast *>(Sec) - ->addSection(cast>(C)); + llvm_unreachable("Wrong output section kind"); } } @@ -462,6 +505,8 @@ auto AddStartEnd = [&](StringRef Start, StringRef End, OutputSectionBase *OS) { + if (!hasBoundingSymbols(OS)) + return; if (OS) { Symtab.addSyntheticSym(Start, *OS, 0); Symtab.addSyntheticSym(End, *OS, OS->getSize()); @@ -478,8 +523,9 @@ AddStartEnd("__fini_array_start", "__fini_array_end", Out::Dynamic->FiniArraySec); - for (OutputSectionBase *Sec : RegularSections) - addStartStopSymbols(Sec); + for (OutputSectionBase *Sec : OutputSections) + if (isRegularSection(Sec) && hasBoundingSymbols(Sec)) + addStartStopSymbols(Sec); // __tls_get_addr is defined by the dynamic linker for dynamic ELFs. For // static linking the linker is required to optimize away any references to @@ -490,12 +536,14 @@ // Scan relocations. This must be done after every symbol is declared so that // we can correctly decide if a dynamic relocation is needed. - for (const std::unique_ptr> &F : Symtab.getObjectFiles()) - for (InputSectionBase *B : F->getSections()) - if (auto *S = dyn_cast_or_null>(B)) - if (S != &InputSection::Discarded) - if (S->isLive()) - scanRelocs(*S); + for (const std::unique_ptr> &F : Symtab.getObjectFiles()) { + for (InputSectionBase *C : F->getSections()) { + if (discardInputSection(C)) + continue; + if (auto *S = dyn_cast>(C)) + scanRelocs(*S); + } + } std::vector *> CommonSymbols; for (auto &P : Symtab.getSymbols()) { @@ -543,8 +591,11 @@ if (!Out::Plt->empty()) OutputSections.push_back(Out::Plt); - std::stable_sort(OutputSections.begin(), OutputSections.end(), - compareOutputSections); + std::stable_sort( + OutputSections.begin(), OutputSections.end(), + [this](OutputSectionBase *A, OutputSectionBase *B) -> bool { + return compareOutputSections(A, B); + }); for (unsigned I = 0, N = OutputSections.size(); I < N; ++I) OutputSections[I]->SectionIndex = I + 1; @@ -814,6 +865,51 @@ PH->p_align = From->getAlign(); } +template +void LinkerScriptWriter::parseSectionDescriptions() { + for (auto OutSec : Config->OutputSections) + for (auto InSec : OutSec.second.InputSections) + for (auto Name : InSec.Names) + InputToOutputSection[Name] = OutSec.second.Name; +} + +template +bool LinkerScriptWriter::compareOutputSections( + OutputSectionBase *A, OutputSectionBase *B) const { + if (!isRegularSection(A) || !isRegularSection(B)) + return Writer::compareOutputSections(A, B); + + auto ItA = Config->OutputSections.find(A->getName()); + auto ItB = Config->OutputSections.find(B->getName()); + auto ItEnd = std::end(Config->OutputSections); + if (ItA == ItEnd || ItB == ItEnd) + return Writer::compareOutputSections(A, B); + + return std::distance(ItA, ItB) > 0; +} + +template +bool LinkerScriptWriter::discardInputSection( + InputSectionBase *IS) const { + return Writer::discardInputSection(IS) || + InputToOutputSection.lookup(IS->getSectionName()) == "/DISCARD/"; +} + +template +bool LinkerScriptWriter::hasBoundingSymbols( + OutputSectionBase *OS) const { + return Writer::hasBoundingSymbols(OS) && OS && + Config->OutputSections.count(OS->getName()) == 0; +} + +template +StringRef LinkerScriptWriter::getOutputSectionName(StringRef S) const { + auto It = InputToOutputSection.find(S); + if (It != std::end(InputToOutputSection)) + return It->second; + return Writer::getOutputSectionName(S); +} + template void lld::elf2::writeResult(SymbolTable *Symtab); template void lld::elf2::writeResult(SymbolTable *Symtab); template void lld::elf2::writeResult(SymbolTable *Symtab); Index: test/elf2/linkerscript-sections.s =================================================================== --- /dev/null +++ test/elf2/linkerscript-sections.s @@ -0,0 +1,140 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t + +# Empty SECTIONS command. +# RUN: echo "SECTIONS {}" > %t.script +# RUN: ld.lld2 -o %t1 --script %t.script %t +# RUN: llvm-objdump -section-headers %t1 | \ +# RUN: FileCheck -check-prefix=SEC-DEFAULT %s + +# SECTIONS command with the same order as default. +# RUN: echo "SECTIONS { \ +# RUN: .text : { *(.text) } \ +# RUN: .data : { *(.data) } }" \ > %t.script +# RUN: ld.lld2 -o %t2 --script %t.script %t +# RUN: llvm-objdump -section-headers %t2 | \ +# RUN: FileCheck -check-prefix=SEC-DEFAULT %s + +# Idx Name Size +# SEC-DEFAULT: 1 .text 0000000e {{[0-9a-f]*}} TEXT DATA +# SEC-DEFAULT: 2 .data 00000020 {{[0-9a-f]*}} DATA +# SEC-DEFAULT: 3 other 00000003 {{[0-9a-f]*}} DATA +# SEC-DEFAULT: 4 .bss 00000002 {{[0-9a-f]*}} BSS +# SEC-DEFAULT: 5 .shstrtab 00000002 {{[0-9a-f]*}} +# SEC-DEFAULT: 6 .symtab 00000030 {{[0-9a-f]*}} +# SEC-DEFAULT: 7 .shstrtab 00000032 {{[0-9a-f]*}} +# SEC-DEFAULT: 8 .strtab 00000008 {{[0-9a-f]*}} + +# Sections are put in order specified in linker script. +# .shstrtab affects only regular section from the input file. +# RUN: echo "SECTIONS { \ +# RUN: .bss : { *(.bss) } \ +# RUN: other : { *(other) } \ +# RUN: .shstrtab : { *(.shstrtab) } \ +# RUN: .data : { *(.data) } \ +# RUN: .text : { *(.text) } }" \ > %t.script +# RUN: ld.lld2 -o %t3 --script %t.script %t +# RUN: llvm-objdump -section-headers %t3 | \ +# RUN: FileCheck -check-prefix=SEC-ORDER %s + +# Idx Name Size +# SEC-ORDER: 1 .bss 00000002 {{[0-9a-f]*}} BSS +# SEC-ORDER: 2 other 00000003 {{[0-9a-f]*}} DATA +# SEC-ORDER: 3 .shstrtab 00000002 {{[0-9a-f]*}} +# SEC-ORDER: 4 .data 00000020 {{[0-9a-f]*}} DATA +# SEC-ORDER: 5 .text 0000000e {{[0-9a-f]*}} TEXT DATA +# SEC-ORDER: 6 .symtab 00000030 {{[0-9a-f]*}} +# SEC-ORDER: 7 .shstrtab 00000032 {{[0-9a-f]*}} +# SEC-ORDER: 8 .strtab 00000008 {{[0-9a-f]*}} + +# .text and .data have swapped names but proper sizes and types. +# RUN: echo "SECTIONS { \ +# RUN: .data : { *(.text) } \ +# RUN: .text : { *(.data) } }" \ > %t.script +# RUN: ld.lld2 -o %t4 --script %t.script %t +# RUN: llvm-objdump -section-headers %t4 | \ +# RUN: FileCheck -check-prefix=SEC-SWAP-NAMES %s + +# Idx Name Size +# SEC-SWAP-NAMES: 1 .data 0000000e {{[0-9a-f]*}} TEXT DATA +# SEC-SWAP-NAMES: 2 .text 00000020 {{[0-9a-f]*}} DATA +# SEC-SWAP-NAMES: 3 other 00000003 {{[0-9a-f]*}} DATA +# SEC-SWAP-NAMES: 4 .bss 00000002 {{[0-9a-f]*}} BSS +# SEC-SWAP-NAMES: 5 .shstrtab 00000002 {{[0-9a-f]*}} +# SEC-SWAP-NAMES: 6 .symtab 00000030 {{[0-9a-f]*}} +# SEC-SWAP-NAMES: 7 .shstrtab 00000032 {{[0-9a-f]*}} +# SEC-SWAP-NAMES: 8 .strtab 00000008 {{[0-9a-f]*}} + +# .shstrtab from the input object file is discarded. +# RUN: echo "SECTIONS { \ +# RUN: /DISCARD/ : { *(.shstrtab) } }" \ > %t.script +# RUN: ld.lld2 -o %t5 --script %t.script %t +# RUN: llvm-objdump -section-headers %t5 | \ +# RUN: FileCheck -check-prefix=SEC-DISCARD %s + +# Idx Name Size +# SEC-DISCARD: 1 .text 0000000e {{[0-9a-f]*}} TEXT DATA +# SEC-DISCARD: 2 .data 00000020 {{[0-9a-f]*}} DATA +# SEC-DISCARD: 3 other 00000003 {{[0-9a-f]*}} DATA +# SEC-DISCARD: 4 .bss 00000002 {{[0-9a-f]*}} BSS +# SEC-DISCARD: 5 .symtab 00000030 {{[0-9a-f]*}} +# SEC-DISCARD: 6 .shstrtab 00000032 {{[0-9a-f]*}} +# SEC-DISCARD: 7 .strtab 00000008 {{[0-9a-f]*}} + +# Multiple SECTIONS command specifying additional input section descriptions +# for the same output section description - input sections are merged into +# one output section. +# RUN: echo "SECTIONS { \ +# RUN: .text : { *(.text) } \ +# RUN: .data : { *(.data) } } \ +# RUN: SECTIONS { \ +# RUN: .data : { *(other) } }" \ > %t.script +# RUN: ld.lld2 -o %t6 --script %t.script %t +# RUN: llvm-objdump -section-headers %t6 | \ +# RUN: FileCheck -check-prefix=SEC-MULTI %s + +# Idx Name Size +# SEC-MULTI: 1 .text 0000000e {{[0-9a-f]*}} TEXT DATA +# SEC-MULTI: 2 .data 00000023 {{[0-9a-f]*}} DATA +# SEC-MULTI: 3 .bss 00000002 {{[0-9a-f]*}} BSS +# SEC-MULTI: 4 .shstrtab 00000002 {{[0-9a-f]*}} +# SEC-MULTI: 5 .symtab 00000030 {{[0-9a-f]*}} +# SEC-MULTI: 6 .shstrtab 0000002c {{[0-9a-f]*}} +# SEC-MULTI: 7 .strtab 00000008 {{[0-9a-f]*}} + +# No bounding symbols added for custom sections with valid C-identifier +# names if section is present in the SECTIONS command. +# RUN: echo "SECTIONS { \ +# RUN: other : { *(other) } }" \ > %t.script +# RUN: ld.lld2 -o %t7 --script %t.script %t -u __start_other -u __stop_other +# RUN: llvm-objdump -t %t7 | \ +# RUN: FileCheck -check-prefix=SEC-NO-BOUND-SYM %s + +# SEC-NO-BOUND-SYM: 0000000000000000 *UND* 00000000 {{.*}} __start_other +# SEC-NO-BOUND-SYM: 0000000000000000 *UND* 00000000 {{.*}} __stop_other + +# Bounding symbols are added for custom sections with valid C-identifier +# names if section is not present in the SECTIONS command. +# RUN: echo "SECTIONS { \ +# RUN: .data : { *(.data) } }" \ > %t.script +# RUN: ld.lld2 -o %t8 --script %t.script %t -u __start_other -u __stop_other +# RUN: llvm-objdump -t %t8 | \ +# RUN: FileCheck -check-prefix=SEC-HAS-BOUND-SYM %s + +# SEC-HAS-BOUND-SYM: {{[0-9a-f]*}} other 00000000 {{.*}} __start_other +# SEC-HAS-BOUND-SYM: {{[0-9a-f]*}} other 00000000 {{.*}} __stop_other + +.globl _start; +_start: + mov $60, %rax + mov $42, %rdi + +.section .data,"aw" +.quad 10, 10, 20, 20 +.section other,"aw" +.short 10 +.byte 20 +.section .shstrtab,"" +.short 20 +.section .bss,"",@nobits +.short 0