Index: ELF/LinkerScript.h =================================================================== --- ELF/LinkerScript.h +++ ELF/LinkerScript.h @@ -14,6 +14,7 @@ #include "Writer.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/MemoryBuffer.h" @@ -26,6 +27,7 @@ class ScriptParser; class SymbolBody; template class InputSectionBase; +template class InputSection; template class OutputSectionBase; template class OutputSectionFactory; class InputSectionData; @@ -70,7 +72,6 @@ bool Provide = false; bool Hidden = false; bool IsAbsolute; - InputSectionData *GoesAfter = nullptr; }; // Linker scripts allow additional constraints to be put on ouput sections. @@ -107,6 +108,7 @@ SortKind SortInner = SortNone; llvm::Regex ExcludedFileRe; llvm::Regex SectionRe; + std::vector Sections; }; struct AssertCommand : BaseCommand { @@ -173,6 +175,7 @@ ArrayRef getFiller(StringRef Name); Expr getLma(StringRef Name); bool shouldKeep(InputSectionBase *S); + void assignOffsets(OutputSectionCommand *Cmd); void assignAddresses(); int compareSections(StringRef A, StringRef B); bool hasPhdrsCommands(); @@ -185,8 +188,7 @@ std::vector *> *OutputSections; private: - std::vector *> - getInputSections(const InputSectionDescription *); + void getInputSections(InputSectionDescription *, ConstraintKind Constraint); void discard(ArrayRef *> V); @@ -201,6 +203,14 @@ size_t getPhdrIndex(StringRef PhdrName); uintX_t Dot; + OutputSectionBase *CurOutSec = nullptr; + uintX_t ThreadBssOffset = 0; + void switchTo(OutputSectionBase *Sec); + void flush(); + void output(InputSection *Sec); + void process(BaseCommand &Base); + llvm::DenseSet *> AlreadyOutputOS; + llvm::DenseSet AlreadyOutputIS; }; // Variable template is a C++14 feature, so we can't template Index: ELF/LinkerScript.cpp =================================================================== --- ELF/LinkerScript.cpp +++ ELF/LinkerScript.cpp @@ -136,31 +136,46 @@ } template -static bool matchConstraints(ArrayRef *> Sections, +static bool matchConstraints(ArrayRef Sections, ConstraintKind Kind) { if (Kind == ConstraintKind::NoConstraint) return true; - return llvm::all_of(Sections, [=](InputSectionBase *Sec) { + return llvm::all_of(Sections, [=](InputSectionData *Sec2) { + auto *Sec = static_cast *>(Sec2); return checkConstraint(Sec->getSectionHdr()->sh_flags, Kind); }); } // Returns input sections filtered by given glob patterns. template -std::vector *> -LinkerScript::getInputSections(const InputSectionDescription *I) { +void LinkerScript::getInputSections(InputSectionDescription *I, + ConstraintKind Constraint) { const Regex &Re = I->SectionRe; - std::vector *> Ret; + std::vector &V = I->Sections; for (ObjectFile *F : Symtab::X->getObjectFiles()) if (fileMatches(I, sys::path::filename(F->getName()))) for (InputSectionBase *S : F->getSections()) if (!isDiscarded(S) && !S->OutSec && const_cast(Re).match(S->Name)) - Ret.push_back(S); + V.push_back(S); if (const_cast(Re).match("COMMON")) - Ret.push_back(CommonInputSection::X); - return Ret; + V.push_back(CommonInputSection::X); + + if (!matchConstraints(V, Constraint)) + V.clear(); + + if (I->SortInner) + std::stable_sort(V.begin(), V.end(), getComparator(I->SortInner)); + if (I->SortOuter) + std::stable_sort(V.begin(), V.end(), getComparator(I->SortOuter)); + + // We do not add duplicate input sections, so mark them with a dummy output + // section for now. + for (InputSectionData *S : V) { + auto *S2 = static_cast *>(S); + S2->OutSec = (OutputSectionBase *)-1; + } } template @@ -175,30 +190,18 @@ std::vector *> LinkerScript::createInputSectionList(OutputSectionCommand &OutCmd) { std::vector *> Ret; - DenseSet *> SectionIndex; for (const std::unique_ptr &Base : OutCmd.Commands) { if (auto *OutCmd = dyn_cast(Base.get())) { if (shouldDefine(OutCmd)) addSymbol(OutCmd); - OutCmd->GoesAfter = Ret.empty() ? nullptr : Ret.back(); continue; } auto *Cmd = cast(Base.get()); - std::vector *> V = getInputSections(Cmd); - if (!matchConstraints(V, OutCmd.Constraint)) - continue; - if (Cmd->SortInner) - std::stable_sort(V.begin(), V.end(), getComparator(Cmd->SortInner)); - if (Cmd->SortOuter) - std::stable_sort(V.begin(), V.end(), getComparator(Cmd->SortOuter)); - - // Add all input sections corresponding to rule 'Cmd' to - // resulting vector. We do not add duplicate input sections. - for (InputSectionBase *S : V) - if (SectionIndex.insert(S).second) - Ret.push_back(S); + getInputSections(Cmd, OutCmd.Constraint); + for (InputSectionData *S : Cmd->Sections) + Ret.push_back(static_cast *>(S)); } return Ret; } @@ -314,84 +317,75 @@ Body->Value = Cmd->Expression(Sec->getVA() + Off); } -// Linker script may define start and end symbols for special section types, -// like .got, .eh_frame_hdr, .eh_frame and others. Those sections are not a list -// of regular input input sections, therefore our way of defining symbols for -// regular sections will not work. The approach we use for special section types -// is not perfect - it handles only start and end symbols. -template -void addStartEndSymbols(OutputSectionCommand *Cmd, - OutputSectionBase *Sec) { - bool Start = true; - BaseCommand *PrevCmd = nullptr; - - for (std::unique_ptr &Base : Cmd->Commands) { - if (auto *AssignCmd = dyn_cast(Base.get())) { - assignSectionSymbol(AssignCmd, Sec, Start ? 0 : Sec->getSize()); - } else { - if (!Start && isa(PrevCmd)) - error("section '" + Sec->getName() + - "' supports only start and end symbols"); - Start = false; - } - PrevCmd = Base.get(); +template void LinkerScript::output(InputSection *S) { + if (!AlreadyOutputIS.insert(S).second) + return; + AlreadyOutputIS.insert(S); + bool IsTbss = + (CurOutSec->getFlags() & SHF_TLS) && CurOutSec->getType() == SHT_NOBITS; + + uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; + Pos = alignTo(Pos, S->Alignment); + S->OutSecOff = Pos - CurOutSec->getVA(); + Pos += S->getSize(); + + // Update section size inside for-loop, so that SIZEOF + // works correctly in the case below: + // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } + CurOutSec->setSize(Pos - CurOutSec->getVA()); + + if (!IsTbss) + Dot = Pos; +} + +template void LinkerScript::flush() { + if (auto *OutSec = dyn_cast_or_null>(CurOutSec)) { + for (InputSection *I : OutSec->Sections) + output(I); + AlreadyOutputOS.insert(CurOutSec); } } template -void assignOffsets(OutputSectionCommand *Cmd, OutputSectionBase *Sec) { - auto *OutSec = dyn_cast>(Sec); - if (!OutSec) { - Sec->assignOffsets(); - // This section is not regular output section. However linker script may - // have defined start/end symbols for it. This case is handled below. - addStartEndSymbols(Cmd, Sec); +void LinkerScript::switchTo(OutputSectionBase *Sec) { + if (CurOutSec == Sec) + return; + if (AlreadyOutputOS.count(Sec)) return; - } - typedef typename ELFT::uint uintX_t; - uintX_t Off = 0; - auto ItCmd = Cmd->Commands.begin(); - - // Assigns values to all symbols following the given - // input section 'D' in output section 'Sec'. When symbols - // are in the beginning of output section the value of 'D' - // is nullptr. - auto AssignSuccessors = [&](InputSectionData *D) { - for (; ItCmd != Cmd->Commands.end(); ++ItCmd) { - auto *AssignCmd = dyn_cast(ItCmd->get()); - if (!AssignCmd) - continue; - if (D != AssignCmd->GoesAfter) - break; - if (AssignCmd->Name == ".") { - // Update to location counter means update to section size. - Off = AssignCmd->Expression(Sec->getVA() + Off) - Sec->getVA(); - Sec->setSize(Off); - continue; - } - assignSectionSymbol(AssignCmd, Sec, Off); - } - }; + flush(); + CurOutSec = Sec; - AssignSuccessors(nullptr); - for (InputSection *I : OutSec->Sections) { - Off = alignTo(Off, I->Alignment); - I->OutSecOff = Off; - Off += I->getSize(); - // Update section size inside for-loop, so that SIZEOF - // works correctly in the case below: - // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } - Sec->setSize(Off); - // Add symbols following current input section. - AssignSuccessors(I); + Dot = alignTo(Dot, CurOutSec->getAlignment()); + CurOutSec->setVA(Dot); +} + +template void LinkerScript::process(BaseCommand &Base) { + if (auto *AssignCmd = dyn_cast(&Base)) { + if (AssignCmd->Name == ".") { + // Update to location counter means update to section size. + Dot = AssignCmd->Expression(Dot); + CurOutSec->setSize(Dot - CurOutSec->getVA()); + return; + } + assignSectionSymbol(AssignCmd, CurOutSec, Dot - CurOutSec->getVA()); + return; + } + auto &ICmd = cast(Base); + for (InputSectionData *ID : ICmd.Sections) { + auto *IB = static_cast *>(ID); + switchTo(IB->OutSec); + if (auto *I = dyn_cast>(IB)) + output(I); + else if (AlreadyOutputOS.insert(CurOutSec).second) + Dot += CurOutSec->getSize(); } } template static std::vector *> findSections(OutputSectionCommand &Cmd, - ArrayRef *> Sections) { + const std::vector *> &Sections) { std::vector *> Ret; for (OutputSectionBase *Sec : Sections) if (Sec->getName() == Cmd.Name && @@ -400,6 +394,34 @@ return Ret; } +template +void LinkerScript::assignOffsets(OutputSectionCommand *Cmd) { + std::vector *> Sections = + findSections(*Cmd, *OutputSections); + if (Sections.empty()) + return; + switchTo(Sections[0]); + + // Find the last section output location. We will output orphan sections + // there so that end symbols point to the correct location. + auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), + [](const std::unique_ptr &Cmd) { + return !isa(*Cmd); + }) + .base(); + for (auto I = Cmd->Commands.begin(); I != E; ++I) + process(**I); + flush(); + for (OutputSectionBase *Base : Sections) { + if (!AlreadyOutputOS.insert(Base).second) + continue; + switchTo(Base); + Dot += CurOutSec->getSize(); + } + for (auto I = E, E = Cmd->Commands.end(); I != E; ++I) + process(**I); +} + template void LinkerScript::assignAddresses() { // Orphan sections are sections present in the input files which // are not explicitly placed into the output file by the linker script. @@ -415,7 +437,6 @@ // Assign addresses as instructed by linker script SECTIONS sub-commands. Dot = getHeaderSize(); uintX_t MinVA = std::numeric_limits::max(); - uintX_t ThreadBssOffset = 0; for (const std::unique_ptr &Base : Opt.Commands) { if (auto *Cmd = dyn_cast(Base.get())) { @@ -433,34 +454,18 @@ } auto *Cmd = cast(Base.get()); - for (OutputSectionBase *Sec : - findSections(*Cmd, *OutputSections)) { - - if (Cmd->AddrExpr) - Dot = Cmd->AddrExpr(Dot); - - if ((Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS) { - uintX_t TVA = Dot + ThreadBssOffset; - TVA = alignTo(TVA, Sec->getAlignment()); - Sec->setVA(TVA); - assignOffsets(Cmd, Sec); - ThreadBssOffset = TVA - Dot + Sec->getSize(); - continue; - } - if (!(Sec->getFlags() & SHF_ALLOC)) { - assignOffsets(Cmd, Sec); - continue; - } + if (Cmd->AddrExpr) + Dot = Cmd->AddrExpr(Dot); - Dot = alignTo(Dot, Sec->getAlignment()); - Sec->setVA(Dot); - assignOffsets(Cmd, Sec); - MinVA = std::min(MinVA, Dot); - Dot += Sec->getSize(); - } + MinVA = std::min(MinVA, Dot); + assignOffsets(Cmd); } + for (OutputSectionBase *Sec : *OutputSections) + if (!(Sec->getFlags() & SHF_ALLOC)) + Sec->setVA(0); + // ELF and Program headers need to be right before the first section in // memory. Set their addresses accordingly. MinVA = alignDown(MinVA - Out::ElfHeader->getSize() - Index: test/ELF/linkerscript/merge-sections.s =================================================================== --- test/ELF/linkerscript/merge-sections.s +++ test/ELF/linkerscript/merge-sections.s @@ -1,7 +1,7 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t -# RUN: echo "SECTIONS {.foo : {*(.foo.*)} }" > %t.script +# RUN: echo "SECTIONS {.foo : { begin = .; *(.foo.*) end = .;} }" > %t.script # RUN: ld.lld -o %t1 --script %t.script %t -shared # RUN: llvm-readobj -s -t %t1 | FileCheck %s @@ -52,6 +52,13 @@ # CHECK-NEXT: AddressAlignment: 2 # CHECK-NEXT: EntrySize: 2 + +# CHECK: Name: begin +# CHECK-NEXT: Value: 0x1C8 + +# CHECK: Name: end +# CHECK-NEXT: Value: 0x1D0 + .section .foo.1a,"aMS",@progbits,1 .asciz "foo" Index: test/ELF/linkerscript/symbols-synthetic.s =================================================================== --- test/ELF/linkerscript/symbols-synthetic.s +++ test/ELF/linkerscript/symbols-synthetic.s @@ -29,19 +29,6 @@ # RUN: ld.lld -o %t1 --eh-frame-hdr --script %t.script %t # RUN: llvm-objdump -t %t1 | FileCheck --check-prefix=SIMPLE %s -# The script below contains symbols in the middle of .eh_frame_hdr section. -# We don't support this. -# RUN: echo "SECTIONS { \ -# RUN: .eh_frame_hdr : { \ -# RUN: PROVIDE_HIDDEN(_begin_sec = .); \ -# RUN: __eh_frame_hdr_start = .; \ -# RUN: *(.eh_frame_hdr) \ -# RUN: PROVIDE_HIDDEN(_end_sec_abs = ABSOLUTE(.)); \ -# RUN: *(.eh_frame_hdr) } \ -# RUN: PROVIDE_HIDDEN(_end_sec = .); \ -# RUN: }" > %t.script -# RUN: not ld.lld -o %t1 --eh-frame-hdr --script %t.script %t 2>&1 | FileCheck --check-prefix=ERROR %s - # Check that the following script is processed without errors # RUN: echo "SECTIONS { \ # RUN: .eh_frame_hdr : { \ @@ -69,7 +56,6 @@ # SIMPLE-NEXT: 0000000000001010 *ABS* 00000000 __eh_frame_hdr_start2 # SIMPLE-NEXT: 0000000000001018 .eh_frame_hdr 00000000 __eh_frame_hdr_end # SIMPLE-NEXT: 0000000000001020 *ABS* 00000000 __eh_frame_hdr_end2 -# ERROR: section '.eh_frame_hdr' supports only start and end symbols .global _start _start: