Index: lld/trunk/ELF/InputSection.h =================================================================== --- lld/trunk/ELF/InputSection.h +++ lld/trunk/ELF/InputSection.h @@ -56,7 +56,7 @@ InputSectionBase *Repl; // Returns the size of this section (even if this is a common or BSS.) - size_t getSize() const { return Header->sh_size; } + size_t getSize() const; static InputSectionBase *Discarded; @@ -167,6 +167,17 @@ InputSectionBase *getRelocatedSection(); + // Register thunk related to the symbol. When the section is written + // to a mmap'ed file, target is requested to write an actual thunk code. + // Now thunks is supported for MIPS target only. + void addThunk(SymbolBody &Body); + + // The offset of synthetic thunk code from beginning of this section. + uint64_t getThunkOff() const; + + // Size of chunk with thunks code. + uint64_t getThunksSize() const; + private: template void copyRelocations(uint8_t *Buf, llvm::iterator_range Rels); @@ -176,6 +187,8 @@ // Used by ICF. uint64_t GroupId = 0; + + llvm::TinyPtrVector Thunks; }; // MIPS .reginfo section provides information on the registers used by the code Index: lld/trunk/ELF/InputSection.cpp =================================================================== --- lld/trunk/ELF/InputSection.cpp +++ lld/trunk/ELF/InputSection.cpp @@ -38,6 +38,13 @@ Align = std::max(Header->sh_addralign, 1); } +template size_t InputSectionBase::getSize() const { + if (auto *D = dyn_cast>(this)) + if (D->getThunksSize() > 0) + return D->getThunkOff() + D->getThunksSize(); + return Header->sh_size; +} + template StringRef InputSectionBase::getSectionName() const { return check(File->getObj().getSectionName(this->Header)); } @@ -105,6 +112,19 @@ return Sections[this->Header->sh_info]; } +template void InputSection::addThunk(SymbolBody &Body) { + Body.ThunkIndex = Thunks.size(); + Thunks.push_back(&Body); +} + +template uint64_t InputSection::getThunkOff() const { + return this->Header->sh_size; +} + +template uint64_t InputSection::getThunksSize() const { + return Thunks.size() * Target->ThunkSize; +} + // This is used for -r. We can't use memcpy to copy relocations because we need // to update symbol table offset and section index for each relocation. So we // copy relocations one by one. @@ -293,6 +313,9 @@ // If that's the case, we leave the field alone rather than filling it // with a possibly incorrect value. continue; + } else if (Target->needsThunk(Type, *this->getFile(), Body)) { + // Get address of a thunk code related to the symbol. + SymVA = Body.getThunkVA(); } else if (Config->EMachine == EM_MIPS) { SymVA = adjustMipsSymVA(Type, *File, Body, AddrLoc, SymVA); } else if (!Target->needsCopyRel(Type, Body) && @@ -333,6 +356,19 @@ else this->relocate(Buf, BufEnd, EObj.rels(RelSec)); } + + // The section might have a data/code generated by the linker and need + // to be written after the section. Usually these are thunks - small piece + // of code used to jump between "incompatible" functions like PIC and non-PIC + // or if the jump target too far and its address does not fit to the short + // jump istruction. + if (!Thunks.empty()) { + Buf += OutSecOff + getThunkOff(); + for (const SymbolBody *S : Thunks) { + Target->writeThunk(Buf, S->getVA()); + Buf += Target->ThunkSize; + } + } } template Index: lld/trunk/ELF/OutputSections.h =================================================================== --- lld/trunk/ELF/OutputSections.h +++ lld/trunk/ELF/OutputSections.h @@ -87,6 +87,7 @@ // Typically the first section of each PT_LOAD segment has this flag. bool PageAlign = false; + virtual void assignOffsets() {} virtual void finalize() {} virtual void writeTo(uint8_t *Buf) {} virtual ~OutputSectionBase() = default; @@ -271,10 +272,10 @@ void sortInitFini(); void sortCtorsDtors(); void writeTo(uint8_t *Buf) override; + void assignOffsets() override; void finalize() override; private: - void reassignOffsets(); std::vector *> Sections; }; Index: lld/trunk/ELF/OutputSections.cpp =================================================================== --- lld/trunk/ELF/OutputSections.cpp +++ lld/trunk/ELF/OutputSections.cpp @@ -821,12 +821,6 @@ Sections.push_back(S); S->OutSec = this; this->updateAlign(S->Align); - - uintX_t Off = this->Header.sh_size; - Off = alignTo(Off, S->Align); - S->OutSecOff = Off; - Off += S->getSize(); - this->Header.sh_size = Off; } // If an input string is in the form of "foo.N" where N is a number, @@ -843,8 +837,8 @@ } // This function is called after we sort input sections -// to update their offsets. -template void OutputSection::reassignOffsets() { +// and scan relocations to setup sections' offsets. +template void OutputSection::assignOffsets() { uintX_t Off = 0; for (InputSection *S : Sections) { Off = alignTo(Off, S->Align); @@ -872,7 +866,6 @@ Sections.clear(); for (Pair &P : V) Sections.push_back(P.second); - reassignOffsets(); } // Returns true if S matches /Filename.?\.o$/. @@ -933,7 +926,6 @@ // Read the comment above. template void OutputSection::sortCtorsDtors() { std::stable_sort(Sections.begin(), Sections.end(), compCtors); - reassignOffsets(); } static void fill(uint8_t *Buf, size_t Size, ArrayRef A) { Index: lld/trunk/ELF/Symbols.h =================================================================== --- lld/trunk/ELF/Symbols.h +++ lld/trunk/ELF/Symbols.h @@ -85,9 +85,11 @@ uint32_t GotIndex = -1; uint32_t GotPltIndex = -1; uint32_t PltIndex = -1; + uint32_t ThunkIndex = -1; bool hasGlobalDynIndex() { return GlobalDynIndex != uint32_t(-1); } bool isInGot() const { return GotIndex != -1U; } bool isInPlt() const { return PltIndex != -1U; } + bool hasThunk() const { return ThunkIndex != -1U; } void setUsedInRegularObj() { IsUsedInRegularObj = true; } @@ -97,6 +99,7 @@ template typename ELFT::uint getGotVA() const; template typename ELFT::uint getGotPltVA() const; template typename ELFT::uint getPltVA() const; + template typename ELFT::uint getThunkVA() const; template typename ELFT::uint getSize() const; // A SymbolBody has a backreference to a Symbol. Originally they are @@ -249,6 +252,10 @@ return S->kind() == SymbolBody::DefinedSyntheticKind; } + // Special value designates that the symbol 'points' + // to the end of the section. + static const uintX_t SectionEnd = uintX_t(-1); + uintX_t Value; const OutputSectionBase &Section; }; Index: lld/trunk/ELF/Symbols.cpp =================================================================== --- lld/trunk/ELF/Symbols.cpp +++ lld/trunk/ELF/Symbols.cpp @@ -37,6 +37,8 @@ switch (Body.kind()) { case SymbolBody::DefinedSyntheticKind: { auto &D = cast>(Body); + if (D.Value == DefinedSynthetic::SectionEnd) + return D.Section.getVA() + D.Section.getSize(); return D.Section.getVA() + D.Value; } case SymbolBody::DefinedRegularKind: { @@ -133,6 +135,13 @@ PltIndex * Target->PltEntrySize; } +template typename ELFT::uint SymbolBody::getThunkVA() const { + auto *D = cast>(this); + auto *S = cast>(D->Section); + return S->OutSec->getVA() + S->OutSecOff + S->getThunkOff() + + ThunkIndex * Target->ThunkSize; +} + template typename ELFT::uint SymbolBody::getSize() const { if (auto *B = dyn_cast>(this)) return B->Sym.st_size; @@ -298,6 +307,11 @@ template uint64_t SymbolBody::template getSize() const; template uint64_t SymbolBody::template getSize() const; +template uint32_t SymbolBody::template getThunkVA() const; +template uint32_t SymbolBody::template getThunkVA() const; +template uint64_t SymbolBody::template getThunkVA() const; +template uint64_t SymbolBody::template getThunkVA() const; + template int SymbolBody::compare(SymbolBody *Other); template int SymbolBody::compare(SymbolBody *Other); template int SymbolBody::compare(SymbolBody *Other); Index: lld/trunk/ELF/Target.h =================================================================== --- lld/trunk/ELF/Target.h +++ lld/trunk/ELF/Target.h @@ -17,6 +17,7 @@ namespace lld { namespace elf { +class InputFile; class SymbolBody; class TargetInfo { @@ -62,6 +63,11 @@ enum PltNeed { Plt_No, Plt_Explicit, Plt_Implicit }; PltNeed needsPlt(uint32_t Type, const SymbolBody &S) const; + virtual bool needsThunk(uint32_t Type, const InputFile &File, + const SymbolBody &S) const; + + virtual void writeThunk(uint8_t *Buf, uint64_t S) const {} + virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, uint64_t SA) const = 0; virtual bool isGotRelative(uint32_t Type) const; @@ -94,6 +100,7 @@ unsigned PltZeroSize = 0; unsigned GotHeaderEntriesNum = 0; unsigned GotPltHeaderEntriesNum = 3; + uint32_t ThunkSize = 0; bool UseLazyBinding = false; private: Index: lld/trunk/ELF/Target.cpp =================================================================== --- lld/trunk/ELF/Target.cpp +++ lld/trunk/ELF/Target.cpp @@ -17,6 +17,7 @@ #include "Target.h" #include "Error.h" +#include "InputFiles.h" #include "OutputSections.h" #include "Symbols.h" @@ -198,9 +199,12 @@ void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; void writeGotHeader(uint8_t *Buf) const override; + void writeThunk(uint8_t *Buf, uint64_t S) const override; bool needsCopyRelImpl(uint32_t Type) const override; bool needsGot(uint32_t Type, const SymbolBody &S) const override; bool needsPltImpl(uint32_t Type) const override; + bool needsThunk(uint32_t Type, const InputFile &File, + const SymbolBody &S) const override; void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, uint64_t SA) const override; bool isHintRel(uint32_t Type) const override; @@ -331,6 +335,11 @@ return Plt_No; } +bool TargetInfo::needsThunk(uint32_t Type, const InputFile &File, + const SymbolBody &S) const { + return false; +} + bool TargetInfo::isTlsInitialExecRel(uint32_t Type) const { return false; } bool TargetInfo::pointsToLocalDynamicGotEntry(uint32_t Type) const { @@ -1581,6 +1590,7 @@ PageSize = 65536; PltEntrySize = 16; PltZeroSize = 32; + ThunkSize = 16; UseLazyBinding = true; CopyRel = R_MIPS_COPY; PltRel = R_MIPS_JUMP_SLOT; @@ -1695,6 +1705,20 @@ } template +void MipsTargetInfo::writeThunk(uint8_t *Buf, uint64_t S) const { + // Write MIPS LA25 thunk code to call PIC function from the non-PIC one. + // See MipsTargetInfo::writeThunk for details. + const endianness E = ELFT::TargetEndianness; + write32(Buf, 0x3c190000); // lui $25, %hi(func) + write32(Buf + 4, 0x08000000); // j func + write32(Buf + 8, 0x27390000); // addiu $25, $25, %lo(func) + write32(Buf + 12, 0x00000000); // nop + writeMipsHi16(Buf, S); + write32(Buf + 4, 0x08000000 | (S >> 2)); + writeMipsLo16(Buf + 8, S); +} + +template bool MipsTargetInfo::needsCopyRelImpl(uint32_t Type) const { return !isRelRelative(Type); } @@ -1715,6 +1739,31 @@ } template +bool MipsTargetInfo::needsThunk(uint32_t Type, const InputFile &File, + const SymbolBody &S) const { + // Any MIPS PIC code function is invoked with its address in register $t9. + // So if we have a branch instruction from non-PIC code to the PIC one + // we cannot make the jump directly and need to create a small stubs + // to save the target function address. + // See page 3-38 ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Type != R_MIPS_26) + return false; + auto *F = dyn_cast>(&File); + if (!F) + return false; + // If current file has PIC code, LA25 stub is not required. + if (F->getObj().getHeader()->e_flags & EF_MIPS_PIC) + return false; + auto *D = dyn_cast>(&S); + if (!D || !D->Section) + return false; + // LA25 is required if target file has PIC code + // or target symbol is a PIC symbol. + return (D->Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC) || + (D->Sym.st_other & STO_MIPS_MIPS16) == STO_MIPS_PIC; +} + +template uint64_t MipsTargetInfo::getImplicitAddend(uint8_t *Buf, uint32_t Type) const { const endianness E = ELFT::TargetEndianness; Index: lld/trunk/ELF/Writer.cpp =================================================================== --- lld/trunk/ELF/Writer.cpp +++ lld/trunk/ELF/Writer.cpp @@ -85,6 +85,9 @@ bool isOutputDynamic() const { return !Symtab.getSharedFiles().empty() || Config->Pic; } + template + void scanRelocsForThunks(const elf::ObjectFile &File, + iterator_range Rels); void ensureBss(); void addCommonSymbols(std::vector &Syms); @@ -298,6 +301,25 @@ return 0; } +// Some targets might require creation of thunks for relocations. Now we +// support only MIPS which requires LA25 thunk to call PIC code from non-PIC +// one. Scan relocations to find each one requires thunk. +template +template +void Writer::scanRelocsForThunks(const elf::ObjectFile &File, + iterator_range Rels) { + for (const RelTy &RI : Rels) { + uint32_t Type = RI.getType(Config->Mips64EL); + uint32_t SymIndex = RI.getSymbol(Config->Mips64EL); + SymbolBody &Body = File.getSymbolBody(SymIndex).repl(); + if (Body.hasThunk() || !Target->needsThunk(Type, File, Body)) + continue; + auto *D = cast>(&Body); + auto *S = cast>(D->Section); + S->addThunk(Body); + } +} + // The reason we have to do this early scan is as follows // * To mmap the output file, we need to know the size // * For that, we need to know how many dynamic relocs we will have. @@ -479,6 +501,10 @@ Out::RelaDyn->addReloc( {Target->RelativeRel, &C, RI.r_offset, true, &Body, Addend}); } + + // Scan relocations for necessary thunks. + if (Config->EMachine == EM_MIPS) + scanRelocsForThunks(File, Rels); } template void Writer::scanRelocs(InputSection &C) { @@ -1042,6 +1068,9 @@ } } + for (OutputSectionBase *Sec : getSections()) + Sec->assignOffsets(); + // Now that we have defined all possible symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. std::vector CommonSymbols; @@ -1167,7 +1196,8 @@ OutputSectionBase *OS) { if (OS) { Symtab.addSynthetic(Start, *OS, 0, STV_DEFAULT); - Symtab.addSynthetic(End, *OS, OS->getSize(), STV_DEFAULT); + Symtab.addSynthetic(End, *OS, DefinedSynthetic::SectionEnd, + STV_DEFAULT); } else { Symtab.addIgnored(Start); Symtab.addIgnored(End); @@ -1200,7 +1230,8 @@ Symtab.addSynthetic(Start, *Sec, 0, STV_DEFAULT); if (SymbolBody *B = Symtab.find(Stop)) if (B->isUndefined()) - Symtab.addSynthetic(Stop, *Sec, Sec->getSize(), STV_DEFAULT); + Symtab.addSynthetic(Stop, *Sec, DefinedSynthetic::SectionEnd, + STV_DEFAULT); } template static bool needsPtLoad(OutputSectionBase *Sec) { Index: lld/trunk/test/ELF/Inputs/mips-pic.s =================================================================== --- lld/trunk/test/ELF/Inputs/mips-pic.s +++ lld/trunk/test/ELF/Inputs/mips-pic.s @@ -0,0 +1,19 @@ + .option pic2 + + .section .text.1,"ax",@progbits + .align 4 + .globl foo1a + .type foo1a, @function +foo1a: + nop + .globl foo1b + .type foo1b, @function +foo1b: + nop + + .section .text.2,"ax",@progbits + .align 4 + .globl foo2 + .type foo2, @function +foo2: + nop Index: lld/trunk/test/ELF/mips-npic-call-pic.s =================================================================== --- lld/trunk/test/ELF/mips-npic-call-pic.s +++ lld/trunk/test/ELF/mips-npic-call-pic.s @@ -0,0 +1,58 @@ +# Check LA25 stubs creation. This stub code is necessary when +# non-PIC code calls PIC function. + +# RUN: llvm-mc -filetype=obj -triple=mips-unknown-linux \ +# RUN: %p/Inputs/mips-pic.s -o %t-pic.o +# RUN: llvm-mc -filetype=obj -triple=mips-unknown-linux %s -o %t-npic.o +# RUN: ld.lld %t-npic.o %t-pic.o -o %t.exe +# RUN: llvm-objdump -d %t.exe | FileCheck %s + +# REQUIRES: mips + +# CHECK: Disassembly of section .text: +# CHECK-NEXT: __start: +# CHECK-NEXT: 20000: 0c 00 80 0a jal 131112 +# ^-- 0x20030 .pic.foo1a +# CHECK-NEXT: 20004: 00 00 00 00 nop +# CHECK-NEXT: 20008: 0c 00 80 15 jal 131156 +# ^-- 0x20060 .pic.foo2 +# CHECK-NEXT: 2000c: 00 00 00 00 nop +# CHECK-NEXT: 20010: 0c 00 80 0e jal 131128 +# ^-- 0x20040 .pic.foo1b +# CHECK-NEXT: 20014: 00 00 00 00 nop +# CHECK-NEXT: 20018: 0c 00 80 15 jal 131156 +# ^-- 0x20060 .pic.foo2 +# CHECK-NEXT: 2001c: 00 00 00 00 nop +# +# CHECK: foo1a: +# CHECK-NEXT: 20020: 00 00 00 00 nop +# +# CHECK: foo1b: +# CHECK-NEXT: 20024: 00 00 00 00 nop +# +# CHECK-NEXT: 20028: 3c 19 00 02 lui $25, 2 +# CHECK-NEXT: 2002c: 08 00 80 08 j 131104 +# CHECK-NEXT: 20030: 27 39 00 20 addiu $25, $25, 32 +# CHECK-NEXT: 20034: 00 00 00 00 nop +# CHECK-NEXT: 20038: 3c 19 00 02 lui $25, 2 +# CHECK-NEXT: 2003c: 08 00 80 09 j 131108 +# CHECK-NEXT: 20040: 27 39 00 24 addiu $25, $25, 36 +# CHECK-NEXT: 20044: 00 00 00 00 nop +# CHECK-NEXT: 20048: 00 00 00 00 nop +# CHECK-NEXT: 2004c: 00 00 00 00 nop +# +# CHECK: foo2: +# CHECK-NEXT: 20050: 00 00 00 00 nop +# +# CHECK-NEXT: 20054: 3c 19 00 02 lui $25, 2 +# CHECK-NEXT: 20058: 08 00 80 14 j 131152 +# CHECK-NEXT: 2005c: 27 39 00 50 addiu $25, $25, 80 +# CHECK-NEXT: 20060: 00 00 00 00 nop + + .text + .globl __start +__start: + jal foo1a + jal foo2 + jal foo1b + jal foo2