Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -208,6 +208,19 @@ continue; } + // TLS descriptor lazy relocations are specific. They + // uses two words in the .got.plt. A single relocation is to be used to + // compute the value of the two words of the TLS descriptor. + // Overall design can be found in + // "Thread-Local Storage Descriptors for IA32 and AMD64/EM64T" + // http://www.fsfla.org/~lxoliva/writeups/TLS/RFC-TLSDESC-x86.txt + if (Body->isTls() && Target->isTlsDescReloc(Type, *Body)) { + Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc, + Out::GotPlt->getEntryAddr(*Body) + + getAddend(RI)); + continue; + } + uintX_t SymVA = getSymVA(*Body); if (Target->relocNeedsPlt(Type, *Body)) { SymVA = Out::Plt->getEntryAddr(*Body); Index: ELF/OutputSections.h =================================================================== --- ELF/OutputSections.h +++ ELF/OutputSections.h @@ -125,6 +125,7 @@ void addEntry(SymbolBody *Sym); bool addDynTlsEntry(SymbolBody *Sym); bool addCurrentModuleTlsIndex(); + void addTlsDescEntry(); bool empty() const { return Entries.empty(); } uintX_t getEntryAddr(const SymbolBody &B) const; uintX_t getGlobalDynAddr(const SymbolBody &B) const; @@ -141,10 +142,13 @@ unsigned getMipsLocalEntriesNum() const; uint32_t getLocalTlsIndexVA() { return Base::getVA() + LocalTlsIndexOff; } + uintX_t getTlsDescEntryVA() const { return Base::getVA() + TlsDescEntryOff; } + bool hasTlsDescEntry() const { return TlsDescEntryOff != (uintX_t)-1; } private: std::vector Entries; uint32_t LocalTlsIndexOff = -1; + uintX_t TlsDescEntryOff = -1; }; template @@ -156,6 +160,7 @@ void finalize() override; void writeTo(uint8_t *Buf) override; void addEntry(SymbolBody *Sym); + void addTlsDescEntry(SymbolBody *Sym); bool empty() const; uintX_t getEntryAddr(const SymbolBody &B) const; @@ -172,8 +177,9 @@ void finalize() override; void writeTo(uint8_t *Buf) override; void addEntry(SymbolBody *Sym); - bool empty() const { return Entries.empty(); } + bool empty() const; uintX_t getEntryAddr(const SymbolBody &B) const; + uintX_t getTlsDescEntryVA() const; private: std::vector> Entries; Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -45,6 +45,12 @@ Entries.push_back(Sym); } +template void GotPltSection::addTlsDescEntry(SymbolBody *Sym) { + Sym->GotPltIndex = Target->getGotPltHeaderEntriesNum() + Entries.size(); + Entries.push_back(nullptr); + Entries.push_back(nullptr); +} + template bool GotPltSection::empty() const { return Entries.empty(); } @@ -64,7 +70,8 @@ Target->writeGotPltHeaderEntries(Buf); Buf += Target->getGotPltHeaderEntriesNum() * sizeof(uintX_t); for (const SymbolBody *B : Entries) { - Target->writeGotPltEntry(Buf, Out::Plt->getEntryAddr(*B)); + if (B) + Target->writeGotPltEntry(Buf, Out::Plt->getEntryAddr(*B)); Buf += sizeof(uintX_t); } } @@ -101,6 +108,13 @@ return true; } +template void GotSection::addTlsDescEntry() { + if (TlsDescEntryOff != uintX_t(-1)) + return; + Entries.push_back(nullptr); + TlsDescEntryOff = (Entries.size() - 1) * sizeof(uintX_t); +} + template typename GotSection::uintX_t GotSection::getEntryAddr(const SymbolBody &B) const { @@ -174,6 +188,14 @@ Target->writePltEntry(Buf + Off, GotVA, GotE, Plt, E->PltIndex, RelOff); Off += Target->getPltEntrySize(); } + // If module uses TLS descriptor relocations, it requires .got entry + // and a special .plt entry as well. This plt entry is used for TLS descriptor + // resolver calls, and also dynamic entry DT_TLSDESC_PLT should be created to + // hold it's address. + if (Out::Got->hasTlsDescEntry()) + Target->writePltTlsDescEntry(Buf + Off, this->getVA() + Off, + Out::Got->getTlsDescEntryVA(), + Out::GotPlt->getVA()); } template void PltSection::addEntry(SymbolBody *Sym) { @@ -184,6 +206,10 @@ Entries.push_back(std::make_pair(Sym, RelOff)); } +template bool PltSection::empty() const { + return Entries.empty() && !Out::Got->hasTlsDescEntry(); +} + template typename PltSection::uintX_t PltSection::getEntryAddr(const SymbolBody &B) const { @@ -191,9 +217,17 @@ B.PltIndex * Target->getPltEntrySize(); } +template +typename PltSection::uintX_t PltSection::getTlsDescEntryVA() const { + return this->getVA() + Target->getPltZeroEntrySize() + + Entries.size() * Target->getPltEntrySize(); +} + template void PltSection::finalize() { this->Header.sh_size = Target->getPltZeroEntrySize() + Entries.size() * Target->getPltEntrySize(); + if (Out::Got->hasTlsDescEntry()) + this->Header.sh_size += Target->getPltTlsDescEntrySize(); } template @@ -216,6 +250,16 @@ return true; } + // All TLS descriptor relocations produces single dynamic one which is + // R_*_TLSDESC usually. Because of dynamic nature, .got.plt is used + // to store double word for them. + if (Body && Target->isTlsDescReloc(Type, *Body)) { + P->setSymbolAndType(Body->DynamicSymbolTableIndex, + Target->getTlsDescDynReloc(), Config->Mips64EL); + P->r_offset = Out::GotPlt->getEntryAddr(*Body); + return true; + } + if (!Body || !Target->isTlsGlobalDynamicReloc(Type)) return false; @@ -686,6 +730,11 @@ WriteVal(DT_PLTREL, Out::RelaPlt->isRela() ? DT_RELA : DT_REL); } + if (Out::Got->hasTlsDescEntry()) { + WritePtr(DT_TLSDESC_GOT, Out::Got->getTlsDescEntryVA()); + WritePtr(DT_TLSDESC_PLT, Out::Plt->getTlsDescEntryVA()); + } + WritePtr(DT_SYMTAB, Out::DynSymTab->getVA()); WritePtr(DT_SYMENT, sizeof(Elf_Sym)); WritePtr(DT_STRTAB, Out::DynStrTab->getVA()); Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -36,12 +36,17 @@ } unsigned getTlsModuleIndexReloc() const { return TlsModuleIndexReloc; } unsigned getTlsOffsetReloc() const { return TlsOffsetReloc; } + unsigned getTlsDescDynReloc() const { return TlsDescDynReloc; } unsigned getPltZeroEntrySize() const { return PltZeroEntrySize; } unsigned getPltEntrySize() const { return PltEntrySize; } + unsigned getPltTlsDescEntrySize() const { return PltTlsDescEntrySize; } bool supportsLazyRelocations() const { return LazyRelocations; } unsigned getGotHeaderEntriesNum() const { return GotHeaderEntriesNum; } unsigned getGotPltHeaderEntriesNum() const { return GotPltHeaderEntriesNum; } virtual unsigned getDynReloc(unsigned Type) const { return Type; } + virtual bool isTlsDescReloc(unsigned Type, const SymbolBody &S) const { + return false; + } virtual bool isTlsDynReloc(unsigned Type, const SymbolBody &S) const { return false; } @@ -56,6 +61,9 @@ virtual void writePltEntry(uint8_t *Buf, uint64_t GotAddr, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const = 0; + virtual void writePltTlsDescEntry(uint8_t *Buf, uint64_t PltEntryAddr, + uint64_t GotTlsDescEntryAddr, + uint64_t GotPltVA) const; // Returns true if a relocation is just a hint for linker to make for example // some code optimization. Such relocations should not be handled as a regular @@ -106,8 +114,10 @@ unsigned TlsGlobalDynamicReloc = 0; unsigned TlsModuleIndexReloc; unsigned TlsOffsetReloc; + unsigned TlsDescDynReloc; unsigned PltEntrySize = 8; unsigned PltZeroEntrySize = 0; + unsigned PltTlsDescEntrySize = 0; unsigned GotHeaderEntriesNum = 0; unsigned GotPltHeaderEntriesNum = 3; bool LazyRelocations = false; Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -198,7 +198,11 @@ void writePltEntry(uint8_t *Buf, uint64_t GotAddr, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; + void writePltTlsDescEntry(uint8_t *Buf, uint64_t PltEntryAddr, + uint64_t GotTlsDescEntryAddr, + uint64_t GotPltVA) const override; unsigned getTlsGotReloc(unsigned Type = -1) const override; + bool isTlsDescReloc(unsigned Type, const SymbolBody &S) const override; bool isTlsDynReloc(unsigned Type, const SymbolBody &S) const override; bool needsCopyRel(uint32_t Type, const SymbolBody &S) const override; bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const override; @@ -302,6 +306,10 @@ void TargetInfo::writeGotPltHeaderEntries(uint8_t *Buf) const {} +void TargetInfo::writePltTlsDescEntry(uint8_t *Buf, uint64_t PltEntryAddr, + uint64_t GotTlsDescEntryAddr, + uint64_t GotPltVA) const {} + X86TargetInfo::X86TargetInfo() { CopyReloc = R_386_COPY; PCRelReloc = R_386_PC32; @@ -1185,10 +1193,12 @@ IRelativeReloc = R_AARCH64_IRELATIVE; GotReloc = R_AARCH64_GLOB_DAT; PltReloc = R_AARCH64_JUMP_SLOT; + TlsDescDynReloc = R_AARCH64_TLSDESC; TlsGotReloc = R_AARCH64_TLS_TPREL64; LazyRelocations = true; PltEntrySize = 16; PltZeroEntrySize = 32; + PltTlsDescEntrySize = 32; } unsigned AArch64TargetInfo::getDynReloc(unsigned Type) const { @@ -1245,6 +1255,31 @@ GotEntryAddr); } +void AArch64TargetInfo::writePltTlsDescEntry(uint8_t *Buf, + uint64_t PltEntryAddr, + uint64_t GotTlsDescEntryAddr, + uint64_t GotPltVA) const { + const uint8_t Inst[] = { + 0xe2, 0x0f, 0xbf, 0xa9, // stp x2, x3, [sp, #-16]! + 0x02, 0x00, 0x00, 0x90, // adrp x2, Page(DT_TLSDESC_GOT) + 0x03, 0x00, 0x00, 0x90, // adrp x3, Page(&.got.plt[0]) + 0x42, 0x00, 0x40, 0xf9, // ldr x2, [x2, #0] + 0x63, 0x00, 0x00, 0x91, // add x3, x3, 0 + 0x40, 0x00, 0x1f, 0xd6, // br x2 + 0x1f, 0x20, 0x03, 0xd5, // nop + 0x1f, 0x20, 0x03, 0xd5 // nop + }; + memcpy(Buf, Inst, sizeof(Inst)); + relocateOne(Buf + 4, Buf + 8, R_AARCH64_ADR_PREL_PG_HI21, PltEntryAddr + 4, + GotTlsDescEntryAddr); + relocateOne(Buf + 8, Buf + 12, R_AARCH64_ADR_PREL_PG_HI21, PltEntryAddr + 8, + GotPltVA); + relocateOne(Buf + 12, Buf + 16, R_AARCH64_LDST64_ABS_LO12_NC, + PltEntryAddr + 12, GotTlsDescEntryAddr); + relocateOne(Buf + 16, Buf + 20, R_AARCH64_ADD_ABS_LO12_NC, PltEntryAddr + 16, + GotPltVA); +} + unsigned AArch64TargetInfo::getTlsGotReloc(unsigned Type) const { if (Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 || Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) @@ -1252,6 +1287,19 @@ return TlsGotReloc; } +bool AArch64TargetInfo::isTlsDescReloc(unsigned Type, + const SymbolBody &S) const { + switch (Type) { + case R_AARCH64_TLSDESC_ADR_PAGE21: + case R_AARCH64_TLSDESC_LD64_LO12_NC: + case R_AARCH64_TLSDESC_ADD_LO12_NC: + case R_AARCH64_TLSDESC_CALL: + return true; + default: + return false; + } +} + bool AArch64TargetInfo::isTlsDynReloc(unsigned Type, const SymbolBody &S) const { return Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 || @@ -1358,7 +1406,8 @@ break; } case R_AARCH64_ADR_PREL_PG_HI21: - case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: { + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + case R_AARCH64_TLSDESC_ADR_PAGE21: { uint64_t X = getAArch64Page(SA) - getAArch64Page(P); checkInt<33>(X, Type); updateAArch64Adr(Loc, (X >> 12) & 0x1FFFFF); // X[32:12] @@ -1379,6 +1428,7 @@ } case R_AARCH64_LD64_GOT_LO12_NC: case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + case R_AARCH64_TLSDESC_LD64_LO12_NC: checkAlignment<8>(SA, Type); or32le(Loc, (SA & 0xFF8) << 7); break; @@ -1389,6 +1439,7 @@ or32le(Loc, (SA & 0x0FFC) << 9); break; case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_TLSDESC_ADD_LO12_NC: or32le(Loc, (SA & 0xFFF) << 10); break; case R_AARCH64_LDST32_ABS_LO12_NC: @@ -1414,6 +1465,11 @@ or32le(Loc, (X & 0xFFFC) << 3); break; } + case R_AARCH64_TLSDESC_CALL: + // For relaxation only. Must be used to identify a + // BLR instruction which performs an indirect call + // to the TLS descriptor function for S + A. + break; default: error("unrecognized reloc " + Twine(Type)); } Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -247,8 +247,30 @@ continue; } - if (Body && Body->isTls() && !Target->isTlsDynReloc(Type, *Body)) - continue; + if (Body && Body->isTls()) { + // If module uses TLS descriptor relocations, + // then special entries are created for module: + // 1) .got entry to be filled in by the dynamic loader with the + // address of the internal function to be used for lazy relocation of TLS + // descriptors. + // 2) Special .plt entry that + // pushes onto the stack the module's link map address, located in the + // GOT portion reserved for the dynamic loader to use, and then jumps to + // the lazy relocation function, using the address stored in the + // TLSDESC_GOT entry. + // For each Body itself two words are allocated in .got.plt instead of + // usual .got, because these relocations are lazy ones. + if (Target->isTlsDescReloc(Type, *Body)) { + Out::Got->addTlsDescEntry(); + if (Body->isInGotPlt()) + continue; + Out::GotPlt->addTlsDescEntry(Body); + Out::RelaPlt->addReloc({&C, &RI}); + continue; + } + if (!Target->isTlsDynReloc(Type, *Body)) + continue; + } if (Target->relocNeedsDynRelative(Type)) { RelType *Rel = new (Alloc) RelType; Index: test/ELF/aarch64-tls-desc.s =================================================================== --- test/ELF/aarch64-tls-desc.s +++ test/ELF/aarch64-tls-desc.s @@ -0,0 +1,155 @@ +# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %tmain.o +# RUN: ld.lld %tmain.o -shared -o %tout +# RUN: llvm-objdump -d %tout | FileCheck %s +# RUN: llvm-readobj -s -r -dynamic-table %tout | FileCheck -check-prefix=READOBJ %s +# REQUIRES: aarch64 + +#READOBJ: Section { +#READOBJ: Index: +#READOBJ: Name: .plt +#READOBJ-NEXT: Type: SHT_PROGBITS +#READOBJ-NEXT: Flags [ +#READOBJ-NEXT: SHF_ALLOC +#READOBJ-NEXT: SHF_EXECINSTR +#READOBJ-NEXT: ] +#READOBJ-NEXT: Address: 0x1020 +#READOBJ-NEXT: Offset: +#READOBJ-NEXT: Size: 64 +#READOBJ-NEXT: Link: +#READOBJ-NEXT: Info: +#READOBJ-NEXT: AddressAlignment: +#READOBJ-NEXT: EntrySize: +#READOBJ-NEXT: } +#READOBJ: Section { +#READOBJ: Index: +#READOBJ: Name: .got +#READOBJ-NEXT: Type: SHT_PROGBITS +#READOBJ-NEXT: Flags [ +#READOBJ-NEXT: SHF_ALLOC +#READOBJ-NEXT: SHF_WRITE +#READOBJ-NEXT: ] +#READOBJ-NEXT: Address: 0x20A8 +#READOBJ-NEXT: Offset: +#READOBJ-NEXT: Size: 8 +#READOBJ-NEXT: Link: +#READOBJ-NEXT: Info: +#READOBJ-NEXT: AddressAlignment: +#READOBJ-NEXT: EntrySize: +#READOBJ-NEXT: } +#READOBJ: Section { +#READOBJ: Index: +#READOBJ: Name: .got.plt +#READOBJ-NEXT: Type: SHT_PROGBITS +#READOBJ-NEXT: Flags [ +#READOBJ-NEXT: SHF_ALLOC +#READOBJ-NEXT: SHF_WRITE +#READOBJ-NEXT: ] +#READOBJ-NEXT: Address: 0x3000 +#READOBJ-NEXT: Offset: 0x3000 +#READOBJ-NEXT: Size: 56 +#READOBJ-NEXT: Link: 0 +#READOBJ-NEXT: Info: 0 +#READOBJ-NEXT: AddressAlignment: 8 +#READOBJ-NEXT: EntrySize: 0 +#READOBJ-NEXT: } +#READOBJ: Relocations [ +#READOBJ-NEXT: Section ({{.*}}) .rela.plt { +# 0x3018 = .got.plt + 0x18 (reserved 3 entries) +# 0x3028 = 0x3018 + double entry size (16) +#READOBJ-NEXT: 0x3018 R_AARCH64_TLSDESC foo 0x0 +#READOBJ-NEXT: 0x3028 R_AARCH64_TLSDESC bar 0x0 +#READOBJ-NEXT: } +#READOBJ-NEXT:] +# Todo: unknowns should be replaced with TLSDESC_PLT and TLSDESC_GOT +# once llvm-readobj is updated to support it. +#READOBJ: DynamicSection [ +#READOBJ-NEXT: Tag Type Name/Value +#READOBJ-NEXT: 0x0000000000000017 JMPREL 0x298 +#READOBJ-NEXT: 0x0000000000000002 PLTRELSZ 48 (bytes) +#READOBJ-NEXT: 0x0000000000000003 PLTGOT 0x3000 +#READOBJ-NEXT: 0x0000000000000014 PLTREL RELA +# 0x20A8 = Location of GOT entry used by TLS descriptor resolver PLT entry +#READOBJ-NEXT: 0x000000006FFFFEF7 unknown 0x20A8 +# 0x1040 = Location of PLT entry for TLS descriptor resolver calls. +#READOBJ-NEXT: 0x000000006FFFFEF6 unknown 0x1040 +#READOBJ-NEXT: 0x0000000000000006 SYMTAB 0x200 +#READOBJ-NEXT: 0x000000000000000B SYMENT 24 (bytes) +#READOBJ-NEXT: 0x0000000000000005 STRTAB 0x288 +#READOBJ-NEXT: 0x000000000000000A STRSZ 16 (bytes) +#READOBJ-NEXT: 0x0000000000000004 HASH 0x260 +#READOBJ-NEXT: ] + +#CHECK: Disassembly of section .text: +#CHECK-NEXT: _start: +# Page(.got.plt[N]) - Page(0x1000) = Page(0x3018) - 0x1000 = +# 0x3000 - 0x1000 = 0x2000 = 8192 +# 0x18 = 24 +#CHECK-NEXT: 1000: 00 00 00 d0 adrp x0, #8192 +#CHECK-NEXT: 1004: 02 0c 40 f9 ldr x2, [x0, #24] +#CHECK-NEXT: 1008: 00 60 00 91 add x0, x0, #24 +#CHECK-NEXT: 100c: 40 00 3f d6 blr x2 +# Page(.got.plt[N]) - Page(0x1000) = Page(0x3028) - 0x1000 = +# 0x3000 - 0x1000 = 0x2000 = 8192 +# 0x28 = 40 +#CHECK-NEXT: 1010: 00 00 00 d0 adrp x0, #8192 +#CHECK-NEXT: 1014: 02 14 40 f9 ldr x2, [x0, #40] +#CHECK-NEXT: 1018: 00 a0 00 91 add x0, x0, #40 +#CHECK-NEXT: 101c: 40 00 3f d6 blr x2 +#CHECK-NEXT: Disassembly of section .plt: +#CHECK-NEXT: .plt: +#CHECK-NEXT: 1020: f0 7b bf a9 stp x16, x30, [sp, #-16]! +#CHECK-NEXT: 1024: 10 00 00 d0 adrp x16, #8192 +#CHECK-NEXT: 1028: 11 0a 40 f9 ldr x17, [x16, #16] +#CHECK-NEXT: 102c: 10 42 00 91 add x16, x16, #16 +#CHECK-NEXT: 1030: 20 02 1f d6 br x17 +#CHECK-NEXT: 1034: 1f 20 03 d5 nop +#CHECK-NEXT: 1038: 1f 20 03 d5 nop +#CHECK-NEXT: 103c: 1f 20 03 d5 nop +# Page(.got[N]) - Page(P) = Page(0x20A8) - Page(0x1044) = +# 0x2000 - 0x1000 = 4096 +# Page(.got.plt) - Page(P) = Page(0x3000) - Page(0x1048) = +# 0x3000 - 0x1000 = 8192 +# 0xA8 = 168 +# 0x0 = 0 +#CHECK-NEXT: 1040: e2 0f bf a9 stp x2, x3, [sp, #-16]! +#CHECK-NEXT: 1044: 02 00 00 b0 adrp x2, #4096 +#CHECK-NEXT: 1048: 03 00 00 d0 adrp x3, #8192 +#CHECK-NEXT: 104c: 42 54 40 f9 ldr x2, [x2, #168] +#CHECK-NEXT: 1050: 63 00 00 91 add x3, x3, #0 +#CHECK-NEXT: 1054: 40 00 1f d6 br x2 +#CHECK-NEXT: 1058: 1f 20 03 d5 nop +#CHECK-NEXT: 105c: 1f 20 03 d5 nop + +.text + .global foo + .section .tdata,"awT",%progbits + .align 2 + .type foo, %object + .size foo, 4 +foo: + .word 5 + .text + +.text + .global bar + .section .tdata,"awT",%progbits + .align 2 + .type bar, %object + .size bar, 4 +bar: + .word 5 + .text + +.globl _start +_start: + adrp x0, :tlsdesc:foo + ldr x2, [x0, #:tlsdesc_lo12:foo] + add x0, x0, :tlsdesc_lo12:foo + .tlsdesccall foo + blr x2 + + adrp x0, :tlsdesc:bar + ldr x2, [x0, #:tlsdesc_lo12:bar] + add x0, x0, :tlsdesc_lo12:bar + .tlsdesccall bar + blr x2