Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -169,11 +169,14 @@ } if (Target->isTlsOptimized(Type, &Body)) { + uintX_t SymVA = Target->relocNeedsGot(Type, Body) + ? Out::Got->getEntryAddr(Body) + : getSymVA(Body); // By optimizing TLS relocations, it is sometimes needed to skip // relocations that immediately follow TLS relocations. This function // knows how many slots we need to skip. - I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc, - getSymVA(Body)); + I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc, SymVA, + Body); continue; } Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -218,6 +218,12 @@ } if (Body && Target->isTlsGlobalDynamicReloc(Type)) { + if (Target->isTlsOptimized(Type, Body)) { + P->setSymbolAndType(Body->getDynamicSymbolTableIndex(), + Target->getTlsGotReloc(), Config->Mips64EL); + P->r_offset = Out::Got->getEntryAddr(*Body); + return true; + } P->setSymbolAndType(Body->getDynamicSymbolTableIndex(), Target->getTlsModuleIndexReloc(), Config->Mips64EL); P->r_offset = Out::Got->getGlobalDynAddr(*Body); Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -59,10 +59,10 @@ virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, uint64_t SA, uint8_t *PairedLoc = nullptr) const = 0; - virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const; + virtual uint32_t isTlsOptimized(unsigned Type, const SymbolBody *S) const; virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, - uint32_t Type, uint64_t P, - uint64_t SA) const; + uint32_t Type, uint64_t P, uint64_t SA, + const SymbolBody &S) const; virtual ~TargetInfo(); protected: Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -108,15 +108,18 @@ void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, uint64_t SA, uint8_t *PairedLoc = nullptr) const override; bool isRelRelative(uint32_t Type) const override; - bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override; + uint32_t isTlsOptimized(unsigned Type, const SymbolBody *S) const override; unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, - uint64_t P, uint64_t SA) const override; + uint64_t P, uint64_t SA, + const SymbolBody &S) const override; private: void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, uint64_t SA) const; void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, uint64_t SA) const; + void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, uint64_t SA) const; }; @@ -195,8 +198,8 @@ TargetInfo::~TargetInfo() {} -bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const { - return false; +uint32_t TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const { + return 0; } uint64_t TargetInfo::getVAStart() const { return Config->Shared ? 0 : VAStart; } @@ -210,8 +213,8 @@ bool TargetInfo::isRelRelative(uint32_t Type) const { return true; } unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, - uint32_t Type, uint64_t P, - uint64_t SA) const { + uint32_t Type, uint64_t P, uint64_t SA, + const SymbolBody &S) const { return 0; } @@ -416,14 +419,15 @@ } bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { + if (Type == R_X86_64_TLSGD) + return Target->isTlsOptimized(Type, &S) == 1; if (Type == R_X86_64_GOTTPOFF) return !isTlsOptimized(Type, &S); - return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL || - relocNeedsPlt(Type, S); + return Type == R_X86_64_GOTPCREL || relocNeedsPlt(Type, S); } bool X86_64TargetInfo::isTlsDynReloc(unsigned Type) const { - return Type == R_X86_64_GOTTPOFF; + return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_TLSGD; } unsigned X86_64TargetInfo::getPltRefReloc(unsigned Type) const { @@ -487,13 +491,18 @@ } } -bool X86_64TargetInfo::isTlsOptimized(unsigned Type, - const SymbolBody *S) const { +// Returns optimization level. 0 - not optimized, +// 1 - partial, other values - full. +uint32_t X86_64TargetInfo::isTlsOptimized(unsigned Type, + const SymbolBody *S) const { if (Config->Shared || (S && !S->isTLS())) - return false; - return Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 || - (Type == R_X86_64_TLSGD && !canBePreempted(S, true)) || - (Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true)); + return 0; + if (Type == R_X86_64_TLSGD) + return canBePreempted(S, true) ? 1 : 2; + if (Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 || + (Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true))) + return 2; + return 0; } // "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5 @@ -538,6 +547,28 @@ relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA); } +// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5 +// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows +// how +// GD can be optimized to IE: +// .byte 0x66 +// leaq x@tlsgd(%rip), %rdi +// .word 0x6666 +// rex64 +// call __tls_get_addr@plt +// Is converted to: +// mov %fs:0x0,%rax +// addq x@tpoff,%rax +void X86_64TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const { + const uint8_t Inst[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax + 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // addq x@tpoff,%rax + }; + memcpy(Loc - 4, Inst, sizeof(Inst)); + relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF64, P + 12, SA); +} + // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to // R_X86_64_TPOFF32 so that R_X86_64_TPOFF32 so that it does not use GOT. // This function does that. Read "ELF Handling For Thread-Local Storage, @@ -580,7 +611,8 @@ // This function returns a number of relocations that need to be skipped. unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, - uint64_t SA) const { + uint64_t SA, + const SymbolBody &S) const { switch (Type) { case R_X86_64_DTPOFF32: relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA); @@ -588,10 +620,14 @@ case R_X86_64_GOTTPOFF: relocateTlsIeToLe(Loc, BufEnd, P, SA); return 0; - case R_X86_64_TLSGD: - relocateTlsGdToLe(Loc, BufEnd, P, SA); + case R_X86_64_TLSGD: { + if (isTlsOptimized(Type, &S) == 1) + relocateTlsGdToIe(Loc, BufEnd, P, SA); + else + relocateTlsGdToLe(Loc, BufEnd, P, SA); // The next relocation should be against __tls_get_addr, so skip it return 1; + } case R_X86_64_TLSLD: relocateTlsLdToLe(Loc, BufEnd, P, SA); // The next relocation should be against __tls_get_addr, so skip it Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -219,14 +219,14 @@ Body = Body->repl(); if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) { - if (Target->isTlsOptimized(Type, Body)) - continue; - if (Out::Got->addDynTlsEntry(Body)) { + uint32_t Opt = Target->isTlsOptimized(Type, Body); + if (!Opt && Out::Got->addDynTlsEntry(Body)) { Out::RelaDyn->addReloc({&C, &RI}); Out::RelaDyn->addReloc({nullptr, nullptr}); Body->setUsedInDynamicReloc(); } - continue; + if (Opt != 1) + continue; } if (Body && Body->isTLS() && !Target->isTlsDynReloc(Type)) Index: test/ELF/Inputs/tls-opt-gdie.s =================================================================== --- test/ELF/Inputs/tls-opt-gdie.s +++ test/ELF/Inputs/tls-opt-gdie.s @@ -0,0 +1,20 @@ +.type tlsshared0,@object +.section .tbss,"awT",@nobits +.globl tlsshared0 +.align 4 +tlsshared0: + .long 0 + .size tlsshared0, 4 + +.type tlsshared1,@object +.globl tlsshared1 +.align 4 +tlsshared1: + .long 0 + .size tlsshared1, 4 + +.text +.globl __tls_get_addr +.align 16, 0x90 +.type __tls_get_addr,@function +__tls_get_addr: Index: test/ELF/tls-opt-gdie.s =================================================================== --- test/ELF/tls-opt-gdie.s +++ test/ELF/tls-opt-gdie.s @@ -0,0 +1,55 @@ +// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/tls-opt-gdie.s -o %tso.o +// RUN: ld.lld -shared %tso.o -o %t.so +// RUN: ld.lld %t.o %t.so -o %t1 +// RUN: llvm-readobj -s -r %t1 | FileCheck --check-prefix=RELOC %s +// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s + +//RELOC: Section { +//RELOC: Index: 9 +//RELOC-NEXT: Name: .got +//RELOC-NEXT: Type: SHT_PROGBITS +//RELOC-NEXT: Flags [ +//RELOC-NEXT: SHF_ALLOC +//RELOC-NEXT: SHF_WRITE +//RELOC-NEXT: ] +//RELOC-NEXT: Address: 0x120E0 +//RELOC-NEXT: Offset: 0x20E0 +//RELOC-NEXT: Size: 16 +//RELOC-NEXT: Link: 0 +//RELOC-NEXT: Info: 0 +//RELOC-NEXT: AddressAlignment: 8 +//RELOC-NEXT: EntrySize: 0 +//RELOC-NEXT: } +//RELOC: Relocations [ +//RELOC-NEXT: Section (4) .rela.dyn { +//RELOC-NEXT: 0x120E0 R_X86_64_TPOFF64 tlsshared0 0x0 +//RELOC-NEXT: 0x120E8 R_X86_64_TPOFF64 tlsshared1 0x0 +//RELOC-NEXT: } +//RELOC-NEXT: Section (5) .rela.plt { +//RELOC-NEXT: 0x12108 R_X86_64_JUMP_SLOT __tls_get_addr 0x0 +//RELOC-NEXT: } +//RELOC-NEXT: ] + +//0x11009 + (4304 + 7) = 0x120E0 +//0x11019 + (4296 + 7) = 0x120E8 +// DISASM: Disassembly of section .text: +// DISASM-NEXT: _start: +// DISASM-NEXT: 11000: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 11009: 48 03 05 d0 10 00 00 addq 4304(%rip), %rax +// DISASM-NEXT: 11010: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 11019: 48 03 05 c8 10 00 00 addq 4296(%rip), %rax + +.section .text +.globl _start +_start: + .byte 0x66 + leaq tlsshared0@tlsgd(%rip),%rdi + .word 0x6666 + rex64 + call __tls_get_addr@plt + .byte 0x66 + leaq tlsshared1@tlsgd(%rip),%rdi + .word 0x6666 + rex64 + call __tls_get_addr@plt