Index: lld/trunk/ELF/InputSection.cpp =================================================================== --- lld/trunk/ELF/InputSection.cpp +++ lld/trunk/ELF/InputSection.cpp @@ -169,11 +169,14 @@ } if (Target->isTlsOptimized(Type, &Body)) { + uintX_t SymVA = Target->relocNeedsGot(Type, Body) + ? Out::Got->getEntryAddr(Body) + : getSymVA(Body); // By optimizing TLS relocations, it is sometimes needed to skip // relocations that immediately follow TLS relocations. This function // knows how many slots we need to skip. - I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc, - getSymVA(Body)); + I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc, SymVA, + Body); continue; } Index: lld/trunk/ELF/OutputSections.cpp =================================================================== --- lld/trunk/ELF/OutputSections.cpp +++ lld/trunk/ELF/OutputSections.cpp @@ -217,16 +217,23 @@ return true; } - if (Body && Target->isTlsGlobalDynamicReloc(Type)) { + if (!Body || !Target->isTlsGlobalDynamicReloc(Type)) + return false; + + if (Target->isTlsOptimized(Type, Body)) { P->setSymbolAndType(Body->getDynamicSymbolTableIndex(), - Target->getTlsModuleIndexReloc(), Config->Mips64EL); - P->r_offset = Out::Got->getGlobalDynAddr(*Body); - N->setSymbolAndType(Body->getDynamicSymbolTableIndex(), - Target->getTlsOffsetReloc(), Config->Mips64EL); - N->r_offset = Out::Got->getGlobalDynAddr(*Body) + sizeof(uintX_t); + Target->getTlsGotReloc(), Config->Mips64EL); + P->r_offset = Out::Got->getEntryAddr(*Body); return true; } - return false; + + P->setSymbolAndType(Body->getDynamicSymbolTableIndex(), + Target->getTlsModuleIndexReloc(), Config->Mips64EL); + P->r_offset = Out::Got->getGlobalDynAddr(*Body); + N->setSymbolAndType(Body->getDynamicSymbolTableIndex(), + Target->getTlsOffsetReloc(), Config->Mips64EL); + N->r_offset = Out::Got->getGlobalDynAddr(*Body) + sizeof(uintX_t); + return true; } template void RelocationSection::writeTo(uint8_t *Buf) { Index: lld/trunk/ELF/Target.h =================================================================== --- lld/trunk/ELF/Target.h +++ lld/trunk/ELF/Target.h @@ -61,8 +61,8 @@ uint8_t *PairedLoc = nullptr) const = 0; virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const; virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, - uint32_t Type, uint64_t P, - uint64_t SA) const; + uint32_t Type, uint64_t P, uint64_t SA, + const SymbolBody &S) const; virtual ~TargetInfo(); protected: Index: lld/trunk/ELF/Target.cpp =================================================================== --- lld/trunk/ELF/Target.cpp +++ lld/trunk/ELF/Target.cpp @@ -110,13 +110,16 @@ bool isRelRelative(uint32_t Type) const override; bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override; unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, - uint64_t P, uint64_t SA) const override; + uint64_t P, uint64_t SA, + const SymbolBody &S) const override; private: void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, uint64_t SA) const; void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, uint64_t SA) const; + void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, uint64_t SA) const; }; @@ -211,8 +214,8 @@ bool TargetInfo::isRelRelative(uint32_t Type) const { return true; } unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, - uint32_t Type, uint64_t P, - uint64_t SA) const { + uint32_t Type, uint64_t P, uint64_t SA, + const SymbolBody &S) const { return 0; } @@ -417,14 +420,15 @@ } bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { + if (Type == R_X86_64_TLSGD) + return Target->isTlsOptimized(Type, &S) && canBePreempted(&S, true); if (Type == R_X86_64_GOTTPOFF) return !isTlsOptimized(Type, &S); - return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL || - relocNeedsPlt(Type, S); + return Type == R_X86_64_GOTPCREL || relocNeedsPlt(Type, S); } bool X86_64TargetInfo::isTlsDynReloc(unsigned Type) const { - return Type == R_X86_64_GOTTPOFF; + return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_TLSGD; } unsigned X86_64TargetInfo::getPltRefReloc(unsigned Type) const { @@ -492,8 +496,8 @@ const SymbolBody *S) const { if (Config->Shared || (S && !S->isTLS())) return false; - return Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 || - (Type == R_X86_64_TLSGD && !canBePreempted(S, true)) || + return Type == R_X86_64_TLSGD || Type == R_X86_64_TLSLD || + Type == R_X86_64_DTPOFF32 || (Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true)); } @@ -539,6 +543,27 @@ relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA); } +// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5 +// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows +// how GD can be optimized to IE: +// .byte 0x66 +// leaq x@tlsgd(%rip), %rdi +// .word 0x6666 +// rex64 +// call __tls_get_addr@plt +// Is converted to: +// mov %fs:0x0,%rax +// addq x@tpoff,%rax +void X86_64TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const { + const uint8_t Inst[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax + 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // addq x@tpoff,%rax + }; + memcpy(Loc - 4, Inst, sizeof(Inst)); + relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF64, P + 12, SA); +} + // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to // R_X86_64_TPOFF32 so that R_X86_64_TPOFF32 so that it does not use GOT. // This function does that. Read "ELF Handling For Thread-Local Storage, @@ -581,7 +606,8 @@ // This function returns a number of relocations that need to be skipped. unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, - uint64_t SA) const { + uint64_t SA, + const SymbolBody &S) const { switch (Type) { case R_X86_64_DTPOFF32: relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA); @@ -589,10 +615,14 @@ case R_X86_64_GOTTPOFF: relocateTlsIeToLe(Loc, BufEnd, P, SA); return 0; - case R_X86_64_TLSGD: - relocateTlsGdToLe(Loc, BufEnd, P, SA); + case R_X86_64_TLSGD: { + if (canBePreempted(&S, true)) + relocateTlsGdToIe(Loc, BufEnd, P, SA); + else + relocateTlsGdToLe(Loc, BufEnd, P, SA); // The next relocation should be against __tls_get_addr, so skip it return 1; + } case R_X86_64_TLSLD: relocateTlsLdToLe(Loc, BufEnd, P, SA); // The next relocation should be against __tls_get_addr, so skip it Index: lld/trunk/ELF/Writer.cpp =================================================================== --- lld/trunk/ELF/Writer.cpp +++ lld/trunk/ELF/Writer.cpp @@ -219,14 +219,15 @@ Body = Body->repl(); if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) { - if (Target->isTlsOptimized(Type, Body)) - continue; - if (Out::Got->addDynTlsEntry(Body)) { + bool Opt = Target->isTlsOptimized(Type, Body); + if (!Opt && Out::Got->addDynTlsEntry(Body)) { Out::RelaDyn->addReloc({&C, &RI}); Out::RelaDyn->addReloc({nullptr, nullptr}); Body->setUsedInDynamicReloc(); + continue; } - continue; + if (!canBePreempted(Body, true)) + continue; } if (Body && Body->isTLS() && !Target->isTlsDynReloc(Type)) Index: lld/trunk/test/ELF/Inputs/tls-opt-gdie.s =================================================================== --- lld/trunk/test/ELF/Inputs/tls-opt-gdie.s +++ lld/trunk/test/ELF/Inputs/tls-opt-gdie.s @@ -0,0 +1,20 @@ +.type tlsshared0,@object +.section .tbss,"awT",@nobits +.globl tlsshared0 +.align 4 +tlsshared0: + .long 0 + .size tlsshared0, 4 + +.type tlsshared1,@object +.globl tlsshared1 +.align 4 +tlsshared1: + .long 0 + .size tlsshared1, 4 + +.text +.globl __tls_get_addr +.align 16, 0x90 +.type __tls_get_addr,@function +__tls_get_addr: Index: lld/trunk/test/ELF/tls-opt-gdie.s =================================================================== --- lld/trunk/test/ELF/tls-opt-gdie.s +++ lld/trunk/test/ELF/tls-opt-gdie.s @@ -0,0 +1,55 @@ +// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/tls-opt-gdie.s -o %tso.o +// RUN: ld.lld -shared %tso.o -o %t.so +// RUN: ld.lld %t.o %t.so -o %t1 +// RUN: llvm-readobj -s -r %t1 | FileCheck --check-prefix=RELOC %s +// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s + +//RELOC: Section { +//RELOC: Index: 9 +//RELOC-NEXT: Name: .got +//RELOC-NEXT: Type: SHT_PROGBITS +//RELOC-NEXT: Flags [ +//RELOC-NEXT: SHF_ALLOC +//RELOC-NEXT: SHF_WRITE +//RELOC-NEXT: ] +//RELOC-NEXT: Address: 0x120E0 +//RELOC-NEXT: Offset: 0x20E0 +//RELOC-NEXT: Size: 16 +//RELOC-NEXT: Link: 0 +//RELOC-NEXT: Info: 0 +//RELOC-NEXT: AddressAlignment: 8 +//RELOC-NEXT: EntrySize: 0 +//RELOC-NEXT: } +//RELOC: Relocations [ +//RELOC-NEXT: Section (4) .rela.dyn { +//RELOC-NEXT: 0x120E0 R_X86_64_TPOFF64 tlsshared0 0x0 +//RELOC-NEXT: 0x120E8 R_X86_64_TPOFF64 tlsshared1 0x0 +//RELOC-NEXT: } +//RELOC-NEXT: Section (5) .rela.plt { +//RELOC-NEXT: 0x12108 R_X86_64_JUMP_SLOT __tls_get_addr 0x0 +//RELOC-NEXT: } +//RELOC-NEXT: ] + +//0x11009 + (4304 + 7) = 0x120E0 +//0x11019 + (4296 + 7) = 0x120E8 +// DISASM: Disassembly of section .text: +// DISASM-NEXT: _start: +// DISASM-NEXT: 11000: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 11009: 48 03 05 d0 10 00 00 addq 4304(%rip), %rax +// DISASM-NEXT: 11010: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 11019: 48 03 05 c8 10 00 00 addq 4296(%rip), %rax + +.section .text +.globl _start +_start: + .byte 0x66 + leaq tlsshared0@tlsgd(%rip),%rdi + .word 0x6666 + rex64 + call __tls_get_addr@plt + .byte 0x66 + leaq tlsshared1@tlsgd(%rip),%rdi + .word 0x6666 + rex64 + call __tls_get_addr@plt