Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -139,11 +139,14 @@ } if (Target->isTlsOptimized(Type, &Body)) { + uintX_t SymVA = Target->relocNeedsGot(Type, Body) + ? Out::Got->getEntryAddr(Body) + : getSymVA(Body); // By optimizing TLS relocations, it is sometimes needed to skip // relocations that immediately follow TLS relocations. This function // knows how many slots we need to skip. - I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc, - getSymVA(Body)); + I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc, SymVA, + Body); continue; } Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -223,6 +223,12 @@ } if (Body && Target->isTlsGlobalDynamicReloc(Type)) { + if (Target->isTlsOptimized(Type, Body)) { + P->setSymbolAndType(Body->getDynamicSymbolTableIndex(), + Target->getTlsGotReloc(), Config->Mips64EL); + P->r_offset = Out::Got->getEntryAddr(*Body); + continue; + } P->setSymbolAndType(Body->getDynamicSymbolTableIndex(), Target->getTlsModuleIndexReloc(), Config->Mips64EL); P->r_offset = Out::Got->getEntryAddr(*Body); Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -59,10 +59,11 @@ virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0; virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, uint64_t SA) const = 0; - virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const; + virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S, + bool *Partial = nullptr) const; virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, - uint32_t Type, uint64_t P, - uint64_t SA) const; + uint32_t Type, uint64_t P, uint64_t SA, + const SymbolBody &S) const; virtual ~TargetInfo(); protected: Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -80,15 +80,19 @@ void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, uint64_t SA) const override; bool isRelRelative(uint32_t Type) const override; - bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override; + bool isTlsOptimized(unsigned Type, const SymbolBody *S, + bool *Partial = nullptr) const override; unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, - uint64_t P, uint64_t SA) const override; + uint64_t P, uint64_t SA, + const SymbolBody &S) const override; private: void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, uint64_t SA) const; void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, uint64_t SA) const; + void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, uint64_t SA) const; }; @@ -169,7 +173,8 @@ TargetInfo::~TargetInfo() {} -bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const { +bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S, + bool *Partial) const { return false; } @@ -186,8 +191,8 @@ bool TargetInfo::isRelRelative(uint32_t Type) const { return true; } unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, - uint32_t Type, uint64_t P, - uint64_t SA) const { + uint32_t Type, uint64_t P, uint64_t SA, + const SymbolBody &S) const { return 0; } @@ -374,6 +379,10 @@ } bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { + bool PartialOpt = false; + if (S.isTLS() && Target->isTlsGlobalDynamicReloc(Type)) + return Target->isTlsOptimized(Type, &S, &PartialOpt) && PartialOpt; + if (Type == R_X86_64_GOTTPOFF) return !isTlsOptimized(Type, &S); return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL || @@ -381,7 +390,7 @@ } bool X86_64TargetInfo::isTlsDynReloc(unsigned Type) const { - return Type == R_X86_64_GOTTPOFF; + return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_TLSGD; } unsigned X86_64TargetInfo::getPltRefReloc(unsigned Type) const { @@ -445,12 +454,14 @@ } } -bool X86_64TargetInfo::isTlsOptimized(unsigned Type, - const SymbolBody *S) const { +bool X86_64TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S, + bool *Partial) const { if (Config->Shared || (S && !S->isTLS())) return false; + if (Partial && Type == R_X86_64_TLSGD && canBePreempted(S, true)) + *Partial = true; return Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 || - (Type == R_X86_64_TLSGD && !canBePreempted(S, true)) || + Type == R_X86_64_TLSGD || (Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true)); } @@ -496,6 +507,28 @@ relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA); } +// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5 +// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows +// how +// GD can be optimized to IE: +// .byte 0x66 +// leaq x@tlsgd(%rip), %rdi +// .word 0x6666 +// rex64 +// call __tls_get_addr@plt +// Is converted to: +// mov %fs:0x0,%rax +// addq x@tpoff,%rax +void X86_64TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const { + const uint8_t Inst[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax + 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // addq x@tpoff,%rax + }; + memcpy(Loc - 4, Inst, sizeof(Inst)); + relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF64, P + 12, SA); +} + // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to // R_X86_64_TPOFF32 so that R_X86_64_TPOFF32 so that it does not use GOT. // This function does that. Read "ELF Handling For Thread-Local Storage, @@ -538,7 +571,8 @@ // This function returns a number of relocations that need to be skipped. unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, - uint64_t SA) const { + uint64_t SA, + const SymbolBody &S) const { switch (Type) { case R_X86_64_GOTTPOFF: relocateTlsIeToLe(Loc, BufEnd, P, SA); @@ -547,10 +581,15 @@ relocateTlsLdToLe(Loc, BufEnd, P, SA); // The next relocation should be against __tls_get_addr, so skip it return 1; - case R_X86_64_TLSGD: - relocateTlsGdToLe(Loc, BufEnd, P, SA); + case R_X86_64_TLSGD: { + bool PartialOpt = false; + if (isTlsOptimized(Type, &S, &PartialOpt) && PartialOpt) + relocateTlsGdToIe(Loc, BufEnd, P, SA); + else + relocateTlsGdToLe(Loc, BufEnd, P, SA); // The next relocation should be against __tls_get_addr, so skip it return 1; + } case R_X86_64_DTPOFF32: relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA); return 0; Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -222,15 +222,20 @@ Body = Body->repl(); if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) { - if (Target->isTlsOptimized(Type, Body)) + bool PartialOpt = false; + bool Optimized = Target->isTlsOptimized(Type, Body, &PartialOpt); + if (Optimized && !PartialOpt) continue; - if (Body->isInGot()) + + if (!Optimized) { + if (Body->isInGot()) + continue; + Out::Got->addDynTlsEntry(Body); + Out::RelaDyn->addReloc({&C, &RI}); + Out::RelaDyn->addReloc({nullptr, nullptr}); + Body->setUsedInDynamicReloc(); continue; - Out::Got->addDynTlsEntry(Body); - Out::RelaDyn->addReloc({&C, &RI}); - Out::RelaDyn->addReloc({nullptr, nullptr}); - Body->setUsedInDynamicReloc(); - continue; + } } if ((Body && Body->isTLS()) && !Target->isTlsDynReloc(Type)) Index: test/ELF/Inputs/tls-opt-gdie.s =================================================================== --- test/ELF/Inputs/tls-opt-gdie.s +++ test/ELF/Inputs/tls-opt-gdie.s @@ -0,0 +1,20 @@ +.type tlsshared0,@object +.section .tbss,"awT",@nobits +.globl tlsshared0 +.align 4 +tlsshared0: + .long 0 + .size tlsshared0, 4 + +.type tlsshared1,@object +.globl tlsshared1 +.align 4 +tlsshared1: + .long 0 + .size tlsshared1, 4 + +.text +.globl __tls_get_addr +.align 16, 0x90 +.type __tls_get_addr,@function +__tls_get_addr: Index: test/ELF/tls-opt-gdie.s =================================================================== --- test/ELF/tls-opt-gdie.s +++ test/ELF/tls-opt-gdie.s @@ -0,0 +1,55 @@ +// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/tls-opt-gdie.s -o %tso.o +// RUN: ld.lld -shared %tso.o -o %t.so +// RUN: ld.lld %t.o %t.so -o %t1 +// RUN: llvm-readobj -s -r %t1 | FileCheck --check-prefix=RELOC %s +// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s + +//RELOC: Section { +//RELOC: Index: 9 +//RELOC-NEXT: Name: .got +//RELOC-NEXT: Type: SHT_PROGBITS +//RELOC-NEXT: Flags [ +//RELOC-NEXT: SHF_ALLOC +//RELOC-NEXT: SHF_WRITE +//RELOC-NEXT: ] +//RELOC-NEXT: Address: 0x120E0 +//RELOC-NEXT: Offset: 0x20E0 +//RELOC-NEXT: Size: 16 +//RELOC-NEXT: Link: 0 +//RELOC-NEXT: Info: 0 +//RELOC-NEXT: AddressAlignment: 8 +//RELOC-NEXT: EntrySize: 0 +//RELOC-NEXT: } +//RELOC: Relocations [ +//RELOC-NEXT: Section (4) .rela.dyn { +//RELOC-NEXT: 0x120E0 R_X86_64_TPOFF64 tlsshared0 0x0 +//RELOC-NEXT: 0x120E8 R_X86_64_TPOFF64 tlsshared1 0x0 +//RELOC-NEXT: } +//RELOC-NEXT: Section (5) .rela.plt { +//RELOC-NEXT: 0x12108 R_X86_64_JUMP_SLOT __tls_get_addr 0x0 +//RELOC-NEXT: } +//RELOC-NEXT: ] + +//0x11009 + (4304 + 7) = 0x120E0 +//0x11019 + (4296 + 7) = 0x120E8 +// DISASM: Disassembly of section .text: +// DISASM-NEXT: _start: +// DISASM-NEXT: 11000: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 11009: 48 03 05 d0 10 00 00 addq 4304(%rip), %rax +// DISASM-NEXT: 11010: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 11019: 48 03 05 c8 10 00 00 addq 4296(%rip), %rax + +.section .text +.globl _start +_start: + .byte 0x66 + leaq tlsshared0@tlsgd(%rip),%rdi + .word 0x6666 + rex64 + call __tls_get_addr@plt + .byte 0x66 + leaq tlsshared1@tlsgd(%rip),%rdi + .word 0x6666 + rex64 + call __tls_get_addr@plt