Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -88,6 +88,20 @@ bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override; void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, uint64_t SA, uint8_t *PairedLoc = nullptr) const override; + bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override; + unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, + uint64_t P, uint64_t SA, + const SymbolBody &S) const override; + +private: + void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; + void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; + void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; + void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const; }; class X86_64TargetInfo final : public TargetInfo { @@ -260,7 +274,7 @@ if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 || Type == R_386_TLS_GOTIE) return Config->Shared; - return false; + return Type == R_386_TLS_GD; } void X86TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, @@ -311,8 +325,11 @@ } bool X86TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { - return Type == R_386_TLS_GOTIE || Type == R_386_GOT32 || - relocNeedsPlt(Type, S); + if (S.isTLS() && Type == R_386_TLS_GD) + return Target->isTlsOptimized(Type, &S) && canBePreempted(&S, true); + if (Type == R_386_TLS_GOTIE) + return !isTlsOptimized(Type, &S); + return Type == R_386_GOT32 || relocNeedsPlt(Type, S); } bool X86TargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const { @@ -358,6 +375,122 @@ } } +bool X86TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const { + if (Config->Shared || (S && !S->isTLS())) + return false; + return Type == R_386_TLS_LDO_32 || Type == R_386_TLS_LDM || + Type == R_386_TLS_GD || + (Type == R_386_TLS_GOTIE && !canBePreempted(S, true)); +} + +unsigned X86TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, + uint32_t Type, uint64_t P, + uint64_t SA, + const SymbolBody &S) const { + switch (Type) { + case R_386_TLS_GD: { + if (canBePreempted(&S, true)) + relocateTlsGdToIe(Loc, BufEnd, P, SA); + else + relocateTlsGdToLe(Loc, BufEnd, P, SA); + // The next relocation should be against __tls_get_addr, so skip it + return 1; + } + case R_386_TLS_GOTIE: + relocateTlsIeToLe(Loc, BufEnd, P, SA); + return 0; + case R_386_TLS_LDM: + relocateTlsLdToLe(Loc, BufEnd, P, SA); + // The next relocation should be against __tls_get_addr, so skip it + return 1; + case R_386_TLS_LDO_32: + relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA); + return 0; + } + llvm_unreachable("Unknown TLS optimization"); +} + +// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.1 +// IA-32 Linker Optimizations, http://www.akkadia.org/drepper/tls.pdf) shows +// how GD can be optimized to IE: +// leal x@tlsgd(, %ebx, 1), +// call __tls_get_addr@plt +// Is converted to: +// movl %gs:0, %eax +// addl x@gotntpoff(%ebx), %eax +void X86TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax + 0x03, 0x83, 0x00, 0x00, 0x00, 0x00 // addl 0(%ebx), %eax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); + relocateOne(Loc + 5, BufEnd, R_386_32, P, + SA - Out::Got->getVA() - + Out::Got->getNumEntries() * 4); +} + +// GD can be optimized to LE: +// leal x@tlsgd(, %ebx, 1), +// call __tls_get_addr@plt +// Can be converted to: +// movl %gs:0,%eax +// addl $x@ntpoff,%eax +// But gold emits subl $foo@tpoff,%eax instead of addl. +// These instructions are completely equal in behavior. +// This method generates subl to be consistent with gold. +void X86TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax + 0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 // subl 0(%ebx), %eax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); + relocateOne(Loc + 5, BufEnd, R_386_32, P, + Out::TlsPhdr->p_memsz - SA); +} + +// LD can be optimized to LE: +// leal foo(%reg), %eax +// call ___tls_get_addr +// Is converted to: +// movl %gs:0,%eax +// nop +// leal 0(%esi,1),%esi +void X86TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax + 0x90, // nop + 0x8d, 0x74, 0x26, 0x00 // leal 0(%esi,1),%esi + }; + memcpy(Loc - 2, Inst, sizeof(Inst)); +} + +// In some conditions, R_386_TLS_GOTIE relocation can be optimized to +// R_386_TLS_LE so that it does not use GOT. +// This function does that. Read "ELF Handling For Thread-Local Storage, +// 5.1 IA-32 Linker Optimizations" (http://www.akkadia.org/drepper/tls.pdf) +// by Ulrich Drepper for details. +void X86TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + // Ulrich's document section 6.2 says that @gotntpoff can be + // used with MOVL or ADDL instructions. + // "MOVL foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVL $foo, %REG". + // "ADDL foo@GOTNTPOFF(%RIP), %REG" is transformed to "LEAL foo(%REG), %REG" + // Note: gold converts to ADDL instead of LEAL. + uint8_t *Inst = Loc - 2; + uint8_t *RegSlot = Loc - 1; + uint8_t Reg = (Loc[-1] >> 3) & 7; + bool IsMov = *Inst == 0x8b; + *Inst = IsMov ? 0xc7 : 0x8d; + if (IsMov) + *RegSlot = 0xc0 | ((*RegSlot >> 3) & 7); + else + *RegSlot = 0x80 | Reg | (Reg << 3); + relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA); +} + X86_64TargetInfo::X86_64TargetInfo() { CopyReloc = R_X86_64_COPY; PCRelReloc = R_X86_64_PC32; Index: test/ELF/Inputs/tls-opt-gdiele-i686.s =================================================================== --- test/ELF/Inputs/tls-opt-gdiele-i686.s +++ test/ELF/Inputs/tls-opt-gdiele-i686.s @@ -0,0 +1,20 @@ +.type tlsshared0,@object +.section .tbss,"awT",@nobits +.globl tlsshared0 +.align 4 +tlsshared0: + .long 0 + .size tlsshared0, 4 + +.type tlsshared1,@object +.globl tlsshared1 +.align 4 +tlsshared1: + .long 0 + .size tlsshared1, 4 + +.text + .globl __tls_get_addr + .align 16, 0x90 + .type __tls_get_addr,@function +__tls_get_addr: \ No newline at end of file Index: test/ELF/tls-opt-gdiele-i686.s =================================================================== --- test/ELF/tls-opt-gdiele-i686.s +++ test/ELF/tls-opt-gdiele-i686.s @@ -0,0 +1,59 @@ +// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %p/Inputs/tls-opt-gdiele-i686.s -o %tso.o +// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o +// RUN: ld.lld -shared %tso.o -o %tso +// RUN: ld.lld %t.o %tso -o %tout +// RUN: llvm-readobj -r %tout | FileCheck --check-prefix=NORELOC %s +// RUN: llvm-objdump -d %tout | FileCheck --check-prefix=DISASM %s + +// NORELOC: Relocations [ +// NORELOC-NEXT: Section ({{.*}}) .rel.dyn { +// NORELOC-NEXT: 0x12050 R_386_TLS_TPOFF tlsshared0 0x0 +// NORELOC-NEXT: 0x12054 R_386_TLS_TPOFF tlsshared1 0x0 +// NORELOC-NEXT: } +// NORELOC-NEXT: ] + +// DISASM: Disassembly of section .text: +// DISASM-NEXT: _start: +// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 11006: 03 83 f8 ff ff ff addl -8(%ebx), %eax +// DISASM-NEXT: 1100c: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 11012: 03 83 fc ff ff ff addl -4(%ebx), %eax +// DISASM-NEXT: 11018: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 1101e: 81 e8 08 00 00 00 subl $8, %eax +// DISASM-NEXT: 11024: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 1102a: 81 e8 04 00 00 00 subl $4, %eax + +.type tlsexe1,@object +.section .tbss,"awT",@nobits +.globl tlsexe1 +.align 4 +tlsexe1: + .long 0 + .size tlsexe1, 4 + +.type tlsexe2,@object +.section .tbss,"awT",@nobits +.globl tlsexe2 +.align 4 +tlsexe2: + .long 0 + .size tlsexe2, 4 + +.section .text +.globl ___tls_get_addr +.type ___tls_get_addr,@function +___tls_get_addr: + +.section .text +.globl _start +_start: +//GD->IE +leal tlsshared0@tlsgd(,%ebx,1),%eax +call ___tls_get_addr@plt +leal tlsshared1@tlsgd(,%ebx,1),%eax +call ___tls_get_addr@plt +//GD->IE +leal tlsexe1@tlsgd(,%ebx,1),%eax +call ___tls_get_addr@plt +leal tlsexe2@tlsgd(,%ebx,1),%eax +call ___tls_get_addr@plt Index: test/ELF/tls-opt-i686.s =================================================================== --- test/ELF/tls-opt-i686.s +++ test/ELF/tls-opt-i686.s @@ -0,0 +1,69 @@ +// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o +// RUN: ld.lld %t.o -o %t1 +// RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=NORELOC %s +// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s + +// NORELOC: Relocations [ +// NORELOC-NEXT: ] + +// DISASM: Disassembly of section .text: +// DISASM-NEXT: _start: +// LD -> LE: +// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 11006: 90 nop +// DISASM-NEXT: 11007: 8d 74 26 00 leal (%esi), %esi +// DISASM-NEXT: 1100b: 8d 90 f8 ff ff ff leal -8(%eax), %edx +// DISASM-NEXT: 11011: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 11017: 90 nop +// DISASM-NEXT: 11018: 8d 74 26 00 leal (%esi), %esi +// DISASM-NEXT: 1101c: 8d 90 fc ff ff ff leal -4(%eax), %edx +// IE -> LE: +// 4294967288 == 0xFFFFFFF8 +// 4294967292 == 0xFFFFFFFC +// DISASM-NEXT: 11022: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 11028: c7 c0 f8 ff ff ff movl $4294967288, %eax +// DISASM-NEXT: 1102e: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 11034: c7 c0 fc ff ff ff movl $4294967292, %eax +// DISASM-NEXT: 1103a: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 11040: 8d 80 f8 ff ff ff leal -8(%eax), %eax +// DISASM-NEXT: 11046: 65 a1 00 00 00 00 movl %gs:0, %eax +// DISASM-NEXT: 1104c: 8d 80 fc ff ff ff leal -4(%eax), %eax +.type tls0,@object +.section .tbss,"awT",@nobits +.globl tls0 +.align 4 +tls0: + .long 0 + .size tls0, 4 + +.type tls1,@object +.globl tls1 +.align 4 +tls1: + .long 0 + .size tls1, 4 + +.section .text +.globl ___tls_get_addr +.type ___tls_get_addr,@function +___tls_get_addr: + +.section .text +.globl _start +_start: +//LD -> LE: +leal tls0@tlsldm(%ebx),%eax +call ___tls_get_addr@plt +leal tls0@dtpoff(%eax),%edx +leal tls1@tlsldm(%ebx),%eax +call ___tls_get_addr@plt +leal tls1@dtpoff(%eax),%edx +//IE -> LE: +movl %gs:0,%eax +movl tls0@gotntpoff(%ebx),%eax +movl %gs:0,%eax +movl tls1@gotntpoff(%ebx),%eax +movl %gs:0,%eax +addl tls0@gotntpoff(%ebx),%eax +movl %gs:0,%eax +addl tls1@gotntpoff(%ebx),%eax