Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -22,8 +22,8 @@ class TargetInfo { public: uint64_t getVAStart() const; - bool isTlsLocalDynamicRel(unsigned Type) const; - bool isTlsGlobalDynamicRel(unsigned Type) const; + virtual bool isTlsLocalDynamicRel(unsigned Type) const; + virtual bool isTlsGlobalDynamicRel(unsigned Type) const; virtual unsigned getDynRel(unsigned Type) const { return Type; } virtual bool isTlsDynRel(unsigned Type, const SymbolBody &S) const; virtual unsigned getTlsGotRel(unsigned Type) const { return TlsGotRel; } @@ -82,8 +82,6 @@ unsigned RelativeRel; unsigned IRelativeRel; unsigned TlsGotRel = 0; - unsigned TlsLocalDynamicRel = 0; - unsigned TlsGlobalDynamicRel = 0; unsigned TlsModuleIndexRel; unsigned TlsOffsetRel; unsigned PltEntrySize = 8; Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -83,6 +83,8 @@ void writeGotPltHeader(uint8_t *Buf) const override; unsigned getDynRel(unsigned Type) const override; unsigned getTlsGotRel(unsigned Type) const override; + bool isTlsLocalDynamicRel(unsigned Type) const override; + bool isTlsGlobalDynamicRel(unsigned Type) const override; bool isTlsDynRel(unsigned Type, const SymbolBody &S) const override; void writeGotPlt(uint8_t *Buf, uint64_t Plt) const override; void writePltZero(uint8_t *Buf) const override; @@ -115,6 +117,8 @@ public: X86_64TargetInfo(); unsigned getTlsGotRel(unsigned Type) const override; + bool isTlsLocalDynamicRel(unsigned Type) const override; + bool isTlsGlobalDynamicRel(unsigned Type) const override; bool isTlsDynRel(unsigned Type, const SymbolBody &S) const override; void writeGotPltHeader(uint8_t *Buf) const override; void writeGotPlt(uint8_t *Buf, uint64_t Plt) const override; @@ -170,6 +174,7 @@ public: AArch64TargetInfo(); unsigned getDynRel(unsigned Type) const override; + bool isTlsGlobalDynamicRel(unsigned Type) const override; void writeGotPlt(uint8_t *Buf, uint64_t Plt) const override; void writePltZero(uint8_t *Buf) const override; void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, @@ -182,6 +187,17 @@ void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, uint64_t SA, uint64_t ZA = 0, uint8_t *PairedLoc = nullptr) const override; + unsigned relaxTls(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, + uint64_t SA, const SymbolBody *S) const override; + bool canRelaxTls(unsigned Type, const SymbolBody *S) const override; + +private: + void relocateTlsGdToLe(unsigned Type, uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const; + void relocateTlsIeToLe(unsigned Type, uint8_t *Loc, uint8_t *BufEnd, + uint64_t P, uint64_t SA) const; + + static const uint64_t TcbSize = 16; }; class AMDGPUTargetInfo final : public TargetInfo { @@ -251,14 +267,6 @@ return false; } -bool TargetInfo::isTlsLocalDynamicRel(unsigned Type) const { - return Type == TlsLocalDynamicRel; -} - -bool TargetInfo::isTlsGlobalDynamicRel(unsigned Type) const { - return Type == TlsGlobalDynamicRel; -} - bool TargetInfo::isTlsDynRel(unsigned Type, const SymbolBody &S) const { return false; } @@ -272,6 +280,14 @@ bool TargetInfo::needsPlt(uint32_t Type, SymbolBody &S) const { return false; } +bool TargetInfo::isTlsLocalDynamicRel(unsigned Type) const { + return false; +} + +bool TargetInfo::isTlsGlobalDynamicRel(unsigned Type) const { + return false; +} + unsigned TargetInfo::relaxTls(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, uint64_t SA, const SymbolBody *S) const { @@ -285,8 +301,6 @@ IRelativeRel = R_386_IRELATIVE; RelativeRel = R_386_RELATIVE; TlsGotRel = R_386_TLS_TPOFF; - TlsGlobalDynamicRel = R_386_TLS_GD; - TlsLocalDynamicRel = R_386_TLS_LDM; TlsModuleIndexRel = R_386_TLS_DTPMOD32; TlsOffsetRel = R_386_TLS_DTPOFF32; UseLazyBinding = true; @@ -318,6 +332,14 @@ return TlsGotRel; } +bool X86TargetInfo::isTlsGlobalDynamicRel(unsigned Type) const { + return Type == R_386_TLS_GD; +} + +bool X86TargetInfo::isTlsLocalDynamicRel(unsigned Type) const { + return Type == R_386_TLS_LDM; +} + bool X86TargetInfo::isTlsDynRel(unsigned Type, const SymbolBody &S) const { if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 || Type == R_386_TLS_GOTIE) @@ -593,8 +615,6 @@ RelativeRel = R_X86_64_RELATIVE; IRelativeRel = R_X86_64_IRELATIVE; TlsGotRel = R_X86_64_TPOFF64; - TlsLocalDynamicRel = R_X86_64_TLSLD; - TlsGlobalDynamicRel = R_X86_64_TLSGD; TlsModuleIndexRel = R_X86_64_DTPMOD64; TlsOffsetRel = R_X86_64_DTPOFF64; UseLazyBinding = true; @@ -662,6 +682,14 @@ return R_X86_64_PC32; } +bool X86_64TargetInfo::isTlsGlobalDynamicRel(unsigned Type) const { + return Type == R_X86_64_TLSGD; +} + +bool X86_64TargetInfo::isTlsLocalDynamicRel(unsigned Type) const { + return Type == R_X86_64_TLSLD; +} + bool X86_64TargetInfo::isTlsDynRel(unsigned Type, const SymbolBody &S) const { return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_TLSGD; } @@ -1153,11 +1181,20 @@ GotRel = R_AARCH64_GLOB_DAT; PltRel = R_AARCH64_JUMP_SLOT; TlsGotRel = R_AARCH64_TLS_TPREL64; + TlsModuleIndexRel = R_AARCH64_TLS_DTPMOD64; + TlsOffsetRel = R_AARCH64_TLS_DTPREL64; UseLazyBinding = true; PltEntrySize = 16; PltZeroSize = 32; } +bool AArch64TargetInfo::isTlsGlobalDynamicRel(unsigned Type) const { + return Type == R_AARCH64_TLSDESC_ADR_PAGE21 || + Type == R_AARCH64_TLSDESC_LD64_LO12_NC || + Type == R_AARCH64_TLSDESC_ADD_LO12_NC || + Type == R_AARCH64_TLSDESC_CALL; +} + unsigned AArch64TargetInfo::getDynRel(unsigned Type) const { if (Type == R_AARCH64_ABS32 || Type == R_AARCH64_ABS64) return Type; @@ -1220,7 +1257,11 @@ } bool AArch64TargetInfo::isTlsDynRel(unsigned Type, const SymbolBody &S) const { - return Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 || + return Type == R_AARCH64_TLSDESC_ADR_PAGE21 || + Type == R_AARCH64_TLSDESC_LD64_LO12_NC || + Type == R_AARCH64_TLSDESC_ADD_LO12_NC || + Type == R_AARCH64_TLSDESC_CALL || + Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 || Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; } @@ -1273,13 +1314,17 @@ } } -static void updateAArch64Adr(uint8_t *L, uint64_t Imm) { +static void updateAArch64Addr(uint8_t *L, uint64_t Imm) { uint32_t ImmLo = (Imm & 0x3) << 29; uint32_t ImmHi = ((Imm & 0x1FFFFC) >> 2) << 5; uint64_t Mask = (0x3 << 29) | (0x7FFFF << 5); write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi); } +static inline void updateAArch64Add(uint8_t *L, uint64_t Imm) { + or32le(L, (Imm & 0xFFF) << 10); +} + // Page(Expr) is the page address of the expression Expr, defined // as (Expr & ~0xFFF). (This applies even if the machine page size // supported by the platform has a different value.) @@ -1312,20 +1357,20 @@ case R_AARCH64_ADR_GOT_PAGE: { uint64_t X = getAArch64Page(SA) - getAArch64Page(P); checkInt<33>(X, Type); - updateAArch64Adr(Loc, (X >> 12) & 0x1FFFFF); // X[32:12] + updateAArch64Addr(Loc, (X >> 12) & 0x1FFFFF); // X[32:12] break; } case R_AARCH64_ADR_PREL_LO21: { uint64_t X = SA - P; checkInt<21>(X, Type); - updateAArch64Adr(Loc, X & 0x1FFFFF); + updateAArch64Addr(Loc, X & 0x1FFFFF); break; } case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: { uint64_t X = getAArch64Page(SA) - getAArch64Page(P); checkInt<33>(X, Type); - updateAArch64Adr(Loc, (X >> 12) & 0x1FFFFF); // X[32:12] + updateAArch64Addr(Loc, (X >> 12) & 0x1FFFFF); // X[32:12] break; } case R_AARCH64_CALL26: @@ -1378,11 +1423,131 @@ or32le(Loc, (X & 0xFFFC) << 3); break; } + case R_AARCH64_TLSLE_ADD_TPREL_HI12: { + uint64_t V = llvm::alignTo(TcbSize, Out::TlsPhdr->p_align) + SA; + checkInt<24>(V, Type); + updateAArch64Add(Loc, (V & 0xFFF000) >> 12); + break; + } + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: { + uint64_t V = llvm::alignTo(TcbSize, Out::TlsPhdr->p_align) + SA; + updateAArch64Add(Loc, (V & 0xFFF)); + break; + } default: fatal("unrecognized reloc " + Twine(Type)); } } +bool AArch64TargetInfo::canRelaxTls(unsigned Type, const SymbolBody *S) const { + if (Config->Shared || (S && !S->isTls())) + return false; + + // Global-Dynamic relocs can be relaxed to Initial-Exec if the target is + // an executable. And if the target is local it can also be fully relaxed to + // Local-Exec. + if (isTlsGlobalDynamicRel(Type)) + return !canBePreempted(S, true); + + // Initial-Exec relocs can be relaxed to Local-Exec if the target is a local + // symbol. + if (Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 || + Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) + return !canBePreempted(S, true); + + return false; +} + +unsigned AArch64TargetInfo::relaxTls(uint8_t *Loc, uint8_t *BufEnd, + uint32_t Type, uint64_t P, uint64_t SA, + const SymbolBody *S) const { + switch (Type) { + case R_AARCH64_TLSDESC_ADR_PAGE21: + case R_AARCH64_TLSDESC_LD64_LO12_NC: + case R_AARCH64_TLSDESC_ADD_LO12_NC: + case R_AARCH64_TLSDESC_CALL: + if (canBePreempted(S, true)) { + llvm_unreachable("Unsupported TLS optimization"); + } else { + uint64_t X = S ? S->getVA() : SA; + relocateTlsGdToLe(Type, Loc, BufEnd, P, X); + } + return 0; + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + relocateTlsIeToLe(Type, Loc, BufEnd, P, S->getVA()); + return 0; + } + llvm_unreachable("Unknown TLS optimization"); +} + +// Global-Dynamic relocations can be relaxed to Local-Exec if both binary is +// an executable and target is final (can notbe preempted). +void AArch64TargetInfo::relocateTlsGdToLe(unsigned Type, uint8_t *Loc, + uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + // TLSDESC Global-Dynamic relocation are in the form: + // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] + // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12_NC] + // add x0, x0, :tlsdesc_los:v [_AARCH64_TLSDESC_ADD_LO12_NC] + // .tlsdesccall [R_AARCH64_TLSDESC_CALL] + // And it can optimized to: + // movz x0, #0x0, lsl #16 + // movk x0, #0x10 + // nop + // nop + + uint64_t TPOff = llvm::alignTo(TcbSize, Out::TlsPhdr->p_align); + uint64_t X = SA + TPOff; + checkUInt<32>(X, Type); + + uint32_t NewInst; + switch (Type) { + case R_AARCH64_TLSDESC_ADD_LO12_NC: + case R_AARCH64_TLSDESC_CALL: + // nop + NewInst = 0xd503201f; + break; + case R_AARCH64_TLSDESC_ADR_PAGE21: + // movz + NewInst = 0xd2a00000 | (((X >> 16) & 0xffff) << 5); + break; + case R_AARCH64_TLSDESC_LD64_LO12_NC: + // movk + NewInst = 0xf2800000 | ((X & 0xffff) << 5); + break; + default: + llvm_unreachable("Unsupported Relocation for TLS GD to LE relax"); + } + write32le(Loc, NewInst); +} + +// Initial-Exec relocations can be relaxed to Local-Exec if symbol is final +// (can not be preempted). +void AArch64TargetInfo::relocateTlsIeToLe(unsigned Type, uint8_t *Loc, + uint8_t *BufEnd, uint64_t P, + uint64_t SA) const { + uint64_t TPOff = llvm::alignTo(TcbSize, Out::TlsPhdr->p_align); + uint64_t X = SA + TPOff; + checkUInt<32>(X, Type); + + uint32_t Inst = read32le (Loc); + uint32_t NewInst; + if (Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { + // Generate movz. + unsigned RegNo = (Inst & 0x1f); + NewInst = (0xd2a00000 | RegNo) | (((X >> 16) & 0xffff) << 5); + } else if (Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { + // Generate movk + unsigned RegNo = (Inst & 0x1f); + NewInst = (0xf2800000 | RegNo) | ((X & 0xffff) << 5); + } else { + llvm_unreachable("Invalid Relocation for TLS IE to LE Relax"); + } + write32le(Loc, NewInst); +} + + // Implementing relocations for AMDGPU is low priority since most // programs don't use relocations now. Thus, this function is not // actually called (relocateOne is called for each relocation). Index: test/ELF/aarch64-tls-gdle.s =================================================================== --- /dev/null +++ test/ELF/aarch64-tls-gdle.s @@ -0,0 +1,26 @@ +# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-linux %p/Inputs/aarch64-tls-ie.s -o %ttlsie.o +# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-linux %s -o %tmain.o +# RUN: ld.lld %tmain.o %ttlsie.o -o %tout +# RUN: llvm-objdump -d %tout | FileCheck %s +# RUN: llvm-readobj -s -r %tout | FileCheck -check-prefix=RELOC %s +# REQUIRES: aarch64 + +#Local-Dynamic to Initial-Exec relax creates no +#RELOC: Relocations [ +#RELOC-NEXT: ] + +# TCB size = 0x16 and foo is first element from TLS register. +#CHECK: Disassembly of section .text: +#CHECK: _start: +#CHECK: 11000: 00 00 a0 d2 movz x0, #0, lsl #16 +#CHECK: 11004: 00 02 80 f2 movk x0, #0x10 +#CHECK: 11008: 1f 20 03 d5 nop +#CHECK: 1100c: 1f 20 03 d5 nop + +.globl _start +_start: + adrp x0, :tlsdesc:foo + ldr x1, [x0, :tlsdesc_lo12:foo] + add x0, x0, :tlsdesc_lo12:foo + .tlsdesccall foo + blr x1 Index: test/ELF/aarch64-tls-iele.s =================================================================== --- /dev/null +++ test/ELF/aarch64-tls-iele.s @@ -0,0 +1,21 @@ +# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-linux %p/Inputs/aarch64-tls-ie.s -o %ttlsie.o +# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-linux %s -o %tmain.o +# RUN: ld.lld %tmain.o %ttlsie.o -o %tout +# RUN: llvm-objdump -d %tout | FileCheck %s +# RUN: llvm-readobj -s -r %tout | FileCheck -check-prefix=RELOC %s +# REQUIRES: aarch64 + +#Local-Dynamic to Initial-Exec relax creates no +#RELOC: Relocations [ +#RELOC-NEXT: ] + +# TCB size = 0x16 and foo is first element from TLS register. +#CHECK: Disassembly of section .text: +#CHECK: _start: +#CHECK: 11000: 00 00 a0 d2 movz x0, #0, lsl #16 +#CHECK: 11004: 00 02 80 f2 movk x0, #0x10 + +.globl _start +_start: + adrp x0, :gottprel:foo + ldr x0, [x0, :gottprel_lo12:foo] Index: test/ELF/aarch64-tls-le.s =================================================================== --- /dev/null +++ test/ELF/aarch64-tls-le.s @@ -0,0 +1,31 @@ +# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %tmain.o +# RUN: ld.lld %tmain.o -o %tout +# RUN: llvm-objdump -d %tout | FileCheck %s +# RUN: llvm-readobj -s -r %tout | FileCheck -check-prefix=RELOC %s +# REQUIRES: aarch64 + +#Local-Dynamic to Initial-Exec relax creates no +#RELOC: Relocations [ +#RELOC-NEXT: ] + +.globl _start +_start: + mrs x0, TPIDR_EL0 + add x0, x0, :tprel_hi12:v1 + add x0, x0, :tprel_lo12_nc:v1 + +# TCB size = 0x16 and foo is first element from TLS register. +#CHECK: Disassembly of section .text: +#CHECK: _start: +#CHECK: 11000: 40 d0 3b d5 mrs x0, TPIDR_EL0 +#CHECK: 11004: 00 00 00 91 add x0, x0, #0 +#CHECK: 11008: 00 40 00 91 add x0, x0, #16 + +.type v1,@object +.section .tbss,"awT",@nobits +.globl v1 +.p2align 2 +v1: +.word 0 +.size v1, 4 +