Index: lld/ELF/Arch/PPC64.cpp =================================================================== --- lld/ELF/Arch/PPC64.cpp +++ lld/ELF/Arch/PPC64.cpp @@ -51,6 +51,7 @@ void writeGotHeader(uint8_t *Buf) const override; bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const override; + void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; }; } // namespace @@ -156,6 +157,42 @@ return 2; } +void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { + // Global-Dynamic relocations are in the form: + // addis r3, r2, a@got@tlsgd@ha [R_PPC64_GOT_TLSGD16_HA] + // addi r3, r3, a@got@tlsgd@l [R_PPC64_GOT_TLSGD16_LO] + // bl __tls_get_addr(a@tlsgd) [R_PPC64_TLSGD] + // nop + // And can be optimized to: + // nop + // addis r3, r13, a@tprel@ha [R_PPC64_TPREL16_HA] + // nop + // addi r3, r3, a@tprel@l [R_PPC64_TPREL16_LO] + + unsigned Offset = (Config->EKind == ELF64BEKind) ? 2 : 0; + + switch (Type) { + case R_PPC64_GOT_TLSGD16_HA: { + write32(Loc - Offset, 0x60000000); // nop + break; + } + case R_PPC64_GOT_TLSGD16_LO: { + write32(Loc - Offset, 0x3c6d0000); // addis r3, r13 + relocateOne(Loc, R_PPC64_TPREL16_HA, Val); + break; + } + case R_PPC64_TLSGD: { + write32(Loc, 0x60000000); // nop + write32(Loc + 4, 0x38630000); // addi r3, r3 + relocateOne(Loc + 4 + Offset, R_PPC64_TPREL16_LO, Val); + break; + } + default: { + llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + } + } +} + RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S, const uint8_t *Loc) const { switch (Type) { @@ -213,6 +250,7 @@ case R_PPC64_DTPREL16_LO_DS: return R_ABS; case R_PPC64_TLSGD: + return R_TLSGD_HINT; case R_PPC64_TLSLD: case R_PPC64_TLS: return R_HINT; Index: lld/ELF/InputSection.cpp =================================================================== --- lld/ELF/InputSection.cpp +++ lld/ELF/InputSection.cpp @@ -512,6 +512,7 @@ case R_RELAX_TLS_GD_TO_IE: return Sym.getGotVA() + A - P; case R_HINT: + case R_TLSGD_HINT: case R_NONE: case R_TLSDESC_CALL: llvm_unreachable("cannot relocate hint relocs"); @@ -778,6 +779,12 @@ Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); break; case R_PPC_CALL: + // If this is a call to __tls_get_addr, it may be part of a TLS + // sequence that has been relaxed and turned into a nop. In this + // case, we don't want to handle it as a call. + if (read32(BufLoc) == 0x60000000) // nop + break; + // Patch a nop (0x60000000) to a ld. if (Rel.Sym->NeedsTocRestore) { if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) { Index: lld/ELF/Relocations.h =================================================================== --- lld/ELF/Relocations.h +++ lld/ELF/Relocations.h @@ -79,6 +79,7 @@ R_TLSDESC_PAGE, R_TLSGD_GOT, R_TLSGD_GOT_FROM_END, + R_TLSGD_HINT, R_TLSGD_PC, R_TLSLD_GOT_FROM_END, R_TLSLD_GOT, Index: lld/ELF/Relocations.cpp =================================================================== --- lld/ELF/Relocations.cpp +++ lld/ELF/Relocations.cpp @@ -210,8 +210,10 @@ } if (isRelExprOneOf(Expr)) { + R_TLSGD_GOT_FROM_END, R_TLSGD_PC, R_TLSGD_HINT>(Expr)) { if (Config->Shared) { + if (Expr == R_TLSGD_HINT) + return 1; if (InX::Got->addDynTlsEntry(Sym)) { uint64_t Off = InX::Got->getGlobalDynOffset(Sym); InX::RelaDyn->addReloc(Target->TlsModuleIndexRel, InX::Got, Off, &Sym); @@ -340,7 +342,7 @@ R_MIPS_GOT_GP_PC, R_MIPS_TLSGD, R_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTONLY_PC_FROM_END, R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOT_FROM_END, R_TLSGD_PC, R_PPC_CALL_PLT, - R_TLSDESC_CALL, R_TLSDESC_PAGE, R_HINT>(E)) + R_TLSDESC_CALL, R_TLSDESC_PAGE, R_HINT, R_TLSGD_HINT>(E)) return true; // These never do, except if the entire file is position dependent or if @@ -933,6 +935,8 @@ RelExpr Expr = Target->getRelExpr(Type, Sym, RelocatedAddr); // Ignore "hint" relocations because they are only markers for relaxation. + // The TLS hint relocations are not ignored since they are required to be + // processed for relaxation. if (isRelExprOneOf(Expr)) return; Index: lld/test/ELF/ppc64-tls-gd-le.s =================================================================== --- /dev/null +++ lld/test/ELF/ppc64-tls-gd-le.s @@ -0,0 +1,83 @@ +// REQUIRES: ppc + +// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +// RUN: llvm-readelf -relocations --wide %t.o | FileCheck --check-prefix=InputRelocs %s +// RUN: ld.lld %t.o -o %t +// RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s +// RUN: llvm-readelf -relocations --wide %t | FileCheck --check-prefix=OutputRelocs %s + +// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +// RUN: llvm-readelf -relocations --wide %t.o | FileCheck --check-prefix=InputRelocs %s +// RUN: ld.lld %t.o -o %t +// RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s +// RUN: llvm-readelf -relocations --wide %t | FileCheck --check-prefix=OutputRelocs %s + + .text + .abiversion 2 + .globl _start # -- Begin function _start + .p2align 4 + .type _start,@function +_start: # @_start +.Lfunc_begin0: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry _start, .Lfunc_lep0-.Lfunc_gep0 +# %bb.0: # %entry + mflr 0 + std 31, -8(1) + std 0, 16(1) + stdu 1, -64(1) + mr 31, 1 + std 30, 48(31) # 8-byte Folded Spill + li 3, 0 + stw 3, 44(31) + addis 3, 2, a@got@tlsgd@ha + addi 3, 3, a@got@tlsgd@l + bl __tls_get_addr(a@tlsgd) + nop + lwz 30, 0(3) + extsw 3, 30 + ld 30, 48(31) # 8-byte Folded Reload + addi 1, 1, 64 + ld 0, 16(1) + ld 31, -8(1) + mtlr 0 + blr + .long 0 + .quad 0 +.Lfunc_end0: + .size _start, .Lfunc_end0-.Lfunc_begin0 + +.globl __tls_get_addr +.type __tls_get_addr,@function +__tls_get_addr: + + # -- End function + .type a,@object # @a + .section .tdata,"awT",@progbits + .globl a + .p2align 2 +a: + .long 55 # 0x37 + .size a, 4 + +// Verify that the input has general-dynamic tls relocation types +// InputRelocs: Relocation section '.rela.text' +// InputRelocs: R_PPC64_GOT_TLSGD16_HA {{0+}} a + 0 +// InputRelocs: R_PPC64_GOT_TLSGD16_LO {{0+}} a + 0 +// InputRelocs: R_PPC64_TLSGD {{0+}} a + 0 + +// Verify that the general-dynamic sequence is relaxed to local exec. +// #ha(a@tprel) --> (0 - 0x7000 + 0x8000) >> 16 = 0 +// #lo(a@tprel)) --> (0 - 0x7000) & 0xFFFF = -0x7000 = -28672 +// Dis: _start: +// Dis: nop +// Dis: addis 3, 13, 0 +// Dis: nop +// Dis: addi 3, 3, -28672 + +// Verify that no general-dynamic relocations exist for the dynamic linker. +// OutputRelocs-NOT: R_PPC64_DTPMOD64 +// OutputRelocs-NOT: R_PPC64_DTPREL64