Index: lld/trunk/ELF/Arch/PPC64.cpp =================================================================== --- lld/trunk/ELF/Arch/PPC64.cpp +++ lld/trunk/ELF/Arch/PPC64.cpp @@ -54,6 +54,7 @@ RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; + void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; }; } // namespace @@ -159,6 +160,42 @@ return 2; } +void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { + // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement. + // The general dynamic code sequence for a global `x` will look like: + // Instruction Relocation Symbol + // addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x + // addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x + // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x + // R_PPC64_REL24 __tls_get_addr + // nop None None + + // Relaxing to local exec entails converting: + // addis r3, r2, x@got@tlsgd@ha into nop + // addi r3, r3, x@got@tlsgd@l into addis r3, r13, x@tprel@ha + // bl __tls_get_addr(x@tlsgd) into nop + // nop into addi r3, r3, x@tprel@l + + uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; + + switch (Type) { + case R_PPC64_GOT_TLSGD16_HA: + write32(Loc - EndianOffset, 0x60000000); // nop + break; + case R_PPC64_GOT_TLSGD16_LO: + write32(Loc - EndianOffset, 0x3c6d0000); // addis r3, r13 + relocateOne(Loc, R_PPC64_TPREL16_HA, Val); + break; + case R_PPC64_TLSGD: + write32(Loc, 0x60000000); // nop + write32(Loc + 4, 0x38630000); // addi r3, r3 + relocateOne(Loc + 4 + EndianOffset, R_PPC64_TPREL16_LO, Val); + break; + default: + llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + } +} + RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S, const uint8_t *Loc) const { switch (Type) { Index: lld/trunk/test/ELF/ppc64-tls-gd-le.s =================================================================== --- lld/trunk/test/ELF/ppc64-tls-gd-le.s +++ lld/trunk/test/ELF/ppc64-tls-gd-le.s @@ -0,0 +1,83 @@ +// REQUIRES: ppc + +// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +// RUN: llvm-readelf -relocations --wide %t.o | FileCheck --check-prefix=InputRelocs %s +// RUN: ld.lld %t.o -o %t +// RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s +// RUN: llvm-readelf -relocations --wide %t | FileCheck --check-prefix=OutputRelocs %s + +// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +// RUN: llvm-readelf -relocations --wide %t.o | FileCheck --check-prefix=InputRelocs %s +// RUN: ld.lld %t.o -o %t +// RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s +// RUN: llvm-readelf -relocations --wide %t | FileCheck --check-prefix=OutputRelocs %s + + .text + .abiversion 2 + .globl _start # -- Begin function _start + .p2align 4 + .type _start,@function +_start: # @_start +.Lfunc_begin0: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry _start, .Lfunc_lep0-.Lfunc_gep0 +# %bb.0: # %entry + mflr 0 + std 31, -8(1) + std 0, 16(1) + stdu 1, -64(1) + mr 31, 1 + std 30, 48(31) # 8-byte Folded Spill + li 3, 0 + stw 3, 44(31) + addis 3, 2, a@got@tlsgd@ha + addi 3, 3, a@got@tlsgd@l + bl __tls_get_addr(a@tlsgd) + nop + lwz 30, 0(3) + extsw 3, 30 + ld 30, 48(31) # 8-byte Folded Reload + addi 1, 1, 64 + ld 0, 16(1) + ld 31, -8(1) + mtlr 0 + blr + .long 0 + .quad 0 +.Lfunc_end0: + .size _start, .Lfunc_end0-.Lfunc_begin0 + +.globl __tls_get_addr +.type __tls_get_addr,@function +__tls_get_addr: + + # -- End function + .type a,@object # @a + .section .tdata,"awT",@progbits + .globl a + .p2align 2 +a: + .long 55 # 0x37 + .size a, 4 + +// Verify that the input has general-dynamic tls relocation types +// InputRelocs: Relocation section '.rela.text' +// InputRelocs: R_PPC64_GOT_TLSGD16_HA {{0+}} a + 0 +// InputRelocs: R_PPC64_GOT_TLSGD16_LO {{0+}} a + 0 +// InputRelocs: R_PPC64_TLSGD {{0+}} a + 0 + +// Verify that the general-dynamic sequence is relaxed to local exec. +// #ha(a@tprel) --> (0 - 0x7000 + 0x8000) >> 16 = 0 +// #lo(a@tprel)) --> (0 - 0x7000) & 0xFFFF = -0x7000 = -28672 +// Dis: _start: +// Dis: nop +// Dis: addis 3, 13, 0 +// Dis: nop +// Dis: addi 3, 3, -28672 + +// Verify that no general-dynamic relocations exist for the dynamic linker. +// OutputRelocs-NOT: R_PPC64_DTPMOD64 +// OutputRelocs-NOT: R_PPC64_DTPREL64