Index: ELF/Arch/PPC64.cpp =================================================================== --- ELF/Arch/PPC64.cpp +++ ELF/Arch/PPC64.cpp @@ -51,6 +51,9 @@ void writeGotHeader(uint8_t *Buf) const override; bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const override; + RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, + RelExpr Expr) const override; + void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; }; } // namespace @@ -213,6 +216,7 @@ case R_PPC64_DTPREL16_LO_DS: return R_ABS; case R_PPC64_TLSGD: + return R_TLSDESC_CALL; case R_PPC64_TLSLD: case R_PPC64_TLS: return R_HINT; @@ -402,6 +406,54 @@ return Type == R_PPC64_REL24 && S.isInPlt(); } +RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data, + RelExpr Expr) const { + if (Expr == R_RELAX_TLS_GD_TO_IE) + return R_RELAX_TLS_GD_TO_IE_ABS; + return Expr; +} + +// Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement. +// The general dynamic code sequence for a global `x` will look like: +// Instruction Relocation Symbol +// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x +// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x +// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x +// R_PPC64_REL24 __tls_get_addr +// nop None None +// +// Relaxing to initial-exec entails: +// 1) Convert the addis/addi pair that builds the address of the 'x' tls_index +// struct to an addis/ld pair that loads an offset from a got-entry. +// 2) Convert the call to __tls_get_addr to a nop. +// 3) Convert the nop following the call to an add of the loaded offset to the +// thread pointer. + +void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const { + switch (Type) { + case R_PPC64_GOT_TLSGD16_HA: + // This is relaxed from addis rT, r2, sym@got@tlsgd@ha to + // addis rT, r2, sym@got@tprel@ha. + write16(Loc, applyPPCHa(Val - getPPC64TocBase())); + return; + case R_PPC64_GOT_TLSGD16_LO: { + // Relax from addi r3, rA, sym@got@tlsgd@l to + // ld r3, sym@got@tprel@l(rA) + uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; + uint32_t InputRegister = (read32(Loc - EndianOffset) & (31 << 16)); + write32(Loc - EndianOffset, + 0xE8600000 | InputRegister | applyPPCLo(Val - getPPC64TocBase())); + return; + } + case R_PPC64_TLSGD: + write32(Loc, 0x60000000); // bl __tls_get_addr(sym@tlsgd) --> nop + write32(Loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 + return; + default: + llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + } +} + TargetInfo *elf::getPPC64TargetInfo() { static PPC64 Target; return &Target; Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -778,6 +778,12 @@ Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); break; case R_PPC_CALL: + // If this is a call to __tls_get_addr, it may be part of a TLS + // sequence that has been relaxed and turned into a nop. In this + // case, we don't want to handle it as a call. + if (read32(BufLoc) == 0x60000000) // nop + break; + // Patch a nop (0x60000000) to a ld. if (Rel.Sym->NeedsTocRestore) { if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) { Index: test/ELF/ppc64-gd-to-ie.s =================================================================== --- /dev/null +++ test/ELF/ppc64-gd-to-ie.s @@ -0,0 +1,104 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-tls.s -o %t2.o +# RUN: ld.lld -shared %t2.o -o %t3.so +# RUN: ld.lld %t.o %t3.so -o %t +# RUN: llvm-objdump --section-headers %t | FileCheck --check-prefix=CheckGot %s +# RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s +# RUN: llvm-readelf -relocations --wide %t | FileCheck --check-prefix=OutputRelocs %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-tls.s -o %t2.o +# RUN: ld.lld -shared %t2.o -o %t3.so +# RUN: ld.lld %t.o %t3.so -o %t +# RUN: llvm-objdump --section-headers %t | FileCheck --check-prefix=CheckGot %s +# RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s +# RUN: llvm-readelf -relocations --wide %t | FileCheck --check-prefix=OutputRelocs %s + + .text + .abiversion 2 + .globl _start + .p2align 4 + .type _start,@function +_start: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry _start, .Lfunc_lep0-.Lfunc_gep0 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + addis 3, 2, a@got@tlsgd@ha + addi 3, 3, a@got@tlsgd@l + bl __tls_get_addr(a@tlsgd) + nop + lwa 3, 0(3) + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr + + + .globl other_reg + .p2align 4 + .type other_reg,@function +other_reg: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry other_reg, .Lfunc_lep1-.Lfunc_gep1 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + addis 5, 2, a@got@tlsgd@ha + addi 3, 5, a@got@tlsgd@l + bl __tls_get_addr(a@tlsgd) + nop + lwa 4, 0(3) + addis 30, 2, b@got@tlsgd@ha + addi 3, 30, b@got@tlsgd@l + bl __tls_get_addr(b@tlsgd) + nop + lwa 3, 0(3) + add 3, 4, 3 + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr + + .globl __tls_get_addr + .type __tls_get_addr,@function +__tls_get_addr: + + +# CheckGot: .got 00000018 00000000100200c0 DATA +# .got is at 0x100200c0 so the toc-base is 100280c0. +# `a` is at .got[1], we expect the offsets to be: +# Ha(a) = ((0x100200c8 - 0x100280c0) + 0x8000) >> 16 = 0 +# Lo(a) = (0x100200c8 - 0x100280c0) = -32760 + +# Dis-LABEL: _start +# Dis: addis 3, 2, 0 +# Dis-NEXT: ld 3, -32760(3) +# Dis-NEXT: nop +# Dis-NEXT: add 3, 3, 13 + +# Dis-LABEL: other_reg +# Dis: addis 5, 2, 0 +# Dis-NEXT: ld 3, -32760(5) +# Dis-NEXT: nop +# Dis-NEXT: add 3, 3, 13 +# Dis: addis 30, 2, 0 +# Dis: ld 3, -32752(30) +# Dis-NEXT: nop +# Dis-NEXT: add 3, 3, 13 + +# Verify that the only dynamic relocations we emit are TPREL ones rather then +# the DTPMOD64/DTPREL64 pair for general-dynamic. +# OutputRelocs: Relocation section '.rela.dyn' at offset 0x{{[0-9a-f]+}} contains 2 entries: +# OutputRelocs-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# OutputRelocs-NEXT: {{[0-9a-f]+}} {{[0-9a-f]+}} R_PPC64_TPREL64 {{[0-9a-f]+}} a + 0 +# OutputRelocs-NEXT: {{[0-9a-f]+}} {{[0-9a-f]+}} R_PPC64_TPREL64 {{[0-9a-f]+}} b + 0