Index: ELF/Arch/PPC64.cpp =================================================================== --- ELF/Arch/PPC64.cpp +++ ELF/Arch/PPC64.cpp @@ -21,6 +21,7 @@ using namespace lld::elf; static uint64_t PPC64TocOffset = 0x8000; +static uint64_t DynamicThreadPointerOffset = 0x8000; uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The @@ -180,6 +181,17 @@ case R_PPC64_GOT_TLSLD16_HI: case R_PPC64_GOT_TLSLD16_LO: return R_TLSLD_GOT; + case R_PPC64_DTPREL16: + case R_PPC64_DTPREL16_DS: + case R_PPC64_DTPREL16_HA: + case R_PPC64_DTPREL16_HI: + case R_PPC64_DTPREL16_HIGHER: + case R_PPC64_DTPREL16_HIGHERA: + case R_PPC64_DTPREL16_HIGHEST: + case R_PPC64_DTPREL16_HIGHESTA: + case R_PPC64_DTPREL16_LO: + case R_PPC64_DTPREL16_LO_DS: + return R_ABS; case R_PPC64_TLSGD: case R_PPC64_TLSLD: return R_HINT; @@ -224,28 +236,56 @@ } static std::pair toAddr16Rel(RelType Type, uint64_t Val) { - uint64_t V = Val - PPC64TocOffset; + // Relocations relative to the toc-base need to be adjusted by the Toc offset. + uint64_t TocBiasedVal = Val - PPC64TocOffset; + // Relocations relative to dtv[dtpmod] need to be adjusted by the DTP offset. + uint64_t DTPBiasedVal = Val - DynamicThreadPointerOffset; + switch (Type) { + // TOC biased relocation. case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSLD16: case R_PPC64_TOC16: - return {R_PPC64_ADDR16, V}; + return {R_PPC64_ADDR16, TocBiasedVal}; case R_PPC64_TOC16_DS: - return {R_PPC64_ADDR16_DS, V}; + return {R_PPC64_ADDR16_DS, TocBiasedVal}; case R_PPC64_GOT_TLSGD16_HA: case R_PPC64_GOT_TLSLD16_HA: case R_PPC64_TOC16_HA: - return {R_PPC64_ADDR16_HA, V}; + return {R_PPC64_ADDR16_HA, TocBiasedVal}; case R_PPC64_GOT_TLSGD16_HI: case R_PPC64_GOT_TLSLD16_HI: case R_PPC64_TOC16_HI: - return {R_PPC64_ADDR16_HI, V}; + return {R_PPC64_ADDR16_HI, TocBiasedVal}; case R_PPC64_GOT_TLSGD16_LO: case R_PPC64_GOT_TLSLD16_LO: case R_PPC64_TOC16_LO: - return {R_PPC64_ADDR16_LO, V}; + return {R_PPC64_ADDR16_LO, TocBiasedVal}; case R_PPC64_TOC16_LO_DS: - return {R_PPC64_ADDR16_LO_DS, V}; + return {R_PPC64_ADDR16_LO_DS, TocBiasedVal}; + + // Dynamic Thread pointer biased relocation types. + case R_PPC64_DTPREL16: + return {R_PPC64_ADDR16, DTPBiasedVal}; + case R_PPC64_DTPREL16_DS: + return {R_PPC64_ADDR16_DS, DTPBiasedVal}; + case R_PPC64_DTPREL16_HA: + return {R_PPC64_ADDR16_HA, DTPBiasedVal}; + case R_PPC64_DTPREL16_HI: + return {R_PPC64_ADDR16_HI, DTPBiasedVal}; + case R_PPC64_DTPREL16_HIGHER: + return {R_PPC64_ADDR16_HIGHER, DTPBiasedVal}; + case R_PPC64_DTPREL16_HIGHERA: + return {R_PPC64_ADDR16_HIGHERA, DTPBiasedVal}; + case R_PPC64_DTPREL16_HIGHEST: + return {R_PPC64_ADDR16_HIGHEST, DTPBiasedVal}; + case R_PPC64_DTPREL16_HIGHESTA: + return {R_PPC64_ADDR16_HIGHESTA, DTPBiasedVal}; + case R_PPC64_DTPREL16_LO: + return {R_PPC64_ADDR16_LO, DTPBiasedVal}; + case R_PPC64_DTPREL16_LO_DS: + return {R_PPC64_ADDR16_LO_DS, DTPBiasedVal}; + default: return {Type, Val}; } Index: test/ELF/ppc64-dtprel.s =================================================================== --- /dev/null +++ test/ELF/ppc64-dtprel.s @@ -0,0 +1,158 @@ +// REQUIRES: ppc + +// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +// RUN: ld.lld -shared %t.o -o %t.so +// RUN: llvm-readelf -relocations --wide %t.o | FileCheck --check-prefix=InputRelocs %s +// RUN: llvm-readelf -relocations --wide %t.so | FileCheck --check-prefix=OutputRelocs %s +// RUN: llvm-objdump -D %t.so | FileCheck --check-prefix=Dis %s + +// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +// RUN: ld.lld -shared %t.o -o %t.so +// RUN: llvm-readelf -relocations --wide %t.o | FileCheck --check-prefix=InputRelocs %s +// RUN: llvm-readelf -relocations --wide %t.so | FileCheck --check-prefix=OutputRelocs %s +// RUN: llvm-objdump -D %t.so | FileCheck --check-prefix=Dis %s + + .text + .abiversion 2 + .globl test + .p2align 4 + .type test,@function +test: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry test, .Lfunc_lep0-.Lfunc_gep0 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + addis 3, 2, i@got@tlsld@ha + addi 3, 3, i@got@tlsld@l + bl __tls_get_addr(i@tlsld) + nop + addi 4, 3, i@dtprel + lwa 4, i@dtprel(3) + ld 0, 16(1) + mtlr 0 + blr + + .globl test_64 + .p2align 4 + .type test_64,@function + + .globl test_adjusted + .p2align 4 + .type test_adjusted,@function +test_adjusted: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry test_adjusted, .Lfunc_lep1-.Lfunc_gep1 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + addis 3, 2, k@got@tlsld@ha + addi 3, 3, k@got@tlsld@l + bl __tls_get_addr(k@tlsld) + nop + lis 4, k@dtprel@highesta + ori 4, 4, k@dtprel@highera + lis 5, k@dtprel@ha + addi 5, 5, k@dtprel@l + sldi 4, 4, 32 + or 4, 4, 5 + add 3, 3, 4 + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr + + .globl test_not_adjusted + .p2align 4 + .type test_not_adjusted,@function +test_not_adjusted: +.Lfunc_gep2: + addis 2, 12, .TOC.-.Lfunc_gep2@ha + addi 2, 2, .TOC.-.Lfunc_gep2@l +.Lfunc_lep2: + .localentry test_not_adjusted, .Lfunc_lep2-.Lfunc_gep2 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + addis 3, 2, i@got@tlsld@ha + addi 3, 3, i@got@tlsld@l + bl __tls_get_addr(k@tlsld) + nop + lis 4, k@dtprel@highest + ori 4, 4, k@dtprel@higher + sldi 4, 4, 32 + oris 4, 4, k@dtprel@h + ori 4, 4, k@dtprel@l + add 3, 3, 4 + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr + + .type i,@object + .section .tdata,"awT",@progbits + .space 1024 + .p2align 2 +i: + .long 55 + .size i, 4 + + .space 1024 * 1024 * 4 + .type k,@object + .p2align 2 +k: + .long 128 + .size k,4 + +// Verify the input has all the remaining DTPREL based relocations we want to +// test. +// InputRelocs: Relocation section '.rela.text' +// InputRelocs: R_PPC64_DTPREL16 {{[0-9a-f]+}} i + 0 +// InputRelocs: R_PPC64_DTPREL16_DS {{[0-9a-f]+}} i + 0 +// InputRelocs: R_PPC64_DTPREL16_HIGHESTA {{[0-9a-f]+}} k + 0 +// InputRelocs: R_PPC64_DTPREL16_HIGHERA {{[0-9a-f]+}} k + 0 +// InputRelocs: R_PPC64_DTPREL16_HA {{[0-9a-f]+}} k + 0 +// InputRelocs: R_PPC64_DTPREL16_LO {{[0-9a-f]+}} k + 0 +// InputRelocs: R_PPC64_DTPREL16_HIGHEST {{[0-9a-f]+}} k + 0 +// InputRelocs: R_PPC64_DTPREL16_HIGHER {{[0-9a-f]+}} k + 0 +// InputRelocs: R_PPC64_DTPREL16_HI {{[0-9a-f]+}} k + 0 +// InputRelocs: R_PPC64_DTPREL16_LO {{[0-9a-f]+}} k + 0 + +// Expect a single dynamic relocation in the '.rela.dyn section for the module id. +// OutputRelocs: Relocation section '.rela.dyn' at offset 0x{{[0-9a-f]+}} contains 1 entries: +// OutputRelocs-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +// OutputRelocs-NEXT: R_PPC64_DTPMOD64 + + +// i@dtprel --> (1024 - 0x8000) = -31744 +// Dis: test: +// Dis: addi 4, 3, -31744 +// Dis: lwa 4, -31744(3) + +// #k@dtprel(1024 + 4 + 1024 * 1024 * 4) = 0x400404 + +// #highesta(k@dtprel) --> ((0x400404 - 0x8000 + 0x8000) >> 48) & 0xffff = 0 +// #highera(k@dtprel) --> ((0x400404 - 0x8000 + 0x8000) >> 32) & 0xffff = 0 +// #ha(k@dtprel) --> ((0x400404 - 0x8000 + 0x8000) >> 16) & 0xffff = 64 +// #lo(k@dtprel) --> ((0x400404 - 0x8000) & 0xffff = -31740 +// Dis: test_adjusted: +// Dis: lis 4, 0 +// Dis: ori 4, 4, 0 +// Dis: lis 5, 64 +// Dis: addi 5, 5, -31740 + +// #highest(k@dtprel) --> ((0x400404 - 0x8000) >> 48) & 0xffff = 0 +// #higher(k@dtprel) --> ((0x400404 - 0x8000) >> 32) & 0xffff = 0 +// #hi(k@dtprel) --> ((0x400404 - 0x8000) >> 16) & 0xffff = 63 +// #lo(k@dtprel) --> ((0x400404 - 0x8000) & 0xffff = 33796 +// Dis: test_not_adjusted: +// Dis: lis 4, 0 +// Dis: ori 4, 4, 0 +// Dis: oris 4, 4, 63 +// Dis: ori 4, 4, 33796 Index: test/ELF/ppc64-local-dynamic.s =================================================================== --- test/ELF/ppc64-local-dynamic.s +++ test/ELF/ppc64-local-dynamic.s @@ -32,6 +32,8 @@ addi 3, 3, i@got@tlsld@l bl __tls_get_addr(i@tlsld) nop + addis 3, 3, i@dtprel@ha + lwa 3, i@dtprel@l(3) ld 0, 16(1) mtlr 0 blr @@ -71,13 +73,17 @@ .quad 66 .size k, 8 -// Verify that the input contains all the R_PPC64_GOT_TLSLD16* relocations. +// Verify that the input contains all the R_PPC64_GOT_TLSLD16* relocations, as +// well as the DTPREL relocations used in a typical medium code model +// local-dynamic variable access. // InputRelocs: Relocation section '.rela.text' -// InputRelocs: R_PPC64_GOT_TLSLD16_HA 0000000000000000 i + 0 -// InputRelocs: R_PPC64_GOT_TLSLD16_LO 0000000000000000 i + 0 -// InputRelocs: R_PPC64_TLSLD 0000000000000000 i + 0 -// InputRelocs: R_PPC64_GOT_TLSLD16_HI 0000000000000000 j + 0 -// InputRelocs: R_PPC64_GOT_TLSLD16 0000000000000008 k + 0 +// InputRelocs: R_PPC64_GOT_TLSLD16_HA {{[0-9a-f]+}} i + 0 +// InputRelocs: R_PPC64_GOT_TLSLD16_LO {{[0-9a-f]+}} i + 0 +// InputRelocs: R_PPC64_TLSLD {{[0-9a-f]+}} i + 0 +// InputRelocs: R_PPC64_DTPREL16_HA {{[0-9a-f]+}} i + 0 +// InputRelocs: R_PPC64_DTPREL16_LO_DS {{[0-9a-f]+}} i + 0 +// InputRelocs: R_PPC64_GOT_TLSLD16_HI {{[0-9a-f]+}} j + 0 +// InputRelocs: R_PPC64_GOT_TLSLD16 {{[0-9a-f]+}} k + 0 // The local dynamic version of tls needs to use the same mechanism to look up // a variables address as general-dynamic. ie a call to __tls_get_addr with the @@ -99,9 +105,19 @@ // #ha(i@got@tlsld) --> (0x20108 - 0x28100 + 0x8000) >> 16 = 0 // #lo(i@got@tlsld) --> (0x20108 - 0x28100) = -7ff8 = -32760 +// When calculating offset relative to the dynamic thread pointer we have to +// adjust by 0x8000 since each DTV pointer points 0x8000 bytes past the start of +// its TLS block. +// #ha(i@dtprel) --> (0x0 -0x8000 + 0x8000) >> 16 = 0 +// #lo(i@dtprel) --> (0x0 -0x8000) = -0x8000 = -32768 // Dis: test: // Dis: addis 3, 2, 0 // Dis-NEXT: addi 3, 3, -32760 +// Dis-NEXT: bl .+67108804 +// Dis-NEXT: ld 2, 24(1) +// Dis-NEXT: addis 3, 3, 0 +// Dis-NEXT: lwa 3, -32768(3) + // #hi(j@got@tlsld) --> (0x20108 - 0x28100 ) > 16 = -1 // Dis: test_hi: