diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -384,6 +384,8 @@ bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; + RelExpr adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *loc) const override; void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const override; void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, @@ -1392,15 +1394,6 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { - if ((type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) && - config->pcRelOptimize) { - // It only makes sense to optimize pld since paddi means that the address - // of the object in the GOT is required rather than the object itself. - assert(data && "Expecting an instruction encoding here"); - if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000) - return R_PPC64_RELAX_GOT_PC; - } - if (type != R_PPC64_GOT_TLSGD_PCREL34 && expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) @@ -1408,6 +1401,18 @@ return expr; } +RelExpr PPC64::adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *loc) const { + if ((type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) && + config->pcRelOptimize) { + // It only makes sense to optimize pld since paddi means that the address + // of the object in the GOT is required rather than the object itself. + if ((readPrefixedInstruction(loc) & 0xfc000000) == 0xe4000000) + return R_PPC64_RELAX_GOT_PC; + } + return R_GOT_PC; +} + // Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement. // The general dynamic code sequence for a global `x` uses 4 instructions. // Instruction Relocation Symbol diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -40,8 +40,8 @@ void applyJumpInstrMod(uint8_t *loc, JumpModType type, unsigned size) const override; - RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, - RelExpr expr) const override; + RelExpr adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *loc) const override; void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const override; void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, @@ -728,12 +728,17 @@ } } -RelExpr X86_64::adjustRelaxExpr(RelType type, const uint8_t *data, - RelExpr relExpr) const { - if (type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX) - return relExpr; - const uint8_t op = data[-2]; - const uint8_t modRm = data[-1]; +RelExpr X86_64::adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *loc) const { + // Only R_X86_64_[REX_]GOTPCRELX can be relaxed. addend != -4 means the + // instruction does not load the full GOT entry, we cannot relax the + // relocation. E.g. movl x@GOTPCREL+4(%rip), %rax (addend=0) loads the high 32 + // bits of the GOT entry. + if ((type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX) || + addend != -4) + return R_GOT_PC; + const uint8_t op = loc[-2]; + const uint8_t modRm = loc[-1]; // FIXME: When PIC is disabled and foo is defined locally in the // lower 32 bit address space, memory operand in mov can be converted into @@ -748,11 +753,11 @@ // We don't support test/binop instructions without a REX prefix. if (type == R_X86_64_GOTPCRELX) - return relExpr; + return R_GOT_PC; // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor. // If PIC then no relaxation is available. - return config->isPic ? relExpr : R_RELAX_GOT_PC_NOPIC; + return config->isPic ? R_GOT_PC : R_RELAX_GOT_PC_NOPIC; } // A subset of relaxations can only be applied for no-PIC. This method diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1361,9 +1361,7 @@ // runtime, because the main executable is always at the beginning of a search // list. We can leverage that fact. if (!sym.isPreemptible && (!sym.isGnuIFunc() || config->zIfuncNoplt)) { - if (expr == R_GOT_PC && !isAbsoluteValue(sym)) { - expr = target->adjustRelaxExpr(type, relocatedAddr, expr); - } else { + if (expr != R_GOT_PC) { // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call // stub type. It should be ignored if optimized to R_PC. if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL) @@ -1375,6 +1373,8 @@ type == R_HEX_GD_PLT_B22_PCREL_X || type == R_HEX_GD_PLT_B32_PCREL_X))) expr = fromPlt(expr); + } else if (!isAbsoluteValue(sym)) { + expr = target->adjustGotPcExpr(type, addend, relocatedAddr); } } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -150,6 +150,8 @@ virtual RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const; + virtual RelExpr adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *loc) const; virtual void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const; virtual void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -155,6 +155,11 @@ return expr; } +RelExpr TargetInfo::adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *data) const { + return R_GOT_PC; +} + void TargetInfo::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const { llvm_unreachable("Should not have claimed to be relaxable"); diff --git a/lld/test/ELF/x86-64-gotpc-offset.s b/lld/test/ELF/x86-64-gotpc-offset.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/x86-64-gotpc-offset.s @@ -0,0 +1,18 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -s -d %t | FileCheck %s + +# CHECK: Contents of section .got: +# CHECK-NEXT: 2021a0 9d112000 00000000 + +## The second instruction has an offset (addend!=-4). It is incorrect to relax +## movl to leal. +# CHECK: leaq 6(%rip), %rax # {{.*}} +# CHECK-NEXT: movl {{.*}}(%rip), %eax # 2021a4 + movq foo@GOTPCREL(%rip), %rax + movl foo@GOTPCREL+4(%rip), %eax + +.globl foo +foo: + nop