Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -221,6 +221,7 @@ case R_NEG_TLS: return Out::TlsPhdr->p_memsz - Body.getVA(A); case R_ABS: + case R_RELAX_GOT_PC_NOPIC: return Body.getVA(A); case R_GOT_OFF: return Body.getGotOffset() + A; @@ -325,6 +326,7 @@ switch (Expr) { case R_RELAX_GOT_PC: + case R_RELAX_GOT_PC_NOPIC: Target->relaxGot(BufLoc, SymVA); break; case R_RELAX_TLS_IE_TO_LE: Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -39,6 +39,7 @@ R_PPC_PLT_OPD, R_PPC_TOC, R_RELAX_GOT_PC, + R_RELAX_GOT_PC_NOPIC, R_RELAX_TLS_GD_TO_IE, R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_IE_TO_LE, Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -370,8 +370,8 @@ } else if (!Preemptible) { if (needsPlt(Expr)) Expr = fromPlt(Expr); - if (Expr == R_GOT_PC && Target->canRelaxGot(Type, Data + Offset)) - Expr = R_RELAX_GOT_PC; + if (Expr == R_GOT_PC) + Expr = Target->adjustRelaxGotExpr(Type, Data + Offset, Expr); } if (IsWrite || isStaticLinkTimeConstant(Expr, Type, Body)) Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -88,7 +88,8 @@ uint32_t ThunkSize = 0; - virtual bool canRelaxGot(uint32_t Type, const uint8_t *Data) const; + virtual RelExpr adjustRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const; virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; virtual void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; virtual void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -113,7 +113,8 @@ int32_t Index, unsigned RelOff) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; - bool canRelaxGot(uint32_t Type, const uint8_t *Data) const override; + RelExpr adjustRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const override; void relaxGot(uint8_t *Loc, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; @@ -234,8 +235,9 @@ return false; } -bool TargetInfo::canRelaxGot(uint32_t Type, const uint8_t *Data) const { - return false; +RelExpr TargetInfo::adjustRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { + return Expr; } void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { @@ -734,16 +736,28 @@ } } -bool X86_64TargetInfo::canRelaxGot(uint32_t Type, const uint8_t *Data) const { +RelExpr X86_64TargetInfo::adjustRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr RelExpr) const { if (Type != R_X86_64_GOTPCRELX && Type != R_X86_64_REX_GOTPCRELX) - return false; + return RelExpr; const uint8_t Op = Data[-2]; const uint8_t ModRm = Data[-1]; - // Relax mov. + // FIXME: When PIC is disabled and foo is defined locally in the + // lower 32 bit address space, memory operand in mov can be converted into + // immediate operand. Otherwise, mov must be changed to lea. We support only + // latter relaxation at this moment. if (Op == 0x8b) - return true; + return R_RELAX_GOT_PC; // Relax call and jmp. - return Op == 0xff && (ModRm == 0x15 || ModRm == 0x25); + if (Op == 0xff && (ModRm == 0x15 || ModRm == 0x25)) + return R_RELAX_GOT_PC; + + // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor. + // If PIC then no relaxation is available. + // We also don't relax test/binop instructions without REX byte, + // they are 32bit operations and not common to have. + return (!Config->Pic && Type == R_X86_64_REX_GOTPCRELX) ? R_RELAX_GOT_PC_NOPIC + : RelExpr; } void X86_64TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { @@ -757,22 +771,95 @@ return; } - assert(Op == 0xff); - if (ModRm == 0x15) { - // ABI says we can convert call *foo@GOTPCREL(%rip) to nop call foo. - // Instead we convert to addr32 call foo, where addr32 is instruction - // prefix. That makes result expression to be a single instruction. - *(Loc - 2) = 0x67; // addr32 prefix - *(Loc - 1) = 0xe8; // call - } else { - assert(ModRm == 0x25); - // Convert jmp *foo@GOTPCREL(%rip) to jmp foo nop. - // jmp doesn't return, so it is fine to use nop here, it is just a stub. - *(Loc - 2) = 0xe9; // jmp - *(Loc + 3) = 0x90; // nop - Loc -= 1; - Val += 1; + // Convert call/jmp instructions. + if (Op == 0xff) { + if (ModRm == 0x15) { + // ABI says we can convert call *foo@GOTPCREL(%rip) to nop call foo. + // Instead we convert to addr32 call foo, where addr32 is instruction + // prefix. That makes result expression to be a single instruction. + *(Loc - 2) = 0x67; // addr32 prefix + *(Loc - 1) = 0xe8; // call + } + else { + assert(ModRm == 0x25); + // Convert jmp *foo@GOTPCREL(%rip) to jmp foo nop. + // jmp doesn't return, so it is fine to use nop here, it is just a stub. + *(Loc - 2) = 0xe9; // jmp + *(Loc + 3) = 0x90; // nop + Loc -= 1; + Val += 1; + } + relocateOne(Loc, R_X86_64_PC32, Val); + return; } + + assert(!Config->Pic); + // We are relaxing a rip relative to an absolute, so compensate for the old + // -4 addend. + Val += 4; + // "Intel 64 and IA-32 Architectures Software Developer's Manual V2" + // (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/ + // 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf) + // can be used as reference. + + const uint8_t Rex = Loc[-3]; + // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg". + if (Op == 0x85) { + // See "TEST-Logical Compare" (4-428 Vol. 2B), + // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension). + + // ModR/M byte has form XX YYY ZZZ, where + // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1). + // XX has different meanings: + // 00: The operand's memory address is in reg1. + // 01: The operand's memory address is reg1 + a byte-sized displacement. + // 10: The operand's memory address is reg1 + a word-sized displacement. + // 11: The operand is reg1 itself. + // If an instruction requires only one operand, the unused reg2 field + // holds extra opcode bits rather than a register code + // 0xC0 == 11 000 000 binary. + // 0x38 == 00 111 000 binary. + // We transfer reg2 to reg1 here as operand. + // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3). + *(Loc - 1) = 0xc0 | (ModRm & 0x38) >> 3; // ModR/M byte. + + // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32 + // See "TEST-Logical Compare" (4-428 Vol. 2B). + *(Loc - 2) = 0xf7; + + // Move R bit to the B bit in REX byte. + // REX byte is encoded as 0100WRXB, where + // 0100 is 4bit fixed pattern. + // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the + // default operand size is used (which is 32-bit for most but not all + // instructions). + // REX.R This 1-bit value is an extension to the MODRM.reg field. + // REX.X This 1-bit value is an extension to the SIB.index field. + // REX.B This 1-bit value is an extension to the MODRM.rm field or the + // SIB.base field. + // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A). + *(Loc - 3) = (Rex & ~0x4) | (Rex & 0x4) >> 2; + relocateOne(Loc, R_X86_64_PC32, Val); + return; + } + + // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub + // or xor operations. + + // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg". + // Logic is close to one for test instruction above, but we also + // write opcode extension here, see below for details. + *(Loc - 1) = 0xc0 | (ModRm & 0x38) >> 3 | (Op & 0x3c); // ModR/M byte. + + // Primary opcode is 0x81, opcode extension is one of: + // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB, + // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP. + // This value was wrote to MODRM.reg in a line above. + // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15), + // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for + // descriptions about each operation. + *(Loc - 2) = 0x81; + *(Loc - 3) = (Rex & ~0x4) | (Rex & 0x4) >> 2; relocateOne(Loc, R_X86_64_PC32, Val); } Index: test/ELF/gotpc-relax-nopic.s =================================================================== --- test/ELF/gotpc-relax-nopic.s +++ test/ELF/gotpc-relax-nopic.s @@ -0,0 +1,82 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=x86_64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t1 +# RUN: llvm-readobj -symbols -r %t1 | FileCheck --check-prefix=SYMRELOC %s +# RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s + +## There is no relocations. +# SYMRELOC: Relocations [ +# SYMRELOC-NEXT: ] +# SYMRELOC: Symbols [ +# SYMRELOC: Symbol { +# SYMRELOC: Name: bar +# SYMRELOC-NEXT: Value: 0x12000 + +## 73728 = 0x12000 (bar) +## Notice, that 32bit versions of operations are not relaxed. +# DISASM: Disassembly of section .text: +# DISASM-NEXT: _start: +# DISASM-NEXT: 11000: 48 81 d0 00 20 01 00 adcq $73728, %rax +# DISASM-NEXT: 11007: 48 81 c3 00 20 01 00 addq $73728, %rbx +# DISASM-NEXT: 1100e: 48 81 e1 00 20 01 00 andq $73728, %rcx +# DISASM-NEXT: 11015: 48 81 fa 00 20 01 00 cmpq $73728, %rdx +# DISASM-NEXT: 1101c: 48 81 cf 00 20 01 00 orq $73728, %rdi +# DISASM-NEXT: 11023: 48 81 de 00 20 01 00 sbbq $73728, %rsi +# DISASM-NEXT: 1102a: 48 81 ed 00 20 01 00 subq $73728, %rbp +# DISASM-NEXT: 11031: 49 81 f0 00 20 01 00 xorq $73728, %r8 +# DISASM-NEXT: 11038: 49 f7 c7 00 20 01 00 testq $73728, %r15 + +# RUN: ld.lld -shared %t.o -o %t2 +# RUN: llvm-readobj -s %t2 | FileCheck --check-prefix=SEC-PIC %s +# RUN: llvm-objdump -d %t2 | FileCheck --check-prefix=DISASM-PIC %s +# SEC-PIC: Section { +# SEC-PIC: Index: +# SEC-PIC: Name: .got +# SEC-PIC-NEXT: Type: SHT_PROGBITS +# SEC-PIC-NEXT: Flags [ +# SEC-PIC-NEXT: SHF_ALLOC +# SEC-PIC-NEXT: SHF_WRITE +# SEC-PIC-NEXT: ] +# SEC-PIC-NEXT: Address: 0x2090 +# SEC-PIC-NEXT: Offset: 0x2090 +# SEC-PIC-NEXT: Size: 8 +# SEC-PIC-NEXT: Link: +# SEC-PIC-NEXT: Info: +# SEC-PIC-NEXT: AddressAlignment: +# SEC-PIC-NEXT: EntrySize: +# SEC-PIC-NEXT: } + +## Check that there was no relaxation performed. All values refer to got entry. +## Ex: 0x1000 + 4233 + 7 = 0x2090 +## 0x102a + 4191 + 7 = 0x2090 +# DISASM-PIC: Disassembly of section .text: +# DISASM-PIC-NEXT: _start: +# DISASM-PIC-NEXT: 1000: 48 13 05 89 10 00 00 adcq 4233(%rip), %rax +# DISASM-PIC-NEXT: 1007: 48 03 1d 82 10 00 00 addq 4226(%rip), %rbx +# DISASM-PIC-NEXT: 100e: 48 23 0d 7b 10 00 00 andq 4219(%rip), %rcx +# DISASM-PIC-NEXT: 1015: 48 3b 15 74 10 00 00 cmpq 4212(%rip), %rdx +# DISASM-PIC-NEXT: 101c: 48 0b 3d 6d 10 00 00 orq 4205(%rip), %rdi +# DISASM-PIC-NEXT: 1023: 48 1b 35 66 10 00 00 sbbq 4198(%rip), %rsi +# DISASM-PIC-NEXT: 102a: 48 2b 2d 5f 10 00 00 subq 4191(%rip), %rbp +# DISASM-PIC-NEXT: 1031: 4c 33 05 58 10 00 00 xorq 4184(%rip), %r8 +# DISASM-PIC-NEXT: 1038: 4c 85 3d 51 10 00 00 testq 4177(%rip), %r15 + +.data +.type bar, @object +bar: + .byte 1 + .size bar, .-bar + +.text +.globl _start +.type _start, @function +_start: + adcq bar@GOTPCREL(%rip), %rax + addq bar@GOTPCREL(%rip), %rbx + andq bar@GOTPCREL(%rip), %rcx + cmpq bar@GOTPCREL(%rip), %rdx + orq bar@GOTPCREL(%rip), %rdi + sbbq bar@GOTPCREL(%rip), %rsi + subq bar@GOTPCREL(%rip), %rbp + xorq bar@GOTPCREL(%rip), %r8 + testq %r15, bar@GOTPCREL(%rip)