Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -257,6 +257,8 @@ case R_PC: case R_RELAX_GOT_PC: return Body.getVA(A) - P; + case R_RELAX_GOT_PC_NOPIC: + return Body.getVA(); case R_PAGE_PC: return getAArch64Page(Body.getVA(A)) - getAArch64Page(P); } @@ -325,6 +327,7 @@ switch (Expr) { case R_RELAX_GOT_PC: + case R_RELAX_GOT_PC_NOPIC: Target->relaxGot(BufLoc, SymVA); break; case R_RELAX_TLS_IE_TO_LE: Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -39,6 +39,7 @@ R_PPC_PLT_OPD, R_PPC_TOC, R_RELAX_GOT_PC, + R_RELAX_GOT_PC_NOPIC, R_RELAX_TLS_GD_TO_IE, R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_IE_TO_LE, Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -244,7 +244,7 @@ // file (PC, or GOT for example). static bool isRelExpr(RelExpr Expr) { return Expr == R_PC || Expr == R_GOTREL || Expr == R_PAGE_PC || - Expr == R_RELAX_GOT_PC; + Expr == R_RELAX_GOT_PC || Expr == R_RELAX_GOT_PC_NOPIC; } template @@ -370,8 +370,8 @@ } else if (!Preemptible) { if (needsPlt(Expr)) Expr = fromPlt(Expr); - if (Expr == R_GOT_PC && Target->canRelaxGot(Type, Data + Offset)) - Expr = R_RELAX_GOT_PC; + if (Expr == R_GOT_PC) + Expr = Target->adjustRelaxGotExpr(Type, Data + Offset, Expr); } if (IsWrite || isStaticLinkTimeConstant(Expr, Type, Body)) Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -88,7 +88,8 @@ uint32_t ThunkSize = 0; - virtual bool canRelaxGot(uint32_t Type, const uint8_t *Data) const; + virtual RelExpr adjustRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const; virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; virtual void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; virtual void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -113,7 +113,8 @@ int32_t Index, unsigned RelOff) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; - bool canRelaxGot(uint32_t Type, const uint8_t *Data) const override; + RelExpr adjustRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const override; void relaxGot(uint8_t *Loc, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; @@ -234,8 +235,9 @@ return false; } -bool TargetInfo::canRelaxGot(uint32_t Type, const uint8_t *Data) const { - return false; +RelExpr TargetInfo::adjustRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { + return Expr; } void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { @@ -734,16 +736,25 @@ } } -bool X86_64TargetInfo::canRelaxGot(uint32_t Type, const uint8_t *Data) const { +RelExpr X86_64TargetInfo::adjustRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr RelExpr) const { if (Type != R_X86_64_GOTPCRELX && Type != R_X86_64_REX_GOTPCRELX) - return false; + return RelExpr; const uint8_t Op = Data[-2]; const uint8_t ModRm = Data[-1]; // Relax mov. if (Op == 0x8b) - return true; + return R_RELAX_GOT_PC; // Relax call and jmp. - return Op == 0xff && (ModRm == 0x15 || ModRm == 0x25); + if (Op == 0xff && (ModRm == 0x15 || ModRm == 0x25)) + return R_RELAX_GOT_PC; + + // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor. + // If PIC then no relaxation is available. + // We also don't relax test/binop instructions without REX byte, + // they are 32bit operations and not common to have. + return (!Config->Pic && Type == R_X86_64_REX_GOTPCRELX) ? R_RELAX_GOT_PC_NOPIC + : RelExpr; } void X86_64TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { @@ -757,22 +768,86 @@ return; } - assert(Op == 0xff); - if (ModRm == 0x15) { - // ABI says we can convert call *foo@GOTPCREL(%rip) to nop call foo. - // Instead we convert to addr32 call foo, where addr32 is instruction - // prefix. That makes result expression to be a single instruction. - *(Loc - 2) = 0x67; // addr32 prefix - *(Loc - 1) = 0xe8; // call - } else { - assert(ModRm == 0x25); - // Convert jmp *foo@GOTPCREL(%rip) to jmp foo nop. - // jmp doesn't return, so it is fine to use nop here, it is just a stub. - *(Loc - 2) = 0xe9; // jmp - *(Loc + 3) = 0x90; // nop - Loc -= 1; - Val += 1; + // Convert call/jmp instructions. + if (Op == 0xff) { + if (ModRm == 0x15) { + // ABI says we can convert call *foo@GOTPCREL(%rip) to nop call foo. + // Instead we convert to addr32 call foo, where addr32 is instruction + // prefix. That makes result expression to be a single instruction. + *(Loc - 2) = 0x67; // addr32 prefix + *(Loc - 1) = 0xe8; // call + } + else { + assert(ModRm == 0x25); + // Convert jmp *foo@GOTPCREL(%rip) to jmp foo nop. + // jmp doesn't return, so it is fine to use nop here, it is just a stub. + *(Loc - 2) = 0xe9; // jmp + *(Loc + 3) = 0x90; // nop + Loc -= 1; + Val += 1; + } + relocateOne(Loc, R_X86_64_PC32, Val); + return; } + + // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg". + // Documents used: + // 1) http://ref.x86asm.net/coder64.html + // 2) http://wiki.osdev.org/X86-64_Instruction_Encoding + // 3) http://www.swansontec.com/sintel.html + assert(!Config->Pic); + const uint8_t Rex = Loc[-3]; + if (Op == 0x85) { + // See 0x85 description in (1). Column "o" has "r" what indicates that the + // instruction uses "full" ModR / M byte, (no opcode extension). + + // ModR/M byte has form XX YYY ZZZ, where + // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1). + // XX has different meanings: + // 00: The operand's memory address is in reg1. + // 01: The operand's memory address is reg1 + a byte-sized displacement. + // 10: The operand's memory address is reg1 + a word-sized displacement. + // 11: The operand is reg1 itself. + // If an instruction requires only one operand, the unused reg2 field + // holds extra opcode bits rather than a register code + // 0xC0 == 11 000 000 binary. + // 0x38 == 00 111 000 binary. + // We transfer reg2 to reg1 here as operand. See (2), (3). + *(Loc - 1) = 0xc0 | (ModRm & 0x38) >> 3; // ModR/M byte. + + // Change opcode from test op1 = r/m16/32/64, op2 = r16/32/64 + // to test op1 = r/m16/32/64, op2 = imm16/32/64, see (1). + *(Loc - 2) = 0xf7; + + // Move R bit to the B bit in REX byte. + // REX byte is encoded as 0100WRXB, where + // 0100 is 4bit fixed pattern. + // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the + // default operand size is used (which is 32-bit for most but not all + // instructions). + // REX.R This 1-bit value is an extension to the MODRM.reg field. + // REX.X This 1-bit value is an extension to the SIB.index field. + // REX.B This 1-bit value is an extension to the MODRM.rm field or the + // SIB.base field, see (3). + *(Loc - 3) = (Rex & ~0x4) | (Rex & 0x4) >> 2; + relocateOne(Loc, R_X86_64_PC32, Val); + return; + } + + // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub + // or xor operations. + + // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg". + // Logic is close to one for test instruction above, but we also + // write opcode extension here, see below for info. + *(Loc - 1) = 0xc0 | (ModRm & 0x38) >> 3 | (Op & 0x3c); // ModR/M byte. + + // Primary opcode is 0x81, opcode extension is one of: + // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB, + // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP. + // This value was wrote to MODRM.reg in a line above, see (1). + *(Loc - 2) = 0x81; + *(Loc - 3) = (Rex & ~0x4) | (Rex & 0x4) >> 2; relocateOne(Loc, R_X86_64_PC32, Val); } Index: test/ELF/gotpc-relax-nopic.s =================================================================== --- test/ELF/gotpc-relax-nopic.s +++ test/ELF/gotpc-relax-nopic.s @@ -0,0 +1,109 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=x86_64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t1 +# RUN: llvm-readobj -symbols -r %t1 | FileCheck --check-prefix=SYMRELOC %s +# RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s + +## There is no relocations. +# SYMRELOC: Relocations [ +# SYMRELOC-NEXT: ] +# SYMRELOC: Symbols [ +# SYMRELOC: Symbol { +# SYMRELOC: Name: bar +# SYMRELOC-NEXT: Value: 0x13000 + +## 77824 = 0x13000 (bar) +## Notice, that 32bit versions of operations are not relaxed. +# DISASM: Disassembly of section .text: +# DISASM-NEXT: _start: +# DISASM-NEXT: 11000: 13 05 fa 0f 00 00 adcl 4090(%rip), %eax +# DISASM-NEXT: 11006: 03 1d f4 0f 00 00 addl 4084(%rip), %ebx +# DISASM-NEXT: 1100c: 23 0d ee 0f 00 00 andl 4078(%rip), %ecx +# DISASM-NEXT: 11012: 3b 15 e8 0f 00 00 cmpl 4072(%rip), %edx +# DISASM-NEXT: 11018: 0b 35 e2 0f 00 00 orl 4066(%rip), %esi +# DISASM-NEXT: 1101e: 1b 3d dc 0f 00 00 sbbl 4060(%rip), %edi +# DISASM-NEXT: 11024: 2b 2d d6 0f 00 00 subl 4054(%rip), %ebp +# DISASM-NEXT: 1102a: 44 33 05 cf 0f 00 00 xorl 4047(%rip), %r8d +# DISASM-NEXT: 11031: 44 85 3d c8 0f 00 00 testl 4040(%rip), %r15d +# DISASM-NEXT: 11038: 48 81 d0 00 30 01 00 adcq $77824, %rax +# DISASM-NEXT: 1103f: 48 81 c3 00 30 01 00 addq $77824, %rbx +# DISASM-NEXT: 11046: 48 81 e1 00 30 01 00 andq $77824, %rcx +# DISASM-NEXT: 1104d: 48 81 fa 00 30 01 00 cmpq $77824, %rdx +# DISASM-NEXT: 11054: 48 81 cf 00 30 01 00 orq $77824, %rdi +# DISASM-NEXT: 1105b: 48 81 de 00 30 01 00 sbbq $77824, %rsi +# DISASM-NEXT: 11062: 48 81 ed 00 30 01 00 subq $77824, %rbp +# DISASM-NEXT: 11069: 49 81 f0 00 30 01 00 xorq $77824, %r8 +# DISASM-NEXT: 11070: 49 f7 c7 00 30 01 00 testq $77824, %r15 + +# RUN: ld.lld -shared %t.o -o %t2 +# RUN: llvm-readobj -s %t2 | FileCheck --check-prefix=SEC-PIC %s +# RUN: llvm-objdump -d %t2 | FileCheck --check-prefix=DISASM-PIC %s +# SEC-PIC: Section { +# SEC-PIC: Index: +# SEC-PIC: Name: .got +# SEC-PIC-NEXT: Type: SHT_PROGBITS +# SEC-PIC-NEXT: Flags [ +# SEC-PIC-NEXT: SHF_ALLOC +# SEC-PIC-NEXT: SHF_WRITE +# SEC-PIC-NEXT: ] +# SEC-PIC-NEXT: Address: 0x2090 +# SEC-PIC-NEXT: Offset: 0x2090 +# SEC-PIC-NEXT: Size: 8 +# SEC-PIC-NEXT: Link: +# SEC-PIC-NEXT: Info: +# SEC-PIC-NEXT: AddressAlignment: +# SEC-PIC-NEXT: EntrySize: +# SEC-PIC-NEXT: } + +## Check that there was no relaxation performed. All values refer to got entry. +## Ex: 0x1000 + 4234 + 6 = 0x2090 +## 0x102a + 4191 + 7 = 0x2090 +# DISASM-PIC: Disassembly of section .text: +# DISASM-PIC-NEXT: _start: +# DISASM-PIC-NEXT: 1000: 13 05 8a 10 00 00 adcl 4234(%rip), %eax +# DISASM-PIC-NEXT: 1006: 03 1d 84 10 00 00 addl 4228(%rip), %ebx +# DISASM-PIC-NEXT: 100c: 23 0d 7e 10 00 00 andl 4222(%rip), %ecx +# DISASM-PIC-NEXT: 1012: 3b 15 78 10 00 00 cmpl 4216(%rip), %edx +# DISASM-PIC-NEXT: 1018: 0b 35 72 10 00 00 orl 4210(%rip), %esi +# DISASM-PIC-NEXT: 101e: 1b 3d 6c 10 00 00 sbbl 4204(%rip), %edi +# DISASM-PIC-NEXT: 1024: 2b 2d 66 10 00 00 subl 4198(%rip), %ebp +# DISASM-PIC-NEXT: 102a: 44 33 05 5f 10 00 00 xorl 4191(%rip), %r8d +# DISASM-PIC-NEXT: 1031: 44 85 3d 58 10 00 00 testl 4184(%rip), %r15d +# DISASM-PIC-NEXT: 1038: 48 13 05 51 10 00 00 adcq 4177(%rip), %rax +# DISASM-PIC-NEXT: 103f: 48 03 1d 4a 10 00 00 addq 4170(%rip), %rbx +# DISASM-PIC-NEXT: 1046: 48 23 0d 43 10 00 00 andq 4163(%rip), %rcx +# DISASM-PIC-NEXT: 104d: 48 3b 15 3c 10 00 00 cmpq 4156(%rip), %rdx +# DISASM-PIC-NEXT: 1054: 48 0b 3d 35 10 00 00 orq 4149(%rip), %rdi +# DISASM-PIC-NEXT: 105b: 48 1b 35 2e 10 00 00 sbbq 4142(%rip), %rsi +# DISASM-PIC-NEXT: 1062: 48 2b 2d 27 10 00 00 subq 4135(%rip), %rbp +# DISASM-PIC-NEXT: 1069: 4c 33 05 20 10 00 00 xorq 4128(%rip), %r8 +# DISASM-PIC-NEXT: 1070: 4c 85 3d 19 10 00 00 testq 4121(%rip), %r15 + +.data +.type bar, @object +bar: + .byte 1 + .size bar, .-bar + +.text +.globl _start +.type _start, @function +_start: + adcl bar@GOTPCREL(%rip), %eax + addl bar@GOTPCREL(%rip), %ebx + andl bar@GOTPCREL(%rip), %ecx + cmpl bar@GOTPCREL(%rip), %edx + orl bar@GOTPCREL(%rip), %esi + sbbl bar@GOTPCREL(%rip), %edi + subl bar@GOTPCREL(%rip), %ebp + xorl bar@GOTPCREL(%rip), %r8d + testl %r15d, bar@GOTPCREL(%rip) + adcq bar@GOTPCREL(%rip), %rax + addq bar@GOTPCREL(%rip), %rbx + andq bar@GOTPCREL(%rip), %rcx + cmpq bar@GOTPCREL(%rip), %rdx + orq bar@GOTPCREL(%rip), %rdi + sbbq bar@GOTPCREL(%rip), %rsi + subq bar@GOTPCREL(%rip), %rbp + xorq bar@GOTPCREL(%rip), %r8 + testq %r15, bar@GOTPCREL(%rip)