Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -257,6 +257,9 @@ case R_PC: case R_RELAX_GOT_PC: return Body.getVA(A) - P; + case R_RELAX_GOT_PC_NOPIC: + case R_RELAX_GOT_PC_NOPIC_REX: + return Body.getVA(); case R_PAGE_PC: return getAArch64Page(Body.getVA(A)) - getAArch64Page(P); } @@ -325,7 +328,9 @@ switch (Expr) { case R_RELAX_GOT_PC: - Target->relaxGot(BufLoc, SymVA); + case R_RELAX_GOT_PC_NOPIC: + case R_RELAX_GOT_PC_NOPIC_REX: + Target->relaxGot(BufLoc, SymVA, Expr); break; case R_RELAX_TLS_IE_TO_LE: Target->relaxTlsIeToLe(BufLoc, Type, SymVA); Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -39,6 +39,8 @@ R_PPC_PLT_OPD, R_PPC_TOC, R_RELAX_GOT_PC, + R_RELAX_GOT_PC_NOPIC, + R_RELAX_GOT_PC_NOPIC_REX, R_RELAX_TLS_GD_TO_IE, R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_IE_TO_LE, Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -244,7 +244,8 @@ // file (PC, or GOT for example). static bool isRelExpr(RelExpr Expr) { return Expr == R_PC || Expr == R_GOTREL || Expr == R_PAGE_PC || - Expr == R_RELAX_GOT_PC; + Expr == R_RELAX_GOT_PC || Expr == R_RELAX_GOT_PC_NOPIC || + Expr == R_RELAX_GOT_PC_NOPIC_REX; } template @@ -370,8 +371,7 @@ } else if (!Preemptible) { if (needsPlt(Expr)) Expr = fromPlt(Expr); - if (Expr == R_GOT_PC && Target->canRelaxGot(Type, Data + Offset)) - Expr = R_RELAX_GOT_PC; + Expr = Target->getRelaxGotExpr(Type, Data + Offset, Expr); } if (IsWrite || isStaticLinkTimeConstant(Expr, Type, Body)) Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -88,8 +88,8 @@ uint32_t ThunkSize = 0; - virtual bool canRelaxGot(uint32_t Type, const uint8_t *Data) const; - virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; + virtual RelExpr getRelaxGotExpr(uint32_t Type, const uint8_t *Data, RelExpr Expr) const; + virtual void relaxGot(uint8_t *Loc, uint64_t Val, RelExpr Expr) const; virtual void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; virtual void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; virtual void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -113,8 +113,9 @@ int32_t Index, unsigned RelOff) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; - bool canRelaxGot(uint32_t Type, const uint8_t *Data) const override; - void relaxGot(uint8_t *Loc, uint64_t Val) const override; + RelExpr getRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const override; + void relaxGot(uint8_t *Loc, uint64_t Val, RelExpr Expr) const override; void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; @@ -234,11 +235,13 @@ return false; } -bool TargetInfo::canRelaxGot(uint32_t Type, const uint8_t *Data) const { - return false; +RelExpr TargetInfo::getRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { + // Return initial expression to show that no relaxation is available. + return Expr; } -void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { +void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val, RelExpr Expr) const { llvm_unreachable("Should not have claimed to be relaxable"); } @@ -734,19 +737,30 @@ } } -bool X86_64TargetInfo::canRelaxGot(uint32_t Type, const uint8_t *Data) const { +RelExpr X86_64TargetInfo::getRelaxGotExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { if (Type != R_X86_64_GOTPCRELX && Type != R_X86_64_REX_GOTPCRELX) - return false; + return Expr; const uint8_t Op = Data[-2]; const uint8_t ModRm = Data[-1]; // Relax mov. if (Op == 0x8b) - return true; + return R_RELAX_GOT_PC; // Relax call and jmp. - return Op == 0xff && (ModRm == 0x15 || ModRm == 0x25); + if (Op == 0xff && (ModRm == 0x15 || ModRm == 0x25)) + return R_RELAX_GOT_PC; + + // If PIC - no relaxation is available. + if (Config->Pic) + return Expr; + + // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor. + bool Rex = Type == R_X86_64_REX_GOTPCRELX; + return Rex ? R_RELAX_GOT_PC_NOPIC_REX : R_RELAX_GOT_PC_NOPIC; } -void X86_64TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { +void X86_64TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val, + RelExpr Expr) const { const uint8_t Op = Loc[-2]; const uint8_t ModRm = Loc[-1]; @@ -757,22 +771,87 @@ return; } - assert(Op == 0xff); - if (ModRm == 0x15) { - // ABI says we can convert call *foo@GOTPCREL(%rip) to nop call foo. - // Instead we convert to addr32 call foo, where addr32 is instruction - // prefix. That makes result expression to be a single instruction. - *(Loc - 2) = 0x67; // addr32 prefix - *(Loc - 1) = 0xe8; // call - } else { - assert(ModRm == 0x25); - // Convert jmp *foo@GOTPCREL(%rip) to jmp foo nop. - // jmp doesn't return, so it is fine to use nop here, it is just a stub. - *(Loc - 2) = 0xe9; // jmp - *(Loc + 3) = 0x90; // nop - Loc -= 1; - Val += 1; + // Convert call/jmp instructions. + if (Op == 0xff) { + if (ModRm == 0x15) { + // ABI says we can convert call *foo@GOTPCREL(%rip) to nop call foo. + // Instead we convert to addr32 call foo, where addr32 is instruction + // prefix. That makes result expression to be a single instruction. + *(Loc - 2) = 0x67; // addr32 prefix + *(Loc - 1) = 0xe8; // call + } + else { + assert(ModRm == 0x25); + // Convert jmp *foo@GOTPCREL(%rip) to jmp foo nop. + // jmp doesn't return, so it is fine to use nop here, it is just a stub. + *(Loc - 2) = 0xe9; // jmp + *(Loc + 3) = 0x90; // nop + Loc -= 1; + Val += 1; + } + relocateOne(Loc, R_X86_64_PC32, Val); + return; } + + assert(Expr == R_RELAX_GOT_PC_NOPIC_REX || Expr == R_RELAX_GOT_PC_NOPIC); + bool HasRex = Expr == RelExpr::R_RELAX_GOT_PC_NOPIC_REX; + // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg". + if (Op == 0x85) { + // See 0x85 description at http://ref.x86asm.net/coder64.html, + // Column "o" has "r" what indicates that the instruction uses "full" ModR / M byte, + // (no opcode extension). + + // ModR/M byte has form XX YYY ZZZ, where + // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1). + // XX has different meanings: + // 00: The operand's memory address is in reg1. + // 01: The operand's memory address is reg1 + a byte-sized displacement. + // 10: The operand's memory address is reg1 + a word-sized displacement. + // 11: The operand is reg1 itself. + // If an instruction requires only one operand, the unused reg2 field + // holds extra opcode bits rather than a register code + // 0xC0 == 11 000 000 binary. + // 0x38 == 00 111 000 binary. + // We transfer reg2 to reg1 here as operand. + // (http://www.swansontec.com/sintel.html, http://wiki.osdev.org/X86-64_Instruction_Encoding) + *(Loc - 1) = 0xc0 | (ModRm & 0x38) >> 3; // ModR/M byte. + + // Change opcode from test op1 = r/m16/32/64, op2 = r16/32/64 + // to test op1 = r/m16/32/64, op2 = imm16/32/64. + // Instruction description available at http://ref.x86asm.net/coder64.html + *(Loc - 2) = 0xf7; + + // Move R bit to the B bit in REX byte. + // REX byte is encoded as 0100WRXB, where + // 0100 is 4bit fixed pattern. + // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the + // default operand size is used (which is 32-bit for most but not all instructions). + // REX.R This 1-bit value is an extension to the MODRM.reg field. + // REX.X This 1-bit value is an extension to the SIB.index field. + // REX.B This 1-bit value is an extension to the MODRM.rm field or the SIB.base field. + // (http://wiki.osdev.org/X86-64_Instruction_Encoding#64-bit_addressing) + // If we know that instruction has REX byte, it is safe to access and modify it: + if (HasRex) + *(Loc - 3) = (Loc[-3] & ~0x4) | (Loc[-3] & 0x4) >> 2; + relocateOne(Loc, R_X86_64_PC32, Val); + return; + } + + // If we are here then we need to relax the binop, where binop + // is one of the operations mentioned above. + // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg". + // Logic similar to one for test instruction above, but we also + // write opcode extension here, see below for info. + *(Loc - 1) = 0xc0 | (ModRm & 0x38) >> 3 | (Op & 0x3c); // ModR/M byte. + + // Primary opcode is 0x81, opcode extension is one of: + // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB, + // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP. + // This value was wrote to MODRM.reg in a line above. + // (see http://ref.x86asm.net/coder64.html for reference). + *(Loc - 2) = 0x81; + if (HasRex) + *(Loc - 3) = (Loc[-3] & ~0x4) | (Loc[-3] & 0x4) >> 2; relocateOne(Loc, R_X86_64_PC32, Val); } Index: test/ELF/gotpc-relax-nopic.s =================================================================== --- test/ELF/gotpc-relax-nopic.s +++ test/ELF/gotpc-relax-nopic.s @@ -0,0 +1,111 @@ +# REQUIRES: x86 +# RUN: ld.lld %S/Inputs/gotpc-relax-nopic.o -o %t1 +# RUN: llvm-readobj -symbols -r %t1 | FileCheck --check-prefix=SYMRELOC %s +# RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s + +## There is no relocations. +# SYMRELOC: Relocations [ +# SYMRELOC-NEXT: ] +# SYMRELOC: Symbols [ +# SYMRELOC: Symbol { +# SYMRELOC: Name: bar +# SYMRELOC-NEXT: Value: 0x12000 + +## LLVM-MC is broken atm (https://llvm.org/bugs/show_bug.cgi?id=27939), +## so we are using precompiled object file as input. It was generated using code below and +## next command line: +## as --64 -mrelax-relocations=yes test.s -o test.o +## .data +## .type bar, @object +## bar: +## .byte 1 +## .size bar, .-bar +## +## .text +## .globl _start +## .type _start, @function +## _start: +## adcl bar@GOTPCREL(%rip), %eax +## addl bar@GOTPCREL(%rip), %ebx +## andl bar@GOTPCREL(%rip), %ecx +## cmpl bar@GOTPCREL(%rip), %edx +## orl bar@GOTPCREL(%rip), %esi +## sbbl bar@GOTPCREL(%rip), %edi +## subl bar@GOTPCREL(%rip), %ebp +## xorl bar@GOTPCREL(%rip), %r8d +## testl %r15d, bar@GOTPCREL(%rip) +## adcq bar@GOTPCREL(%rip), %rax +## addq bar@GOTPCREL(%rip), %rbx +## andq bar@GOTPCREL(%rip), %rcx +## cmpq bar@GOTPCREL(%rip), %rdx +## orq bar@GOTPCREL(%rip), %rdi +## sbbq bar@GOTPCREL(%rip), %rsi +## subq bar@GOTPCREL(%rip), %rbp +## xorq bar@GOTPCREL(%rip), %r8 +## testq %r15, bar@GOTPCREL(%rip) + +## 73728 = 0x12000 (bar) +# DISASM: Disassembly of section .text: +# DISASM-NEXT: _start: +# DISASM-NEXT: 11000: 81 d0 00 20 01 00 adcl $73728, %eax +# DISASM-NEXT: 11006: 81 c3 00 20 01 00 addl $73728, %ebx +# DISASM-NEXT: 1100c: 81 e1 00 20 01 00 andl $73728, %ecx +# DISASM-NEXT: 11012: 81 fa 00 20 01 00 cmpl $73728, %edx +# DISASM-NEXT: 11018: 81 ce 00 20 01 00 orl $73728, %esi +# DISASM-NEXT: 1101e: 81 df 00 20 01 00 sbbl $73728, %edi +# DISASM-NEXT: 11024: 81 ed 00 20 01 00 subl $73728, %ebp +# DISASM-NEXT: 1102a: 41 81 f0 00 20 01 00 xorl $73728, %r8d +# DISASM-NEXT: 11031: 41 f7 c7 00 20 01 00 testl $73728, %r15d +# DISASM-NEXT: 11038: 48 81 d0 00 20 01 00 adcq $73728, %rax +# DISASM-NEXT: 1103f: 48 81 c3 00 20 01 00 addq $73728, %rbx +# DISASM-NEXT: 11046: 48 81 e1 00 20 01 00 andq $73728, %rcx +# DISASM-NEXT: 1104d: 48 81 fa 00 20 01 00 cmpq $73728, %rdx +# DISASM-NEXT: 11054: 48 81 cf 00 20 01 00 orq $73728, %rdi +# DISASM-NEXT: 1105b: 48 81 de 00 20 01 00 sbbq $73728, %rsi +# DISASM-NEXT: 11062: 48 81 ed 00 20 01 00 subq $73728, %rbp +# DISASM-NEXT: 11069: 49 81 f0 00 20 01 00 xorq $73728, %r8 +# DISASM-NEXT: 11070: 49 f7 c7 00 20 01 00 testq $73728, %r15 + +# RUN: ld.lld -shared %S/Inputs/gotpc-relax-nopic.o -o %t2 +# RUN: llvm-readobj -s %t2 | FileCheck --check-prefix=SEC-PIC %s +# RUN: llvm-objdump -d %t2 | FileCheck --check-prefix=DISASM-PIC %s +# SEC-PIC: Section { +# SEC-PIC: Index: +# SEC-PIC: Name: .got +# SEC-PIC-NEXT: Type: SHT_PROGBITS +# SEC-PIC-NEXT: Flags [ +# SEC-PIC-NEXT: SHF_ALLOC +# SEC-PIC-NEXT: SHF_WRITE +# SEC-PIC-NEXT: ] +# SEC-PIC-NEXT: Address: 0x2090 +# SEC-PIC-NEXT: Offset: 0x2090 +# SEC-PIC-NEXT: Size: 8 +# SEC-PIC-NEXT: Link: +# SEC-PIC-NEXT: Info: +# SEC-PIC-NEXT: AddressAlignment: +# SEC-PIC-NEXT: EntrySize: +# SEC-PIC-NEXT: } + +## Check that there was no relaxation performed. All values refer to got entry. +## Ex: 0x1000 + 4234 + 6 = 0x2090 +## 0x102a + 4191 + 7 = 0x2090 +# DISASM-PIC: Disassembly of section .text: +# DISASM-PIC-NEXT: _start: +# DISASM-PIC-NEXT: 1000: 13 05 8a 10 00 00 adcl 4234(%rip), %eax +# DISASM-PIC-NEXT: 1006: 03 1d 84 10 00 00 addl 4228(%rip), %ebx +# DISASM-PIC-NEXT: 100c: 23 0d 7e 10 00 00 andl 4222(%rip), %ecx +# DISASM-PIC-NEXT: 1012: 3b 15 78 10 00 00 cmpl 4216(%rip), %edx +# DISASM-PIC-NEXT: 1018: 0b 35 72 10 00 00 orl 4210(%rip), %esi +# DISASM-PIC-NEXT: 101e: 1b 3d 6c 10 00 00 sbbl 4204(%rip), %edi +# DISASM-PIC-NEXT: 1024: 2b 2d 66 10 00 00 subl 4198(%rip), %ebp +# DISASM-PIC-NEXT: 102a: 44 33 05 5f 10 00 00 xorl 4191(%rip), %r8d +# DISASM-PIC-NEXT: 1031: 44 85 3d 58 10 00 00 testl 4184(%rip), %r15d +# DISASM-PIC-NEXT: 1038: 48 13 05 51 10 00 00 adcq 4177(%rip), %rax +# DISASM-PIC-NEXT: 103f: 48 03 1d 4a 10 00 00 addq 4170(%rip), %rbx +# DISASM-PIC-NEXT: 1046: 48 23 0d 43 10 00 00 andq 4163(%rip), %rcx +# DISASM-PIC-NEXT: 104d: 48 3b 15 3c 10 00 00 cmpq 4156(%rip), %rdx +# DISASM-PIC-NEXT: 1054: 48 0b 3d 35 10 00 00 orq 4149(%rip), %rdi +# DISASM-PIC-NEXT: 105b: 48 1b 35 2e 10 00 00 sbbq 4142(%rip), %rsi +# DISASM-PIC-NEXT: 1062: 48 2b 2d 27 10 00 00 subq 4135(%rip), %rbp +# DISASM-PIC-NEXT: 1069: 4c 33 05 20 10 00 00 xorq 4128(%rip), %r8 +# DISASM-PIC-NEXT: 1070: 4c 85 3d 19 10 00 00 testq 4121(%rip), %r15