Index: ELF/Arch/AArch64.cpp =================================================================== --- ELF/Arch/AArch64.cpp +++ ELF/Arch/AArch64.cpp @@ -39,6 +39,9 @@ void writePltHeader(uint8_t *Buf) const override; void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; + bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, + uint64_t BranchAddr, const Symbol &S) const override; + bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; bool usesOnlyLowPageBits(RelType Type) const override; void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, @@ -66,6 +69,12 @@ // It doesn't seem to be documented anywhere, but tls on aarch64 uses variant // 1 of the tls structures and the tcb size is 16. TcbSize = 16; + NeedsThunks = true; + + // See comment in Arch/ARM.cpp for a more detailed explanation of + // ThunkSectionSpacing. For AArch64 the only branches we are permitted to + // Thunk have a range of +/- 128 MiB + ThunkSectionSpacing = (128 * 1024 * 1024) - 0x30000; } RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S, @@ -181,6 +190,31 @@ relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotPltEntryAddr); } +bool AArch64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, + uint64_t BranchAddr, const Symbol &S) const { + // ELF for the ARM 64-bit architecture, section Call and Jump relocations + // only permits range extension thunks for R_AARCH64_CALL26 and + // R_AARCH64_JUMP26 relocation types. + if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26) + return false; + uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA(); + return !inBranchRange(Type, BranchAddr, Dst); +} + +bool AArch64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { + if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26) + return true; + // The AArch64 call and unconditional branch instructions have a range of + // +/- 128 MiB. + uint64_t Range = 128 * 1024 * 1024; + if (Dst > Src) { + // Immediate of branch is signed. + Range -= 4; + return Dst - Src <= Range; + } + return Src - Dst <= Range; +} + static void write32AArch64Addr(uint8_t *L, uint64_t Imm) { uint32_t ImmLo = (Imm & 0x3) << 29; uint32_t ImmHi = (Imm & 0x1FFFFC) << 3; Index: ELF/Thunks.cpp =================================================================== --- ELF/Thunks.cpp +++ ELF/Thunks.cpp @@ -48,6 +48,23 @@ namespace { +// AArch64 long range Thunks +class AArch64ABSLongThunk final : public Thunk { +public: + AArch64ABSLongThunk(Symbol &Dest) : Thunk(Dest) {} + uint32_t size() const override { return 16; } + void writeTo(uint8_t *Buf, ThunkSection &IS) const override; + void addSymbols(ThunkSection &IS) override; +}; + +class AArch64PILongThunk final : public Thunk { +public: + AArch64PILongThunk(Symbol &Dest) : Thunk(Dest) {} + uint32_t size() const override { return 24; } + void writeTo(uint8_t *Buf, ThunkSection &IS) const override; + void addSymbols(ThunkSection &IS) override; +}; + // Specific ARM Thunk implementations. The naming convention is: // Source State, TargetState, Target Requirement, ABS or PI, Range class ARMV7ABSLongThunk final : public Thunk { @@ -125,6 +142,59 @@ } // end anonymous namespace +// AArch64 long range Thunks + +static uint64_t getAArch64ThunkDestVA(const Symbol &S) { + uint64_t V = S.isInPlt() ? S.getPltVA() : S.getVA(); + return V; +} + +void AArch64ABSLongThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const { + const uint8_t Data[] = { + 0x50, 0x00, 0x00, 0x58, // ldr x16, L0 + 0x00, 0x02, 0x1f, 0xd6, // br x16 + 0x00, 0x00, 0x00, 0x00, // L0: .xword S + 0x00, 0x00, 0x00, 0x00, + }; + uint64_t S = getAArch64ThunkDestVA(Destination); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf + 8, R_AARCH64_ABS64, S); +} + +void AArch64ABSLongThunk::addSymbols(ThunkSection &IS) { + ThunkSym = addSyntheticLocal( + Saver.save("__AArch64AbsLongThunk_" + Destination.getName()), STT_FUNC, + Offset, size(), &IS); + addSyntheticLocal("$x", STT_NOTYPE, Offset, 0, &IS); + addSyntheticLocal("$d", STT_NOTYPE, Offset + 8, 0, &IS); +} + +// Conservative implementation that can cover all of the address space. A more +// efficient implementation with a maximum range of +- 4Gb is possible if ADRP +// followed by add is used. +void AArch64PILongThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const { + const uint8_t Data[] = { + 0x10, 0x00, 0x00, 0x10, // P: adr x16, #0 + 0x71, 0x00, 0x00, 0x58, // ldr x17, L1 + 0x10, 0x02, 0x11, 0x8b, // add x16, x17, ip1 + 0x00, 0x02, 0x1f, 0xd6, // br x16 + 0x00, 0x00, 0x00, 0x00, // L1: .xword (S - P) + 0x00, 0x00, 0x00, 0x00, + }; + uint64_t S = getAArch64ThunkDestVA(Destination); + uint64_t P = ThunkSym->getVA(); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf + 16, R_AARCH64_PREL64, S - P); +} + +void AArch64PILongThunk::addSymbols(ThunkSection &IS) { + ThunkSym = addSyntheticLocal( + Saver.save("__AArch64PILongThunk_" + Destination.getName()), STT_FUNC, + Offset, size(), &IS); + addSyntheticLocal("$x", STT_NOTYPE, Offset, 0, &IS); + addSyntheticLocal("$d", STT_NOTYPE, Offset + 16, 0, &IS); +} + // ARM Target Thunks static uint64_t getARMThunkDestVA(const Symbol &S) { uint64_t V = S.isInPlt() ? S.getPltVA() : S.getVA(); @@ -309,6 +379,14 @@ Thunk::~Thunk() = default; +static Thunk *addThunkAArch64(RelType Type, Symbol &S) { + if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26) + fatal("unrecognized relocation type"); + if (Config->Pic) + return make(S); + return make(S); +} + // Creates a thunk for Thumb-ARM interworking. static Thunk *addThunkArm(RelType Reloc, Symbol &S) { // ARM relocations need ARM to Thumb interworking Thunks. @@ -341,7 +419,9 @@ } Thunk *addThunk(RelType Type, Symbol &S) { - if (Config->EMachine == EM_ARM) + if (Config->EMachine == EM_AARCH64) + return addThunkAArch64(Type, S); + else if (Config->EMachine == EM_ARM) return addThunkArm(Type, S); else if (Config->EMachine == EM_MIPS) return addThunkMips(Type, S); Index: test/ELF/aarch64-call26-error.s =================================================================== --- test/ELF/aarch64-call26-error.s +++ /dev/null @@ -1,11 +0,0 @@ -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t -// RUN: not ld.lld %t %tabs -o %t2 2>&1 | FileCheck %s -// REQUIRES: aarch64 - -.text -.globl _start -_start: - bl big - -// CHECK: R_AARCH64_CALL26 out of range Index: test/ELF/aarch64-call26-thunk.s =================================================================== --- /dev/null +++ test/ELF/aarch64-call26-thunk.s @@ -0,0 +1,21 @@ +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t +// RUN: ld.lld %t %tabs -o %t2 2>&1 +// RUN: llvm-objdump -d -triple=aarch64-pc-freebsd %t2 | FileCheck %s +// REQUIRES: aarch64 + +.text +.globl _start +_start: + bl big + +// CHECK: Disassembly of section .text: +// CHECK-NEXT: _start: +// CHECK-NEXT: 20000: 02 00 00 94 bl #8 +// CHECK: __AArch64AbsLongThunk_big: +// CHECK-NEXT: 20008: 50 00 00 58 ldr x16, #8 +// CHECK-NEXT: 2000c: 00 02 1f d6 br x16 +// CHECK: $d: +// CHECK-NEXT: 20010: 00 00 00 00 .word 0x00000000 +// CHECK-NEXT: 20014: 10 00 00 00 .word 0x00000010 + Index: test/ELF/aarch64-jump26-error.s =================================================================== --- test/ELF/aarch64-jump26-error.s +++ /dev/null @@ -1,11 +0,0 @@ -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t -// RUN: not ld.lld %t %tabs -o %t2 2>&1 | FileCheck %s -// REQUIRES: aarch64 - -.text -.globl _start -_start: - b big - -// CHECK: R_AARCH64_JUMP26 out of range Index: test/ELF/aarch64-jump26-thunk.s =================================================================== --- /dev/null +++ test/ELF/aarch64-jump26-thunk.s @@ -0,0 +1,20 @@ +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t +// RUN: ld.lld %t %tabs -o %t2 2>&1 +// RUN: llvm-objdump -d -triple=aarch64-pc-freebsd %t2 | FileCheck %s +// REQUIRES: aarch64 + +.text +.globl _start +_start: + b big + +// CHECK: Disassembly of section .text: +// CHECK-NEXT: _start: +// CHECK-NEXT: 20000: 02 00 00 14 b #8 +// CHECK: __AArch64AbsLongThunk_big: +// CHECK-NEXT: 20008: 50 00 00 58 ldr x16, #8 +// CHECK-NEXT: 2000c: 00 02 1f d6 br x16 +// CHECK: $d: +// CHECK-NEXT: 20010: 00 00 00 00 .word 0x00000000 +// CHECK-NEXT: 20014: 10 00 00 00 .word 0x00000010 Index: test/ELF/aarch64-thunk-pi.s =================================================================== --- /dev/null +++ test/ELF/aarch64-thunk-pi.s @@ -0,0 +1,104 @@ +// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t +// RUN: echo "SECTIONS { \ +// RUN: .text_low : { *(.text_low) } \ +// RUN: .text_high 0x10000000 : { *(.text_high) } \ +// RUN: } " > %t.script +// RUN: ld.lld --script %t.script --shared %t -o %t2 2>&1 +// RUN: llvm-objdump -d -triple=aarch64-linux-gnu %t2 | FileCheck %s +// REQUIRES: aarch64 + +// Check that Position Independent thunks are generated for shared libraries. + .section .text_low, "ax", %progbits + .globl low_target + .type low_target, %function +low_target: + // Need thunk to high_target@plt + bl high_target + ret +// CHECK: low_target: +// CHECK-NEXT: 0: 04 00 00 94 bl #16 +// CHECK-NEXT: 4: c0 03 5f d6 ret + + .hidden low_target2 + .globl low_target2 + .type low_target2, %function +low_target2: + // Need thunk to high_target + bl high_target2 + ret +// CHECK: low_target2: +// CHECK-NEXT: 8: 08 00 00 94 bl #32 +// CHECK-NEXT: c: c0 03 5f d6 ret + +// Expect range extension thunks for .text_low +// Calculation is PC at adr instruction + signed offset in .words + +// CHECK: __AArch64PILongThunk_high_target: +// CHECK-NEXT: 10: 10 00 00 10 adr x16, #0 +// CHECK-NEXT: 14: 71 00 00 58 ldr x17, #12 +// CHECK-NEXT: 18: 10 02 11 8b add x16, x16, x17 +// CHECK-NEXT: 1c: 00 02 1f d6 br x16 +// CHECK: $d: +// CHECK-NEXT: 20: 10 01 00 10 .word 0x10000110 +// CHECK-NEXT: 24: 00 00 00 00 .word 0x00000000 + +// CHECK: __AArch64PILongThunk_high_target2: +// CHECK-NEXT: 28: 10 00 00 10 adr x16, #0 +// CHECK-NEXT: 2c: 71 00 00 58 ldr x17, #12 +// CHECK-NEXT: 30: 10 02 11 8b add x16, x16, x17 +// CHECK-NEXT: 34: 00 02 1f d6 br x16 +// CHECK: $d: +// CHECK-NEXT: 38: e0 ff ff 0f .word 0x0fffffe0 +// CHECK-NEXT: 3c: 00 00 00 00 .word 0x00000000 + + .section .text_high, "ax", %progbits + .globl high_target + .type high_target, %function +high_target: + // No thunk needed as we can reach low_target@plt + bl low_target + ret +// CHECK: high_target: +// CHECK-NEXT: 10000000: 4c 00 00 94 bl #304 +// CHECK-NEXT: 10000004: c0 03 5f d6 ret + + .hidden high_target2 + .globl high_target2 + .type high_target2, %function +high_target2: + // Need thunk to low_target + bl low_target2 + ret +// CHECK: high_target2: +// CHECK-NEXT: 10000008: 02 00 00 94 bl #8 +// CHECK-NEXT: 1000000c: c0 03 5f d6 ret + +// Expect Thunk for .text.high + +// CHECK: __AArch64PILongThunk_low_target2: +// CHECK-NEXT: 10000010: 10 00 00 10 adr x16, #0 +// CHECK-NEXT: 10000014: 71 00 00 58 ldr x17, #12 +// CHECK-NEXT: 10000018: 10 02 11 8b add x16, x16, x17 +// CHECK-NEXT: 1000001c: 00 02 1f d6 br x16 +// CHECK: $d: +// CHECK-NEXT: 10000020: f8 ff ff ef .word 0xeffffff8 +// CHECK-NEXT: 10000024: ff ff ff ff .word 0xffffffff + +// CHECK: Disassembly of section .plt: +// CHECK-NEXT: .plt: +// CHECK-NEXT: 10000100: f0 7b bf a9 stp x16, x30, [sp, #-16]! +// CHECK-NEXT: 10000104: 10 00 00 90 adrp x16, #0 +// CHECK-NEXT: 10000108: 11 aa 40 f9 ldr x17, [x16, #336] +// CHECK-NEXT: 1000010c: 10 42 05 91 add x16, x16, #336 +// CHECK-NEXT: 10000110: 20 02 1f d6 br x17 +// CHECK-NEXT: 10000114: 1f 20 03 d5 nop +// CHECK-NEXT: 10000118: 1f 20 03 d5 nop +// CHECK-NEXT: 1000011c: 1f 20 03 d5 nop +// CHECK-NEXT: 10000120: 10 00 00 90 adrp x16, #0 +// CHECK-NEXT: 10000124: 11 ae 40 f9 ldr x17, [x16, #344] +// CHECK-NEXT: 10000128: 10 62 05 91 add x16, x16, #344 +// CHECK-NEXT: 1000012c: 20 02 1f d6 br x17 +// CHECK-NEXT: 10000130: 10 00 00 90 adrp x16, #0 +// CHECK-NEXT: 10000134: 11 b2 40 f9 ldr x17, [x16, #352] +// CHECK-NEXT: 10000138: 10 82 05 91 add x16, x16, #352 +// CHECK-NEXT: 1000013c: 20 02 1f d6 br x17 Index: test/ELF/aarch64-thunk-script.s =================================================================== --- /dev/null +++ test/ELF/aarch64-thunk-script.s @@ -0,0 +1,41 @@ +// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t +// RUN: echo "SECTIONS { \ +// RUN: .text_low 0x2000: { *(.text_low) } \ +// RUN: .text_high 0x8002000 : { *(.text_high) } \ +// RUN: } " > %t.script +// RUN: ld.lld --script %t.script %t -o %t2 2>&1 +// RUN: llvm-objdump -d -triple=aarch64-linux-gnu %t2 | FileCheck %s +// REQUIRES: aarch64 + +// Check that we have the out of branch range calculation right. The immediate +// field is signed so we have a slightly higher negative displacement. + .section .text_low, "ax", %progbits + .globl _start + .type _start, %function +_start: + // Need thunk to high_target@plt + bl high_target + ret + + .section .text_high, "ax", %progbits + .globl high_target + .type high_target, %function +high_target: + // No Thunk needed as we are within signed immediate range + bl _start + ret + +// CHECK: Disassembly of section .text_low: +// CHECK-NEXT: _start: +// CHECK-NEXT: 2000: 02 00 00 94 bl #8 +// CHECK-NEXT: 2004: c0 03 5f d6 ret +// CHECK: __AArch64AbsLongThunk_high_target: +// CHECK-NEXT: 2008: 50 00 00 58 ldr x16, #8 +// CHECK-NEXT: 200c: 00 02 1f d6 br x16 +// CHECK: $d: +// CHECK-NEXT: 2010: 00 20 00 08 .word 0x08002000 +// CHECK-NEXT: 2014: 00 00 00 00 .word 0x00000000 +// CHECK: Disassembly of section .text_high: +// CHECK-NEXT: high_target: +// CHECK-NEXT: 8002000: 00 00 00 96 bl #-134217728 +// CHECK-NEXT: 8002004: c0 03 5f d6 ret Index: test/ELF/aarch64-thunk-section-location.s =================================================================== --- /dev/null +++ test/ELF/aarch64-thunk-section-location.s @@ -0,0 +1,41 @@ +// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t +// RUN: ld.lld %t -o %t2 2>&1 +// RUN: llvm-objdump -d -start-address=134086664 -stop-address=134086676 -triple=aarch64-linux-gnu %t2 | FileCheck %s + +// Check that the range extension thunks are dumped close to the aarch64 branch +// range of 128 MiB + .section .text.1, "ax", %progbits + .balign 0x1000 + .globl _start +_start: + bl high_target + ret + + .section .text.2, "ax", %progbits + .space 0x2000000 + + .section .text.2, "ax", %progbits + .space 0x2000000 + + .section .text.3, "ax", %progbits + .space 0x2000000 + + .section .text.4, "ax", %progbits + .space 0x2000000 - 0x40000 + + .section .text.5, "ax", %progbits + .space 0x40000 + + .section .text.6, "ax", %progbits + .balign 0x1000 + + .globl high_target + .type high_target, %function +high_target: + ret + +// CHECK: __AArch64AbsLongThunk_high_target: +// CHECK-NEXT: 7fe0008: 50 00 00 58 ldr x16, #8 +// CHECK-NEXT: 7fe000c: 00 02 1f d6 br x16 +// CHECK: $d: +// CHECK-NEXT: 7fe0010: 00 10 02 08 .word 0x08021000