Index: lld/ELF/Thunks.cpp =================================================================== --- lld/ELF/Thunks.cpp +++ lld/ELF/Thunks.cpp @@ -44,21 +44,45 @@ namespace { -// AArch64 long range Thunks -class AArch64ABSLongThunk final : public Thunk { +// Base class for AArch64 thunks. +// +// An AArch64 thunk may be either short or long. A short thunk is simply a branch +// (B) instruction, and it may be used to call AArch64 functions when the distance +// from the thunk to the target is less than 128MB. Long thunks can branch to any +// virtual address and they are implemented in the derived classes. This class +// tries to create a short thunk if the target is in range, otherwise it creates +// a long thunk. +class AArch64Thunk : public Thunk { public: - AArch64ABSLongThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} - uint32_t size() override { return 16; } + bool getMayUseShortThunk(); + AArch64Thunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} void writeTo(uint8_t *buf) override; + +private: + bool mayUseShortThunk = true; + virtual void writeLong(uint8_t *buf) = 0; +}; + +// AArch64 long range Thunks. +class AArch64ABSLongThunk final : public AArch64Thunk { +public: + AArch64ABSLongThunk(Symbol &dest, int64_t addend) + : AArch64Thunk(dest, addend) {} + uint32_t size() override { return getMayUseShortThunk() ? 4 : 16; } void addSymbols(ThunkSection &isec) override; + +private: + virtual void writeLong(uint8_t *buf) override; }; -class AArch64ADRPThunk final : public Thunk { +class AArch64ADRPThunk final : public AArch64Thunk { public: - AArch64ADRPThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} - uint32_t size() override { return 12; } - void writeTo(uint8_t *buf) override; + AArch64ADRPThunk(Symbol &dest, int64_t addend) : AArch64Thunk(dest, addend) {} + uint32_t size() override { return getMayUseShortThunk() ? 4 : 12; } void addSymbols(ThunkSection &isec) override; + +private: + virtual void writeLong(uint8_t *buf) override; }; // Base class for ARM thunks. @@ -461,14 +485,34 @@ offset = newOffset; } -// AArch64 long range Thunks - +// AArch64 Thunk base class. static uint64_t getAArch64ThunkDestVA(const Symbol &s, int64_t a) { uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA(a); return v; } -void AArch64ABSLongThunk::writeTo(uint8_t *buf) { +bool AArch64Thunk::getMayUseShortThunk() { + if (!mayUseShortThunk) + return false; + uint64_t s = getAArch64ThunkDestVA(destination, addend); + uint64_t p = getThunkTargetSym()->getVA(); + mayUseShortThunk = llvm::isInt<28>(s - p); + return mayUseShortThunk; +} + +void AArch64Thunk::writeTo(uint8_t *buf) { + if (!getMayUseShortThunk()) { + writeLong(buf); + return; + } + uint64_t s = getAArch64ThunkDestVA(destination, addend); + uint64_t p = getThunkTargetSym()->getVA(); + write32(buf, 0x14000000); // b S + target->relocateNoSym(buf, R_AARCH64_CALL26, s - p); +} + +// AArch64 long range Thunks. +void AArch64ABSLongThunk::writeLong(uint8_t *buf) { const uint8_t data[] = { 0x50, 0x00, 0x00, 0x58, // ldr x16, L0 0x00, 0x02, 0x1f, 0xd6, // br x16 @@ -484,7 +528,8 @@ addSymbol(saver().save("__AArch64AbsLongThunk_" + destination.getName()), STT_FUNC, 0, isec); addSymbol("$x", STT_NOTYPE, 0, isec); - addSymbol("$d", STT_NOTYPE, 8, isec); + if (!getMayUseShortThunk()) + addSymbol("$d", STT_NOTYPE, 8, isec); } // This Thunk has a maximum range of 4Gb, this is sufficient for all programs @@ -492,7 +537,7 @@ // clang and gcc do not support the large code model for position independent // code so it is safe to use this for position independent thunks without // worrying about the destination being more than 4Gb away. -void AArch64ADRPThunk::writeTo(uint8_t *buf) { +void AArch64ADRPThunk::writeLong(uint8_t *buf) { const uint8_t data[] = { 0x10, 0x00, 0x00, 0x90, // adrp x16, Dest R_AARCH64_ADR_PREL_PG_HI21(Dest) 0x10, 0x02, 0x00, 0x91, // add x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest) Index: lld/test/ELF/aarch64-cortex-a53-843419-thunk.s =================================================================== --- lld/test/ELF/aarch64-cortex-a53-843419-thunk.s +++ lld/test/ELF/aarch64-cortex-a53-843419-thunk.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o // RUN: echo "SECTIONS { \ // RUN: .text1 0x10000 : { *(.text.01) *(.text.02) *(.text.03) } \ -// RUN: .text2 0x8010000 : { *(.text.04) } } " > %t.script +// RUN: .text2 0x10010000 : { *(.text.04) } } " > %t.script // RUN: ld.lld --script %t.script -fix-cortex-a53-843419 -verbose %t.o -o %t2 \ // RUN: 2>&1 | FileCheck -check-prefix=CHECK-PRINT %s // RUN: llvm-objdump --no-print-imm-hex --no-show-raw-insn --triple=aarch64-linux-gnu -d %t2 | FileCheck %s @@ -39,7 +39,7 @@ // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 10FF8 in unpatched output. // CHECK: 0000000000010ff8 : -// CHECK-NEXT: adrp x0, 0x8010000 +// CHECK-NEXT: adrp x0, 0x10010000 // CHECK-NEXT: ldr x1, [x1] // CHECK-NEXT: b 0x11008 // CHECK-NEXT: ret Index: lld/test/ELF/aarch64-long-thunk-converge.s =================================================================== --- /dev/null +++ lld/test/ELF/aarch64-long-thunk-converge.s @@ -0,0 +1,53 @@ +// REQUIRES: aarch64 +// RUN: rm -rf %t && split-file %s %t +// RUN: llvm-mc -triple aarch64-none-elf -filetype=obj -o %t/a.o %t/a.s +// RUN: ld.lld --shared %t/a.o -T %t/a.t -o %t/a +// RUN: llvm-objdump --no-show-raw-insn -d --start-address=0x7001004 --stop-address=0x7001010 %t/a | FileCheck %s +// RUN: llvm-objdump --no-show-raw-insn -d --start-address=0x11001008 --stop-address=0x11001014 %t/a | FileCheck --check-prefix=CHECK2 %s + +/// This test shows that once a long-thunk has been generated it +/// cannot be written as a short thunk. This prevents oscillations +/// in size that can prevent convergence. +/// In pass 1 the distance between bl foo and foo: requires a long-range thunk +/// In pass 2 the long range thunk also inserted in pass 1 before foo: +/// shortens the distance such that a short thunk is possible. We expect that +/// the long thunk written out on pass 1 remains long. + +// CHECK: <__AArch64ADRPThunk_>: +// CHECK-NEXT: 7001004: adrp x16, 0x11001000 +// CHECK-NEXT: add x16, x16, #0x14 +// CHECK-NEXT: br x16 + +// CHECK2: <__AArch64ADRPThunk_>: +// CHECK2-NEXT: 11001008: adrp x16, 0x9001000 +// CHECK2-NEXT: add x16, x16, #0x10 +// CHECK2-NEXT: br x16 + + +//--- a.t +SECTIONS { + .foo 0x1000 : { *(.foo.*) } + .bar 0x11001000 : { *(.bar.*) } +} + +//--- a.s +.section .foo.1,"ax",%progbits,unique,1 +bl bar + +.section .foo.2,"ax",%progbits,unique,1 +.space 0x7000000 + +.section .foo.3,"ax",%progbits,unique,1 +.space 0x2000000 + +.section .foo.4,"ax",%progbits,unique,1 +foo: +nop + +.section .bar.1,"ax",%progbits,unique,1 +nop +nop +.section .bar.2,"ax",%progbits,unique,1 +bar: +bl foo +.space 0x8000000 Index: lld/test/ELF/aarch64-thunk-reuse2.s =================================================================== --- lld/test/ELF/aarch64-thunk-reuse2.s +++ lld/test/ELF/aarch64-thunk-reuse2.s @@ -10,17 +10,15 @@ # CHECK: : # CHECK-NEXT: 10700: ret # CHECK: <__AArch64ADRPThunk_>: -# CHECK-NEXT: 10704: adrp x16, 0x10000 -# CHECK-NEXT: add x16, x16, #1792 -# CHECK-NEXT: br x16 +# CHECK-NEXT: 10704: b 0x10700 # CHECK-EMPTY: # CHECK: <__AArch64ADRPThunk_>: -# CHECK-NEXT: 8010710: adrp x16, 0x10000 +# CHECK-NEXT: 8010708: adrp x16, 0x10000 # CHECK-NEXT: add x16, x16, #1792 -# CHECK-NEXT: br x16 +# CHECk-NEXT: br x16 # CHECK-LABEL: : -# CHECK-NEXT: 801071c: bl 0x8010710 <__AArch64ADRPThunk_> -# CHECK-NEXT: b 0x8010710 <__AArch64ADRPThunk_> +# CHECK-NEXT: 8010714: bl 0x8010708 <__AArch64ADRPThunk_> +# CHECK-NEXT: b 0x8010708 <__AArch64ADRPThunk_> .section .text._start, "ax", %progbits .globl _start Index: lld/test/ELF/aarch64-thunk-script.s =================================================================== --- lld/test/ELF/aarch64-thunk-script.s +++ lld/test/ELF/aarch64-thunk-script.s @@ -30,20 +30,12 @@ // CHECK-EMPTY: // CHECK-NEXT: <_start>: // CHECK-NEXT: 2000: bl 0x200c <__AArch64AbsLongThunk_high_target> -// CHECK-NEXT: 2004: bl 0x201c <__AArch64AbsLongThunk_> +// CHECK-NEXT: 2004: bl 0x2010 <__AArch64AbsLongThunk_> // CHECK-NEXT: ret // CHECK: <__AArch64AbsLongThunk_high_target>: -// CHECK-NEXT: 200c: ldr x16, 0x2014 -// CHECK-NEXT: br x16 -// CHECK: <$d>: -// CHECK-NEXT: 2014: 00 20 00 08 .word 0x08002000 -// CHECK-NEXT: 2018: 00 00 00 00 .word 0x00000000 -// CHECK: <__AArch64AbsLongThunk_>: -// CHECK-NEXT: 201c: ldr x16, 0x2024 -// CHECK-NEXT: 2020: br x16 -// CHECK: <$d>: -// CHECK-NEXT: 2024: 04 20 00 08 .word 0x08002004 -// CHECK-NEXT: 2028: 00 00 00 00 .word 0x00000000 +// CHECK-NEXT: 200c: b 0x8002000 +// CHECK: <__AArch64AbsLongThunk_>: +// CHECK-NEXT: 2010: b 0x8002004 // CHECK: Disassembly of section .text_high: // CHECK-EMPTY: // CHECK-NEXT: : @@ -56,10 +48,8 @@ /// Local thunk symbols. // NM-NEXT: t __AArch64AbsLongThunk_high_target // NM-NEXT: t $x -// NM-NEXT: t $d // NM-NEXT: t __AArch64AbsLongThunk_{{$}} // NM-NEXT: t $x -// NM-NEXT: t $d /// Global symbols. // NM-NEXT: T _start // NM-NEXT: T high_target Index: lld/test/ELF/aarch64-thunk-section-location.s =================================================================== --- lld/test/ELF/aarch64-thunk-section-location.s +++ lld/test/ELF/aarch64-thunk-section-location.s @@ -1,7 +1,7 @@ // REQUIRES: aarch64 // RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t.o // RUN: ld.lld %t.o -o %t -// RUN: llvm-objdump -d --start-address=0x81d1008 --stop-address=0x81d1014 --no-show-raw-insn %t | FileCheck %s +// RUN: llvm-objdump -d --start-address=0x81d1008 --stop-address=0x81d100c --no-show-raw-insn %t | FileCheck %s // RUN: rm %t.o %t // Check that the range extension thunks are dumped close to the aarch64 branch // range of 128 MiB @@ -36,7 +36,4 @@ ret // CHECK: <__AArch64AbsLongThunk_high_target>: -// CHECK-NEXT: 81d1008: ldr x16, 0x81d1010 -// CHECK-NEXT: 81d100c: br x16 -// CHECK: <$d>: -// CHECK-NEXT: 81d1010: 00 20 21 08 .word 0x08212000 +// CHECK-NEXT: 81d1008: b 0x8212000