Index: lld/ELF/Relocations.cpp =================================================================== --- lld/ELF/Relocations.cpp +++ lld/ELF/Relocations.cpp @@ -1744,6 +1744,44 @@ uint64_t off) { auto *ts = make(os, off); ts->partition = os->partition; + if ((config->fixCortexA53Errata843419 || config->fixCortexA8) && + !isd->sections.empty()) { + // The errata fixes are sensitive to addresses modulo 4 KiB. When we add + // thunks we disturb the base addresses of sections placed after the thunks + // this makes patches we have generated redundant, and may cause us to + // generate more patches as different instructions are now in sensitive + // locations. When we generate more patches we may force more branches to + // go out of range, causing more thunks to be generated. In pathological + // cases this can cause the address dependent content pass not to converge. + // We fix this by rounding up the size of the ThunkSection to 4KiB, this + // limits the insertion of a ThunkSection on the addresses modulo 4 KiB, + // which means that adding Thunks to the section does not invalidate + // errata patches for following code. + uint64_t isdBase = isd->sections.front()->outSecOff; + uint64_t isdLimit = + isd->sections.back()->outSecOff + isd->sections.back()->getSize(); + // Rounding up the size to 4KiB has consequences for code-size and can + // trip up linker script defined assertions. For example the linux kernel + // has an assertion that what LLD represents as an InputSectionDescription + // does not exceed 4 KiB even if the overall OutputSection is > 128 Mib. + // We use the heuristic of rounding up the size when both of the following + // conditions are true: + // 1.) There may be more code in the OutputSection after the ThunkSection + // is inserted so inserting the thunks may affect the addresses of + // that code. + // 2.) The InputSectionDescription is smaller than 4 KiB. This will prevent + // any assertion failures that an InputSectionDescription is < 4 KiB + // in size. + if (isdBase + target->getThunkSectionSpacing() < os->size && + isdLimit - isdBase > 4096) { + // ThunkSections have wordsize alignment but AArch64 only needs 4, we set + // to 4 so that if we start at an address that is 4 (modulo 8) we don't + // occur 4 bytes padding, which is more difficult to account for when + // rounding the size up. + ts->alignment = 4; + ts->roundUpSizeForErrata = true; + } + } isd->thunkSections.push_back({ts, pass}); return ts; } Index: lld/ELF/SyntheticSections.h =================================================================== --- lld/ELF/SyntheticSections.h +++ lld/ELF/SyntheticSections.h @@ -1069,6 +1069,10 @@ InputSection *getTargetInputSection() const; bool assignOffsets(); + // When true, round up reported size of section to 4 KiB. See comment + // in addThunkSection() for more details. + bool roundUpSizeForErrata = false; + private: std::vector thunks; size_t size = 0; Index: lld/ELF/SyntheticSections.cpp =================================================================== --- lld/ELF/SyntheticSections.cpp +++ lld/ELF/SyntheticSections.cpp @@ -3457,13 +3457,8 @@ this->outSecOff = off; } -// When the errata patching is on, we round the size up to a 4 KiB -// boundary. This limits the effect that adding Thunks has on the addresses -// of the program modulo 4 KiB. As the errata patching is sensitive to address -// modulo 4 KiB this can prevent further patches from being needed due to -// Thunk insertion. size_t ThunkSection::getSize() const { - if (config->fixCortexA53Errata843419 || config->fixCortexA8) + if (roundUpSizeForErrata) return alignTo(size, 4096); return size; } Index: lld/test/ELF/aarch64-cortex-a53-843419-thunk-align.s =================================================================== --- /dev/null +++ lld/test/ELF/aarch64-cortex-a53-843419-thunk-align.s @@ -0,0 +1,76 @@ +// REQUIRES: aarch64 +// RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o +// RUN: echo "SECTIONS { \ +// RUN: .text 0x10000 : { \ +// RUN: *(.text.01) ; \ +// RUN: . += 0x8000000 ; \ +// RUN: *(.text.02) } \ +// RUN: .foo : { *(.foo_sec) } } " > %t.script +// RUN: ld.lld -pie --fix-cortex-a53-843419 --script=%t.script %t.o -o %t2 +// RUN: llvm-objdump --no-show-raw-insn -triple=aarch64-linux-gnu -d %t2 | FileCheck %s + +/// %t2 is > 128 Megabytes, so delete it early. +// RUN: rm %t2 + +/// Test case that for an OutputSection larger than the ThunkSectionSpacing +/// --fix-cortex-a53-843419 will cause the size of the ThunkSection to be +/// rounded up to the nearest 4KiB + + .section .text.01, "ax", %progbits + .balign 4096 + .globl _start + .type _start, %function +_start: +/// Range extension thunk needed, due to linker script + bl far_away + .space 4096 - 12 + +/// Erratum sequence + .globl t3_ff8_ldr + .type t3_ff8_ldr, %function +t3_ff8_ldr: + adrp x0, dat + ldr x1, [x1, #0] + ldr x0, [x0, :lo12:dat] + ret + +/// force address after nop to 4 (modulo 8) so we can check 4-byte alignment +/// of thunks. + nop + +/// Expect thunk and patch to be inserted here +// CHECK: 000000000001100c __AArch64ADRPThunk_far_away: +// CHECK-NEXT: 1100c: adrp x16, #134221824 +// CHECK-NEXT: add x16, x16, #20 +// CHECK-NEXT: br x16 +// CHECK: 000000000001200c __CortexA53843419_11000: +// CHECK-NEXT: 1200c: ldr x0, [x0, #168] +// CHECK-NEXT: b #-4108 + + .section .text.02, "ax", %progbits + .globl far_away + .type far_away, function +far_away: + bl _start + +/// Expect thunk for _start not to have size rounded up to 4KiB as it is at +/// the end of the OutputSection +// CHECK: 0000000008012014 far_away: +// CHECK-NEXT: 8012014: bl #4 +// CHECK: 0000000008012018 __AArch64ADRPThunk__start: +// CHECK-NEXT: 8012018: adrp x16, #-134225920 +// CHECK-NEXT: add x16, x16, #0 +// CHECK-NEXT: br x16 +// CHECK: 0000000008012024 foo: +// CHECK-NEXT: 8012024: ret + .section .foo_sec, "ax", %progbits + .globl foo + .type foo, function +foo: + ret + + + .section .data + .balign 8 + .globl dat +dat: .quad 0 Index: lld/test/ELF/aarch64-cortex-a53-843419-thunk.s =================================================================== --- lld/test/ELF/aarch64-cortex-a53-843419-thunk.s +++ lld/test/ELF/aarch64-cortex-a53-843419-thunk.s @@ -23,11 +23,9 @@ _start: bl far_away /// Thunk to far_away, size 16-bytes goes here. - /// Thunk Section with patch enabled has its size rounded up to 4KiB - /// this leaves the address of following sections the same modulo 4 KiB .section .text.02, "ax", %progbits - .space 4096 - 12 + .space 4096 - 32 /// Erratum sequence will only line up at address 0 modulo 0xffc when /// Thunk is inserted. @@ -40,13 +38,13 @@ ldr x0, [x0, :got_lo12:dat] ret -// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 11FFC in unpatched output. -// CHECK: 0000000000011ffc t3_ff8_ldr: -// CHECK-NEXT: adrp x0, #134213632 +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 10FF8 in unpatched output. +// CHECK: 0000000000010ff8 t3_ff8_ldr: +// CHECK-NEXT: adrp x0, #134217728 // CHECK-NEXT: ldr x1, [x1] // CHECK-NEXT: b #8 // CHECK-NEXT: ret -// CHECK: 000000000001200c __CortexA53843419_12004: +// CHECK: 0000000000011008 __CortexA53843419_11000: // CHECK-NEXT: ldr x0, [x0, #8] // CHECK-NEXT: b #-8 .section .text.04, "ax", %progbits Index: lld/test/ELF/arm-fix-cortex-a8-thunk-align.s =================================================================== --- /dev/null +++ lld/test/ELF/arm-fix-cortex-a8-thunk-align.s @@ -0,0 +1,41 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o +// RUN: ld.lld --fix-cortex-a8 --shared %t.o -o %t2 +// RUN: llvm-objdump -d --no-show-raw-insn %t2 | FileCheck %s + +/// Test case that for an OutputSection larger than the ThunkSectionSpacing +/// --fix-cortex-a8 will cause the size of the ThunkSection to be rounded up to +/// the nearest 4KiB + .thumb + + .section .text.01, "ax", %progbits + .balign 4096 + .globl _start + .type _start, %function +_start: + /// state change thunk required + b.w arm_func +thumb_target: + .space 4096 - 10 + /// erratum patch needed + nop.w + b.w thumb_target + +/// Expect thunk and patch to be inserted here +// CHECK: 00003004 __ThumbV7PILongThunk_arm_func: +// CHECK-NEXT: 3004: movw r12, #4088 +// CHECK-NEXT: movt r12, #256 +// CHECK-NEXT: add r12, pc +// CHECK-NEXT: bx r12 +// CHECK: 00004004 __CortexA8657417_2FFE: +// CHECK-NEXT: 4004: b.w #-8196 + .section .text.02 + /// Take us over thunk section spacing + .space 16 * 1024 * 1024 + + .section .text.03, "ax", %progbits + .arm + .balign 4 + .type arm_func, %function +arm_func: + bx lr Index: lld/test/ELF/arm-fix-cortex-a8-thunk.s =================================================================== --- lld/test/ELF/arm-fix-cortex-a8-thunk.s +++ lld/test/ELF/arm-fix-cortex-a8-thunk.s @@ -1,7 +1,7 @@ // REQUIRES: arm // RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o // RUN: echo "SECTIONS { \ -// RUN: .text0 0x01200a : { *(.text.00) } \ +// RUN: .text0 0x011006 : { *(.text.00) } \ // RUN: .text1 0x110000 : { *(.text.01) *(.text.02) *(.text.03) \ // RUN: *(.text.04) } \ // RUN: .text2 0x210000 : { *(.text.05) } } " > %t.script @@ -32,7 +32,7 @@ // CHECK-NEXT: bx r12 .section .text.02, "ax", %progbits - .space 4096 - 10 + .space 4096 - 22 .section .text.03, "ax", %progbits .thumb_func @@ -43,21 +43,21 @@ bl target /// Expect erratum patch inserted here -// CHECK: 00111ffa target: -// CHECK-NEXT: 111ffa: nop.w +// CHECK: 00110ffa target: +// CHECK-NEXT: 110ffa: nop.w // CHECK-NEXT: bl #2 -// CHECK: 00112004 __CortexA8657417_111FFE: -// CHECK-NEXT: 112004: b.w #-14 +// CHECK: 00111004 __CortexA8657417_110FFE: +// CHECK-NEXT: 111004: b.w #-14 /// Expect range extension thunk here. -// CHECK: 00112008 __ThumbV7PILongThunk_early: -// CHECK-NEXT: 112008: b.w #-1048578 +// CHECK: 00111008 __ThumbV7PILongThunk_early: +// CHECK-NEXT: 111008: b.w #-1048582 .section .text.04, "ax", %progbits /// The erratum patch will push this branch out of range, so another /// range extension thunk will be needed. beq.w early -// CHECK: 113008: beq.w #-4100 +// CHECK: 11100c: beq.w #-8 .section .text.05, "ax", %progbits .arm