Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -153,6 +153,8 @@ ThunkSection *addThunkSection(OutputSection *OS, std::vector *, uint64_t Off); + bool normalizeExistingThunk(Relocation &Rel, uint64_t Src); + // Record all the available Thunks for a Symbol llvm::DenseMap> ThunkedSymbols; @@ -166,12 +168,18 @@ // The Mips LA25 Thunk is an example of an inline ThunkSection. llvm::DenseMap ThunkedSections; - // All the ThunkSections that we have created, organised by OutputSection - // will contain a mix of ThunkSections that have been created this pass, and - // ThunkSections that have been merged into the OutputSection on previous - // passes + // All the ThunkSections that we have created, organised by + // InputSectionDescription. This will contain a mix of ThunkSections that + // have been created in the current pass and ThunkSections that have been + // created in previous passes and hence have already been merged into + // InputSectionRanges. std::map *, std::vector> ThunkSections; + + // All the ThunkSections that we have created in the current pass and will + // need to insert at the end of the pass. This is a subset of ThunkSections. + std::map *, std::vector> + NewThunkSections; }; // Return a int64_t to make sure we get the sign extension out of the way as Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -969,19 +969,113 @@ scanRelocs(S, S.rels()); } +// Thunk Implementation +// +// Thunks (sometimes called stubs, veneers or branch islands) are small pieces +// of code that the linker inserts inbetween a caller and a callee. The thunks +// are added at link time rather than compile time as the decision on whether +// a thunk is needed, such as the caller and callee being out of range, can only +// be made at link time. +// +// It is straightforward to tell given the current state of the program when a +// thunk is needed for a particular call. The more difficult part is that +// the thunk needs to be placed in the program such that the caller can reach +// the thunk and the thunk can reach the callee; furthermore, adding thunks to +// the program alters addresses, which can mean more thunks etc. +// +// In lld we have a synthetic ThunkSection that can hold many Thunks. +// The decision to have a ThunkSection act as a container means that we can +// more easily handle the most common case of a single block of contiguous +// Thunks by inserting just a single ThunkSection. +// +// The implementation of Thunks in lld is split across these areas +// Relocations.cpp : Framework for creating and placing thunks +// Thunks.cpp : The code generated for each supported thunk +// Target.cpp : Target specific hooks that the framework uses to decide when +// a thunk is used +// Synthetic.cpp : Implementation of ThunkSection +// Writer.cpp : Iteratively call framework until no more Thunks added +// +// Thunk placement requirements: +// Mips LA25 thunks. These must be placed immediately before the callee section +// We can assume that the caller is in range of the Thunk. These are modelled +// by Thunks that return the section they must precede with +// getTargetInputSection(). +// +// ARM interworking and range extension thunks. These thunks must be placed +// within range of the caller. All implemented ARM thunks can always reach the +// callee as they use an indirect jump via a register that has no range +// restrictions. +// +// Thunk placement algorithm: +// For Mips LA25 ThunkSections; the placement is explicit, it has to be before +// getTargetInputSection(). +// +// For thunks that must be placed within range of the caller there are many +// possible choices given that the maximum range from the caller is usually +// much larger than the average InputSection size. Desirable properties include: +// - Maximize reuse of thunks by multiple callers +// - Minimize number of ThunkSections to simplify insertion +// - Handle impact of already added Thunks on addresses +// - Simple to understand and implement +// +// In lld for the first pass, we pre-create one or more ThunkSections per +// InputSectionDescription at Target specific intervals. A ThunkSection is +// placed so that the estimated end of the ThunkSection is within range of the +// start of the InputSectionDescription or the previous ThunkSection. For +// example: +// InputSectionDescription +// Section 0 +// ... +// Section N +// ThunkSection 0 +// Section N + 1 +// ... +// Section N + K +// Thunk Section 1 +// +// The intention is that we can add a Thunk to a ThunkSection that is well +// spaced enough to service a number of callers without having to do a lot +// of work. An important principle is that it is not an error if a Thunk cannot +// be placed in a pre-created ThunkSection; when this happens we create a new +// ThunkSection placed next to the caller. This allows us to handle the vast +// majority of thunks simply, but also handle rare cases where the branch range +// is smaller than the target specific spacing. +// +// The algorithm is expected to create all the thunks that are needed in a +// single pass, with a small number of programs needing a second pass due to +// the insertion of thunks in the first pass increasing the offset between +// callers and callees that were only just in range. +// +// A consequence of allowing new ThunkSections to be created outside of the +// pre-created ThunkSections is that in rare cases calls to Thunks that were in +// range in pass K, are out of range in some pass > K due to the insertion of +// more Thunks in between the caller and callee. When this happens we retarget +// the relocation back to the original target and create another Thunk. + +// Remove ThunkSections that are empty, this should only be the initial set +// precreated on pass 0. +static void +removeEmptyThunkSections(std::map *, + std::vector> &ISRToThunkMap) { + for (auto &KV : ISRToThunkMap) { + std::vector &Thunks = KV.second; + // Remove ThunkSections that contain no Thunks + llvm::erase_if(Thunks, + [](const ThunkSection *TS) { return TS->getSize() == 0; }); + } +} + // Insert the Thunks that we have generated this pass into the designated // InputSectionDescription vectors. void ThunkCreator::mergeThunks() { - for (auto &KV : ThunkSections) { + removeEmptyThunkSections(ThunkSections); + removeEmptyThunkSections(NewThunkSections); + + for (auto &KV : NewThunkSections) { std::vector *ISR = KV.first; std::vector &Thunks = KV.second; - // Remove ThunkSections that contain no Thunks. - llvm::erase_if(Thunks, - [](const ThunkSection *TS) { return TS->getSize() == 0; }); - if (Thunks.empty()) - continue; - // Order Thunks in ascending OutSecOff. auto ThunkCmp = [](const ThunkSection *A, const ThunkSection *B) { return A->OutSecOff < B->OutSecOff; @@ -1110,6 +1204,7 @@ uint64_t Off) { auto *TS = make(OS, Off); ThunkSections[ISR].push_back(TS); + NewThunkSections[ISR].push_back(TS); return TS; } @@ -1143,20 +1238,54 @@ } } +// Return true if the relocation target is an in range Thunk. +// Return false if the relocation is not to a Thunk. If the relocation target +// was originally to a Thunk, but is no longer in range we revert the +// relocation back to its original non-Thunk target. +bool ThunkCreator::normalizeExistingThunk(Relocation &Rel, uint64_t Src) { + if (Thunk *ET = Thunks.lookup(Rel.Sym)) { + if (Target->inBranchRange(Rel.Type, Src, Rel.Sym->getVA())) + return true; + Rel.Sym = &ET->Destination; + if (Rel.Sym->isInPlt()) + Rel.Expr = toPlt(Rel.Expr); + } + return false; +} + // Process all relocations from the InputSections that have been assigned -// to OutputSections and redirect through Thunks if needed. +// to OutputSections and redirect through Thunks if needed. The function should +// be called iteratively until it returns false. +// +// PreConditions: +// All InputSections that may need a Thunk are reachable from +// OutputSectionCommands. +// +// All OutputSections have an address and all InputSections have an offset +// within the OutputSection. // -// createThunks must be called after scanRelocs has created the Relocations for -// each InputSection. It must be called before the static symbol table is -// finalized. If any Thunks are added to an OutputSection the output section -// offsets of the InputSections will change. +// The offsets between caller (relocation place) and callee +// (relocation target) will not be modified outside of createThunks(). // -// FIXME: Initial support for RangeThunks; only one pass supported. +// PostConditions: +// If return value is true then ThunkSections have been inserted into +// OutputSections. All relocations that needed a Thunk based on the information +// available to createThunks() on entry have been redirected to a Thunk. Note +// that adding Thunks changes offsets between caller and callee so more Thunks +// may be required. +// +// If return value is false then no more Thunks are needed, and createThunks has +// made no changes. If the target requires range extension thunks, currently +// ARM, then any future change in offset between caller and callee risks a +// relocation out of range error. bool ThunkCreator::createThunks(ArrayRef OutputSections) { - if (Pass > 0) - ThunkSections.clear(); - else if (Target->ThunkSectionSpacing) + bool AddressesChanged = false; + if (Pass == 0 && Target->ThunkSectionSpacing) createInitialThunkSections(OutputSections); + else if (Pass == 10) + // With Thunk Size much smaller than branch range we expect to + // converge quickly; if we get to 10 something has gone wrong. + fatal("thunk creation not converged"); // Create all the Thunks and insert them into synthetic ThunkSections. The // ThunkSections are later inserted back into the OutputSection. @@ -1168,15 +1297,22 @@ OutputSections, [&](OutputSection *OS, std::vector *ISR) { for (InputSection *IS : *ISR) for (Relocation &Rel : IS->Relocations) { - SymbolBody &Body = *Rel.Sym; uint64_t Src = OS->Addr + IS->OutSecOff + Rel.Offset; - if (Thunks.find(&Body) != Thunks.end() || - !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Src, Body)) + + // If we are a relocation to an existing Thunk, check if it is + // still in range. If not then Rel will be altered to point to its + // original target so another Thunk can be generated. + if (Pass > 0 && normalizeExistingThunk(Rel, Src)) + continue; + + if (!Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Src, + *Rel.Sym)) continue; Thunk *T; bool IsNew; - std::tie(T, IsNew) = getThunk(Body, Rel.Type, Src); + std::tie(T, IsNew) = getThunk(*Rel.Sym, Rel.Type, Src); if (IsNew) { + AddressesChanged = true; // Find or create a ThunkSection for the new Thunk ThunkSection *TS; if (auto *TIS = T->getTargetInputSection()) @@ -1194,7 +1330,8 @@ // Merge all created synthetic ThunkSections back into OutputSection mergeThunks(); ++Pass; - return !ThunkSections.empty(); + NewThunkSections.clear(); + return AddressesChanged; } template void elf::scanRelocations(InputSectionBase &); Index: ELF/Thunks.h =================================================================== --- ELF/Thunks.h +++ ELF/Thunks.h @@ -27,7 +27,7 @@ // Thunks are assigned to synthetic ThunkSections class Thunk { public: - Thunk(const SymbolBody &Destination); + Thunk(SymbolBody &Destination); virtual ~Thunk(); virtual uint32_t size() const { return 0; } @@ -47,9 +47,9 @@ // The alignment requirement for this Thunk, defaults to the size of the // typical code section alignment. - const SymbolBody &Destination; + SymbolBody &Destination; SymbolBody *ThunkSym; - uint64_t Offset; + uint64_t Offset = 0; uint32_t Alignment = 4; }; Index: ELF/Thunks.cpp =================================================================== --- ELF/Thunks.cpp +++ ELF/Thunks.cpp @@ -52,7 +52,7 @@ // Source State, TargetState, Target Requirement, ABS or PI, Range class ARMV7ABSLongThunk final : public Thunk { public: - ARMV7ABSLongThunk(const SymbolBody &Dest) : Thunk(Dest) {} + ARMV7ABSLongThunk(SymbolBody &Dest) : Thunk(Dest) {} uint32_t size() const override { return 12; } void writeTo(uint8_t *Buf, ThunkSection &IS) const override; @@ -62,7 +62,7 @@ class ARMV7PILongThunk final : public Thunk { public: - ARMV7PILongThunk(const SymbolBody &Dest) : Thunk(Dest) {} + ARMV7PILongThunk(SymbolBody &Dest) : Thunk(Dest) {} uint32_t size() const override { return 16; } void writeTo(uint8_t *Buf, ThunkSection &IS) const override; @@ -72,7 +72,7 @@ class ThumbV7ABSLongThunk final : public Thunk { public: - ThumbV7ABSLongThunk(const SymbolBody &Dest) : Thunk(Dest) { Alignment = 2; } + ThumbV7ABSLongThunk(SymbolBody &Dest) : Thunk(Dest) { Alignment = 2; } uint32_t size() const override { return 10; } void writeTo(uint8_t *Buf, ThunkSection &IS) const override; @@ -82,7 +82,7 @@ class ThumbV7PILongThunk final : public Thunk { public: - ThumbV7PILongThunk(const SymbolBody &Dest) : Thunk(Dest) { Alignment = 2; } + ThumbV7PILongThunk(SymbolBody &Dest) : Thunk(Dest) { Alignment = 2; } uint32_t size() const override { return 12; } void writeTo(uint8_t *Buf, ThunkSection &IS) const override; @@ -93,7 +93,7 @@ // MIPS LA25 thunk class MipsThunk final : public Thunk { public: - MipsThunk(const SymbolBody &Dest) : Thunk(Dest) {} + MipsThunk(SymbolBody &Dest) : Thunk(Dest) {} uint32_t size() const override { return 16; } void writeTo(uint8_t *Buf, ThunkSection &IS) const override; @@ -233,7 +233,7 @@ return dyn_cast(DR->Section); } -Thunk::Thunk(const SymbolBody &D) : Destination(D), Offset(0) {} +Thunk::Thunk(SymbolBody &D) : Destination(D), Offset(0) {} Thunk::~Thunk() = default; Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -1391,12 +1391,10 @@ if (Target->NeedsThunks) { ThunkCreator TC; Script->assignAddresses(); - if (TC.createThunks(OutputSections)) { + while (TC.createThunks(OutputSections)) { applySynthetic({InX::MipsGot}, [](SyntheticSection *SS) { SS->updateAllocSize(); }); Script->assignAddresses(); - if (TC.createThunks(OutputSections)) - fatal("All non-range thunks should be created in first call"); } } Index: test/ELF/arm-thumb-condbranch-thunk.s =================================================================== --- /dev/null +++ test/ELF/arm-thumb-condbranch-thunk.s @@ -0,0 +1,117 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=thumbv7a-none-linux-gnueabi %s -o %t +// RUN: ld.lld %t -o %t2 2>&1 +// The output file is large, most of it zeroes. We dissassemble only the +// parts we need to speed up the test and avoid a large output file +// RUN: llvm-objdump -d %t2 -start-address=524288 -stop-address=524316 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK1 %s +// RUN: llvm-objdump -d %t2 -start-address=1048576 -stop-address=1048584 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK2 %s +// RUN: llvm-objdump -d %t2 -start-address=1572864 -stop-address=1572872 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK3 %s +// RUN: llvm-objdump -d %t2 -start-address=5242884 -stop-address=5242894 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK4 %s +// RUN: llvm-objdump -d %t2 -start-address=5767168 -stop-address=5767174 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK5 %s +// RUN: llvm-objdump -d %t2 -start-address=16777220 -stop-address=16777240 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK6 %s +// RUN: llvm-objdump -d %t2 -start-address=17825792 -stop-address=17825798 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK7 %s +// Test Range extension Thunks for the Thumb conditional branch instruction. +// This instruction only has a range of 1Mb whereas all the other Thumb wide +// Branch instructions have 16Mb range. We still place our pre-created Thunk +// Sections at 16Mb intervals as conditional branches to a target defined +// in a different section are rare. + .syntax unified +// Define a function aligned on a half megabyte boundary + .macro FUNCTION suff + .section .text.\suff\(), "ax", %progbits + .thumb + .balign 0x80000 + .globl tfunc\suff\() + .type tfunc\suff\(), %function +tfunc\suff\(): + bx lr + .endm + + .globl _start +_start: + FUNCTION 00 +// Long Range Thunk needed for 16Mb range branch, can reach pre-created Thunk +// Section + bl tfunc33 +// CHECK1: Disassembly of section .text: +// CHECK1-NEXT: tfunc00: +// CHECK1-NEXT: 80000: 70 47 bx lr +// CHECK1-NEXT: 80002: 7f f3 ff d7 bl #16252926 +// CHECK1: __Thumbv7ABSLongThunk_tfunc05: +// CHECK1-NEXT: 80008: 40 f2 01 0c movw r12, #1 +// CHECK1-NEXT: 8000c: c0 f2 30 0c movt r12, #48 +// CHECK1-NEXT: 80010: 60 47 bx r12 +// CHECK1: __Thumbv7ABSLongThunk_tfunc00: +// CHECK1-NEXT: 80012: 40 f2 01 0c movw r12, #1 +// CHECK1-NEXT: 80016: c0 f2 08 0c movt r12, #8 +// CHECK1-NEXT: 8001a: 60 47 bx r12 + FUNCTION 01 +// tfunc02 is within range of tfunc02 + beq.w tfunc02 +// tfunc05 is out of range, and we can't reach the pre-created Thunk Section +// create a new one. + bne.w tfunc05 +// CHECK2: tfunc01: +// CHECK2-NEXT: 100000: 70 47 bx lr +// CHECK2-NEXT: 100002: 3f f0 fd a7 beq.w #524282 +// CHECK2-NEXT: 100006: 7f f4 ff a7 bne.w #-524290 <__Thumbv7ABSLongThunk_tfunc05> + FUNCTION 02 +// We can reach the Thunk Section created for bne.w tfunc05 + bne.w tfunc05 + beq.w tfunc00 +// CHECK3: 180000: 70 47 bx lr +// CHECK3-NEXT: 180002: 40 f4 01 80 bne.w #-1048574 <__Thumbv7ABSLongThunk_tfunc05> +// CHECK3-NEXT: 180006: 00 f4 04 80 beq.w #-1048568 <__Thumbv7ABSLongThunk_tfunc00> + FUNCTION 03 + FUNCTION 04 + FUNCTION 05 + FUNCTION 06 + FUNCTION 07 + FUNCTION 08 + FUNCTION 09 +// CHECK4: __Thumbv7ABSLongThunk_tfunc03: +// CHECK4-NEXT: 500004: 40 f2 01 0c movw r12, #1 +// CHECK4-NEXT: 500008: c0 f2 20 0c movt r12, #32 +// CHECK4-NEXT: 50000c: 60 47 bx r12 + FUNCTION 10 +// We can't reach any Thunk Section, create a new one + beq.w tfunc03 +// CHECK5: tfunc10: +// CHECK5-NEXT: 580000: 70 47 bx lr +// CHECK5-NEXT: 580002: 3f f4 ff a7 beq.w #-524290 <__Thumbv7ABSLongThunk_tfunc03> + FUNCTION 11 + FUNCTION 12 + FUNCTION 13 + FUNCTION 14 + FUNCTION 15 + FUNCTION 16 + FUNCTION 17 + FUNCTION 18 + FUNCTION 19 + FUNCTION 20 + FUNCTION 21 + FUNCTION 22 + FUNCTION 23 + FUNCTION 24 + FUNCTION 25 + FUNCTION 26 + FUNCTION 27 + FUNCTION 28 + FUNCTION 29 + FUNCTION 30 + FUNCTION 31 +// CHECK6: __Thumbv7ABSLongThunk_tfunc33: +// CHECK6-NEXT: 1000004: 40 f2 01 0c movw r12, #1 +// CHECK6-NEXT: 1000008: c0 f2 10 1c movt r12, #272 +// CHECK6-NEXT: 100000c: 60 47 bx r12 +// CHECK6: __Thumbv7ABSLongThunk_tfunc00: +// CHECK6-NEXT: 100000e: 40 f2 01 0c movw r12, #1 +// CHECK6-NEXT: 1000012: c0 f2 08 0c movt r12, #8 +// CHECK6-NEXT: 1000016: 60 47 bx r12 + FUNCTION 32 + FUNCTION 33 + // We should be able to reach an existing ThunkSection. + b.w tfunc00 +// CHECK7: tfunc33: +// CHECK7-NEXT: 1100000: 70 47 bx lr +// CHECK7-NEXT: 1100002: 00 f7 04 b8 b.w #-1048568 <__Thumbv7ABSLongThunk_tfunc00> Index: test/ELF/arm-thumb-thunk-empty-pass.s =================================================================== --- /dev/null +++ test/ELF/arm-thumb-thunk-empty-pass.s @@ -0,0 +1,32 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=thumbv7a-none-linux-gnueabi %s -o %t +// RUN: ld.lld %t -o %t2 2>&1 +// RUN: llvm-objdump -d %t2 -start-address=69632 -stop-address=69646 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK1 %s +// RUN: llvm-objdump -d %t2 -start-address=16846860 -stop-address=16846874 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK2 %s + .syntax unified + .global _start, foo + .type _start, %function + .section .text.start,"ax",%progbits +_start: + bl _start + .section .text.dummy1,"ax",%progbits + .space 0xfffffe + .section .text.foo,"ax",%progbits + .type foo, %function +foo: + bl _start + +// CHECK1: Disassembly of section .text: +// CHECK1-NEXT: _start: +// CHECK1-NEXT: 11000: ff f7 fe ff bl #-4 +// CHECK1: __Thumbv7ABSLongThunk__start: +// CHECK1-NEXT: 11004: 41 f2 01 0c movw r12, #4097 +// CHECK1-NEXT: 11008: c0 f2 01 0c movt r12, #1 +// CHECK1-NEXT: 1100c: 60 47 bx r12 + +// CHECK2: __Thumbv7ABSLongThunk__start: +// CHECK2: 101100c: 41 f2 01 0c movw r12, #4097 +// CHECK2-NEXT: 1011010: c0 f2 01 0c movt r12, #1 +// CHECK2-NEXT: 1011014: 60 47 bx r12 +// CHECK2: foo: +// CHECK2-NEXT: 1011016: ff f7 f9 ff bl #-14 Index: test/ELF/arm-thunk-multipass.s =================================================================== --- /dev/null +++ test/ELF/arm-thunk-multipass.s @@ -0,0 +1,96 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t +// RUN: ld.lld %t -o %t2 2>&1 +// The output file is large, most of it zeroes. We dissassemble only the +// parts we need to speed up the test and avoid a large output file +// RUN: llvm-objdump -d %t2 -start-address=1048578 -stop-address=1048586 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK1 %s +// RUN: llvm-objdump -d %t2 -start-address=16777224 -stop-address=16777254 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK2 %s +// RUN: llvm-objdump -d %t2 -start-address=17825818 -stop-address=17825828 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK3 %s +// In this test case a branch that is in range and does not need its range +// extended can be pushed out of range by another Thunk, necessitating another +// pass + + .macro FUNCTION suff + .section .text.\suff\(), "ax", %progbits + .thumb + .balign 0x100000 + .globl tfunc\suff\() + .type tfunc\suff\(), %function +tfunc\suff\(): + bx lr + .endm + + FUNCTION 00 + .globl _start +_start: + bl target + b.w arm_target +// arm_target is in range but needs an interworking thunk +// CHECK1: _start: +// CHECK1-NEXT: 100002: 00 f3 06 d0 bl #15728652 +// CHECK1-NEXT: 100006: ff f2 ff 97 b.w #15728638 <__Thumbv7ABSLongThunk_arm_target> + nop + nop + nop + .globl target2 + .type target2, %function + nop + +target2: + FUNCTION 01 + FUNCTION 02 + FUNCTION 03 + FUNCTION 04 + FUNCTION 05 + FUNCTION 06 + FUNCTION 07 + FUNCTION 08 + FUNCTION 09 + FUNCTION 10 + FUNCTION 11 + FUNCTION 12 + FUNCTION 13 + FUNCTION 14 + FUNCTION 15 + + .section .text.16, "ax", %progbits + .arm + .globl arm_target + .type arm_target, %function +arm_target: + bx lr +// CHECK2: __Thumbv7ABSLongThunk_arm_target: +// CHECK2-NEXT: 1000008: 40 f2 02 0c movw r12, #2 +// CHECK2-NEXT: 100000c: c0 f2 00 1c movt r12, #256 +// CHECK2-NEXT: 1000010: 60 47 bx r12 +// CHECK2: __Thumbv7ABSLongThunk_target: +// CHECK2-NEXT: 1000012: 40 f2 1b 0c movw r12, #27 +// CHECK2-NEXT: 1000016: c0 f2 10 1c movt r12, #272 +// CHECK2-NEXT: 100001a: 60 47 bx r12 +// CHECK2: __Thumbv7ABSLongThunk_target2: +// CHECK2-NEXT: 100001c: 40 f2 13 0c movw r12, #19 +// CHECK2-NEXT: 1000020: c0 f2 10 0c movt r12, #16 +// CHECK2-NEXT: 1000024: 60 47 bx r12 + + .section .text.17, "ax", %progbits +// Just enough space so that bl target is in range if no extension thunks are +// generated. + + .space 0x100000 - 12 + + .section .text.18, "ax", %progbits + .thumb + .globl target + .type target, %function +// target is at maximum ARM branch range away from caller. +target: +// Similar case in the backwards direction + bl target2 + nop + nop + bx lr +// CHECK3: target: +// CHECK3-NEXT: 110001a: ff f6 ff ff bl #-1048578 +// CHECK3-NEXT: 110001e: 00 bf nop +// CHECK3-NEXT: 1100020: 00 bf nop +// CHECK3-NEXT: 1100022: 70 47 bx lr Index: test/ELF/arm-thunk-re-add.s =================================================================== --- /dev/null +++ test/ELF/arm-thunk-re-add.s @@ -0,0 +1,119 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=thumbv7a-none-linux-gnueabi %s -o %t +// RUN: ld.lld %t --shared -o %t.so +// The output file is large, most of it zeroes. We dissassemble only the +// parts we need to speed up the test and avoid a large output file +// RUN: llvm-objdump -d %t.so -start-address=16777220 -stop-address=16777244 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK1 %s +// RUN: llvm-objdump -d %t.so -start-address=17825800 -stop-address=17825826 -triple=thumbv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK2 %s +// RUN: llvm-objdump -d %t.so -start-address=17825824 -stop-address=17825876 -triple=armv7a-linux-gnueabihf | FileCheck -check-prefix=CHECK3 %s + +// A branch to a Thunk that we create on pass N, can drift out of range if +// other Thunks are added in between. In this case we must create a new Thunk +// for the branch that is in range. We also need to make sure that if the +// destination of the Thunk is in the PLT the new Thunk also targets the PLT + .syntax unified + .thumb + + .macro FUNCTION suff + .section .text.\suff\(), "ax", %progbits + .thumb + .balign 0x80000 + .globl tfunc\suff\() + .type tfunc\suff\(), %function +tfunc\suff\(): + bx lr + .endm + + .globl imported + .type imported, %function + .globl imported2 + .type imported2, %function + .globl imported3 + .type imported3, %function +.globl imported4 + .type imported4, %function + FUNCTION 00 + FUNCTION 01 + FUNCTION 02 + FUNCTION 03 + FUNCTION 04 + FUNCTION 05 + FUNCTION 06 + FUNCTION 07 + FUNCTION 08 + FUNCTION 09 + FUNCTION 10 + FUNCTION 11 + FUNCTION 12 + FUNCTION 13 + FUNCTION 14 + FUNCTION 15 + FUNCTION 16 + FUNCTION 17 + FUNCTION 18 + FUNCTION 19 + FUNCTION 20 + FUNCTION 21 + FUNCTION 22 + FUNCTION 23 + FUNCTION 24 + FUNCTION 25 + FUNCTION 26 + FUNCTION 27 + FUNCTION 28 + FUNCTION 29 + FUNCTION 30 + FUNCTION 31 +// Precreated Thunk Pool goes here +// CHECK1: 1000004: 40 f2 24 0c movw r12, #36 +// CHECK1-NEXT: 1000008: c0 f2 10 0c movt r12, #16 +// CHECK1-NEXT: 100000c: fc 44 add r12, pc +// CHECK1-NEXT: 100000e: 60 47 bx r12 +// CHECK1: __ThumbV7PILongThunk_imported2: +// CHECK1-NEXT: 1000010: 40 f2 28 0c movw r12, #40 +// CHECK1-NEXT: 1000014: c0 f2 10 0c movt r12, #16 +// CHECK1-NEXT: 1000018: fc 44 add r12, pc +// CHECK1-NEXT: 100001a: 60 47 bx r12 + + .section .text.32, "ax", %progbits + .space 0x80000 + .section .text.33, "ax", %progbits + .space 0x80000 - 0x14 + .section .text.34, "ax", %progbits + // Need a Thunk to the PLT entry, can use precreated ThunkSection + .globl callers + .type callers, %function +callers: + b.w imported + beq.w imported + b.w imported2 +// CHECK2: __ThumbV7PILongThunk_imported: +// CHECK2-NEXT: 1100008: 40 f2 20 0c movw r12, #32 +// CHECK2-NEXT: 110000c: c0 f2 00 0c movt r12, #0 +// CHECK2-NEXT: 1100010: fc 44 add r12, pc +// CHECK2-NEXT: 1100012: 60 47 bx r12 +// CHECK2: callers: +// CHECK2-NEXT: 1100014: ff f6 f6 bf b.w #-1048596 <__ThumbV7PILongThunk_imported> +// CHECK2-NEXT: 1100018: 3f f4 f6 af beq.w #-20 <__ThumbV7PILongThunk_imported> +// CHECK2-NEXT: 110001c: ff f6 f8 bf b.w #-1048592 <__ThumbV7PILongThunk_imported2> + +// CHECK3: Disassembly of section .plt: +// CHECK3-NEXT: $a: +// CHECK3-NEXT: 1100020: 04 e0 2d e5 str lr, [sp, #-4]! +// CHECK3-NEXT: 1100024: 04 e0 9f e5 ldr lr, [pc, #4] +// CHECK3-NEXT: 1100028: 0e e0 8f e0 add lr, pc, lr +// CHECK3-NEXT: 110002c: 08 f0 be e5 ldr pc, [lr, #8]! +// CHECK3: $d: +// CHECK3-NEXT: 1100030: d0 0f 00 00 .word 0x00000fd0 +// CHECK3: $a: +// CHECK3-NEXT: 1100034: 04 c0 9f e5 ldr r12, [pc, #4] +// CHECK3-NEXT: 1100038: 0f c0 8c e0 add r12, r12, pc +// CHECK3-NEXT: 110003c: 00 f0 9c e5 ldr pc, [r12] +// CHECK3: $d: +// CHECK3-NEXT: 1100040: cc 0f 00 00 .word 0x00000fcc +// CHECK3: $a: +// CHECK3-NEXT: 1100044: 04 c0 9f e5 ldr r12, [pc, #4] +// CHECK3-NEXT: 1100048: 0f c0 8c e0 add r12, r12, pc +// CHECK3-NEXT: 110004c: 00 f0 9c e5 ldr pc, [r12] +// CHECK3: $d: +// CHECK3-NEXT: 1100050: c0 0f 00 00 .word 0x00000fc0