Index: COFF/Chunks.h =================================================================== --- COFF/Chunks.h +++ COFF/Chunks.h @@ -273,6 +273,7 @@ private: llvm::StringTableBuilder Builder; + bool Finalized = false; }; // A chunk for common symbols. Common chunks don't have actual data. @@ -360,6 +361,17 @@ Defined *ImpSymbol; }; +class RangeExtensionThunk : public Chunk { +public: + explicit RangeExtensionThunk(Defined *T) : Target(T) {} + size_t getSize() const override; + void writeTo(uint8_t *Buf) const override; + Defined *getTarget() const { return Target; } + +private: + Defined *Target; +}; + // Windows-specific. // See comments for DefinedLocalImport class. class LocalImportChunk : public Chunk { Index: COFF/Chunks.cpp =================================================================== --- COFF/Chunks.cpp +++ COFF/Chunks.cpp @@ -655,6 +655,30 @@ applyArm64Ldr(Buf + OutputSectionOff + 4, Off); } +// A Thumb2, PIC range extension thunk. A non-PIC one would be 2 bytes +// shorter but would require a base relocation instead. +const uint8_t RangeExtensionThunkARMData[] = { + 0x40, 0xf2, 0x00, 0x0c, // P: movw ip,:lower16:S - (P + (L1-P) + 4) + 0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P) + 4) + 0xfc, 0x44, // L1: add ip, pc + 0x60, 0x47, // bx ip +}; + +size_t RangeExtensionThunk::getSize() const { + assert(Config->Machine == ARMNT); + return sizeof(RangeExtensionThunkARMData); +} + +void RangeExtensionThunk::writeTo(uint8_t *Buf) const { + assert(Config->Machine == ARMNT); + uint64_t Offset = Target->getRVA() - RVA - 12; + // The target address needs to have the Thumb bit set. + Offset |= 1; + memcpy(Buf + OutputSectionOff, RangeExtensionThunkARMData, + sizeof(RangeExtensionThunkARMData)); + applyMOV32T(Buf + OutputSectionOff, uint32_t(Offset)); +} + void LocalImportChunk::getBaserels(std::vector *Res) { Res->emplace_back(getRVA()); } @@ -794,10 +818,13 @@ } void MergeChunk::finalizeContents() { - for (SectionChunk *C : Sections) - if (C->isLive()) - Builder.add(toStringRef(C->getContents())); - Builder.finalize(); + if (!Finalized) { + for (SectionChunk *C : Sections) + if (C->isLive()) + Builder.add(toStringRef(C->getContents())); + Builder.finalize(); + Finalized = true; + } for (SectionChunk *C : Sections) { if (!C->isLive()) Index: COFF/Writer.h =================================================================== --- COFF/Writer.h +++ COFF/Writer.h @@ -11,6 +11,7 @@ #define LLD_COFF_WRITER_H #include "Chunks.h" +#include "Symbols.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/COFF.h" #include @@ -36,11 +37,16 @@ void addChunk(Chunk *C); void merge(OutputSection *Other); ArrayRef getChunks() { return Chunks; } + void clear() { Chunks.clear(); } void addPermissions(uint32_t C); void setPermissions(uint32_t C); uint64_t getRVA() { return Header.VirtualAddress; } uint64_t getFileOff() { return Header.PointerToRawData; } void writeHeaderTo(uint8_t *Buf); + bool createThunks(int Pass, + llvm::DenseMap, + std::vector> &ThunksPerTarget, + llvm::DenseMap &Thunks); // Returns the size of this section in an executable memory image. // This may be smaller than the raw size (the raw size is multiple Index: COFF/Writer.cpp =================================================================== --- COFF/Writer.cpp +++ COFF/Writer.cpp @@ -153,6 +153,7 @@ void createExportTable(); void mergeSections(); void assignAddresses(); + void finalizeAddresses(); void removeEmptySections(); void createSymbolAndStringTable(); void openFile(StringRef OutputPath); @@ -330,6 +331,168 @@ return None; } +static bool machineRequiresThunks() { + // Only ARMNT requires range extension thunks out of the currently supported + // architectures. + return Config->Machine == ARMNT; +} + +// Check whether the target address S is in range from a relocation +// of type RelType at address P. +static bool isInRange(uint16_t RelType, uint64_t S, uint64_t P) { + assert(Config->Machine == ARMNT); + int64_t Diff = S - P - 4; + switch (RelType) { + case IMAGE_REL_ARM_BRANCH20T: + return isInt<21>(Diff); + case IMAGE_REL_ARM_BRANCH24T: + case IMAGE_REL_ARM_BLX23T: + return isInt<25>(Diff); + default: + return true; + } +} + +// Return an existing thunk which is in range, or create a new one. +static std::pair +getThunk(DenseMap, std::vector> + &ThunksPerTarget, + DenseMap &Thunks, Defined *Target, + uint64_t P, uint16_t Type) { + Chunk *TargetChunk = Target->getChunk(); + uint64_t TargetChunkRVA = TargetChunk ? TargetChunk->getRVA() : 0; + // A unique representation of the target address of a Defined symbol, + // stable across relayouts. This is represented as the base Chunk* with + // an offset, wihch should be stable across relayouts. + // Some symbols return a nullptr Chunk, which we should be ready to handle. + std::pair UniqueTarget = {TargetChunk, Target->getRVA() - + TargetChunkRVA}; + std::vector &TargetThunks = ThunksPerTarget[UniqueTarget]; + // For the first pass, any matches are most likely at the end of the vector, + // so by iterating in reverse order, we might find a match sooner. As long + // as the image size only is in the same order of magnitude as the branch + // range (16 MB for ARMNT), there will in practice only be one or a few + // thunks per target. + for (Defined *Sym : llvm::reverse(TargetThunks)) + if (isInRange(Type, Sym->getRVA(), P)) + return {Sym, false}; + RangeExtensionThunk *C = make(Target); + Defined *D = make("", C); + TargetThunks.push_back(D); + Thunks[D] = C; + return {D, true}; +} + +// Check if the symbol currently points at a thunk, and if it does, if it still +// is usable. Returns true if it is a thunk and it still is usable. +static bool +normalizeExistingThunk(DenseMap &Thunks, + Symbol *&RelocTarget, uint16_t RelType, + uint64_t RelAddr) { + Defined *Sym = dyn_cast_or_null(RelocTarget); + if (!Sym) + return false; + if (RangeExtensionThunk *RET = Thunks.lookup(Sym)) { + if (isInRange(RelType, Sym->getRVA(), RelAddr)) + return true; + // The previously used thunk is out of range; don't refer to the thunk any + // longer but directly to the original target, to avoid chaining thunks. + RelocTarget = RET->getTarget(); + } + return false; +} + +bool OutputSection::createThunks( + int Pass, DenseMap, std::vector> + &ThunksPerTarget, + DenseMap &Thunks) { + bool AddressesChanged = false; + size_t ThunksSize = 0; + // Recheck Chunks.size() each iteration, since we can insert more + // elements into it. + for (size_t I = 0; I != Chunks.size(); ++I) { + SectionChunk *SC = dyn_cast_or_null(Chunks[I]); + if (!SC) + continue; + size_t ThunkInsertionSpot = I + 1; + + // Try to get a good enough estimate of where new thunks will be placed. + // Offset this by the size of the new thunks added so far, to make the + // estimate slightly better. + size_t ThunkInsertionRVA = SC->getRVA() + SC->getSize() + ThunksSize; + for (size_t J = 0, E = SC->Relocs.size(); J < E; ++J) { + const coff_relocation &Rel = SC->Relocs[J]; + Symbol *&RelocTarget = SC->RelocTargets[J]; + + // The estimate of the source address P should be pretty accurate, + // but we don't know whether the target Symbol address should be + // offset by ThunkSize or not (or by some of ThunksSize but not all of + // it), giving us some uncertainty once we have added one thunk. + uint64_t P = SC->getRVA() + Rel.VirtualAddress + ThunksSize; + + // If this Symbol already is a thunk, and it is in range, no need to do + // anything. If it was a thunk but the thunk now also is out of range, + // this resets the Symbol to point to the original symbol, allowing the + // new thunk to point directly to the target. + if (Pass > 0 && normalizeExistingThunk(Thunks, RelocTarget, Rel.Type, P)) + continue; + + Defined *Sym = dyn_cast_or_null(RelocTarget); + if (!Sym) + continue; + + uint64_t S = Sym->getRVA(); + + if (isInRange(Rel.Type, S, P)) + continue; + + // If the target isn't in range, hook it up to an existing or new + // thunk. + Defined *Thunk; + bool WasNew; + std::tie(Thunk, WasNew) = + getThunk(ThunksPerTarget, Thunks, Sym, P, Rel.Type); + if (WasNew) { + Chunk *ThunkChunk = Thunk->getChunk(); + ThunkChunk->setRVA(ThunkInsertionRVA); // Estimate of where it will be located. + Chunks.insert(Chunks.begin() + ThunkInsertionSpot, ThunkChunk); + ThunkInsertionSpot++; + ThunksSize += ThunkChunk->getSize(); + ThunkInsertionRVA += ThunkChunk->getSize(); + AddressesChanged = true; + } + RelocTarget = Thunk; + } + } + return AddressesChanged; +} + +// Assign addresses and add thunks if necessary. +void Writer::finalizeAddresses() { + int ThunkPass = 0; + bool AddressesChanged; + DenseMap, std::vector> + ThunksPerTarget; + DenseMap Thunks; + do { + if (ThunkPass >= 10) + fatal("adding thunks hasn't converged after " + Twine(ThunkPass) + + " passes"); + assignAddresses(); + if (!machineRequiresThunks()) + return; + AddressesChanged = false; + for (OutputSection *Sec : OutputSections) + AddressesChanged |= Sec->createThunks(ThunkPass, ThunksPerTarget, Thunks); + ThunkPass++; + // Iterate until no new thunks have been added. Even if the last pass + // hooked up a relocation to a different target than before, we don't need + // to run another pass unless addresses actually have changed. + } while (AddressesChanged); + log("Added " + Twine(Thunks.size()) + " thunks in " + Twine(ThunkPass) + + " passes"); +} + // The main function of the writer. void Writer::run() { ScopedTimer T1(CodeLayoutTimer); @@ -344,7 +507,7 @@ createImportTables(); createExportTable(); mergeSections(); - assignAddresses(); + finalizeAddresses(); removeEmptySections(); setSectionPermissions(); createSymbolAndStringTable(); @@ -1317,6 +1480,7 @@ void Writer::addBaserels() { if (!Config->Relocatable) return; + RelocSec->clear(); std::vector V; for (OutputSection *Sec : OutputSections) { if (Sec->Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) Index: test/COFF/Inputs/far-arm-thumb-abs.s =================================================================== --- test/COFF/Inputs/far-arm-thumb-abs.s +++ /dev/null @@ -1,2 +0,0 @@ -.global too_far1 -too_far1 = 0x1401004 Index: test/COFF/Inputs/far-arm-thumb-abs20.s =================================================================== --- test/COFF/Inputs/far-arm-thumb-abs20.s +++ /dev/null @@ -1,2 +0,0 @@ -.global too_far20 -too_far20 = 0x501004 Index: test/COFF/arm-thumb-branch-error.s =================================================================== --- test/COFF/arm-thumb-branch-error.s +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: llvm-mc -filetype=obj -triple=thumbv7a-windows-gnu %s -o %t -// RUN: llvm-mc -filetype=obj -triple=thumbv7a-windows-gnu %S/Inputs/far-arm-thumb-abs.s -o %tfar -// RUN: not lld-link -entry:_start -subsystem:console %t %tfar -out:%t2 2>&1 | FileCheck %s -// REQUIRES: arm - .syntax unified - .globl _start -_start: - bl too_far1 - -// CHECK: relocation out of range Index: test/COFF/arm-thumb-branch20-error.s =================================================================== --- test/COFF/arm-thumb-branch20-error.s +++ test/COFF/arm-thumb-branch20-error.s @@ -1,10 +1,16 @@ // REQUIRES: arm // RUN: llvm-mc -filetype=obj -triple=thumbv7a-windows-gnu %s -o %t.obj -// RUN: llvm-mc -filetype=obj -triple=thumbv7a-windows-gnu %S/Inputs/far-arm-thumb-abs20.s -o %t.far.obj -// RUN: not lld-link -entry:_start -subsystem:console %t.obj %t.far.obj -out:%t.exe 2>&1 | FileCheck %s +// RUN: not lld-link -entry:_start -subsystem:console %t.obj -out:%t.exe 2>&1 | FileCheck %s .syntax unified .globl _start _start: bne too_far20 + .space 0x100000 + .section .text$a, "xr" +too_far20: + bx lr -// CHECK: relocation out of range +// When trying to add a thunk at the end of the section, the thunk itself +// will be too far away, so this won't converge. + +// CHECK: adding thunks hasn't converged Index: test/COFF/arm-thumb-thunks.s =================================================================== --- /dev/null +++ test/COFF/arm-thumb-thunks.s @@ -0,0 +1,90 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=thumbv7-windows %s -o %t.obj +// RUN: lld-link -entry:main -subsystem:console %t.obj -out:%t.exe +// RUN: llvm-objdump -d %t.exe -start-address=0x401000 -stop-address=0x401022 | FileCheck -check-prefix=MAIN %s +// RUN: llvm-objdump -d %t.exe -start-address=0x501012 -stop-address=0x501030 | FileCheck -check-prefix=FUNC1 %s +// RUN: llvm-objdump -d %t.exe -start-address=0x601030 | FileCheck -check-prefix=FUNC2 %s + +// Pass 0: +// main->func1 in range +// main->func2 out of range, adding thunk after main +// func1->func2 (first) out of range, using thunk from main +// func1->func2 (second) in range +// Pass 1: +// main->func1 out of range, adding thunk after main +// func1->thunk from main out of range, adding new thunk after func1 +// Pass 2: +// func1->func2 (second) now out of range, using existing thunk after func1 + .syntax unified + .globl main + .globl func1 + .text +main: + bne func1 + bne func2 + nop + .section .text$a, "xr" + .space 0x100000 - 16 + .section .text$b, "xr" +func1: + bne func2 + nop + nop + nop + nop + bne func2 + bx lr + .section .text$c, "xr" + .space 0x100000 + .section .text$d, "xr" +func2: +// Test using string tail merging. This is irrelevant to the thunking itself, +// but running multiple passes of assignAddresses() calls finalizeAddresses() +// multiple times; check that MergeChunk handles this correctly. + movw r0, :lower16:"??_C@string1" + movt r0, :upper16:"??_C@string1" + movw r1, :lower16:"??_C@string2" + movt r1, :upper16:"??_C@string2" + bx lr + + .section .rdata,"dr",discard,"??_C@string1" + .globl "??_C@string1" +"??_C@string1": + .asciz "foobar" + .section .rdata,"dr",discard,"??_C@string2" + .globl "??_C@string2" +"??_C@string2": + .asciz "bar" + +// MAIN: 401000: 40 f0 03 80 bne.w #6 <.text+0xa> +// MAIN: 401004: 40 f0 07 80 bne.w #14 <.text+0x16> +// MAIN: 401008: 00 bf nop +// func2 thunk +// MAIN: 40100a: 4f f6 fd 7c movw r12, #65533 +// MAIN: 40100e: c0 f2 0f 0c movt r12, #15 +// MAIN: 401012: fc 44 add r12, pc +// MAIN: 401014: 60 47 bx r12 +// func1 thunk +// MAIN: 401016: 40 f2 0f 0c movw r12, #15 +// MAIN: 40101a: c0 f2 20 0c movt r12, #32 +// MAIN: 40101e: fc 44 add r12, pc +// MAIN: 401020: 60 47 bx r12 + +// FUNC1: 501012: 40 f0 07 80 bne.w #14 <.text+0x100024> +// FUNC1: 501016: 00 bf nop +// FUNC1: 501018: 00 bf nop +// FUNC1: 50101a: 00 bf nop +// FUNC1: 50101c: 00 bf nop +// FUNC1: 50101e: 40 f0 01 80 bne.w #2 <.text+0x100024> +// FUNC1: 501022: 70 47 bx lr +// func2 thunk +// FUNC1: 501024: 40 f2 01 0c movw r12, #1 +// FUNC1: 501028: c0 f2 10 0c movt r12, #16 +// FUNC1: 50102c: fc 44 add r12, pc +// FUNC1: 50102e: 60 47 bx r12 + +// FUNC2: 601030: 42 f2 00 00 movw r0, #8192 +// FUNC2: 601034: c0 f2 60 00 movt r0, #96 +// FUNC2: 601038: 42 f2 03 01 movw r1, #8195 +// FUNC2: 60103c: c0 f2 60 01 movt r1, #96 +// FUNC2: 601040: 70 47 bx lr