Index: ELF/Arch/ARM.cpp =================================================================== --- ELF/Arch/ARM.cpp +++ ELF/Arch/ARM.cpp @@ -61,6 +61,22 @@ // ARM uses Variant 1 TLS TcbSize = 8; NeedsThunks = true; + // Pre-created ThunkSections are spaced roughly 16Mb apart on ARM. This is to + // match the most common expected case of a Thumb 2 encoded BL, BLX or B.W + // ARM B, BL, BLX range 32MiB + // Thumb B.W, BL, BLX range 16MiB + // Thumb B.W range 1MiB + // If branch cannot reach a pre-created ThunkSection a new one will be created + // so we can handle the rare case of Thumb 2 conditional branch. + // FIXME: lld assumes a CPU with support for ARMv6T2 and above encodings. + // If support is added for ARMv6T2 then when in use this spacing should drop + // to 4MiB + ThunkSectionSpacing = 0x1000000; + // The pre-created ThunkSections are inserted such that the end of the + // precreated ThunkSection is almost certain to be within range a branch + // from the start of the Section, or immediately following the previous + // ThunkSection. Allow for 16384 12 byte Thunks per ThunkSectionSpacing + ThunkSectionSize = 0x30000; } RelExpr ARM::getRelExpr(uint32_t Type, const SymbolBody &S, Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -133,14 +133,14 @@ private: void mergeThunks(); - ThunkSection *getOSThunkSec(OutputSection *OS, - std::vector *ISR); + ThunkSection *getISRThunkSec(OutputSection *OS, + std::vector *ISR); ThunkSection *getISThunkSec(InputSection *IS, OutputSection *OS); - void forEachExecInputSection( + void + createInitialThunkSections(ArrayRef OutputSections); + void forEachExecInputSectionRange( ArrayRef OutputSections, - std::function *, - InputSection *)> - Fn); + std::function *)> Fn); std::pair getThunk(SymbolBody &Body, uint32_t Type); ThunkSection *addThunkSection(OutputSection *OS, std::vector *, uint64_t Off); @@ -163,9 +163,6 @@ // passes std::map *, std::vector> ThunkSections; - - // The ThunkSection for this vector of InputSections - ThunkSection *CurTS; }; // Return a int64_t to make sure we get the sign extension out of the way as Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -973,6 +973,14 @@ std::vector *ISR = KV.first; std::vector &Thunks = KV.second; + // // Remove ThunkSections that contain no Thunks + Thunks.erase( + llvm::remove_if( + Thunks, [](const ThunkSection *TS) { return TS->getSize() == 0; }), + Thunks.end()); + if (Thunks.empty()) + continue; + // Order Thunks in ascending OutSecOff auto ThunkCmp = [](const ThunkSection *A, const ThunkSection *B) { return A->OutSecOff < B->OutSecOff; @@ -1000,18 +1008,22 @@ } } -ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *OS, - std::vector *ISR) { - if (CurTS == nullptr) { - uint32_t Off = 0; - for (auto *IS : OS->Sections) { - Off = IS->OutSecOff + IS->getSize(); - if ((IS->Flags & SHF_EXECINSTR) == 0) - break; - } - CurTS = addThunkSection(OS, ISR, Off); - } - return CurTS; +// Find or create a ThunkSection within the InputSectionRange (ISR) that is in +// range of Src. An ISR maps to a range of InputSections described by a +// linker script section pattern such as { .text .text.* }. +// FIXME: At present we assume that all ThunkSections are in range so we always +// return the first pre-created ThunkSection. +ThunkSection *ThunkCreator::getISRThunkSec(OutputSection *OS, + std::vector *ISR) { + // FIXME: When range extension thunks are supported we will need to check + // that the ThunkSection is in range of the caller + if (!ThunkSections[ISR].empty()) + return ThunkSections[ISR].front(); + + // FIXME: When range extension thunks are supported we must handle the case + // where no pre-created ThunkSections are in range by creating a new one in + // range for now it is unreachable + llvm_unreachable("Must have created at least one ThunkSection per ISR"); } ThunkSection *ThunkCreator::getISThunkSec(InputSection *IS, OutputSection *OS) { @@ -1024,7 +1036,7 @@ OutputSectionCommand *C = Script->getCmd(TOS); std::vector *Range = nullptr; for (BaseCommand *BC : C->Commands) - if (auto *ISD = dyn_cast (BC)) { + if (auto *ISD = dyn_cast(BC)) { InputSection *first = ISD->Sections.front(); InputSection *last = ISD->Sections.back(); if (IS->OutSecOff >= first->OutSecOff && @@ -1038,6 +1050,52 @@ return TS; } +// Create one or more ThunkSections per OS that can be used to place Thunks. +// We attempt to place the ThunkSections using the following desirable +// properties: +// - Within range of the maximum number of callers +// - Minimise the number of ThunkSections +// +// We follow a simple but conservative heuristic to place ThunkSections at +// offsets that are multiples of a Target specific branch range. +// For an InputSectionRange that is smaller than the range then a single +// ThunkSection at the end of the range will do. +void ThunkCreator::createInitialThunkSections( + ArrayRef OutputSections) { + bool NeedTrailingTS; + uint32_t Off; + uint32_t Limit; + InputSection *PrevIS = nullptr; + std::vector *PrevISR = nullptr; + + forEachExecInputSectionRange( + OutputSections, [&](OutputSection *OS, std::vector *ISR) { + for (InputSection *IS : *ISR) { + if (ISR != PrevISR) { + NeedTrailingTS = true; + Off = 0; + Limit = IS->OutSecOff + + (Target->ThunkSectionSpacing - Target->ThunkSectionSize); + PrevIS = nullptr; + PrevISR = ISR; + } + Off = IS->OutSecOff + IS->getSize(); + if (Off >= Limit) { + uint32_t ThunkOff = (PrevIS == nullptr) + ? IS->OutSecOff + : PrevIS->OutSecOff + PrevIS->getSize(); + addThunkSection(OS, ISR, ThunkOff); + NeedTrailingTS = false; + Limit = ThunkOff + Target->ThunkSectionSpacing; + } + PrevIS = IS; + + if (ISR->back() == IS && NeedTrailingTS) + addThunkSection(OS, ISR, Off); + } + }); +} + ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS, std::vector *ISR, uint64_t Off) { @@ -1046,7 +1104,6 @@ return TS; } - std::pair ThunkCreator::getThunk(SymbolBody &Body, uint32_t Type) { auto Res = ThunkedSymbols.insert({&Body, std::vector()}); @@ -1062,22 +1119,18 @@ return std::make_pair(T, true); } -// Call Fn on every executable InputSection accessed via the linker script -// InputSectionDescription::Sections. -void ThunkCreator::forEachExecInputSection( +// Call Fn on every executable Range of InputSections accessed via the linker +// script InputSectionDescription::Sections. +void ThunkCreator::forEachExecInputSectionRange( ArrayRef OutputSections, - std::function *, - InputSection *)> - Fn) { + std::function *)> Fn) { for (OutputSectionCommand *Cmd : OutputSections) { OutputSection *OS = Cmd->Sec; if (!(OS->Flags & SHF_ALLOC) || !(OS->Flags & SHF_EXECINSTR)) continue; for (BaseCommand *BC : Cmd->Commands) if (auto *ISD = dyn_cast(BC)) { - CurTS = nullptr; - for (InputSection *IS : ISD->Sections) - Fn(OS, &ISD->Sections, IS); + Fn(OS, &ISD->Sections); } } } @@ -1096,6 +1149,8 @@ ArrayRef OutputSections) { if (Pass > 0) ThunkSections.clear(); + else if (Target->ThunkSectionSpacing) + createInitialThunkSections(OutputSections); // Create all the Thunks and insert them into synthetic ThunkSections. The // ThunkSections are later inserted back into the OutputSection. @@ -1103,31 +1158,31 @@ // We separate the creation of ThunkSections from the insertion of the // ThunkSections back into the OutputSection as ThunkSections are not always // inserted into the same OutputSection as the caller. - forEachExecInputSection( - OutputSections, [&](OutputSection *OS, std::vector *ISR, - InputSection *IS) { - for (Relocation &Rel : IS->Relocations) { - SymbolBody &Body = *Rel.Sym; - if (Thunks.find(&Body) != Thunks.end() || - !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body)) - continue; - Thunk *T; - bool IsNew; - std::tie(T, IsNew) = getThunk(Body, Rel.Type); - if (IsNew) { - // Find or create a ThunkSection for the new Thunk - ThunkSection *TS; - if (auto *TIS = T->getTargetInputSection()) - TS = getISThunkSec(TIS, OS); - else - TS = getOSThunkSec(OS, ISR); - TS->addThunk(T); - Thunks[T->ThunkSym] = T; + forEachExecInputSectionRange( + OutputSections, [&](OutputSection *OS, std::vector *ISR) { + for (InputSection *IS : *ISR) + for (Relocation &Rel : IS->Relocations) { + SymbolBody &Body = *Rel.Sym; + if (Thunks.find(&Body) != Thunks.end() || + !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body)) + continue; + Thunk *T; + bool IsNew; + std::tie(T, IsNew) = getThunk(Body, Rel.Type); + if (IsNew) { + // Find or create a ThunkSection for the new Thunk + ThunkSection *TS; + if (auto *TIS = T->getTargetInputSection()) + TS = getISThunkSec(TIS, OS); + else + TS = getISRThunkSec(OS, ISR); + TS->addThunk(T); + Thunks[T->ThunkSym] = T; + } + // Redirect relocation to Thunk, we never go via the PLT to a Thunk + Rel.Sym = T->ThunkSym; + Rel.Expr = fromPlt(Rel.Expr); } - // Redirect relocation to Thunk, we never go via the PLT to a Thunk - Rel.Sym = T->ThunkSym; - Rel.Expr = fromPlt(Rel.Expr); - } }); // Merge all created synthetic ThunkSections back into OutputSection mergeThunks(); Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -70,6 +70,13 @@ // end of .got uint64_t GotBaseSymOff = 0; + // On systems with range extensions we place collections of Thunks at + // regular spacings that enable the majority of branches reach the Thunks. + uint32_t ThunkSectionSpacing = 0; + + // An estimate of size of the Thunks that will be created per ThunkSection + uint32_t ThunkSectionSize = 0; + uint32_t CopyRel; uint32_t GotRel; uint32_t PltRel; Index: test/ELF/arm-thumb-thunk-symbols.s =================================================================== --- test/ELF/arm-thumb-thunk-symbols.s +++ test/ELF/arm-thumb-thunk-symbols.s @@ -25,18 +25,18 @@ b thumb_fn // CHECK: Name: __Thumbv7ABSLongThunk_arm_fn -// CHECK-NEXT: Value: 0x11005 +// CHECK-NEXT: Value: 0x12005 // CHECK-NEXT: Size: 10 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Function (0x2) // CHECK: Name: __ARMv7ABSLongThunk_thumb_fn -// CHECK-NEXT: Value: 0x11010 +// CHECK-NEXT: Value: 0x12010 // CHECK-NEXT: Size: 12 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Function (0x2) // CHECK-PI: Name: __ThumbV7PILongThunk_arm_fn -// CHECK-PI-NEXT: Value: 0x1005 +// CHECK-PI-NEXT: Value: 0x2005 // CHECK-PI-NEXT: Size: 12 // CHECK-PI-NEXT: Binding: Local (0x0) // CHECK-PI-NEXT: Type: Function (0x2)