Index: ELF/Arch/ARM.cpp =================================================================== --- ELF/Arch/ARM.cpp +++ ELF/Arch/ARM.cpp @@ -63,6 +63,34 @@ // ARM uses Variant 1 TLS TcbSize = 8; NeedsThunks = true; + + // The placing of pre-created ThunkSections is controlled by two parameters + // ThunkSectionSpacing and ThunkSectionSize. The aim is to place the + // ThunkSection such that all branches from the InputSections prior to the + // ThunkSection can reach a Thunk placed at the end of the ThunkSection. + // Graphically: + // | up to (ThunkSectionSpacing - ThunkSectionSize) .text input sections | + // | ThunkSection | + // | up to (ThunkSectionSpacing - ThunkSectionSize) .text input sections | + // | ThunkSection | + + // Pre-created ThunkSections are spaced roughly 16Mib apart on ARM. This is to + // match the most common expected case of a Thumb 2 encoded BL, BLX or B.W + // ARM B, BL, BLX range +/- 32MiB + // Thumb B.W, BL, BLX range +/- 16MiB + // Thumb B.W range +/- 1MiB + // If a branch cannot reach a pre-created ThunkSection a new one will be + // created so we can handle the rare cases of a Thumb 2 conditional branch. + + // FIXME: lld assumes that the Thumb BL and BLX encoding permits the J1 and + // J2 bits to be used to extend the branch range. On earlier Architectures + // such as ARMv4, ARMv5 and ARMv6 (except ARMv6T2) the range is +/- 4Mib. If + // support for the earlier encodings is added then when they are used the + // ThunkSectionSpacing will need lowering. + ThunkSectionSpacing = 0x1000000; + + // Allow for 16384 12 byte Thunks per ThunkSectionSpacing + ThunkSectionSize = 0x30000; } RelExpr ARM::getRelExpr(uint32_t Type, const SymbolBody &S, Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -134,17 +134,22 @@ private: void mergeThunks(); - ThunkSection *getOSThunkSec(OutputSection *Cmd, - std::vector *ISR); + ThunkSection *getISRThunkSec(std::vector *ISR); ThunkSection *getISThunkSec(InputSection *IS, OutputSection *OS); - void forEachExecInputSection( + + void + createInitialThunkSections(ArrayRef OutputSections); + + void forEachExecInputSectionRange( ArrayRef OutputSections, - std::function *, - InputSection *)> + std::function *)> Fn); + std::pair getThunk(SymbolBody &Body, uint32_t Type); - ThunkSection *addThunkSection(OutputSection *Cmd, + + ThunkSection *addThunkSection(OutputSection *OS, std::vector *, uint64_t Off); + // Record all the available Thunks for a Symbol llvm::DenseMap> ThunkedSymbols; @@ -164,9 +169,6 @@ // passes std::map *, std::vector> ThunkSections; - - // The ThunkSection for this vector of InputSections - ThunkSection *CurTS; }; // Return a int64_t to make sure we get the sign extension out of the way as Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -978,13 +978,19 @@ std::vector *ISR = KV.first; std::vector &Thunks = KV.second; - // Order Thunks in ascending OutSecOff + // Remove ThunkSections that contain no Thunks. + llvm::erase_if(Thunks, + [](const ThunkSection *TS) { return TS->getSize() == 0; }); + if (Thunks.empty()) + continue; + + // Order Thunks in ascending OutSecOff. auto ThunkCmp = [](const ThunkSection *A, const ThunkSection *B) { return A->OutSecOff < B->OutSecOff; }; std::stable_sort(Thunks.begin(), Thunks.end(), ThunkCmp); - // Merge sorted vectors of Thunks and InputSections by OutSecOff + // Merge sorted vectors of Thunks and InputSections by OutSecOff. std::vector Tmp; Tmp.reserve(ISR->size() + Thunks.size()); auto MergeCmp = [](const InputSection *A, const InputSection *B) { @@ -1005,22 +1011,21 @@ } } -static uint32_t findEndOfFirstNonExec(OutputSection &Cmd) { - for (BaseCommand *Base : Cmd.Commands) - if (auto *ISD = dyn_cast(Base)) - for (auto *IS : ISD->Sections) - if ((IS->Flags & SHF_EXECINSTR) == 0) - return IS->OutSecOff + IS->getSize(); - return 0; -} - -ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *Cmd, - std::vector *ISR) { - if (CurTS == nullptr) { - uint32_t Off = findEndOfFirstNonExec(*Cmd); - CurTS = addThunkSection(Cmd, ISR, Off); - } - return CurTS; +// Find or create a ThunkSection within the InputSectionRange (ISR) that is in +// range of Src. An ISR maps to a range of InputSections described by a +// linker script section pattern such as { .text .text.* }. +// FIXME: At present we assume that all ThunkSections are in range so we always +// return the first pre-created ThunkSection. +ThunkSection *ThunkCreator::getISRThunkSec(std::vector *ISR) { + // FIXME: When range extension thunks are supported we will need to check + // that the ThunkSection is in range of the caller. + if (!ThunkSections[ISR].empty()) + return ThunkSections[ISR].front(); + + // FIXME: When range extension thunks are supported we must handle the case + // where no pre-created ThunkSections are in range by creating a new one in + // range; for now, it is unreachable. + llvm_unreachable("Must have created at least one ThunkSection per ISR"); } ThunkSection *ThunkCreator::getISThunkSec(InputSection *IS, OutputSection *OS) { @@ -1028,7 +1033,7 @@ if (TS) return TS; - // Find InputSectionRange within TOS that IS is in + // Find InputSectionRange within TOS that IS is in. OutputSection *C = IS->getParent(); std::vector *Range = nullptr; for (BaseCommand *BC : C->Commands) @@ -1046,10 +1051,51 @@ return TS; } -ThunkSection *ThunkCreator::addThunkSection(OutputSection *Cmd, +// Create one or more ThunkSections per OS that can be used to place Thunks. +// We attempt to place the ThunkSections using the following desirable +// properties: +// - Within range of the maximum number of callers +// - Minimise the number of ThunkSections +// +// We follow a simple but conservative heuristic to place ThunkSections at +// offsets that are multiples of a Target specific branch range. +// For an InputSectionRange that is smaller than the range, a single +// ThunkSection at the end of the range will do. +void ThunkCreator::createInitialThunkSections( + ArrayRef OutputSections) { + forEachExecInputSectionRange( + OutputSections, [&](OutputSection *OS, std::vector *ISR) { + if (ISR->empty()) + return; + bool NeedTrailingTS = true; + uint32_t Off; + uint32_t Limit = + ISR->front()->OutSecOff + + (Target->ThunkSectionSpacing - Target->ThunkSectionSize); + const InputSection *PrevIS = nullptr; + + for (const InputSection *IS : *ISR) { + Off = IS->OutSecOff + IS->getSize(); + if (Off >= Limit) { + uint32_t ThunkOff = (PrevIS == nullptr) + ? IS->OutSecOff + IS->getSize() + : PrevIS->OutSecOff + PrevIS->getSize(); + addThunkSection(OS, ISR, ThunkOff); + NeedTrailingTS = false; + Limit = ThunkOff + Target->ThunkSectionSpacing - + Target->ThunkSectionSize; + } + PrevIS = IS; + } + if (NeedTrailingTS) + addThunkSection(OS, ISR, Off); + }); +} + +ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS, std::vector *ISR, uint64_t Off) { - auto *TS = make(Cmd, Off); + auto *TS = make(OS, Off); ThunkSections[ISR].push_back(TS); return TS; } @@ -1069,22 +1115,17 @@ return std::make_pair(T, true); } -// Call Fn on every executable InputSection accessed via the linker script -// InputSectionDescription::Sections. -void ThunkCreator::forEachExecInputSection( +// Call Fn on every executable Range of InputSections accessed via the linker +// script InputSectionDescription::Sections. +void ThunkCreator::forEachExecInputSectionRange( ArrayRef OutputSections, - std::function *, - InputSection *)> - Fn) { + std::function *)> Fn) { for (OutputSection *OS : OutputSections) { if (!(OS->Flags & SHF_ALLOC) || !(OS->Flags & SHF_EXECINSTR)) continue; for (BaseCommand *BC : OS->Commands) - if (auto *ISD = dyn_cast(BC)) { - CurTS = nullptr; - for (InputSection *IS : ISD->Sections) - Fn(OS, &ISD->Sections, IS); - } + if (auto *ISD = dyn_cast(BC)) + Fn(OS, &ISD->Sections); } } @@ -1101,6 +1142,8 @@ bool ThunkCreator::createThunks(ArrayRef OutputSections) { if (Pass > 0) ThunkSections.clear(); + else if (Target->ThunkSectionSpacing) + createInitialThunkSections(OutputSections); // Create all the Thunks and insert them into synthetic ThunkSections. The // ThunkSections are later inserted back into the OutputSection. @@ -1108,32 +1151,32 @@ // We separate the creation of ThunkSections from the insertion of the // ThunkSections back into the OutputSection as ThunkSections are not always // inserted into the same OutputSection as the caller. - forEachExecInputSection(OutputSections, [&](OutputSection *Cmd, - std::vector *ISR, - InputSection *IS) { - for (Relocation &Rel : IS->Relocations) { - SymbolBody &Body = *Rel.Sym; - if (Thunks.find(&Body) != Thunks.end() || - !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body)) - continue; - Thunk *T; - bool IsNew; - std::tie(T, IsNew) = getThunk(Body, Rel.Type); - if (IsNew) { - // Find or create a ThunkSection for the new Thunk - ThunkSection *TS; - if (auto *TIS = T->getTargetInputSection()) - TS = getISThunkSec(TIS, Cmd); - else - TS = getOSThunkSec(Cmd, ISR); - TS->addThunk(T); - Thunks[T->ThunkSym] = T; - } - // Redirect relocation to Thunk, we never go via the PLT to a Thunk - Rel.Sym = T->ThunkSym; - Rel.Expr = fromPlt(Rel.Expr); - } - }); + forEachExecInputSectionRange( + OutputSections, [&](OutputSection *OS, std::vector *ISR) { + for (InputSection *IS : *ISR) + for (Relocation &Rel : IS->Relocations) { + SymbolBody &Body = *Rel.Sym; + if (Thunks.find(&Body) != Thunks.end() || + !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body)) + continue; + Thunk *T; + bool IsNew; + std::tie(T, IsNew) = getThunk(Body, Rel.Type); + if (IsNew) { + // Find or create a ThunkSection for the new Thunk + ThunkSection *TS; + if (auto *TIS = T->getTargetInputSection()) + TS = getISThunkSec(TIS, OS); + else + TS = getISRThunkSec(ISR); + TS->addThunk(T); + Thunks[T->ThunkSym] = T; + } + // Redirect relocation to Thunk, we never go via the PLT to a Thunk + Rel.Sym = T->ThunkSym; + Rel.Expr = fromPlt(Rel.Expr); + } + }); // Merge all created synthetic ThunkSections back into OutputSection mergeThunks(); ++Pass; Index: ELF/SyntheticSections.cpp =================================================================== --- ELF/SyntheticSections.cpp +++ ELF/SyntheticSections.cpp @@ -2308,6 +2308,8 @@ } InputSection *ThunkSection::getTargetInputSection() const { + if (Thunks.empty()) + return nullptr; const Thunk *T = Thunks.front(); return T->getTargetInputSection(); } Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -74,6 +74,13 @@ // end of .got uint64_t GotBaseSymOff = 0; + // On systems with range extensions we place collections of Thunks at + // regular spacings that enable the majority of branches reach the Thunks. + uint32_t ThunkSectionSpacing = 0; + + // An estimate of size of the Thunks that will be created per ThunkSection + uint32_t ThunkSectionSize = 0; + uint32_t CopyRel; uint32_t GotRel; uint32_t PltRel; Index: test/ELF/arm-thumb-thunk-symbols.s =================================================================== --- test/ELF/arm-thumb-thunk-symbols.s +++ test/ELF/arm-thumb-thunk-symbols.s @@ -25,18 +25,18 @@ b thumb_fn // CHECK: Name: __Thumbv7ABSLongThunk_arm_fn -// CHECK-NEXT: Value: 0x11005 +// CHECK-NEXT: Value: 0x12005 // CHECK-NEXT: Size: 10 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Function (0x2) // CHECK: Name: __ARMv7ABSLongThunk_thumb_fn -// CHECK-NEXT: Value: 0x11010 +// CHECK-NEXT: Value: 0x12010 // CHECK-NEXT: Size: 12 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Function (0x2) // CHECK-PI: Name: __ThumbV7PILongThunk_arm_fn -// CHECK-PI-NEXT: Value: 0x1005 +// CHECK-PI-NEXT: Value: 0x2005 // CHECK-PI-NEXT: Size: 12 // CHECK-PI-NEXT: Binding: Local (0x0) // CHECK-PI-NEXT: Type: Function (0x2)