Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -125,7 +125,8 @@ private: void mergeThunks(); - ThunkSection *getOSThunkSec(ThunkSection *&TS, OutputSection *OS); + void createInitialThunkSections(ArrayRef OutputSections); + ThunkSection *getOSThunkSec(OutputSection *OS, InputSection *IS); ThunkSection *getISThunkSec(InputSection *IS, OutputSection *OS); std::pair getThunk(SymbolBody &Body, uint32_t Type); ThunkSection *addThunkSection(OutputSection *OS, uint64_t Off); Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -938,15 +938,24 @@ OutputSection *OS = KV.first; std::vector &Thunks = KV.second; + // Remove ThunkSections with no Thunks + auto ThunkBegin = Thunks.begin(); + auto ThunkEnd = Thunks.end(); + ThunkEnd = std::remove_if(ThunkBegin, ThunkEnd, [](const ThunkSection *TS) { + return TS->getSize() == 0; + }); + if (ThunkBegin == ThunkEnd) + continue; + // Order Thunks in ascending OutSecOff auto ThunkCmp = [](const ThunkSection *A, const ThunkSection *B) { return A->OutSecOff < B->OutSecOff; }; - std::stable_sort(Thunks.begin(), Thunks.end(), ThunkCmp); + std::stable_sort(ThunkBegin, ThunkEnd, ThunkCmp); // Merge sorted vectors of Thunks and InputSections by OutSecOff std::vector Tmp; - Tmp.reserve(OS->Sections.size() + Thunks.size()); + Tmp.reserve(OS->Sections.size() + std::distance(ThunkBegin, ThunkEnd)); auto MergeCmp = [](const InputSection *A, const InputSection *B) { // std::merge requires a strict weak ordering. if (A->OutSecOff < B->OutSecOff) @@ -959,8 +968,8 @@ return true; return false; }; - std::merge(OS->Sections.begin(), OS->Sections.end(), Thunks.begin(), - Thunks.end(), std::back_inserter(Tmp), MergeCmp); + std::merge(OS->Sections.begin(), OS->Sections.end(), ThunkBegin, ThunkEnd, + std::back_inserter(Tmp), MergeCmp); OS->Sections = std::move(Tmp); // We need to insert the sections into the linker script input section @@ -970,19 +979,53 @@ } } +// Create one or more ThunkSections per OS that can be used to place +// non-inline Thunks. We attempt to place the ThunkSections using the following +// desirable properties: +// - Within range of the maximum number of callers +// - Minimise number of ThunkSections that need inserting +// +// We follow a simple but conservative heuristic to place ThunkSections close +// to a Target specific branch range. For an OutputSection that is smaller than +// the range then a single ThunkSection at the end will do. template -ThunkSection *ThunkCreator::getOSThunkSec(ThunkSection *&TS, - OutputSection *OS) { - if (TS == nullptr) { +void elf::ThunkCreator::createInitialThunkSections( + ArrayRef OutputSections) { + + bool NeedTrailingTS = true; + for (OutputSection *Base : OutputSections) { + auto *OS = dyn_cast(Base); + if (OS == nullptr) + continue; uint32_t Off = 0; + uint32_t Limit = Target->ThunkSectionSpacing - Target->ThunkSectionSize; + InputSection *PrevIS = nullptr; for (auto *IS : OS->Sections) { Off = IS->OutSecOff + IS->getSize(); + if (Off >= Limit) { + uint32_t ThunkOff = PrevIS->OutSecOff + PrevIS->getSize(); + addThunkSection(OS, ThunkOff); + NeedTrailingTS = false; + Limit = ThunkOff + Target->ThunkSectionSpacing; + } if ((IS->Flags & SHF_EXECINSTR) == 0) break; + PrevIS = IS; } - TS = addThunkSection(OS, Off); + if (NeedTrailingTS) + addThunkSection(OS, Off); } - return TS; +} + +template +ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *OS, + InputSection *IS) { + // Until range extension thunks just return the first section + for (ThunkSection *TS : ThunkSections[OS]) { + return TS; + } + // No suitable ThunkSection exists. + return addThunkSection(OS, IS->OutSecOff); } template @@ -1047,6 +1090,8 @@ // With existing Thunks pass 0 will create Thunks, pass 1 will // create no more Thunks so if we get to 2 something has gone wrong. fatal("Thunk creation not converged in sufficient number of passes"); + } else if (Target->ThunkSectionSpacing) { + createInitialThunkSections(OutputSections); } // Create all the Thunks and insert them into synthetic ThunkSections. The @@ -1056,7 +1101,6 @@ // ThunkSections back into the OutputSection as ThunkSections are not always // inserted into the same OutputSection as the caller. for (OutputSection *OS : OutputSections) { - ThunkSection *OSTS = nullptr; for (InputSection *IS : OS->Sections) { if (dyn_cast(IS) != nullptr) // Do not create Thunks for relocations from Thunks @@ -1077,7 +1121,7 @@ if (auto *TIS = T->getTargetInputSection()) TS = getISThunkSec(TIS, OS); else - TS = getOSThunkSec(OSTS, OS); + TS = getOSThunkSec(OS, IS); TS->addThunk(T); Thunks[T->ThunkSym] = T; } Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -68,6 +68,13 @@ // Given that, the smallest value that can be used in here is 0x10000. uint64_t DefaultImageBase = 0x10000; + // On systems with range extensions we place collections of Thunks at + // regular spacings that enable the majority of branches reach the Thunks. + uint32_t ThunkSectionSpacing = 0x0; + // An estimate of how many thunks will be needed in a given + // ThunkSectionSpacing + uint32_t ThunkSectionSize = 0x0; + uint32_t CopyRel; uint32_t GotRel; uint32_t PltRel;