Index: lld/trunk/ELF/Arch/ARM.cpp =================================================================== --- lld/trunk/ELF/Arch/ARM.cpp +++ lld/trunk/ELF/Arch/ARM.cpp @@ -63,6 +63,37 @@ // ARM uses Variant 1 TLS TcbSize = 8; NeedsThunks = true; + + // The placing of pre-created ThunkSections is controlled by the + // ThunkSectionSpacing parameter. The aim is to place the + // ThunkSection such that all branches from the InputSections prior to the + // ThunkSection can reach a Thunk placed at the end of the ThunkSection. + // Graphically: + // | up to ThunkSectionSpacing .text input sections | + // | ThunkSection | + // | up to ThunkSectionSpacing .text input sections | + // | ThunkSection | + + // Pre-created ThunkSections are spaced roughly 16MiB apart on ARM. This is to + // match the most common expected case of a Thumb 2 encoded BL, BLX or B.W + // ARM B, BL, BLX range +/- 32MiB + // Thumb B.W, BL, BLX range +/- 16MiB + // Thumb B.W range +/- 1MiB + // If a branch cannot reach a pre-created ThunkSection a new one will be + // created so we can handle the rare cases of a Thumb 2 conditional branch. + // We intentionally use a lower size for ThunkSectionSpacing than the maximum + // branch range so the end of the ThunkSection is more likely to be within + // range of the branch instruction that is furthest away. The value we shorten + // ThunkSectionSpacing by is set conservatively to allow us to create 16,384 + // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to + // one of the Thunks going out of range. + + // FIXME: lld assumes that the Thumb BL and BLX encoding permits the J1 and + // J2 bits to be used to extend the branch range. On earlier Architectures + // such as ARMv4, ARMv5 and ARMv6 (except ARMv6T2) the range is +/- 4MiB. If + // support for the earlier encodings is added then when they are used the + // ThunkSectionSpacing will need lowering. + ThunkSectionSpacing = 0x1000000 - 0x30000; } uint32_t ARM::calcEFlags() const { Index: lld/trunk/ELF/Relocations.h =================================================================== --- lld/trunk/ELF/Relocations.h +++ lld/trunk/ELF/Relocations.h @@ -139,16 +139,20 @@ private: void mergeThunks(ArrayRef OutputSections); - ThunkSection *getOSThunkSec(OutputSection *OS, InputSectionDescription *ISD); + ThunkSection *getISDThunkSec(OutputSection *OS, InputSectionDescription *ISD); ThunkSection *getISThunkSec(InputSection *IS); + void createInitialThunkSections(ArrayRef OutputSections); + void forEachInputSectionDescription( ArrayRef OutputSections, std::function Fn); std::pair getThunk(SymbolBody &Body, RelType Type); + ThunkSection *addThunkSection(OutputSection *OS, InputSectionDescription *, uint64_t Off); + // Record all the available Thunks for a Symbol llvm::DenseMap> ThunkedSymbols; Index: lld/trunk/ELF/Relocations.cpp =================================================================== --- lld/trunk/ELF/Relocations.cpp +++ lld/trunk/ELF/Relocations.cpp @@ -1055,7 +1055,11 @@ if (ISD->ThunkSections.empty()) return; - // Order Thunks in ascending OutSecOff + // Remove any zero sized precreated Thunks. + llvm::erase_if(ISD->ThunkSections, [](const ThunkSection *TS) { + return TS->getSize() == 0; + }); + // Order Thunks in ascending OutSecOff. std::stable_sort(ISD->ThunkSections.begin(), ISD->ThunkSections.end(), [](const ThunkSection *A, const ThunkSection *B) { return A->OutSecOff < B->OutSecOff; @@ -1084,22 +1088,17 @@ }); } -static uint32_t findEndOfFirstNonExec(OutputSection &Cmd) { - for (BaseCommand *Base : Cmd.SectionCommands) - if (auto *ISD = dyn_cast(Base)) - for (auto *IS : ISD->Sections) - if ((IS->Flags & SHF_EXECINSTR) == 0) - return IS->OutSecOff + IS->getSize(); - return 0; -} - -ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *OS, - InputSectionDescription *ISD) { +ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *OS, + InputSectionDescription *ISD) { + // FIXME: When range extension thunks are supported we will need to check + // that the ThunkSection is in range of the caller. if (!ISD->ThunkSections.empty()) return ISD->ThunkSections.front(); - uint32_t Off = findEndOfFirstNonExec(*OS); - return addThunkSection(OS, ISD, Off); + // FIXME: When range extension thunks are supported we must handle the case + // where no pre-created ThunkSections are in range by creating a new one in + // range; for now, it is unreachable. + llvm_unreachable("Must have created at least one ThunkSection per ISR"); } // Add a Thunk that needs to be placed in a ThunkSection that immediately @@ -1126,6 +1125,38 @@ return TS; } +// Create one or more ThunkSections per OS that can be used to place Thunks. +// We attempt to place the ThunkSections using the following desirable +// properties: +// - Within range of the maximum number of callers +// - Minimise the number of ThunkSections +// +// We follow a simple but conservative heuristic to place ThunkSections at +// offsets that are multiples of a Target specific branch range. +// For an InputSectionRange that is smaller than the range, a single +// ThunkSection at the end of the range will do. +void ThunkCreator::createInitialThunkSections( + ArrayRef OutputSections) { + forEachInputSectionDescription( + OutputSections, [&](OutputSection *OS, InputSectionDescription *ISD) { + if (ISD->Sections.empty()) + return; + uint32_t ISLimit; + uint32_t PrevISLimit = ISD->Sections.front()->OutSecOff; + uint32_t ThunkUpperBound = PrevISLimit + Target->ThunkSectionSpacing; + + for (const InputSection *IS : ISD->Sections) { + ISLimit = IS->OutSecOff + IS->getSize(); + if (ISLimit > ThunkUpperBound) { + addThunkSection(OS, ISD, PrevISLimit); + ThunkUpperBound = PrevISLimit + Target->ThunkSectionSpacing; + } + PrevISLimit = ISLimit; + } + addThunkSection(OS, ISD, ISLimit); + }); +} + ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS, InputSectionDescription *ISD, uint64_t Off) { @@ -1175,6 +1206,9 @@ // extension Thunks are not yet supported. bool ThunkCreator::createThunks(ArrayRef OutputSections) { bool AddressesChanged = false; + if (Pass == 0 && Target->ThunkSectionSpacing) + createInitialThunkSections(OutputSections); + // Create all the Thunks and insert them into synthetic ThunkSections. The // ThunkSections are later inserted back into InputSectionDescriptions. // We separate the creation of ThunkSections from the insertion of the @@ -1198,7 +1232,7 @@ if (auto *TIS = T->getTargetInputSection()) TS = getISThunkSec(TIS); else - TS = getOSThunkSec(OS, ISD); + TS = getISDThunkSec(OS, ISD); TS->addThunk(T); Thunks[T->ThunkSym] = T; } Index: lld/trunk/ELF/SyntheticSections.cpp =================================================================== --- lld/trunk/ELF/SyntheticSections.cpp +++ lld/trunk/ELF/SyntheticSections.cpp @@ -2391,6 +2391,8 @@ } InputSection *ThunkSection::getTargetInputSection() const { + if (Thunks.empty()) + return nullptr; const Thunk *T = Thunks.front(); return T->getTargetInputSection(); } Index: lld/trunk/ELF/Target.h =================================================================== --- lld/trunk/ELF/Target.h +++ lld/trunk/ELF/Target.h @@ -74,6 +74,10 @@ // end of .got uint64_t GotBaseSymOff = 0; + // On systems with range extensions we place collections of Thunks at + // regular spacings that enable the majority of branches reach the Thunks. + uint32_t ThunkSectionSpacing = 0; + RelType CopyRel; RelType GotRel; RelType PltRel; Index: lld/trunk/test/ELF/arm-thumb-thunk-symbols.s =================================================================== --- lld/trunk/test/ELF/arm-thumb-thunk-symbols.s +++ lld/trunk/test/ELF/arm-thumb-thunk-symbols.s @@ -25,18 +25,18 @@ b thumb_fn // CHECK: Name: __Thumbv7ABSLongThunk_arm_fn -// CHECK-NEXT: Value: 0x11005 +// CHECK-NEXT: Value: 0x12005 // CHECK-NEXT: Size: 10 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Function (0x2) // CHECK: Name: __ARMv7ABSLongThunk_thumb_fn -// CHECK-NEXT: Value: 0x11010 +// CHECK-NEXT: Value: 0x12010 // CHECK-NEXT: Size: 12 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Function (0x2) // CHECK-PI: Name: __ThumbV7PILongThunk_arm_fn -// CHECK-PI-NEXT: Value: 0x1005 +// CHECK-PI-NEXT: Value: 0x2005 // CHECK-PI-NEXT: Size: 12 // CHECK-PI-NEXT: Binding: Local (0x0) // CHECK-PI-NEXT: Type: Function (0x2)