diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h --- a/lld/MachO/ConcatOutputSection.h +++ b/lld/MachO/ConcatOutputSection.h @@ -74,6 +74,9 @@ private: uint64_t estimateStubsInRangeVA(size_t callIdx) const; + unsigned computeSlopUpperBound(const size_t finalIdx, const size_t endIdx, + const ConcatInputSection *isec, + const unsigned initialFactor = 64) const; std::vector thunks; }; diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -246,7 +246,8 @@ // contains several branch instructions in succession, then the distance // from the current position to the position where the thunks are inserted // grows. So leave room for a bunch of thunks. - unsigned slop = 1024 * thunkSize; + unsigned slop = computeSlopUpperBound(finalIdx, endIdx, isec); + while (finalIdx < endIdx && addr + size + inputs[finalIdx]->getSize() < isecVA + forwardBranchRange - slop) finalizeOne(inputs[finalIdx++]); @@ -308,13 +309,9 @@ } } // ... otherwise, create a new thunk. - if (addr + size > highVA) { - // There were too many consecutive branch instructions for `slop` - // above. If you hit this: For the current algorithm, just bumping up - // slop above and trying again is probably simplest. (See also PR51578 - // comment 5). - fatal(Twine(__FUNCTION__) + ": FIXME: thunk range overrun"); - } + assert(addr + size <= highVA && + "computeSlopUpperBound should have found a large enough slop."); + thunkInfo.isec = makeSyntheticInputSection(isec->getSegName(), isec->getName()); thunkInfo.isec->parent = this; @@ -378,6 +375,47 @@ } } +unsigned TextOutputSection::computeSlopUpperBound( + const size_t initialFinalIdx, const size_t endIdx, + const ConcatInputSection *isec, const unsigned initialFactor) const { + const size_t thunkSize = target->thunkSize; + const uint64_t forwardBranchRange = target->forwardBranchRange; + const uint64_t isecVA = isec->getVA(); + const unsigned slop = initialFactor * thunkSize; + + size_t finalIdx = initialFinalIdx; + size_t computed_size = size; + + if (!slop) + return 0; + + if (!isec->hasCallSites) + return slop; + + while (finalIdx < endIdx && + addr + computed_size + inputs[finalIdx]->getSize() < + isecVA + forwardBranchRange - slop) { + ConcatInputSection *isec = inputs[finalIdx++]; + computed_size = alignTo(computed_size, isec->align) + isec->getSize(); + } + + // This is a very rough mocking of the relocation process. This should use + // more thunk space than we actually need, so that we actually compute an + // upper bound of the size actually used. + for (const Reloc &r : reverse(isec->relocs)) { + if (!target->hasAttr(r.type, RelocAttrBits::BRANCH)) + continue; + uint64_t callVA = isecVA + r.offset; + uint64_t highVA = callVA + forwardBranchRange; + if (addr + computed_size > highVA) { + return computeSlopUpperBound(initialFinalIdx, endIdx, isec, + 2 * initialFactor); + } + computed_size = alignTo(computed_size, isec->align) + isec->getSize(); + } + return slop; +} + void ConcatOutputSection::finalizeFlags(InputSection *input) { switch (sectionType(input->getFlags())) { default /*type-unspec'ed*/: