diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h --- a/lld/MachO/ConcatOutputSection.h +++ b/lld/MachO/ConcatOutputSection.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLD_MACHO_MERGED_OUTPUT_SECTION_H -#define LLD_MACHO_MERGED_OUTPUT_SECTION_H +#ifndef LLD_MACHO_CONCAT_OUTPUT_SECTION_H +#define LLD_MACHO_CONCAT_OUTPUT_SECTION_H #include "InputSection.h" #include "OutputSection.h" @@ -24,7 +24,7 @@ // files that are labeled with the same segment and section name. This class // contains all such sections and writes the data from each section sequentially // in the final binary. -class ConcatOutputSection final : public OutputSection { +class ConcatOutputSection : public OutputSection { public: explicit ConcatOutputSection(StringRef name) : OutputSection(ConcatKind, name) {} @@ -37,27 +37,46 @@ uint64_t getSize() const override { return size; } uint64_t getFileSize() const override { return fileSize; } - void addInput(ConcatInputSection *input); - void finalize() override; - bool needsThunks() const; - uint64_t estimateStubsInRangeVA(size_t callIdx) const; + // Assign values to InputSection::outSecOff. In contrast to TextOutputSection, + // which does this in its implementation of `finalize()`, we can do this + // without `finalize()`'s sequential guarantees detailed in the block comment + // of `OutputSection::finalize()`. + virtual void finalizeContents(); + void addInput(ConcatInputSection *input); void writeTo(uint8_t *buf) const override; - std::vector inputs; - std::vector thunks; - static bool classof(const OutputSection *sec) { return sec->kind() == ConcatKind; } static ConcatOutputSection *getOrCreateForInput(const InputSection *); -private: - void finalizeFlags(InputSection *input); + std::vector inputs; +protected: size_t size = 0; uint64_t fileSize = 0; + void finalizeOne(ConcatInputSection *); + +private: + void finalizeFlags(InputSection *input); +}; + +// ConcatOutputSections that contain code (text) require special handling to +// support thunk insertion. +class TextOutputSection : public ConcatOutputSection { +public: + explicit TextOutputSection(StringRef name) : ConcatOutputSection(name) {} + void finalizeContents() override {} + void finalize() override; + bool needsThunks() const; + void writeTo(uint8_t *buf) const override; + +private: + uint64_t estimateStubsInRangeVA(size_t callIdx) const; + + std::vector thunks; }; // We maintain one ThunkInfo per real function. diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -121,7 +121,7 @@ // instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs // thunks for programs so large that branch source & destination addresses // might differ more than the range of branch instruction(s). -bool ConcatOutputSection::needsThunks() const { +bool TextOutputSection::needsThunks() const { if (!target->usesThunks()) return false; uint64_t isecAddr = addr; @@ -138,7 +138,7 @@ auto *sym = r.referent.get(); // Pre-populate the thunkMap and memoize call site counts for every // InputSection and ThunkInfo. We do this for the benefit of - // ConcatOutputSection::estimateStubsInRangeVA() + // estimateStubsInRangeVA(). ThunkInfo &thunkInfo = thunkMap[sym]; // Knowing ThunkInfo call site count will help us know whether or not we // might need to create more for this referent at the time we are @@ -154,7 +154,7 @@ // Since __stubs is placed after __text, we must estimate the address // beyond which stubs are within range of a simple forward branch. // This is called exactly once, when the last input section has been finalized. -uint64_t ConcatOutputSection::estimateStubsInRangeVA(size_t callIdx) const { +uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const { // Tally the functions which still have call sites remaining to process, // which yields the maximum number of thunks we might yet place. size_t maxPotentialThunks = 0; @@ -193,23 +193,24 @@ return stubsInRangeVA; } -void ConcatOutputSection::finalize() { - uint64_t isecAddr = addr; - uint64_t isecFileOff = fileOff; - auto finalizeOne = [&](ConcatInputSection *isec) { - isecAddr = alignTo(isecAddr, isec->align); - isecFileOff = alignTo(isecFileOff, isec->align); - isec->outSecOff = isecAddr - addr; - isec->isFinal = true; - isecAddr += isec->getSize(); - isecFileOff += isec->getFileSize(); - }; +void ConcatOutputSection::finalizeOne(ConcatInputSection *isec) { + size = alignTo(size, isec->align); + fileSize = alignTo(fileSize, isec->align); + isec->outSecOff = size; + isec->isFinal = true; + size += isec->getSize(); + fileSize += isec->getFileSize(); +} + +void ConcatOutputSection::finalizeContents() { + for (ConcatInputSection *isec : inputs) + finalizeOne(isec); +} +void TextOutputSection::finalize() { if (!needsThunks()) { for (ConcatInputSection *isec : inputs) finalizeOne(isec); - size = isecAddr - addr; - fileSize = isecFileOff - fileOff; return; } @@ -225,7 +226,7 @@ // Walk all sections in order. Finalize all sections that are less than // forwardBranchRange in front of it. // isecVA is the address of the current section. - // isecAddr is the start address of the first non-finalized section. + // addr + size is the start address of the first non-finalized section. // inputs[finalIdx] is for finalization (address-assignment) size_t finalIdx = 0; @@ -246,7 +247,7 @@ // from the current position to the position where the thunks are inserted // grows. So leave room for a bunch of thunks. unsigned slop = 256 * thunkSize; - while (finalIdx < endIdx && isecAddr + inputs[finalIdx]->getSize() < + while (finalIdx < endIdx && addr + size + inputs[finalIdx]->getSize() < isecVA + forwardBranchRange - slop) finalizeOne(inputs[finalIdx++]); @@ -307,7 +308,7 @@ } } // ... otherwise, create a new thunk. - if (isecAddr > highVA) { + if (addr + size > highVA) { // There were too many consecutive branch instructions for `slop` // above. If you hit this: For the current algorithm, just bumping up // slop above and trying again is probably simplest. (See also PR51578 @@ -342,12 +343,11 @@ thunkInfo.sym->used = true; target->populateThunk(thunkInfo.isec, funcSym); finalizeOne(thunkInfo.isec); + fprintf(stderr, "%llx\n", thunkInfo.isec->outSecOff); thunks.push_back(thunkInfo.isec); ++thunkCount; } } - size = isecAddr - addr; - fileSize = isecFileOff - fileOff; log("thunks for " + parent->name + "," + name + ": funcs = " + std::to_string(thunkMap.size()) + @@ -358,6 +358,11 @@ } void ConcatOutputSection::writeTo(uint8_t *buf) const { + for (ConcatInputSection *isec : inputs) + isec->writeTo(buf + isec->outSecOff); +} + +void TextOutputSection::writeTo(uint8_t *buf) const { // Merge input sections from thunk & ordinary vectors size_t i = 0, ie = inputs.size(); size_t t = 0, te = thunks.size(); @@ -402,8 +407,14 @@ ConcatOutputSection::getOrCreateForInput(const InputSection *isec) { NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()}); ConcatOutputSection *&osec = concatOutputSections[names]; - if (!osec) - osec = make(names.second); + if (!osec) { + if (isec->getSegName() == segment_names::text && + isec->getName() != section_names::gccExceptTab && + isec->getName() != section_names::ehFrame) + osec = make(names.second); + else + osec = make(names.second); + } return osec; } diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -972,6 +972,21 @@ void Writer::finalizeAddresses() { TimeTraceScope timeScope("Finalize addresses"); uint64_t pageSize = target->getPageSize(); + + // We could parallelize this loop, but local benchmarking indicates it is + // faster to do it all in the main thread. + for (OutputSegment *seg : outputSegments) { + if (seg == linkEditSegment) + continue; + for (OutputSection *osec : seg->getSections()) { + if (!osec->isNeeded()) + continue; + // Other kinds of OutputSections have already been finalized. + if (auto concatOsec = dyn_cast(osec)) + concatOsec->finalizeContents(); + } + } + // Ensure that segments (and the sections they contain) are allocated // addresses in ascending order, which dyld requires. //