diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -33,6 +33,7 @@ void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, uint64_t entryAddr) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; + void populateThunk(InputSection *thunk, Defined *defined) override; }; } // namespace @@ -108,10 +109,28 @@ cpuSubtype = CPU_SUBTYPE_ARM64_ALL; stubSize = sizeof(stubCode); + thunkSize = llvm::alignTo<16>(stubSize); + branchRange = llvm::maxIntN(28); stubHelperHeaderSize = sizeof(stubHelperHeaderCode); stubHelperEntrySize = sizeof(stubHelperEntryCode); } +void ARM64::populateThunk(InputSection *thunk, Defined *defined) { + thunk->align = 4; + // FIXME: populate data + thunk->data = {reinterpret_cast(stubCode), sizeof(stubCode)}; + thunk->relocs.push_back({/*type=*/ARM64_RELOC_PAGEOFF12, + /*length=*/2, /*pcrel=*/false, + /*thunkable=*/false, + /*offset=*/4, /*addend=*/0, + /*referent=*/defined}); + thunk->relocs.push_back({/*type=*/ARM64_RELOC_PAGE21, + /*length=*/2, /*pcrel=*/true, + /*thunkable=*/false, + /*offset=*/0, /*addend=*/0, + /*referent=*/defined}); +} + TargetInfo *macho::createARM64TargetInfo() { static ARM64 t; return &t; diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -186,6 +186,8 @@ cpuSubtype = CPU_SUBTYPE_X86_64_ALL; stubSize = sizeof(stub); + thunkSize = 0; + branchRange = std::numeric_limits::max(); stubHelperHeaderSize = sizeof(stubHelperHeader); stubHelperEntrySize = sizeof(stubHelperEntry); } diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -74,6 +74,7 @@ struct Configuration { Symbol *entry; + bool verbose = false; bool hasReexports = false; bool allLoad = false; bool forceLoadObjC = false; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -876,6 +876,11 @@ MachOOptTable parser; InputArgList args = parser.parse(argsArr.slice(1)); + errorHandler().errorLimitExceededMsg = + "too many errors emitted, stopping now " + "(use --error-limit=0 to see all errors)"; + errorHandler().errorLimit = args::getInteger(args, OPT_error_limit_eq, 20); + if (args.hasArg(OPT_help_hidden)) { parser.printHelp(argsArr[0], /*showHidden=*/true); return true; @@ -935,6 +940,7 @@ for (const Arg *arg : args.filtered(OPT_U)) symtab->addDynamicLookup(arg->getValue()); + config->verbose = args.hasArg(OPT_verbose); config->mapFile = args.getLastArgValue(OPT_map); config->outputFile = args.getLastArgValue(OPT_o, "a.out"); config->astPaths = args.getAllArgValues(OPT_add_ast_path); diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -44,6 +44,7 @@ uint32_t align = 1; uint32_t flags = 0; + bool thunkable = false; ArrayRef data; std::vector relocs; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -34,20 +34,16 @@ uint64_t InputSection::getVA() const { return parent->addr + outSecOff; } -static uint64_t resolveSymbolVA(uint8_t *loc, const Symbol &sym, uint8_t type) { +static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { const RelocAttrs &relocAttrs = target->getRelocAttrs(type); if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) { - if (sym.isInStubs()) - return in.stubs->addr + sym.stubsIndex * target->stubSize; + return sym->resolveBranchVA(); } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) { - if (sym.isInGot()) - return in.got->addr + sym.gotIndex * target->wordSize; + return sym->resolveGotVA(); } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) { - if (sym.isInGot()) - return in.tlvPointers->addr + sym.gotIndex * target->wordSize; - assert(isa(&sym)); + return sym->resolveTlvVA(); } - return sym.getVA(); + return sym->getVA(); } void InputSection::writeTo(uint8_t *buf) { @@ -73,7 +69,7 @@ if (target->hasAttr(r.type, RelocAttrBits::LOAD) && !referentSym->isInGot()) target->relaxGotLoad(loc, r.type); - referentVA = resolveSymbolVA(loc, *referentSym, r.type); + referentVA = resolveSymbolVA(referentSym, r.type); if (isThreadLocalVariables(flags)) { // References from thread-local variable sections are treated as offsets diff --git a/lld/MachO/MergedOutputSection.h b/lld/MachO/MergedOutputSection.h --- a/lld/MachO/MergedOutputSection.h +++ b/lld/MachO/MergedOutputSection.h @@ -17,6 +17,10 @@ namespace lld { namespace macho { +class Defined; + +using InputsVector = std::vector; + // Linking multiple files will inevitably mean resolving sections in different // files that are labeled with the same segment and section name. This class // contains all such sections and writes the data from each section sequentially @@ -37,7 +41,14 @@ void writeTo(uint8_t *buf) const override; - std::vector inputs; + InputsVector inputs; + InputsVector thunks; + bool thunkable = false; + + size_t createThunks(); + Defined *makeThunk(StringRef name, uint64_t maxAddr); + uint64_t placeThunk(uint64_t callSiteAddr, uint64_t nearestThunkI) const; + bool needsThunk(Symbol funcSym, uint64_t callSiteAddr) const; static bool classof(const OutputSection *sec) { return sec->kind() == MergedKind; diff --git a/lld/MachO/MergedOutputSection.cpp b/lld/MachO/MergedOutputSection.cpp --- a/lld/MachO/MergedOutputSection.cpp +++ b/lld/MachO/MergedOutputSection.cpp @@ -7,6 +7,10 @@ //===----------------------------------------------------------------------===// #include "MergedOutputSection.h" +#include "Config.h" +#include "OutputSegment.h" +#include "Symbols.h" +#include "Target.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/BinaryFormat/MachO.h" @@ -24,29 +28,57 @@ mergeFlags(input->flags); align = std::max(align, input->align); } - inputs.push_back(input); input->parent = this; + if (input->thunkable) + thunkable = true; } void MergedOutputSection::finalize() { uint64_t isecAddr = addr; uint64_t isecFileOff = fileOff; - for (InputSection *isec : inputs) { + auto finalizeOne = [=, &isecAddr, &isecFileOff](InputSection *isec) { isecAddr = alignTo(isecAddr, isec->align); isecFileOff = alignTo(isecFileOff, isec->align); isec->outSecOff = isecAddr - addr; isec->outSecFileOff = isecFileOff - fileOff; isecAddr += isec->getSize(); isecFileOff += isec->getFileSize(); + }; + + // Merge input sections from thunk & ordinary vectors + // When tentative addresses collide, place thunks first + InputsVector::const_iterator iIt = inputs.begin(); + InputsVector::const_iterator tIt = thunks.begin(); + while (iIt < inputs.end() || tIt < thunks.end()) { + while (iIt < inputs.end() && + (tIt == thunks.end() || (*iIt)->outSecOff < (*tIt)->outSecOff)) + finalizeOne(*iIt++); + while (tIt < thunks.end() && + (iIt == inputs.end() || (*tIt)->outSecOff <= (*iIt)->outSecOff)) + finalizeOne(*tIt++); } + size = isecAddr - addr; fileSize = isecFileOff - fileOff; } void MergedOutputSection::writeTo(uint8_t *buf) const { - for (InputSection *isec : inputs) - isec->writeTo(buf + isec->outSecFileOff); + // Merge input sections from thunk & ordinary vectors + InputsVector::const_iterator iIt = inputs.cbegin(); + InputsVector::const_iterator tIt = thunks.cbegin(); + while (iIt < inputs.cend() || tIt < thunks.cend()) { + while (iIt < inputs.cend() && + (tIt == thunks.cend() || (*iIt)->outSecOff < (*tIt)->outSecOff)) { + InputSection *isec = *iIt++; + isec->writeTo(buf + isec->outSecFileOff); + } + while (tIt < thunks.cend() && + (iIt == inputs.cend() || (*tIt)->outSecOff <= (*iIt)->outSecOff)) { + InputSection *isec = *tIt++; + isec->writeTo(buf + isec->outSecFileOff); + } + } } // TODO: this is most likely wrong; reconsider how section flags @@ -70,3 +102,179 @@ flags |= inputFlags; flags &= pureMask; } + +// A branch-range-extension thunk comprises ... +// +// (1) new Defined privateExtern symbol for the thunk, which references ... +// (2) new InputSection, which contains ... +// (3) new data for the instructions to load & branch to the far address + +// (4) new Relocs on instructions to load the far address, which reference ... +// (5) old Defined extern symbol for the real function +// +// Optimal thunk-placement algorithm features: +// +// * Single pass: O(n) on the number of call sites. +// +// * Accounts for the exact space overhead of thunks - no heuristics +// +// * Exploits the full range of call instructions - forward & backward +// +// Data: +// +// * DenseMap: Map the function symbol to its +// thunk bookkeeper. +// +// * struct ThunkInfo (bookkeeper): Call instructions have limited range, +// and distant call sites might be unable to reach the same thunk, +// so multiple thunks are necessary to serve all call sites in a +// very large program. The thunk bookkeepr stores state for all +// thunks associated with a function: (a) function symbol, plus (b) +// address, (c) index, and (d) sequence number for the active thunk +// incarnation. +// +// * A thunk incarnation comprises (a) a private-extern Defined symbol +// pointing to (b) an InputSection holding machine instructions +// (same code as a MachO stub), and (c) Reloc(s) that reference the +// real function for fixing-up the stub code. +// +// * std::vector MergedInputSection::thunks: a vector +// parallel to inputs. We can store new thunks via cheap vector +// append, rather than costly insertion into the inputs vector. +// +// Control Flow: +// +// * Scan the call sites by ascending address; i.e., +// call-site addresses increase monotonically. +// +// * When a function is beyond the range of a call site, we need a +// thunk. Place it at the maximum forward address from the call +// site. Call sites increase monotonically and thunks are always +// placed at maximum distance from them; thus, we place thunks at +// monotonically increasing addresses. This implies that once a thunk +// is placed, all earlier addresses are fixed and stable. +// +// * The number of thunks between a call site and its thunk is known +// and stable, so we can precisely determine reachability. +// +// * MergedInputSection::finalize() and MergedInputSection::writeTo() +// merge the inputs and thunks vectors (both ordered by asending +// address), which is simple and cheap. + +struct ThunkInfo { + Defined *sym = nullptr; + InputSection *isec = nullptr; + // InputsVector::iterator it; + uint8_t sequence = 0; +}; + +static DenseMap thunkMap; + +class ThunksVectorBounds { +public: + ThunksVectorBounds(InputsVector &v) : v(v) {} + + size_t getLowThunksSize() { return (midI - lowI) * target->thunkSize; } + size_t getHighThunksSize() { return (highI - midI) * target->thunkSize; } + + bool reachable(uint64_t va) { return lowVA < va && va < highVA; } + + void set(uint64_t va) { + midVA = va; + lowVA = alignTo<16>(va - target->branchRange + 16); + highVA = alignTo<16>(va + target->branchRange - 16); + size_t n = v.size(); + if (n == 0) + return; + while (lowI < n && v[lowI]->getVA() < lowVA) + lowI++; + while (midI < n && v[midI]->getVA() < midVA) + midI++; + while (highI < n && v[highI]->getVA() < highVA) + highI++; + while (lowI < n && v[lowI]->getVA() < lowVA + getLowThunksSize()) + lowI++; + while (highI >= n && v[highI - 1]->getVA() > highVA - getHighThunksSize()) + highI--; + lowVA = alignTo<16>(va - target->branchRange + getLowThunksSize()); + highVA = alignTo<16>(va + target->branchRange - getHighThunksSize()); + } + + InputsVector &v; + size_t lowI = 0; + size_t midI = 0; + size_t highI = 0; + uint64_t lowVA = 0; + uint64_t midVA = 0; + uint64_t highVA = 0; +}; + +size_t MergedOutputSection::createThunks() { + size_t relocCount = 0; + size_t callSiteCount = 0; + size_t thunkCallCount = 0; + size_t thunkCount = 0; + thunks.reserve(inputs.size()); + ThunksVectorBounds bounds(thunks); + const InputSection *peer = nullptr; + for (auto *isec : inputs) { + if (!isec->thunkable) + continue; + if (!peer) + peer = isec; + uint64_t isecVA = isec->getVA(); + // Relocs are sequenced by descending address. Iterate in + // reverse so we can process call sites by ascending address. + for (Reloc &r : reverse(isec->relocs)) { + relocCount++; + if (!r.thunkable) + continue; + auto *funcSym = r.referent.dyn_cast(); + assert(funcSym); + assert(!funcSym->isWeakDef()); + if (auto *defined = dyn_cast(funcSym)) + assert(defined->isExternal()); + else if (auto *dylibSym = dyn_cast(funcSym)) + assert(!dylibSym->isWeakDef()); + else + assert(isa(funcSym) || isa(funcSym)); + + callSiteCount++; + bounds.set(isecVA + r.offset); + + uint64_t funcVA = funcSym->resolveBranchVA(); + if (bounds.reachable(funcVA)) + continue; + thunkCallCount++; + ThunkInfo &thunkInfo = thunkMap[funcSym]; + uint64_t thunkVA = thunkInfo.sym ? thunkInfo.isec->getVA() + : std::numeric_limits::max(); + if (!bounds.reachable(thunkVA)) { + thunkInfo.isec = make(); + thunkInfo.isec->name = peer->name; + thunkInfo.isec->segname = peer->segname; + thunkInfo.isec->parent = this; + thunkInfo.isec->outSecOff = bounds.highVA - addr; + Twine thunkName = funcSym->getName() + ".thunk." + + std::to_string(thunkInfo.sequence++); + thunkInfo.sym = make(saver.save(thunkName), /*file=*/nullptr, + thunkInfo.isec, /*value=*/0, + /*size=*/target->thunkSize, + /*isWeakDef=*/false, /*isExternal=*/true, + /*isPrivateExtern=*/true); + target->populateThunk(thunkInfo.isec, thunkInfo.sym); + thunks.push_back(thunkInfo.isec); + thunkCount++; + } + + r.referent = thunkInfo.sym; + } + } + if (config->verbose) + warn("thunks for " + parent->name + "," + name + + ": funcs = " + std::to_string(thunkMap.size()) + + ", relocs = " + std::to_string(relocCount) + + ", all calls = " + std::to_string(callSiteCount) + + ", thunk calls = " + std::to_string(thunkCallCount) + + ", thunks = " + std::to_string(thunkCount)); + return thunkCount; +} diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -10,6 +10,11 @@ def help_hidden : Flag<["--"], "help-hidden">, HelpText<"Display help for hidden options">, Group; +def verbose : Flag<["--"], "verbose">, + Group; +def error_limit_eq : Joined<["--"], "error-limit=">, + HelpText<"Maximum number of errors to print before exiting (default: 20)">, + Group; def color_diagnostics: Flag<["--"], "color-diagnostics">, HelpText<"Alias for --color-diagnostics=always">, Group; diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h --- a/lld/MachO/Relocations.h +++ b/lld/MachO/Relocations.h @@ -52,8 +52,10 @@ struct Reloc { uint8_t type = llvm::MachO::GENERIC_RELOC_INVALID; - bool pcrel = false; uint8_t length = 0; + bool pcrel = false; + bool thunkable = false; + // The offset from the start of the subsection that this relocation belongs // to. uint64_t offset = 0; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -72,6 +72,15 @@ // Whether this symbol is in the StubsSection. bool isInStubs() const { return stubsIndex != UINT32_MAX; } + uint64_t getStubVA() const; + uint64_t getGotVA() const; + uint64_t getTlvVA() const; + uint64_t resolveBranchVA() const { + return isInStubs() ? getStubVA() : getVA(); + } + uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } + uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } + // The index of this symbol in the GOT or the TLVPointer section, depending // on whether it is a thread-local. A given symbol cannot be referenced by // both these sections at once. @@ -239,11 +248,13 @@ }; union SymbolUnion { - alignas(Defined) char a[sizeof(Defined)]; - alignas(Undefined) char b[sizeof(Undefined)]; - alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; - alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; - alignas(LazySymbol) char e[sizeof(LazySymbol)]; +#define X(T) alignas(T) char u_##T[sizeof(T)] + X(Defined); + X(Undefined); + X(CommonSymbol); + X(DylibSymbol); + X(LazySymbol); +#undef X }; template diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -27,6 +27,10 @@ return demangle(b.getName()); } +uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); } +uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); } +uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); } + uint64_t Defined::getVA() const { if (isAbsolute()) return value; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -122,6 +122,10 @@ void addEntry(Symbol *sym); + uint64_t getVA(uint32_t gotIndex) { + return addr + gotIndex * target->wordSize; + } + private: llvm::SetVector entries; }; @@ -289,6 +293,9 @@ // Returns whether the symbol was added. Note that every stubs entry will // have a corresponding entry in the LazyPointerSection. bool addEntry(Symbol *); + uint64_t getVA(uint32_t stubsIndex) const { + return addr + stubsIndex * target->stubSize; + } private: llvm::SetVector entries; diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -24,6 +24,7 @@ LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); class Symbol; +class Defined; class DylibSymbol; class InputSection; @@ -63,6 +64,8 @@ virtual uint64_t getPageSize() const = 0; + virtual void populateThunk(InputSection *thunk, Defined *defined) {} + bool hasAttr(uint8_t type, RelocAttrBits bit) const { return getRelocAttrs(type).hasAttr(bit); } @@ -72,9 +75,11 @@ uint64_t pageZeroSize; size_t stubSize; + size_t thunkSize; size_t stubHelperHeaderSize; size_t stubHelperEntrySize; size_t wordSize; + ssize_t branchRange; }; TargetInfo *createX86_64TargetInfo(); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/xxhash.h" #include +#include using namespace llvm; using namespace llvm::MachO; @@ -51,9 +52,9 @@ void scanSymbols(); template void createOutputSections(); template void createLoadCommands(); - void finalizeAddresses(); void finalizeLinkEditSegment(); void assignAddresses(OutputSegment *); + void assignAddresses(); void openFile(); void writeSections(); @@ -511,9 +512,16 @@ } // namespace -// Adds stubs and bindings where necessary (e.g. if the symbol is a -// DylibSymbol.) -static void prepareBranchTarget(Symbol *sym) { +// Add stubs and bindings where necessary (e.g. if the symbol is a +// DylibSymbol). Return TRUE if this is a call that might need a +// branch-range-extension thunk. This is something of a kludge for +// sake of performance: We can't process thunks until address +// assigment in output-sections finalize() time, at which time we must +// iterate over all relocs again. Here, we dig deeply enough into the +// attributes of a branch reloc that we can determine its eligibility +// for thunking, so it is wise to memoize the result. + +static bool prepareBranchTarget(Symbol *sym) { if (auto *dysym = dyn_cast(sym)) { if (in.stubs->addEntry(dysym)) { if (sym->isWeakDef()) { @@ -525,6 +533,7 @@ in.lazyBinding->addEntry(dysym); } } + return target->thunkSize > 0; } else if (auto *defined = dyn_cast(sym)) { if (defined->isExternalWeakDef()) { if (in.stubs->addEntry(sym)) { @@ -533,8 +542,11 @@ in.weakBinding->addEntry(sym, in.lazyPointers->isec, sym->stubsIndex * target->wordSize); } + } else { + return target->thunkSize > 0; } } + return false; } // Can a symbol's address can only be resolved at runtime? @@ -546,12 +558,24 @@ return false; } -static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, - const Reloc &r) { +static size_t createThunks() { + size_t thunkCount = 0; + for (const OutputSegment *seg : outputSegments) { + for (OutputSection *osec : seg->getSections()) { + auto *merged = dyn_cast(osec); + if (merged && merged->thunkable) + thunkCount += merged->createThunks(); + } + } + return thunkCount; +} + +static void prepareSymbolRelocation(Symbol *sym, InputSection *isec, Reloc &r) { const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type); if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) { - prepareBranchTarget(sym); + if (prepareBranchTarget(sym)) + isec->thunkable = r.thunkable = true; } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) { if (relocAttrs.hasAttr(RelocAttrBits::POINTER) || needsBinding(sym)) in.got->addEntry(sym); @@ -588,7 +612,10 @@ if (auto *undefined = dyn_cast(sym)) treatUndefinedSymbol(*undefined); // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check. - if (!isa(sym) && validateSymbolRelocation(sym, isec, r)) + if (isa(sym)) + warn("relocation to lazy symbol: " + toString(*sym) + + "\n>>> defined in " + toString(sym->getFile())); + else if (!isa(sym) && validateSymbolRelocation(sym, isec, r)) prepareSymbolRelocation(sym, isec, r); } else { assert(r.referent.is()); @@ -927,8 +954,8 @@ linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); } -void Writer::finalizeAddresses() { - TimeTraceScope timeScope("Finalize addresses"); +void Writer::assignAddresses() { + TimeTraceScope timeScope("Assign addresses"); uint64_t pageSize = target->getPageSize(); // Ensure that segments (and the sections they contain) are allocated // addresses in ascending order, which dyld requires. @@ -949,8 +976,6 @@ seg->vmSize = addr - seg->firstSection()->addr; seg->fileSize = fileOff - seg->fileOff; } - - // FIXME(gkm): create branch-extension thunks here, then adjust addresses } void Writer::finalizeLinkEditSegment() { @@ -1052,10 +1077,16 @@ in.stubHelper->setup(); scanSymbols(); createOutputSections(); - // No more sections nor segments are created beyond this point. + // After this point, we create no new segments; HOWEVER, we might + // yet create branch-range extention thunks for architectures whose + // hardware call instructions have limited range, e.g., ARM(64) + // Since the thunks are interspersed with text input sections, + // we create the t sortSegmentsAndSections(); createLoadCommands(); - finalizeAddresses(); + assignAddresses(); + if (createThunks()) + assignAddresses(); finalizeLinkEditSegment(); writeMapFile(); writeOutputFile(); diff --git a/lld/test/MachO/archive.s b/lld/test/MachO/archive.s --- a/lld/test/MachO/archive.s +++ b/lld/test/MachO/archive.s @@ -1,11 +1,13 @@ # REQUIRES: x86 # RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/1.s -o %t/1.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/5.s -o %t/5.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/main.s -o %t/main.o -# RUN: llvm-ar rcs %t/test.a %t/2.o %t/3.o %t/4.o +# RUN: llvm-ar rcs %t/test.a %t/1.o %t/2.o %t/3.o %t/4.o %t/5.o # RUN: %lld %t/main.o %t/test.a -o %t/test.out ## TODO: Run llvm-nm -p to validate symbol order @@ -18,7 +20,10 @@ # RUN: %lld %t/test.a %t/main.o -o %t/test.out # RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST # ARCHIVE-FIRST: T _bar +# ARCHIVE-FIRST: T _barf +# ARCHIVE-FIRST: T _baz # ARCHIVE-FIRST: T _boo +# ARCHIVE-FIRST: T _foo # ARCHIVE-FIRST: T _main # RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix VISIBLE @@ -32,9 +37,15 @@ # ALL-LOAD: T _main # ALL-LOAD: T _unused +#--- 1.s +.globl _undefined, _unused +_unused: + ret + #--- 2.s .globl _boo _boo: + callq _bar ret #--- 3.s @@ -42,14 +53,24 @@ _bar: ret +.globl _barf +_barf: + callq _baz + ret + #--- 4.s -.globl _undefined, _unused -_unused: +.globl _foo +_foo: + ret + +#--- 5.s +.globl _baz +_baz: + callq _foo ret #--- main.s .globl _main _main: callq _boo - callq _bar ret diff --git a/lld/test/MachO/tools/generate-thunkable-program.py b/lld/test/MachO/tools/generate-thunkable-program.py new file mode 100755 --- /dev/null +++ b/lld/test/MachO/tools/generate-thunkable-program.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 + +"""Generate many skeletal functions with a thick call graph spanning a +large address space to induce lld to create branch-islands for arm64. + +""" +from __future__ import print_function +import random +import argparse +import string +from pprint import pprint +from math import factorial +from itertools import permutations + +def print_here_head(name): + print("""\ +llvm-mc -filetype=obj -triple %s -o %s.o <>12) + print_here_head(name) + print("""\ +### %s size=%x calls=%x""" % (name, size, calls)) + print_function_head(4, name) + for i in range(calls): + print(" bl %sx%08x" % ("_" if args.os == "macos" else "", addrs[random.randint(0, len(addrs)-1)])) + fill = size - 4 * (calls + 1) + assert fill > 0 + print("""\ + .fill 0x%x + ret""" % (fill)) + print_here_tail() + +def random_seed(): + """Generate a seed that can easily be passsed back in via --seed=STRING""" + return ''.join(random.choice(string.ascii_lowercase) for i in range(10)) + +def generate_sizes(base, megabytes): + total = 0 + while total < megabytes: + size = random.randint(0x100, 0x10000) * 0x10 + yield size + total += size + +def generate_addrs(addr, sizes): + i = 0 + while i < len(sizes): + yield addr + addr += sizes[i] + i += 1 + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + epilog="""\ +WRITEME +""") + parser.add_argument('--seed', type=str, default=random_seed(), + help='Seed the random number generator') + parser.add_argument('--size', type=int, default=None, + help='Total text size to generate, in megabytes') + parser.add_argument('--os', type=str, default="macos", + help='Target OS: macos, windows, or linux') + global args + args = parser.parse_args() + triples = { + "macos": "arm64-apple-macos", + "linux": "aarch64-pc-linux", + "windows": "aarch64-pc-windows" + } + global triple + triple = triples.get(args.os) + + print("""\ +### seed=%s triple=%s +""" % (args.seed, triple)) + + random.seed(args.seed) + + base = 0x4010 + megabytes = (int(args.size) if args.size else 512) * 1024 * 1024 + sizes = [size for size in generate_sizes(base, megabytes)] + addrs = [addr for addr in generate_addrs(base, sizes)] + + for i in range(len(addrs)): + print_function(addrs[i], sizes[i], addrs) + + print_here_head("main") + print("""\ +### _x%08x +""" % (addrs[-1] + sizes[-1])) + print_function_head(14 if args.os == "macos" else 4, "main") + print(" ret") + print_here_tail() + print("wait") + + +if __name__ == '__main__': + main()