diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -41,6 +41,7 @@ void relaxGotLoad(uint8_t *loc, uint8_t type) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; uint64_t getPageSize() const override { return 16 * 1024; } + void populateThunk(InputSection *thunk, Defined *defined) override; }; } // namespace @@ -297,10 +298,28 @@ cpuSubtype = CPU_SUBTYPE_ARM64_ALL; stubSize = sizeof(stubCode); + thunkSize = stubSize; + branchRange = llvm::maxIntN(28); stubHelperHeaderSize = sizeof(stubHelperHeaderCode); stubHelperEntrySize = sizeof(stubHelperEntryCode); } +void ARM64::populateThunk(InputSection *thunk, Defined *defined) { + thunk->align = 4; + // FIXME: populate data + thunk->data = {reinterpret_cast(stubCode), sizeof(stubCode)}; + thunk->relocs.push_back({/*type=*/ARM64_RELOC_PAGEOFF12, + /*length=*/2, /*pcrel=*/false, + /*thunkable=*/false, + /*offset=*/4, /*addend=*/0, + /*referent=*/defined}); + thunk->relocs.push_back({/*type=*/ARM64_RELOC_PAGE21, + /*length=*/2, /*pcrel=*/false, + /*thunkable=*/false, + /*offset=*/0, /*addend=*/0, + /*referent=*/defined}); +} + TargetInfo *macho::createARM64TargetInfo() { static ARM64 t; return &t; diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -187,6 +187,8 @@ cpuSubtype = CPU_SUBTYPE_X86_64_ALL; stubSize = sizeof(stub); + thunkSize = 0; + branchRange = std::numeric_limits::max(); stubHelperHeaderSize = sizeof(stubHelperHeader); stubHelperEntrySize = sizeof(stubHelperEntry); } diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -73,6 +73,7 @@ struct Configuration { Symbol *entry; + bool verbose = false; bool hasReexports = false; bool allLoad = false; bool forceLoadObjC = false; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -914,6 +914,7 @@ for (const Arg *arg : args.filtered(OPT_U)) symtab->addDynamicLookup(arg->getValue()); + config->verbose = args.hasArg(OPT_verbose); config->mapFile = args.getLastArgValue(OPT_map); config->outputFile = args.getLastArgValue(OPT_o, "a.out"); config->astPaths = args.getAllArgValues(OPT_add_ast_path); diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -44,6 +44,7 @@ uint32_t align = 1; uint32_t flags = 0; + bool thunkable = false; ArrayRef data; std::vector relocs; diff --git a/lld/MachO/MergedOutputSection.h b/lld/MachO/MergedOutputSection.h --- a/lld/MachO/MergedOutputSection.h +++ b/lld/MachO/MergedOutputSection.h @@ -17,6 +17,8 @@ namespace lld { namespace macho { +class Defined; + // Linking multiple files will inevitably mean resolving sections in different // files that are labeled with the same segment and section name. This class // contains all such sections and writes the data from each section sequentially @@ -38,6 +40,11 @@ void writeTo(uint8_t *buf) const override; std::vector inputs; + std::vector thunks; + bool thunkable = false; + + Defined *makeThunk(StringRef name, uint64_t maxAddr); + uint64_t placeThunk(uint64_t callSiteAddr, uint64_t nearestThunkI) const; static bool classof(const OutputSection *sec) { return sec->kind() == MergedKind; diff --git a/lld/MachO/MergedOutputSection.cpp b/lld/MachO/MergedOutputSection.cpp --- a/lld/MachO/MergedOutputSection.cpp +++ b/lld/MachO/MergedOutputSection.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "MergedOutputSection.h" +#include "Symbols.h" +#include "Target.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/BinaryFormat/MachO.h" @@ -24,22 +26,70 @@ mergeFlags(input->flags); align = std::max(align, input->align); } - inputs.push_back(input); input->parent = this; + thunkable = thunkable || input->thunkable; +} + +uint64_t MergedOutputSection::placeThunk(uint64_t callSiteAddr, + uint64_t nearestThunkI) const { + uint64_t maxAddr = callSiteAddr + target->branchRange; + auto findAddr = [&](InputSection *a, uint64_t addr) { + return addr < getSegmentOffset() + a->outSecOff; + }; + auto it = llvm::lower_bound( + thunks, maxAddr - target->thunkSize * (thunks.size() - nearestThunkI), + findAddr); + InputSection *isec = *it; + uint64_t beginAddr = isec->getVA(); + uint64_t endAddr = beginAddr + isec->getSize(); + return maxAddr >= endAddr ? endAddr : beginAddr; +} + +Defined *MergedOutputSection::makeThunk(StringRef name, uint64_t addr) { + // FIXME: convert addr to Reloc.offset + InputSection *thunk = make(); + const InputSection *peer = firstSection(); + thunk->segname = peer->segname; + thunk->name = peer->name; + thunk->parent = this; + Defined *defined = make( + saver.save(name), /*file=*/nullptr, thunk, /*value=*/0, + /*size=*/target->thunkSize, /*isWeakDef=*/false, /*isExternal=*/true, + /*isPrivateExtern=*/true); + target->populateThunk(thunk, defined); + thunks.push_back(thunk); + return defined; } void MergedOutputSection::finalize() { uint64_t isecAddr = addr; uint64_t isecFileOff = fileOff; - for (InputSection *isec : inputs) { + auto finalizeOne = [=, &isecAddr, &isecFileOff](InputSection *isec) { isecAddr = alignTo(isecAddr, isec->align); isecFileOff = alignTo(isecFileOff, isec->align); isec->outSecOff = isecAddr - addr; isec->outSecFileOff = isecFileOff - fileOff; isecAddr += isec->getSize(); isecFileOff += isec->getFileSize(); + }; + + size_t i = 0, ie = inputs.size(); + size_t t = 0, te = thunks.size(); + // Merge input sections from thunk & ordinary vectors + // When tentative addresses collide, place thunks first + while (i < ie && t < te) { + while (t < te && inputs[t]->outSecOff <= thunks[i]->outSecOff) + finalizeOne(inputs[t++]); + while (i < ie && inputs[i]->outSecOff < thunks[t]->outSecOff) + finalizeOne(inputs[i++]); } + // Merge the tail of whichever vector has leftovers + while (i < ie) + finalizeOne(inputs[i++]); + while (t < te) + finalizeOne(inputs[t++]); + size = isecAddr - addr; fileSize = isecFileOff - fileOff; } diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -10,6 +10,8 @@ def help_hidden : Flag<["--"], "help-hidden">, HelpText<"Display help for hidden options">, Group; +def verbose : Flag<["--"], "verbose">, + Group; def color_diagnostics: Flag<["--"], "color-diagnostics">, HelpText<"Alias for --color-diagnostics=always">, Group; diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h --- a/lld/MachO/Relocations.h +++ b/lld/MachO/Relocations.h @@ -51,8 +51,10 @@ struct Reloc { uint8_t type = llvm::MachO::GENERIC_RELOC_INVALID; - bool pcrel = false; uint8_t length = 0; + bool pcrel = false; + bool thunkable = false; + // The offset from the start of the subsection that this relocation belongs // to. uint64_t offset = 0; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -234,11 +234,13 @@ }; union SymbolUnion { - alignas(Defined) char a[sizeof(Defined)]; - alignas(Undefined) char b[sizeof(Undefined)]; - alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; - alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; - alignas(LazySymbol) char e[sizeof(LazySymbol)]; +#define X(T) alignas(T) char u_##T[sizeof(T)] + X(Defined); + X(Undefined); + X(CommonSymbol); + X(DylibSymbol); + X(LazySymbol); +#undef X }; template diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -24,6 +24,7 @@ LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); class Symbol; +class Defined; class DylibSymbol; class InputSection; @@ -63,6 +64,8 @@ virtual uint64_t getPageSize() const = 0; + virtual void populateThunk(InputSection *thunk, Defined *defined) {} + bool hasAttr(uint8_t type, RelocAttrBits bit) const { return getRelocAttrs(type).hasAttr(bit); } @@ -72,9 +75,11 @@ uint64_t pageZeroSize; size_t stubSize; + size_t thunkSize; size_t stubHelperHeaderSize; size_t stubHelperEntrySize; size_t wordSize; + ssize_t branchRange; }; TargetInfo *createX86_64TargetInfo(); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/xxhash.h" #include +#include using namespace llvm; using namespace llvm::MachO; @@ -51,9 +52,9 @@ void scanSymbols(); template void createOutputSections(); template void createLoadCommands(); - void finalizeAddresses(); void finalizeLinkEditSegment(); void assignAddresses(OutputSegment *); + void assignAddresses(); void openFile(); void writeSections(); @@ -457,7 +458,7 @@ // Adds stubs and bindings where necessary (e.g. if the symbol is a // DylibSymbol.) -static void prepareBranchTarget(Symbol *sym) { +static bool prepareBranchTarget(Symbol *sym) { if (auto *dysym = dyn_cast(sym)) { if (in.stubs->addEntry(dysym)) { if (sym->isWeakDef()) { @@ -469,6 +470,7 @@ in.lazyBinding->addEntry(dysym); } } + return target->thunkSize > 0; } else if (auto *defined = dyn_cast(sym)) { if (defined->isExternalWeakDef()) { if (in.stubs->addEntry(sym)) { @@ -477,8 +479,11 @@ in.weakBinding->addEntry(sym, in.lazyPointers->isec, sym->stubsIndex * target->wordSize); } + } else { + return target->thunkSize > 0; } } + return false; } // Can a symbol's address can only be resolved at runtime? @@ -490,12 +495,99 @@ return false; } -static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, - const Reloc &r) { +// A thunk comprises: +// Defined privateExtern symbol for the thunk, which references +// InputSection, which contains +// data for the instruction sequence to load & branch to the far address +// Relocs on the instructions to load the far address, which reference +// Defined extern symbol for the real function + +struct Thunk { + Defined *sym = nullptr; + uint64_t index = 0; + uint64_t addr = 0; + uint8_t sequence = 0; +}; + +static DenseMap thunkMap; + +static uint64_t getSymVA(Symbol *sym) { + if (sym->isInStubs()) + return in.stubs->addr + sym->stubsIndex * target->stubSize; + return sym->getVA(); +} + +// FIXME(gkm): add more comments!! +// FIXME(gkm): verify handling of dylib stubs + +// account for intervening thunks when placing a new thunk at a forward addr +// * count = (thunks.size() - nearest-thunk-index) +// do not account for intervening thunks when reusing one with greater address +// account for intevening thunks when reusing a thunk at an earlier addr +// * count = (thunk.thunkIndex - nearest-thunk-index) + +static size_t createThunks() { + size_t totalThunks = 0; + size_t totalCallSites = 0; + size_t nearestThunkI = 0; + uint64_t nearestThunkVA = 0; + for (const OutputSegment *seg : outputSegments) { + for (OutputSection *osec : seg->getSections()) { + auto *merged = dyn_cast(osec); + if (!merged || !merged->thunkable) + continue; + for (auto *isec : merged->inputs) { + if (!isec->thunkable) + continue; + uint64_t isecVA = isec->getVA(); + // Relocs are sequenced by descending address. Iterate in + // reverse so we can process call sites by ascending address. + for (Reloc &r : reverse(isec->relocs)) { + if (!r.thunkable) + continue; + uint64_t callSiteAddr = isecVA + r.offset; + auto *realSym = r.referent.dyn_cast(); + ssize_t delta = callSiteAddr - getSymVA(realSym); + if (std::abs(delta) < target->branchRange) + continue; + + totalCallSites++; + while (nearestThunkI < merged->thunks.size() && + callSiteAddr > nearestThunkVA) + nearestThunkVA = merged->thunks[nearestThunkI++]->getVA(); + + assert(realSym); + // assert: defined->isExternal() + // assert: dylibsym-> ??? + Thunk &thunk = thunkMap[realSym]; + if (!thunk.sym || callSiteAddr > thunk.addr + target->branchRange - + (nearestThunkI - thunk.index) * + target->thunkSize) { + totalThunks++; + thunk.index = merged->thunks.size(); + thunk.addr = merged->placeThunk(callSiteAddr, nearestThunkI); + thunk.sym = merged->makeThunk((realSym->getName() + ".thunk." + + std::to_string(thunk.sequence++)) + .str(), + thunk.addr); + } + r.referent = thunk.sym; + } + } + } + } + if (config->verbose) + warn("thunks = " + std::to_string(totalThunks) + + ", call sites = " + std::to_string(totalCallSites)); + return totalThunks; +} + +static void prepareSymbolRelocation(Symbol *sym, InputSection *isec, Reloc &r) { const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type); if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) { - prepareBranchTarget(sym); + if (prepareBranchTarget(sym)) + isec->thunkable = r.thunkable = true; } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) { if (relocAttrs.hasAttr(RelocAttrBits::POINTER) || needsBinding(sym)) in.got->addEntry(sym); @@ -533,7 +625,10 @@ if (auto *undefined = dyn_cast(sym)) treatUndefinedSymbol(*undefined); // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check. - if (!isa(sym) && validateSymbolRelocation(sym, isec, r)) + if (isa(sym)) + warn("relocation to lazy symbol: " + toString(*sym) + + "\n>>> defined in " + toString(sym->getFile())); + else if (!isa(sym) && validateSymbolRelocation(sym, isec, r)) prepareSymbolRelocation(sym, isec, r); } else { assert(r.referent.is()); @@ -852,8 +947,9 @@ linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); } -void Writer::finalizeAddresses() { - TimeTraceScope timeScope("Finalize addresses"); +void Writer::assignAddresses() { + static int pass = 0; + TimeTraceScope timeScope("Assign addresses " + std::to_string(++pass)); // Ensure that segments (and the sections they contain) are allocated // addresses in ascending order, which dyld requires. // @@ -967,10 +1063,16 @@ in.stubHelper->setup(); scanSymbols(); createOutputSections(); - // No more sections nor segments are created beyond this point. + // After this point, we create no new segments; HOWEVER, we might + // yet create branch-range extention thunks for architectures whose + // hardware call instructions have limited range, e.g., ARM(64) + // Since the thunks are interspersed with text input sections, + // we create the t sortSegmentsAndSections(); createLoadCommands(); - finalizeAddresses(); + assignAddresses(); + if (createThunks()) + assignAddresses(); finalizeLinkEditSegment(); writeMapFile(); writeOutputFile(); diff --git a/lld/test/MachO/archive.s b/lld/test/MachO/archive.s --- a/lld/test/MachO/archive.s +++ b/lld/test/MachO/archive.s @@ -1,11 +1,13 @@ # REQUIRES: x86 # RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/1.s -o %t/1.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/5.s -o %t/5.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/main.s -o %t/main.o -# RUN: llvm-ar rcs %t/test.a %t/2.o %t/3.o %t/4.o +# RUN: llvm-ar rcs %t/test.a %t/1.o %t/2.o %t/3.o %t/4.o %t/5.o # RUN: %lld %t/main.o %t/test.a -o %t/test.out ## TODO: Run llvm-nm -p to validate symbol order @@ -18,7 +20,10 @@ # RUN: %lld %t/test.a %t/main.o -o %t/test.out # RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST # ARCHIVE-FIRST: T _bar +# ARCHIVE-FIRST: T _barf +# ARCHIVE-FIRST: T _baz # ARCHIVE-FIRST: T _boo +# ARCHIVE-FIRST: T _foo # ARCHIVE-FIRST: T _main # RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix VISIBLE @@ -32,9 +37,15 @@ # ALL-LOAD: T _main # ALL-LOAD: T _unused +#--- 1.s +.globl _undefined, _unused +_unused: + ret + #--- 2.s .globl _boo _boo: + callq _bar ret #--- 3.s @@ -42,14 +53,24 @@ _bar: ret +.globl _barf +_barf: + callq _baz + ret + #--- 4.s -.globl _undefined, _unused -_unused: +.globl _foo +_foo: + ret + +#--- 5.s +.globl _baz +_baz: + callq _foo ret #--- main.s .globl _main _main: callq _boo - callq _bar ret diff --git a/lld/test/MachO/tools/generate-thunkable-program.py b/lld/test/MachO/tools/generate-thunkable-program.py new file mode 100755 --- /dev/null +++ b/lld/test/MachO/tools/generate-thunkable-program.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 + +"""Generate many skeletal functions with a thick call graph spanning a +large address space to induce lld to create branch-islands for arm64. + +""" +from __future__ import print_function +import random +import argparse +import string +from pprint import pprint +from math import factorial +from itertools import permutations + +def print_here_head(name): + print("""\ +llvm-mc -filetype=obj -triple %s -o %s.o <>12) + print_here_head(name) + print("""\ +### %s size=%x calls=%x""" % (name, size, calls)) + print_function_head(4, name) + for i in range(calls): + print(" bl %sx%08x" % ("_" if args.os == "macos" else "", addrs[random.randint(0, len(addrs)-1)])) + fill = size - 4 * (calls + 1) + assert fill > 0 + print("""\ + .fill 0x%x + ret""" % (fill)) + print_here_tail() + +def random_seed(): + """Generate a seed that can easily be passsed back in via --seed=STRING""" + return ''.join(random.choice(string.ascii_lowercase) for i in range(10)) + +def generate_sizes(base, megabytes): + total = 0 + while total < megabytes: + size = random.randint(0x100, 0x10000) * 0x10 + yield size + total += size + +def generate_addrs(addr, sizes): + i = 0 + while i < len(sizes): + yield addr + addr += sizes[i] + i += 1 + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + epilog="""\ +WRITEME +""") + parser.add_argument('--seed', type=str, default=random_seed(), + help='Seed the random number generator') + parser.add_argument('--size', type=int, default=None, + help='Total text size to generate, in megabytes') + parser.add_argument('--os', type=str, default="macos", + help='Target OS: macos, windows, or linux') + global args + args = parser.parse_args() + triples = { + "macos": "arm64-apple-macos", + "linux": "aarch64-pc-linux", + "windows": "aarch64-pc-windows" + } + global triple + triple = triples.get(args.os) + + print("""\ +### seed=%s triple=%s +""" % (args.seed, triple)) + + random.seed(args.seed) + + base = 0x4010 + megabytes = (int(args.size) if args.size else 512) * 1024 * 1024 + sizes = [size for size in generate_sizes(base, megabytes)] + addrs = [addr for addr in generate_addrs(base, sizes)] + + for i in range(len(addrs)): + print_function(addrs[i], sizes[i], addrs) + + print_here_head("main") + print("""\ +### _x%08x +""" % (addrs[-1] + sizes[-1])) + print_function_head(14 if args.os == "macos" else 4, "main") + print(" ret") + print_here_tail() + print("wait") + + +if __name__ == '__main__': + main()