diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -33,6 +33,7 @@ void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, uint64_t entryAddr) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; + void populateThunk(InputSection *thunk, Symbol *funcSym) override; }; } // namespace @@ -108,10 +109,27 @@ cpuSubtype = CPU_SUBTYPE_ARM64_ALL; stubSize = sizeof(stubCode); + thunkSize = stubSize; + branchRange = llvm::maxIntN(28) - thunkSize; stubHelperHeaderSize = sizeof(stubHelperHeaderCode); stubHelperEntrySize = sizeof(stubHelperEntryCode); } +void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) { + thunk->align = 4; + thunk->data = {reinterpret_cast(stubCode), sizeof(stubCode)}; + thunk->relocs.push_back({/*type=*/ARM64_RELOC_PAGEOFF12, + /*length=*/2, /*pcrel=*/false, + /*thunkable=*/false, + /*offset=*/4, /*addend=*/0, + /*referent=*/funcSym}); + thunk->relocs.push_back({/*type=*/ARM64_RELOC_PAGE21, + /*length=*/2, /*pcrel=*/true, + /*thunkable=*/false, + /*offset=*/0, /*addend=*/0, + /*referent=*/funcSym}); +} + TargetInfo *macho::createARM64TargetInfo() { static ARM64 t; return &t; diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -74,6 +74,7 @@ struct Configuration { Symbol *entry; + bool verbose = false; bool hasReexports = false; bool allLoad = false; bool forceLoadObjC = false; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -971,6 +971,7 @@ for (const Arg *arg : args.filtered(OPT_U)) symtab->addDynamicLookup(arg->getValue()); + config->verbose = errorHandler().verbose = args.hasArg(OPT_verbose); config->mapFile = args.getLastArgValue(OPT_map); config->outputFile = args.getLastArgValue(OPT_o, "a.out"); config->astPaths = args.getAllArgValues(OPT_add_ast_path); diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -44,6 +44,8 @@ uint32_t align = 1; uint32_t flags = 0; + uint32_t callSiteCount = 0; + bool isFinal = false; // is address assigned? ArrayRef data; std::vector relocs; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -34,20 +34,15 @@ uint64_t InputSection::getVA() const { return parent->addr + outSecOff; } -static uint64_t resolveSymbolVA(uint8_t *loc, const Symbol &sym, uint8_t type) { +static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { const RelocAttrs &relocAttrs = target->getRelocAttrs(type); - if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) { - if (sym.isInStubs()) - return in.stubs->addr + sym.stubsIndex * target->stubSize; - } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) { - if (sym.isInGot()) - return in.got->addr + sym.gotIndex * target->wordSize; - } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) { - if (sym.isInGot()) - return in.tlvPointers->addr + sym.gotIndex * target->wordSize; - assert(isa(&sym)); - } - return sym.getVA(); + if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) + return sym->resolveBranchVA(); + else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) + return sym->resolveGotVA(); + else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) + return sym->resolveTlvVA(); + return sym->getVA(); } void InputSection::writeTo(uint8_t *buf) { @@ -73,7 +68,7 @@ if (target->hasAttr(r.type, RelocAttrBits::LOAD) && !referentSym->isInGot()) target->relaxGotLoad(loc, r.type); - referentVA = resolveSymbolVA(loc, *referentSym, r.type); + referentVA = resolveSymbolVA(referentSym, r.type); if (isThreadLocalVariables(flags)) { // References from thread-local variable sections are treated as offsets diff --git a/lld/MachO/MergedOutputSection.h b/lld/MachO/MergedOutputSection.h --- a/lld/MachO/MergedOutputSection.h +++ b/lld/MachO/MergedOutputSection.h @@ -17,6 +17,8 @@ namespace lld { namespace macho { +class Defined; + // Linking multiple files will inevitably mean resolving sections in different // files that are labeled with the same segment and section name. This class // contains all such sections and writes the data from each section sequentially @@ -34,10 +36,13 @@ void mergeInput(InputSection *input); void finalize() override; + bool needsThunks() const; void writeTo(uint8_t *buf) const override; std::vector inputs; + std::vector thunks; + uint64_t callSiteCount = 0; static bool classof(const OutputSection *sec) { return sec->kind() == MergedKind; diff --git a/lld/MachO/MergedOutputSection.cpp b/lld/MachO/MergedOutputSection.cpp --- a/lld/MachO/MergedOutputSection.cpp +++ b/lld/MachO/MergedOutputSection.cpp @@ -7,12 +7,18 @@ //===----------------------------------------------------------------------===// #include "MergedOutputSection.h" +#include "Config.h" #include "OutputSegment.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Target.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/ScopedPrinter.h" +#include + using namespace llvm; using namespace llvm::MachO; using namespace lld; @@ -26,29 +32,223 @@ align = std::max(align, input->align); mergeFlags(input); } - inputs.push_back(input); input->parent = this; + callSiteCount += input->callSiteCount; +} + +// A branch-range-extension thunk comprises ... +// +// (1) new Defined privateExtern symbol for the thunk named +// .thunk., which references ... +// (2) new InputSection, which contains ... +// (3.1) new data for the instructions to load & branch to the far address + +// (3.2) new Relocs on instructions to load the far address, which reference ... +// (4.1) existing Defined extern symbol for the real function in __text, or +// (4.2) existing DylibSymbol for the real function in a dylib +// +// Optimal thunk-placement algorithm features: +// +// * Single pass: O(n) on the number of call sites. +// +// * Accounts for the exact space overhead of thunks - no heuristics +// +// * Exploits the full range of call instructions - forward & backward +// +// Data: +// +// * DenseMap thunkMap: Maps the function symbol +// to its thunk bookkeeper. +// +// * struct ThunkInfo (bookkeeper): Call instructions have limited +// range, and distant call sites might be unable to reach the same +// thunk, so multiple thunks are necessary to serve all call sites +// in a very large program. A thunkInfo stores state for all thunks +// associated with a particular function: (a) thunk symbol, (b) +// input section containing stub code, and (c) sequence number for +// the active thunk incarnation. When an old thunk goes out of +// range, we increment the sequence number and create a new thunk +// named .thunk.. +// +// * A thunk incarnation comprises (a) private-extern Defined symbol +// pointing to (b) an InputSection holding machine instructions +// (same code as a MachO stub), and (c) Reloc(s) that reference the +// real function for fixing-up the stub code. +// +// * std::vector MergedInputSection::thunks: A vector +// parallel to the inputs vector. We store new thunks via cheap +// vector append, rather than costly insertion into the inputs +// vector. +// +// Control Flow: +// +// * Writer::scanRelocations() via its helpers +// prepareSymbolRelocation() and prepareBranchTarget() dig into +// Reloc records. Relocs::isCallSite, Inputsection::callSiteCount, +// and MergedOutputSection::callSiteCount memoize paths to call +// sites that might need thunks so that the +// MergedInputSection::finalize() can skip any Reloc, InputSection, +// or MergedOutputSection that needs no attention. +// +// * During address assignment, MergedInputSection::finalize() +// examines call sites by ascending address and creates thunks. +// When a function is beyond the range of a call site, we need a +// thunk. Place it at the largest available forward address from the +// call site. Call sites increase monotonically and thunks are +// always placed as far forward as possible; thus, we place thunks +// at monotonically increasing addresses. Once a thunk is placed, it +// and all previous input-section addresses are final. +// +// * MergedInputSection::finalize() and MergedInputSection::writeTo() +// merge the inputs and thunks vectors (both ordered by ascending +// address), which is simple and cheap. + +bool MergedOutputSection::needsThunks() const { + if (!target->usesThunks()) + return false; + uint64_t isecAddr = addr; + for (InputSection *isec : inputs) + isecAddr = alignTo(isecAddr, isec->align) + isec->getSize(); + uint64_t totalSize = isecAddr - addr; + return totalSize > target->branchRange; } +struct ThunkInfo { + Defined *sym = nullptr; + InputSection *isec = nullptr; + uint8_t sequence = 0; +}; + +static DenseMap thunkMap; + void MergedOutputSection::finalize() { uint64_t isecAddr = addr; uint64_t isecFileOff = fileOff; - for (InputSection *isec : inputs) { + auto finalizeOne = [&](InputSection *isec) { isecAddr = alignTo(isecAddr, isec->align); isecFileOff = alignTo(isecFileOff, isec->align); isec->outSecOff = isecAddr - addr; isec->outSecFileOff = isecFileOff - fileOff; + isec->isFinal = true; isecAddr += isec->getSize(); isecFileOff += isec->getFileSize(); + }; + + if (!needsThunks()) { + for (InputSection *isec : inputs) + finalizeOne(isec); + size = isecAddr - addr; + fileSize = isecFileOff - fileOff; + return; + } + + uint64_t branchRange = target->branchRange; + size_t thunkSize = target->thunkSize; + size_t relocCount = 0; + size_t callSiteCount = 0; + size_t thunkCallCount = 0; + size_t thunkCount = 0; + + // inputs[finalIdx] is for finalization (address-assignment) + size_t finalIdx = 0; + // Kick-off by ensuring that the first input section has an address + finalizeOne(inputs[finalIdx++]); + for (size_t callIdx = 0, endIdx = inputs.size(); callIdx < endIdx; + ++callIdx) { + InputSection *isec = inputs[callIdx]; + assert(isec->isFinal); + uint64_t isecVA = isec->getVA(); + // Assign addresses up-to the forward branch-range limit + while (finalIdx < endIdx && + isecAddr + inputs[finalIdx]->getSize() < isecVA + branchRange) + finalizeOne(inputs[finalIdx++]); + if (isec->callSiteCount == 0) + continue; + // Process relocs by ascending address, i.e., ascending offset within isec + std::vector &relocs = isec->relocs; + assert(std::is_sorted(relocs.begin(), relocs.end(), [](Reloc &a, Reloc &b) { + return a.offset > b.offset; + })); + for (Reloc &r : reverse(relocs)) { + ++relocCount; + if (!r.isCallSite) + continue; + ++callSiteCount; + // calculate branch reachability boundaries + uint64_t callVA = isecVA + r.offset; + uint64_t lowVA = branchRange < callVA ? callVA - branchRange : 0; + uint64_t highVA = callVA + branchRange; + // Calculate our call referent address + auto *funcSym = r.referent.get(); + uint64_t funcVA = funcSym->resolveBranchVA(); + // Is the referent reachable with a simple call instruction? + if (lowVA < funcVA && funcVA < highVA) + continue; + // The referent is not reachable, so we need to use a thunk + ++thunkCallCount; + ThunkInfo &thunkInfo = thunkMap[funcSym]; + // If an existing thunk is reachable, use it ... + if (thunkInfo.sym) { + uint64_t thunkVA = thunkInfo.isec->getVA(); + if (lowVA < thunkVA && thunkVA < highVA) { + r.referent = thunkInfo.sym; + continue; + } + } + // ... otherwise, create a new thunk + if (isecAddr > highVA) { + // When there is small-to-no margin between highVA and + // isecAddr and the distance between subsequent call sites is + // smaller than thunkSize, then a new thunk can go out of + // range. Fix by unfinalizing inputs[finalIdx] to reduce the + // distance between callVA and highVA, then shift some thunks + // to occupy address-space formerly occupied by the + // unfinalized inputs[finalIdx]. + fatal(Twine(__FUNCTION__) + ": FIXME: thunk range overrun"); + } + thunkInfo.isec = make(); + thunkInfo.isec->name = isec->name; + thunkInfo.isec->segname = isec->segname; + thunkInfo.isec->parent = this; + StringRef thunkName = saver.save(funcSym->getName() + ".thunk." + + std::to_string(thunkInfo.sequence++)); + r.referent = thunkInfo.sym = symtab->addDefined( + thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0, + /*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true, + /*isThumb=*/false); + target->populateThunk(thunkInfo.isec, funcSym); + finalizeOne(thunkInfo.isec); + thunks.push_back(thunkInfo.isec); + ++thunkCount; + } } size = isecAddr - addr; fileSize = isecFileOff - fileOff; + + if (config->verbose) + warn("thunks for " + parent->name + "," + name + + ": funcs = " + std::to_string(thunkMap.size()) + + ", relocs = " + std::to_string(relocCount) + + ", all calls = " + std::to_string(callSiteCount) + + ", thunk calls = " + std::to_string(thunkCallCount) + + ", thunks = " + std::to_string(thunkCount)); } void MergedOutputSection::writeTo(uint8_t *buf) const { - for (InputSection *isec : inputs) - isec->writeTo(buf + isec->outSecFileOff); + // Merge input sections from thunk & ordinary vectors + size_t i = 0, ie = inputs.size(); + size_t t = 0, te = thunks.size(); + while (i < ie || t < te) { + while (i < ie && (t == te || inputs[i]->getSize() == 0 || + inputs[i]->outSecOff < thunks[t]->outSecOff)) { + inputs[i]->writeTo(buf + inputs[i]->outSecFileOff); + ++i; + } + while (t < te && (i == ie || thunks[t]->outSecOff < inputs[i]->outSecOff)) { + thunks[t]->writeTo(buf + thunks[t]->outSecFileOff); + ++t; + } + } } // TODO: this is most likely wrong; reconsider how section flags diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -10,6 +10,8 @@ def help_hidden : Flag<["--"], "help-hidden">, HelpText<"Display help for hidden options">, Group; +def verbose : Flag<["--"], "verbose">, + Group; def error_limit_eq : Joined<["--"], "error-limit=">, HelpText<"Maximum number of errors to print before exiting (default: 20)">, Group; diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h --- a/lld/MachO/Relocations.h +++ b/lld/MachO/Relocations.h @@ -52,8 +52,10 @@ struct Reloc { uint8_t type = llvm::MachO::GENERIC_RELOC_INVALID; - bool pcrel = false; uint8_t length = 0; + bool pcrel = false; + bool isCallSite = false; + // The offset from the start of the subsection that this relocation belongs // to. uint64_t offset = 0; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -72,6 +72,16 @@ // Whether this symbol is in the StubsSection. bool isInStubs() const { return stubsIndex != UINT32_MAX; } + uint64_t getStubVA() const; + uint64_t getGotVA() const; + uint64_t getTlvVA() const; + uint64_t resolveBranchVA() const { + assert(isa(this) || isa(this)); + return isInStubs() ? getStubVA() : getVA(); + } + uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } + uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } + // The index of this symbol in the GOT or the TLVPointer section, depending // on whether it is a thread-local. A given symbol cannot be referenced by // both these sections at once. diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -27,9 +27,29 @@ return demangle(b.getName()); } +uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); } +uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); } +uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); } + uint64_t Defined::getVA() const { if (isAbsolute()) return value; + + if (!isec->isFinal) { + // A target arch that does not use thunks ought never ask for + // the address of a function that has not yet been finalized. + assert(target->usesThunks()); + // std::numeric_limits::max() is off limits because it is + // the tombstone value for DenseMap<>. + uint64_t outOfRangeVA = std::numeric_limits::max() - 3; + + // MergedOutputSection::finalize() can seek the address of a + // function that does not yet have an address assigned. The + // thunking algorithm guarantees that unfinalized functions will + // be out of range, so just return a contrived out-of-range + // address now. + return outOfRangeVA; + } return isec->getVA() + value; } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -123,6 +123,10 @@ void addEntry(Symbol *sym); + uint64_t getVA(uint32_t gotIndex) { + return addr + gotIndex * target->wordSize; + } + private: llvm::SetVector entries; }; @@ -285,11 +289,18 @@ StubsSection(); uint64_t getSize() const override; bool isNeeded() const override { return !entries.empty(); } + void finalize() override; void writeTo(uint8_t *buf) const override; const llvm::SetVector &getEntries() const { return entries; } // Returns whether the symbol was added. Note that every stubs entry will // have a corresponding entry in the LazyPointerSection. bool addEntry(Symbol *); + uint64_t getVA(uint32_t stubsIndex) const { + assert(isFinal); + return addr + stubsIndex * target->stubSize; + } + + bool isFinal = false; // is address assigned? private: llvm::SetVector entries; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -64,6 +64,7 @@ // Setting the index to 1 to pretend that this section is the text // section. index = 1; + isec->isFinal = true; } void MachHeaderSection::addLoadCommand(LoadCommand *lc) { @@ -425,6 +426,8 @@ } } +void StubsSection::finalize() { isFinal = true; } + bool StubsSection::addEntry(Symbol *sym) { bool inserted = entries.insert(sym); if (inserted) @@ -1101,12 +1104,12 @@ // __TEXT, __text) // Otherwise, it's an absolute symbol. if (config->isPic) - symtab->addSynthetic("__mh_execute_header", in.header->isec, 0, + symtab->addSynthetic("__mh_execute_header", in.header->isec, /*value=*/0, /*privateExtern=*/false, /*includeInSymtab=*/true); else symtab->addSynthetic("__mh_execute_header", - /*isec*/ nullptr, 0, + /*isec*/ nullptr, /*value=*/0, /*privateExtern=*/false, /*includeInSymtab=*/true); break; diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -24,6 +24,7 @@ LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); class Symbol; +class Defined; class DylibSymbol; class InputSection; @@ -65,10 +66,16 @@ virtual uint64_t getPageSize() const = 0; + virtual void populateThunk(InputSection *thunk, Symbol *funcSym) { + llvm_unreachable("target does not use thunks"); + } + bool hasAttr(uint8_t type, RelocAttrBits bit) const { return getRelocAttrs(type).hasAttr(bit); } + bool usesThunks() { return thunkSize > 0; } + uint32_t magic; uint32_t cpuType; uint32_t cpuSubtype; @@ -79,6 +86,9 @@ size_t stubHelperHeaderSize; size_t stubHelperEntrySize; size_t wordSize; + + size_t thunkSize = 0; + uint64_t branchRange = 0; }; TargetInfo *createX86_64TargetInfo(); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/xxhash.h" #include +#include using namespace llvm; using namespace llvm::MachO; @@ -511,8 +512,15 @@ } // namespace -// Adds stubs and bindings where necessary (e.g. if the symbol is a -// DylibSymbol.) +// Add stubs and bindings where necessary (e.g. if the symbol is a +// DylibSymbol). Return TRUE if this is a call that might need a +// branch-range-extension thunk. This is something of a kludge for +// sake of performance: We can't process thunks until address +// assigment in output-sections finalize() time, at which time we must +// iterate over all relocs again. Here, we dig deeply enough into the +// attributes of a branch reloc that we can determine its eligibility +// for thunking, so it is wise to memoize the result. + static void prepareBranchTarget(Symbol *sym) { if (auto *dysym = dyn_cast(sym)) { if (in.stubs->addEntry(dysym)) { @@ -534,6 +542,8 @@ sym->stubsIndex * target->wordSize); } } + } else if (sym != config->entry) { + llvm_unreachable("invalid branch target symbol type"); } } @@ -546,12 +556,13 @@ return false; } -static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, - const Reloc &r) { +static void prepareSymbolRelocation(Symbol *sym, InputSection *isec, Reloc &r) { const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type); if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) { prepareBranchTarget(sym); + r.isCallSite = true; + isec->callSiteCount++; } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) { if (relocAttrs.hasAttr(RelocAttrBits::POINTER) || needsBinding(sym)) in.got->addEntry(sym); @@ -950,8 +961,6 @@ seg->vmSize = addr - seg->firstSection()->addr; seg->fileSize = fileOff - seg->fileOff; } - - // FIXME(gkm): create branch-extension thunks here, then adjust addresses } void Writer::finalizeLinkEditSegment() { @@ -1053,7 +1062,11 @@ in.stubHelper->setup(); scanSymbols(); createOutputSections(); - // No more sections nor segments are created beyond this point. + // After this point, we create no new segments; HOWEVER, we might + // yet create branch-range extension thunks for architectures whose + // hardware call instructions have limited range, e.g., ARM(64). + // The thunks are created as InputSections interspersed among + // the ordinary __TEXT,_text InputSections. sortSegmentsAndSections(); createLoadCommands(); finalizeAddresses(); diff --git a/lld/test/MachO/tools/generate-thunkable-program.py b/lld/test/MachO/tools/generate-thunkable-program.py new file mode 100755 --- /dev/null +++ b/lld/test/MachO/tools/generate-thunkable-program.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 + +"""Generate many skeletal functions with a thick call graph spanning a +large address space to induce lld to create branch-islands for arm64. + +""" +from __future__ import print_function +import random +import argparse +import string +from pprint import pprint +from math import factorial +from itertools import permutations + +def print_here_head(name): + print("""\ +(tee %s.s |llvm-mc -filetype=obj -triple %s -o %s.o) <>12) + print_here_head(name) + print("""\ +### %s size=%x calls=%x""" % (name, size, calls)) + print_function_head(4, name) + for i in range(calls): + print(" bl %sx%08x\n .p2align 4" % ("_" if args.os == "macos" else "", addrs[random.randint(0, len(addrs)-1)])) + fill = size - 4 * (calls + 1) + assert fill > 0 + print("""\ + .fill 0x%x + ret""" % (fill)) + print_here_tail() + +def random_seed(): + """Generate a seed that can easily be passsed back in via --seed=STRING""" + return ''.join(random.choice(string.ascii_lowercase) for i in range(10)) + +def generate_sizes(base, megabytes): + total = 0 + while total < megabytes: + size = random.randint(0x100, 0x10000) * 0x10 + yield size + total += size + +def generate_addrs(addr, sizes): + i = 0 + while i < len(sizes): + yield addr + addr += sizes[i] + i += 1 + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + epilog="""\ +WRITEME +""") + parser.add_argument('--seed', type=str, default=random_seed(), + help='Seed the random number generator') + parser.add_argument('--size', type=int, default=None, + help='Total text size to generate, in megabytes') + parser.add_argument('--os', type=str, default="macos", + help='Target OS: macos, windows, or linux') + global args + args = parser.parse_args() + triples = { + "macos": "arm64-apple-macos", + "linux": "aarch64-pc-linux", + "windows": "aarch64-pc-windows" + } + global triple + triple = triples.get(args.os) + + print("""\ +### seed=%s triple=%s +""" % (args.seed, triple)) + + random.seed(args.seed) + + base = 0x4010 + megabytes = (int(args.size) if args.size else 512) * 1024 * 1024 + sizes = [size for size in generate_sizes(base, megabytes)] + addrs = [addr for addr in generate_addrs(base, sizes)] + + for i in range(len(addrs)): + print_function(addrs[i], sizes[i], addrs) + + print_here_head("main") + print("""\ +### _x%08x +""" % (addrs[-1] + sizes[-1])) + print_function_head(14 if args.os == "macos" else 4, "main") + print(" ret") + print_here_tail() + print("wait") + + +if __name__ == '__main__': + main()