Index: lld/MachO/Config.h =================================================================== --- lld/MachO/Config.h +++ lld/MachO/Config.h @@ -131,6 +131,7 @@ bool emitBitcodeBundle = false; bool emitDataInCodeInfo = false; bool emitEncryptionInfo = false; + bool emitInitOffsets = false; bool timeTraceEnabled = false; bool dataConst = false; bool dedupLiterals = true; Index: lld/MachO/Driver.cpp =================================================================== --- lld/MachO/Driver.cpp +++ lld/MachO/Driver.cpp @@ -1103,6 +1103,11 @@ if (auto *isec = dyn_cast(subsection.isec)) { if (isec->isCoalescedWeak()) continue; + if (config->emitInitOffsets && + sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) { + in.initOffsets->addInput(isec); + continue; + } isec->outSecOff = inputOrder++; if (!osec) osec = ConcatOutputSection::getOrCreateForInput(isec); @@ -1432,6 +1437,7 @@ config->emitBitcodeBundle = args.hasArg(OPT_bitcode_bundle); config->emitDataInCodeInfo = args.hasFlag(OPT_data_in_code_info, OPT_no_data_in_code_info, true); + config->emitInitOffsets = args.hasArg(OPT_init_offsets); config->icfLevel = getICFLevel(args); config->dedupLiterals = args.hasFlag(OPT_deduplicate_literals, OPT_icf_eq, false) || Index: lld/MachO/InputSection.h =================================================================== --- lld/MachO/InputSection.h +++ lld/MachO/InputSection.h @@ -314,6 +314,7 @@ constexpr const char got[] = "__got"; constexpr const char header[] = "__mach_header"; constexpr const char indirectSymbolTable[] = "__ind_sym_tab"; +constexpr const char initOffsets[] = "__init_offsets"; constexpr const char const_[] = "__const"; constexpr const char lazySymbolPtr[] = "__la_symbol_ptr"; constexpr const char lazyBinding[] = "__lazy_binding"; Index: lld/MachO/MarkLive.cpp =================================================================== --- lld/MachO/MarkLive.cpp +++ lld/MachO/MarkLive.cpp @@ -284,6 +284,9 @@ } } + for (ConcatInputSection *isec : in.initOffsets->inputs()) + marker->enqueue(isec, 0); + marker->markTransitively(); } Index: lld/MachO/Options.td =================================================================== --- lld/MachO/Options.td +++ lld/MachO/Options.td @@ -1273,8 +1273,7 @@ HelpText<"Ignore Linker Optimization Hints">, Group; def init_offsets : Flag<["-"], "init_offsets">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, + HelpText<"Emit offsets to initializer functions">, Group; def keep_dwarf_unwind : Flag<["-"], "keep_dwarf_unwind">, HelpText<"This option is undocumented in ld64">, Index: lld/MachO/OutputSegment.cpp =================================================================== --- lld/MachO/OutputSegment.cpp +++ lld/MachO/OutputSegment.cpp @@ -84,10 +84,11 @@ // Sections are uniquely identified by their segment + section name. if (segname == segment_names::text) { return StringSwitch(osec->name) - .Case(section_names::header, -4) - .Case(section_names::text, -3) - .Case(section_names::stubs, -2) - .Case(section_names::stubHelper, -1) + .Case(section_names::header, -5) + .Case(section_names::text, -4) + .Case(section_names::stubs, -3) + .Case(section_names::stubHelper, -2) + .Case(section_names::initOffsets, -1) .Case(section_names::unwindInfo, std::numeric_limits::max() - 1) .Case(section_names::ehFrame, std::numeric_limits::max()) .Default(osec->inputOrder); Index: lld/MachO/Symbols.h =================================================================== --- lld/MachO/Symbols.h +++ lld/MachO/Symbols.h @@ -346,6 +346,14 @@ return sym; } +// Can a symbol's address can only be resolved at runtime? +inline bool needsBinding(const Symbol *sym) { + if (isa(sym)) + return true; + if (const auto *defined = dyn_cast(sym)) + return defined->isExternalWeakDef() || defined->interposable; + return false; +} } // namespace macho std::string toString(const macho::Symbol &); Index: lld/MachO/SyntheticSections.h =================================================================== --- lld/MachO/SyntheticSections.h +++ lld/MachO/SyntheticSections.h @@ -647,6 +647,31 @@ std::vector files; // files with image info }; +// This section stores 32-bit __TEXT segment offsets of initializer functions. +// +// The compiler stores initializers as pointers in __mod_init_func. These need +// to be fixed up at load time, which takes time and dirties memory. By +// synthesizing InitOffsetsSection from them, this data can live in the +// read-only __TEXT segment instead. +// +// There is no similar counterpart to __mod_term_func, as that format is +// deprecated, and static destructors are handled by registering them via +// __cxa_atexit from an autogenerated initializer function (see D121736). +class InitOffsetsSection final : public SyntheticSection { +public: + InitOffsetsSection(); + bool isNeeded() const override { return !sections.empty(); } + uint64_t getSize() const override; + void writeTo(uint8_t *buf) const override; + void setup(); + + void addInput(ConcatInputSection *isec) { sections.push_back(isec); } + const std::vector &inputs() const { return sections; } + +private: + std::vector sections; +}; + struct InStruct { const uint8_t *bufferStart = nullptr; MachHeaderSection *header = nullptr; @@ -668,6 +693,7 @@ UnwindInfoSection *unwindInfo = nullptr; ObjCImageInfoSection *objCImageInfo = nullptr; ConcatInputSection *imageLoaderCache = nullptr; + InitOffsetsSection *initOffsets = nullptr; }; extern InStruct in; Index: lld/MachO/SyntheticSections.cpp =================================================================== --- lld/MachO/SyntheticSections.cpp +++ lld/MachO/SyntheticSections.cpp @@ -1816,6 +1816,73 @@ write32le(buf + 4, flags); } +InitOffsetsSection::InitOffsetsSection() + : SyntheticSection(segment_names::text, section_names::initOffsets) { + flags = S_INIT_FUNC_OFFSETS; +} + +uint64_t InitOffsetsSection::getSize() const { + size_t count = 0; + for (const ConcatInputSection *isec : sections) + count += isec->relocs.size(); + return count * sizeof(uint32_t); +} + +void InitOffsetsSection::writeTo(uint8_t *buf) const { + uint64_t textVA = 0; + for (const OutputSegment *oseg : outputSegments) + if (oseg->name == segment_names::text) { + textVA = oseg->addr; + break; + } + + for (ConcatInputSection *isec : sections) { + for (const Reloc &rel : isec->relocs) { + const Symbol *referent = rel.referent.dyn_cast(); + assert(referent && "section relocation should have been rejected"); + uint64_t offset = referent->getVA() - textVA; + // FIXME: Can we handle this gracefully? + if (offset > UINT32_MAX) + fatal(isec->getLocation(rel.offset) + ": offset to initializer " + + referent->getName() + " (" + utohexstr(offset) + + ") does not fit in 32 bits"); + + // Entries need to be added in the order they appear in the section, but + // relocations aren't guaranteed to be sorted. + size_t index = rel.offset >> target->p2WordSize; + write32le(&buf[index * sizeof(uint32_t)], offset); + } + buf += isec->relocs.size() * sizeof(uint32_t); + } +} + +// The inputs are __mod_init_func sections, which contain pointers to +// initializer functions, therefore all relocations have to be of the UNSIGNED +// type. Offsets can only point to local functions, so we may have to generate +// stubs. +void InitOffsetsSection::setup() { + for (const ConcatInputSection *isec : sections) { + for (const Reloc &rel : isec->relocs) { + RelocAttrs attrs = target->getRelocAttrs(rel.type); + if (!attrs.hasAttr(RelocAttrBits::UNSIGNED)) + error(isec->getLocation(rel.offset) + + ": unsupported relocation type: " + attrs.name); + if (rel.addend != 0) + error(isec->getLocation(rel.offset) + + ": relocation addend is not representable in __init_offsets"); + if (rel.referent.is()) + error(isec->getLocation(rel.offset) + + ": unexpected section relocation"); + + Symbol *sym = rel.referent.dyn_cast(); + if (auto *undefined = dyn_cast(sym)) + treatUndefinedSymbol(*undefined, isec, rel.offset); + if (needsBinding(sym)) + in.stubs->addEntry(sym); + } + } +} + void macho::createSyntheticSymbols() { auto addHeaderSymbol = [](const char *name) { symtab->addSynthetic(name, in.header->isec, /*value=*/0, Index: lld/MachO/Writer.cpp =================================================================== --- lld/MachO/Writer.cpp +++ lld/MachO/Writer.cpp @@ -575,15 +575,6 @@ } } -// Can a symbol's address can only be resolved at runtime? -static bool needsBinding(const Symbol *sym) { - if (isa(sym)) - return true; - if (const auto *defined = dyn_cast(sym)) - return defined->isExternalWeakDef() || defined->interposable; - return false; -} - static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, const lld::macho::Reloc &r) { assert(sym->isLive()); @@ -1141,6 +1132,8 @@ if (in.objcStubs->isNeeded()) in.objcStubs->setup(); scanRelocations(); + if (in.initOffsets->isNeeded()) + in.initOffsets->setup(); // Do not proceed if there was an undefined symbol. reportPendingUndefinedSymbols(); @@ -1204,6 +1197,7 @@ in.objcStubs = make(); in.unwindInfo = makeUnwindInfoSection(); in.objCImageInfo = make(); + in.initOffsets = make(); // This section contains space for just a single word, and will be used by // dyld to cache an address to the image loader it uses. Index: lld/test/MachO/init-offsets.s =================================================================== --- /dev/null +++ lld/test/MachO/init-offsets.s @@ -0,0 +1,73 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/second.s -o %t/second.o + +# RUN: %lld -lSystem -init_offsets -undefined dynamic_lookup %t/first.o %t/second.o -o %t/out +# RUN: llvm-otool -lv %t/out | FileCheck --check-prefix=FLAGS --implicit-check-not=__mod_init_func %s +# RUN: llvm-otool -l %t/out > %t/dump.txt +# RUN: llvm-objdump --macho --print-imm-hex --section=__TEXT,__stubs %t/out >> %t/dump.txt +# RUN: llvm-objdump --macho --syms %t/out >> %t/dump.txt +# RUN: llvm-objcopy --dump-section=__TEXT,__init_offsets=%t/section.bin %t/out +# RUN: echo "__TEXT,__init_offsets contents:" >> %t/dump.txt +# RUN: od -An -txI %t/section.bin >> %t/dump.txt +# RUN: FileCheck --check-prefix=CONTENT %s < %t/dump.txt + +## This test checks that: +## - __mod_init_func is replaced by __init_offsets. +## - __mod_init_func has type 0x16 (S_INIT_FUNC_OFFSETS). +## - initializers show up in the order their parent objects are specified on the +## command line, and in the order they show up within __mod_init_func. +## - for undefined and dylib symbols, subs are created, and the offsets point to those. +## - offsets are relative to __TEXT's address, they aren't an absolute virtual address. + +# FLAGS: sectname __init_offsets +# FLAGS-NEXT: segname __TEXT +# FLAGS-NEXT: addr +# FLAGS-NEXT: size 0x0000000000000010 +# FLAGS-NEXT: offset +# FLAGS-NEXT: align +# FLAGS-NEXT: reloff 0 +# FLAGS-NEXT: nreloc 0 +# FLAGS-NEXT: type S_INIT_FUNC_OFFSETS + +# CONTENT: segname __TEXT +# CONTENT-NEXT: 0x[[#%x, TEXT:]] + +# CONTENT: Contents of (__TEXT,__stubs) section +# CONTENT-NEXT: [[#%x, ISNAN:]]: {{.*}} ## literal pool symbol address: ___isnan +# CONTENT-NEXT: [[#%x, UNDEF:]]: {{.*}} ## literal pool symbol address: _undefined + +# CONTENT: SYMBOL TABLE: +# CONTENT: [[#%x, FIRST:]] g F __TEXT,__text _first_init +# CONTENT: [[#%x, SECOND:]] g F __TEXT,__text _second_init + +# CONTENT: __TEXT,__init_offsets contents: +# CONTENT: [[#%.8x, FIRST - TEXT]] [[#%.8x, ISNAN - TEXT]] [[#%.8x, UNDEF - TEXT]] [[#%.8x, SECOND - TEXT]] + +#--- first.s +.globl _first_init, ___isnan, _main +.text +_first_init: + ret +_main: + ret + +.section __DATA,__mod_init_func,mod_init_funcs +.quad _first_init +.quad ___isnan + +.subsections_via_symbols + +#--- second.s +.globl _second_init, _undefined +.text +_second_init: + ret + +.section __DATA,__mod_init_func,mod_init_funcs +.quad _undefined +.quad _second_init + +.subsections_via_symbols Index: llvm/include/llvm/BinaryFormat/MachO.h =================================================================== --- llvm/include/llvm/BinaryFormat/MachO.h +++ llvm/include/llvm/BinaryFormat/MachO.h @@ -175,8 +175,11 @@ /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local /// variable initialization pointers to functions. S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u, + /// S_INIT_FUNC_OFFSETS - Section with 32-bit offsets to initializer + /// functions. + S_INIT_FUNC_OFFSETS = 0x16u, - LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS + LAST_KNOWN_SECTION_TYPE = S_INIT_FUNC_OFFSETS }; enum : uint32_t { Index: llvm/lib/MC/MCSectionMachO.cpp =================================================================== --- llvm/lib/MC/MCSectionMachO.cpp +++ llvm/lib/MC/MCSectionMachO.cpp @@ -62,6 +62,8 @@ StringLiteral("S_THREAD_LOCAL_VARIABLE_POINTERS")}, // 0x14 {StringLiteral("thread_local_init_function_pointers"), StringLiteral("S_THREAD_LOCAL_INIT_FUNCTION_POINTERS")}, // 0x15 + {StringLiteral("") /*FIXME??*/, + StringLiteral("S_INIT_FUNC_OFFSETS")}, // 0x16 }; /// SectionAttrDescriptors - This is an array of descriptors for section Index: llvm/tools/llvm-objdump/MachODump.cpp =================================================================== --- llvm/tools/llvm-objdump/MachODump.cpp +++ llvm/tools/llvm-objdump/MachODump.cpp @@ -8955,6 +8955,8 @@ outs() << " S_THREAD_LOCAL_VARIABLE_POINTERS\n"; else if (section_type == MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS) outs() << " S_THREAD_LOCAL_INIT_FUNCTION_POINTERS\n"; + else if (section_type == MachO::S_INIT_FUNC_OFFSETS) + outs() << " S_INIT_FUNC_OFFSETS\n"; else outs() << format("0x%08" PRIx32, section_type) << "\n"; outs() << "attributes";