diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -34,7 +34,8 @@ void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, uint64_t entryAddr) const override; - void prepareSymbolRelocation(lld::macho::Symbol &, uint8_t type) override; + void prepareSymbolRelocation(lld::macho::Symbol &, const InputSection *, + const Reloc &) override; uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override; }; @@ -208,8 +209,9 @@ in.stubHelper->addr); } -void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) { - switch (type) { +void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, + const InputSection *isec, const Reloc &r) { + switch (r.type) { case X86_64_RELOC_GOT_LOAD: // TODO: implement mov -> lea relaxation for non-dynamic symbols case X86_64_RELOC_GOT: @@ -220,7 +222,17 @@ in.stubs->addEntry(*dysym); break; } - case X86_64_RELOC_UNSIGNED: + case X86_64_RELOC_UNSIGNED: { + if (auto *dysym = dyn_cast(&sym)) { + if (r.length != 3) { + error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " + + dysym->getName() + " must have r_length = 3"); + return; + } + in.binding->addEntry(dysym, isec, r.offset, r.addend); + } + break; + } case X86_64_RELOC_SIGNED: case X86_64_RELOC_SIGNED_1: case X86_64_RELOC_SIGNED_2: @@ -228,7 +240,7 @@ break; case X86_64_RELOC_SUBTRACTOR: case X86_64_RELOC_TLV: - fatal("TODO: handle relocation type " + std::to_string(type)); + fatal("TODO: handle relocation type " + std::to_string(r.type)); break; default: llvm_unreachable("unexpected relocation type"); diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -94,6 +94,16 @@ llvm::SetVector entries; }; +struct BindingEntry { + const DylibSymbol *dysym; + const InputSection *isec; + uint64_t offset; + int64_t addend; + BindingEntry(const DylibSymbol *dysym, const InputSection *isec, + uint64_t offset, int64_t addend) + : dysym(dysym), isec(isec), offset(offset), addend(addend) {} +}; + // Stores bind opcodes for telling dyld which symbols to load non-lazily. class BindingSection : public SyntheticSection { public: @@ -107,6 +117,13 @@ bool isNeeded() const override; void writeTo(uint8_t *buf) const override; + void addEntry(const DylibSymbol *dysym, const InputSection *isec, + uint64_t offset, int64_t addend) { + bindings.emplace_back(dysym, isec, offset, addend); + } + +private: + std::vector bindings; SmallVector contents; }; @@ -256,6 +273,7 @@ }; struct InStruct { + BindingSection *binding = nullptr; GotSection *got = nullptr; LazyPointerSection *lazyPointers = nullptr; StubsSection *stubs = nullptr; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -11,6 +11,7 @@ #include "ExportTrie.h" #include "InputFiles.h" #include "MachOStructs.h" +#include "MergedOutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" @@ -95,7 +96,68 @@ BindingSection::BindingSection() : SyntheticSection(segment_names::linkEdit, section_names::binding) {} -bool BindingSection::isNeeded() const { return in.got->isNeeded(); } +bool BindingSection::isNeeded() const { + return bindings.size() != 0 || in.got->isNeeded(); +} + +namespace { +struct Binding { + OutputSegment *segment = nullptr; + uint64_t offset = 0; + int64_t addend = 0; + uint8_t ordinal = 0; +}; +} // namespace + +// Encode a sequence of opcodes that tell dyld to write the address of dysym + +// addend at osec->addr + outSecOff. +// +// The bind opcode "interpreter" remembers the values of each binding field, so +// we only need to encode the differences between bindings. Hence the use of +// lastBinding. +static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec, + uint64_t outSecOff, int64_t addend, + Binding &lastBinding, raw_svector_ostream &os) { + using namespace llvm::MachO; + OutputSegment *seg = osec->parent; + uint64_t offset = osec->getSegmentOffset() + outSecOff; + if (lastBinding.segment != seg) { + os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | + seg->index); + encodeULEB128(offset, os); + lastBinding.segment = seg; + lastBinding.offset = offset; + } else if (lastBinding.offset != offset) { + assert(lastBinding.offset <= offset); + os << static_cast(BIND_OPCODE_ADD_ADDR_ULEB); + encodeULEB128(offset - lastBinding.offset, os); + lastBinding.offset = offset; + } + + if (lastBinding.ordinal != dysym.file->ordinal) { + if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) { + os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + dysym.file->ordinal); + } else { + error("TODO: Support larger dylib symbol ordinals"); + return; + } + lastBinding.ordinal = dysym.file->ordinal; + } + + if (lastBinding.addend != addend) { + os << static_cast(BIND_OPCODE_SET_ADDEND_SLEB); + encodeSLEB128(addend, os); + lastBinding.addend = addend; + } + + os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) + << dysym.getName() << '\0' + << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) + << static_cast(BIND_OPCODE_DO_BIND); + // DO_BIND causes dyld to both perform the binding and increment the offset + lastBinding.offset += WordSize; +} // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld // interprets to update a record with the following fields: @@ -111,44 +173,40 @@ // entry. It does *not* clear the record state after doing the bind, so // subsequent opcodes only need to encode the differences between bindings. void BindingSection::finalizeContents() { - if (!isNeeded()) - return; - raw_svector_ostream os{contents}; - os << static_cast(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | - in.got->parent->index); - encodeULEB128(in.got->getSegmentOffset(), os); - uint32_t entries_to_skip = 0; + Binding lastBinding; + bool didEncode = false; + size_t gotIdx = 0; for (const Symbol *sym : in.got->getEntries()) { if (const auto *dysym = dyn_cast(sym)) { - if (entries_to_skip != 0) { - os << static_cast(MachO::BIND_OPCODE_ADD_ADDR_ULEB); - encodeULEB128(WordSize * entries_to_skip, os); - entries_to_skip = 0; - } - - // TODO: Implement compact encoding -- we only need to encode the - // differences between consecutive symbol entries. - if (dysym->file->ordinal <= MachO::BIND_IMMEDIATE_MASK) { - os << static_cast(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - dysym->file->ordinal); - } else { - error("TODO: Support larger dylib symbol ordinals"); - continue; - } - os << static_cast( - MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << dysym->getName() << '\0' - << static_cast(MachO::BIND_OPCODE_SET_TYPE_IMM | - MachO::BIND_TYPE_POINTER) - << static_cast(MachO::BIND_OPCODE_DO_BIND); - } else { - // We have a defined symbol with a pre-populated address; skip over it. - ++entries_to_skip; + didEncode = true; + encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os); } + ++gotIdx; } - os << static_cast(MachO::BIND_OPCODE_DONE); + // Sorting the relocations by segment and address allows us to encode them + // more compactly. + llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { + OutputSegment *segA = a.isec->parent->parent; + OutputSegment *segB = b.isec->parent->parent; + if (segA != segB) + return segA->fileOff < segB->fileOff; + OutputSection *osecA = a.isec->parent; + OutputSection *osecB = b.isec->parent; + if (osecA != osecB) + return osecA->addr < osecB->addr; + if (a.isec != b.isec) + return a.isec->outSecOff < b.isec->outSecOff; + return a.offset < b.offset; + }); + for (const BindingEntry &b : bindings) { + didEncode = true; + encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset, + b.addend, lastBinding, os); + } + if (didEncode) + os << static_cast(MachO::BIND_OPCODE_DONE); } void BindingSection::writeTo(uint8_t *buf) const { diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -53,7 +53,8 @@ // depending on the relocation type. prepareSymbolRelocation() will set up the // GOT/stubs entries, and getSymbolVA() will return the addresses of those // entries. - virtual void prepareSymbolRelocation(Symbol &, uint8_t type) = 0; + virtual void prepareSymbolRelocation(Symbol &, const InputSection *, + const Reloc &) = 0; virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0; uint32_t cpuType; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -54,7 +54,6 @@ uint64_t addr = 0; uint64_t fileOff = 0; MachHeaderSection *headerSection = nullptr; - BindingSection *bindingSection = nullptr; LazyBindingSection *lazyBindingSection = nullptr; ExportSection *exportSection = nullptr; StringTableSection *stringTableSection = nullptr; @@ -254,7 +253,7 @@ error("undefined symbol " + s->getName() + ", referenced from " + sys::path::filename(isec->file->getName())); else - target->prepareSymbolRelocation(*s, r.type); + target->prepareSymbolRelocation(*s, isec, r); } } } @@ -262,7 +261,7 @@ void Writer::createLoadCommands() { headerSection->addLoadCommand( - make(bindingSection, lazyBindingSection, exportSection)); + make(in.binding, lazyBindingSection, exportSection)); headerSection->addLoadCommand( make(symtabSection, stringTableSection)); headerSection->addLoadCommand(make()); @@ -404,7 +403,6 @@ void Writer::createOutputSections() { // First, create hidden sections headerSection = make(); - bindingSection = make(); lazyBindingSection = make(); stringTableSection = make(); symtabSection = make(*stringTableSection); @@ -513,7 +511,7 @@ assignAddresses(seg); // Fill __LINKEDIT contents. - bindingSection->finalizeContents(); + in.binding->finalizeContents(); lazyBindingSection->finalizeContents(); exportSection->finalizeContents(); symtabSection->finalizeContents(); @@ -535,6 +533,7 @@ void macho::writeResult() { Writer().run(); } void macho::createSyntheticSections() { + in.binding = make(); in.got = make(); in.lazyPointers = make(); in.stubs = make(); diff --git a/lld/test/MachO/dylink.s b/lld/test/MachO/dylink.s --- a/lld/test/MachO/dylink.s +++ b/lld/test/MachO/dylink.s @@ -31,9 +31,12 @@ # CHECK-NEXT: [[#%x, GOODBYE_RIP:]]: popq %rsi # CHECK-LABEL: Bind table: -# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world -# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello _hello_its_me -# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world +# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world +# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello _hello_its_me +# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world +# CHECK-DAG: __DATA __data 0x[[#%x, DATA_ADDR:]] pointer 0 libhello _hello_world +# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 8]] pointer 8 libhello _hello_its_me +# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 16]] pointer -15 libgoodbye _goodbye_world .section __TEXT,__text .globl _main @@ -59,3 +62,8 @@ syscall mov $0, %rax ret + +.data +.quad _hello_world +.quad _hello_its_me + 0x8 +.quad _goodbye_world - 0xf