diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -222,12 +222,14 @@ switch (r.type) { case X86_64_RELOC_GOT_LOAD: // TODO: implement mov -> lea relaxation for non-dynamic symbols - case X86_64_RELOC_GOT: + case X86_64_RELOC_GOT: { in.got->addEntry(sym); + if (sym->isTlv()) error("found GOT relocation referencing thread-local variable in " + toString(isec)); break; + } case X86_64_RELOC_BRANCH: { // TODO: weak dysyms should go into the weak binding section instead if (auto *dysym = dyn_cast(sym)) @@ -241,31 +243,26 @@ dysym->getName() + " must have r_length = 3"); return; } - in.binding->addEntry(dysym, isec, r.offset, r.addend); } + addNonLazyBindingEntries(sym, isec, r.offset, r.addend); break; } case X86_64_RELOC_SIGNED: case X86_64_RELOC_SIGNED_1: case X86_64_RELOC_SIGNED_2: case X86_64_RELOC_SIGNED_4: + // TODO: warn if they refer to a weak global break; - case X86_64_RELOC_TLV: - if (isa(sym)) { + case X86_64_RELOC_TLV: { + if (sym->isWeakDef() || isa(sym)) in.tlvPointers->addEntry(sym); - } else { - assert(isa(sym)); - // TLV relocations on x86_64 are always used with a movq opcode, which - // can be converted to leaq opcodes if they reference a defined symbol. - // (This is in contrast to GOT relocations, which can be used with - // non-movq opcodes.) As such, there is no need to add an entry to - // tlvPointers here. - } + if (!sym->isTlv()) error( "found X86_64_RELOC_TLV referencing a non-thread-local variable in " + toString(isec)); break; + } case X86_64_RELOC_SUBTRACTOR: fatal("TODO: handle relocation type " + std::to_string(r.type)); break; @@ -291,7 +288,7 @@ case X86_64_RELOC_SIGNED_4: return sym.getVA(); case X86_64_RELOC_TLV: { - if (isa(&sym)) + if (sym.isInGot()) return in.tlvPointers->addr + sym.gotIndex * WordSize; // Convert the movq to a leaq. diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -236,7 +236,8 @@ // Global defined symbol return symtab->addDefined(name, isec, value, sym.n_desc & N_WEAK_DEF); // Local defined symbol - return make(name, isec, value, sym.n_desc & N_WEAK_DEF); + return make(name, isec, value, sym.n_desc & N_WEAK_DEF, + /*isExternal=*/false); }; for (size_t i = 0, n = nList.size(); i < n; ++i) { diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -53,7 +53,7 @@ // of a name conflict, we fall through to the replaceSymbol() call below. } - replaceSymbol(s, name, isec, value, isWeakDef); + replaceSymbol(s, name, isec, value, isWeakDef, /*isExternal=*/true); return s; } diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -57,6 +57,9 @@ virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } + // Whether this symbol is in the GOT or TLVPointer sections. + bool isInGot() const { return gotIndex != UINT32_MAX; } + // The index of this symbol in the GOT or the TLVPointer section, depending // on whether it is a thread-local. A given symbol cannot be referenced by // both these sections at once. @@ -71,14 +74,17 @@ class Defined : public Symbol { public: - Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef) - : Symbol(DefinedKind, name), isec(isec), value(value), - weakDef(isWeakDef) {} + Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef, + bool isExternal) + : Symbol(DefinedKind, name), isec(isec), value(value), weakDef(isWeakDef), + external(isExternal) {} bool isWeakDef() const override { return weakDef; } bool isTlv() const override { return isThreadLocalVariables(isec->flags); } + bool isExternal() const { return external; } + static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } uint64_t getVA() const override { return isec->getVA() + value; } @@ -92,6 +98,7 @@ private: const bool weakDef; + const bool external; }; class Undefined : public Symbol { diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -28,6 +28,7 @@ constexpr const char pageZero[] = "__pagezero"; constexpr const char header[] = "__mach_header"; constexpr const char binding[] = "__binding"; +constexpr const char weakBinding[] = "__weak_binding"; constexpr const char lazyBinding[] = "__lazy_binding"; constexpr const char export_[] = "__export"; constexpr const char symbolTable[] = "__symbol_table"; @@ -140,19 +141,24 @@ using SectionPointerUnion = llvm::PointerUnion; -struct BindingEntry { - const DylibSymbol *dysym; +struct BindingTarget { SectionPointerUnion section; uint64_t offset; int64_t addend; - BindingEntry(const DylibSymbol *dysym, SectionPointerUnion section, - uint64_t offset, int64_t addend) - : dysym(dysym), section(section), offset(offset), addend(addend) {} + BindingTarget(SectionPointerUnion section, uint64_t offset, int64_t addend) + : section(section), offset(offset), addend(addend) {} uint64_t getVA() const; }; +struct BindingEntry { + const DylibSymbol *dysym; + BindingTarget target; + BindingEntry(const DylibSymbol *dysym, BindingTarget target) + : dysym(dysym), target(std::move(target)) {} +}; + // Stores bind opcodes for telling dyld which symbols to load non-lazily. class BindingSection : public LinkEditSection { public: @@ -168,7 +174,7 @@ void addEntry(const DylibSymbol *dysym, SectionPointerUnion section, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(dysym, section, offset, addend); + bindings.emplace_back(dysym, BindingTarget(section, offset, addend)); } private: @@ -176,6 +182,43 @@ SmallVector contents; }; +struct WeakBindingEntry { + const Symbol *symbol; + BindingTarget target; + WeakBindingEntry(const Symbol *symbol, BindingTarget target) + : symbol(symbol), target(std::move(target)) {} +}; + +// Stores bind opcodes for telling dyld which weak symbols to load. Note that +// the bind opcodes will only refer to these symbols by name, but will not +// specify which dylib to load them from. +class WeakBindingSection : public LinkEditSection { +public: + WeakBindingSection(); + void finalizeContents(); + uint64_t getRawSize() const override { return contents.size(); } + // Like other sections in __LINKEDIT, the binding section is special: its + // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in + // section headers. + bool isHidden() const override { return true; } + bool isNeeded() const override { return !bindings.empty(); } + + void writeTo(uint8_t *buf) const override; + + void addEntry(const Symbol *symbol, SectionPointerUnion section, + uint64_t offset, int64_t addend = 0) { + bindings.emplace_back(symbol, BindingTarget(section, offset, addend)); + } + +private: + std::vector bindings; + SmallVector contents; +}; + +// Add bindings for symbols that need weak or non-lazy bindings. +void addNonLazyBindingEntries(const Symbol *, SectionPointerUnion, + uint64_t offset, int64_t addend = 0); + // The following sections implement lazy symbol binding -- very similar to the // PLT mechanism in ELF. // @@ -324,6 +367,7 @@ struct InStruct { MachHeaderSection *header = nullptr; BindingSection *binding = nullptr; + WeakBindingSection *weakBinding = nullptr; GotSection *got = nullptr; TlvPointerSection *tlvPointers = nullptr; LazyPointerSection *lazyPointers = nullptr; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -91,11 +91,10 @@ void NonLazyPointerSectionBase::addEntry(Symbol *sym) { if (entries.insert(sym)) { - assert(sym->gotIndex == UINT32_MAX); + assert(!sym->isInGot()); sym->gotIndex = entries.size() - 1; - if (auto *dysym = dyn_cast(sym)) - in.binding->addEntry(dysym, this, sym->gotIndex * WordSize); + addNonLazyBindingEntries(sym, this, sym->gotIndex * WordSize); } } @@ -117,13 +116,13 @@ }; } // namespace -// Encode a sequence of opcodes that tell dyld to write the address of dysym + +// Encode a sequence of opcodes that tell dyld to write the address of symbol + // addend at osec->addr + outSecOff. // // The bind opcode "interpreter" remembers the values of each binding field, so // we only need to encode the differences between bindings. Hence the use of // lastBinding. -static void encodeBinding(const DylibSymbol *dysym, const OutputSection *osec, +static void encodeBinding(const Symbol *sym, const OutputSection *osec, uint64_t outSecOff, int64_t addend, Binding &lastBinding, raw_svector_ostream &os) { using namespace llvm::MachO; @@ -141,17 +140,6 @@ lastBinding.offset = offset; } - if (lastBinding.ordinal != dysym->file->ordinal) { - if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) { - os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - dysym->file->ordinal); - } else { - os << static_cast(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); - encodeULEB128(dysym->file->ordinal, os); - } - lastBinding.ordinal = dysym->file->ordinal; - } - if (lastBinding.addend != addend) { os << static_cast(BIND_OPCODE_SET_ADDEND_SLEB); encodeSLEB128(addend, os); @@ -159,18 +147,33 @@ } os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << dysym->getName() << '\0' + << sym->getName() << '\0' << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) << static_cast(BIND_OPCODE_DO_BIND); // DO_BIND causes dyld to both perform the binding and increment the offset lastBinding.offset += WordSize; } -uint64_t BindingEntry::getVA() const { +// Non-weak bindings need to have their dylib ordinal encoded as well. +static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding &lastBinding, + raw_svector_ostream &os) { + using namespace llvm::MachO; + if (lastBinding.ordinal != dysym->file->ordinal) { + if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) { + os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + dysym->file->ordinal); + } else { + os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + encodeULEB128(dysym->file->ordinal, os); + } + lastBinding.ordinal = dysym->file->ordinal; + } +} + +uint64_t BindingTarget::getVA() const { if (auto *isec = section.dyn_cast()) return isec->getVA() + offset; - auto *osec = section.get(); - return osec->addr + offset; + return section.get()->addr + offset; } // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld @@ -194,15 +197,17 @@ // result. Note that sorting by address alone ensures that bindings for the // same segment / section are located together. llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { - return a.getVA() < b.getVA(); + return a.target.getVA() < b.target.getVA(); }); for (const BindingEntry &b : bindings) { - if (auto *isec = b.section.dyn_cast()) { - encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.offset, b.addend, - lastBinding, os); + encodeDylibOrdinal(b.dysym, lastBinding, os); + if (auto *isec = b.target.section.dyn_cast()) { + encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset, + b.target.addend, lastBinding, os); } else { - auto *osec = b.section.get(); - encodeBinding(b.dysym, osec, b.offset, b.addend, lastBinding, os); + auto *osec = b.target.section.get(); + encodeBinding(b.dysym, osec, b.target.offset, b.target.addend, + lastBinding, os); } } if (!bindings.empty()) @@ -213,6 +218,56 @@ memcpy(buf, contents.data(), contents.size()); } +WeakBindingSection::WeakBindingSection() + : LinkEditSection(segment_names::linkEdit, section_names::weakBinding) {} + +void WeakBindingSection::finalizeContents() { + raw_svector_ostream os{contents}; + Binding lastBinding; + + // Since bindings are delta-encoded, sorting them allows for a more compact + // result. + llvm::sort(bindings, + [](const WeakBindingEntry &a, const WeakBindingEntry &b) { + return a.target.getVA() < b.target.getVA(); + }); + for (const WeakBindingEntry &b : bindings) { + if (auto *isec = b.target.section.dyn_cast()) { + encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset, + b.target.addend, lastBinding, os); + } else { + auto *osec = b.target.section.get(); + encodeBinding(b.symbol, osec, b.target.offset, b.target.addend, + lastBinding, os); + } + } + if (!bindings.empty()) + os << static_cast(MachO::BIND_OPCODE_DONE); +} + +void WeakBindingSection::writeTo(uint8_t *buf) const { + memcpy(buf, contents.data(), contents.size()); +} + +void macho::addNonLazyBindingEntries(const Symbol *sym, + SectionPointerUnion section, + uint64_t offset, int64_t addend) { + if (auto *dysym = dyn_cast(sym)) { + in.binding->addEntry(dysym, section, offset, addend); + if (dysym->isWeakDef()) + in.weakBinding->addEntry(sym, section, offset, addend); + } else if (auto *defined = dyn_cast(sym)) { + if (defined->isWeakDef() && defined->isExternal()) + in.weakBinding->addEntry(sym, section, offset, addend); + } else if (isa(sym)) { + error("cannot bind to " + DSOHandle::name); + } else { + // Undefined symbols are filtered out in scanRelocations(); we should never + // get here + llvm_unreachable("cannot bind to an undefined symbol"); + } +} + StubsSection::StubsSection() : SyntheticSection(segment_names::text, "__stubs") {} diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -65,10 +65,11 @@ class LCDyldInfo : public LoadCommand { public: LCDyldInfo(BindingSection *bindingSection, + WeakBindingSection *weakBindingSection, LazyBindingSection *lazyBindingSection, ExportSection *exportSection) - : bindingSection(bindingSection), lazyBindingSection(lazyBindingSection), - exportSection(exportSection) {} + : bindingSection(bindingSection), weakBindingSection(weakBindingSection), + lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} uint32_t getSize() const override { return sizeof(dyld_info_command); } @@ -80,6 +81,10 @@ c->bind_off = bindingSection->fileOff; c->bind_size = bindingSection->getFileSize(); } + if (weakBindingSection->isNeeded()) { + c->weak_bind_off = weakBindingSection->fileOff; + c->weak_bind_size = weakBindingSection->getFileSize(); + } if (lazyBindingSection->isNeeded()) { c->lazy_bind_off = lazyBindingSection->fileOff; c->lazy_bind_size = lazyBindingSection->getFileSize(); @@ -91,6 +96,7 @@ } BindingSection *bindingSection; + WeakBindingSection *weakBindingSection; LazyBindingSection *lazyBindingSection; ExportSection *exportSection; }; @@ -321,8 +327,8 @@ } void Writer::createLoadCommands() { - in.header->addLoadCommand( - make(in.binding, lazyBindingSection, exportSection)); + in.header->addLoadCommand(make( + in.binding, in.weakBinding, lazyBindingSection, exportSection)); in.header->addLoadCommand(make(symtabSection, stringTableSection)); in.header->addLoadCommand(make()); for (StringRef path : config->runtimePaths) @@ -414,7 +420,8 @@ return -1; } else if (segname == segment_names::linkEdit) { return StringSwitch(osec->name) - .Case(section_names::binding, -5) + .Case(section_names::binding, -6) + .Case(section_names::weakBinding, -5) .Case(section_names::lazyBinding, -4) .Case(section_names::export_, -3) .Case(section_names::symbolTable, -2) @@ -577,6 +584,7 @@ // Fill __LINKEDIT contents. in.binding->finalizeContents(); + in.weakBinding->finalizeContents(); lazyBindingSection->finalizeContents(); exportSection->finalizeContents(); symtabSection->finalizeContents(); @@ -600,6 +608,7 @@ void macho::createSyntheticSections() { in.header = make(); in.binding = make(); + in.weakBinding = make(); in.got = make(); in.tlvPointers = make(); in.lazyPointers = make(); diff --git a/lld/test/MachO/weak-binding.s b/lld/test/MachO/weak-binding.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/weak-binding.s @@ -0,0 +1,107 @@ +# REQUIRES: x86 +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o +# RUN: lld -flavor darwinnew -syslibroot %S/Inputs/MacOSX.sdk -dylib %t/libfoo.o -o %t/libfoo.dylib +# RUN: lld -flavor darwinnew -syslibroot %S/Inputs/MacOSX.sdk %t/test.o -L%t -lfoo -o %t/test -lSystem +# RUN: llvm-objdump -d --no-show-raw-insn --bind --weak-bind --full-contents %t/test | \ +# RUN: FileCheck %s + +# CHECK: Contents of section __got: +## Check that this section contains a nonzero pointer. It should point to +## _weak_external_for_gotpcrel, but we don't have a good way of testing the exact +## value as the bytes here are in little-endian order. +# CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}} + +# CHECK: <_main>: +# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_DY_GOT_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_GLOB_GOT_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_TLV_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_DY_TLV_ADDR:]] + +# CHECK-LABEL: Bind table: +# CHECK-DAG: __DATA __data 0x[[#%x,WEAK_DY:]] pointer 0 libfoo _weak_dysym +# CHECK-DAG: __DATA __thread_vars 0x{{[0-9a-f]*}} pointer 0 libSystem __tlv_bootstrap +# CHECK-DAG: __DATA __thread_ptrs 0x[[#WEAK_DY_TLV_ADDR]] pointer 0 libfoo _weak_dysym_tlv +# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_DY_GOT_ADDR]] pointer 0 libfoo _weak_dysym_for_gotpcrel +## Check that we don't have any other bindings +# CHECK-NOT: pointer + +# CHECK-LABEL: Weak bind table: +# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_DY_GOT_ADDR]] pointer 0 _weak_dysym_for_gotpcrel +# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_GLOB_GOT_ADDR]] pointer 0 _weak_external_for_gotpcrel +# CHECK-DAG: __DATA __data 0x[[#WEAK_DY]] pointer 0 _weak_dysym +# CHECK-DAG: __DATA __thread_ptrs 0x[[#WEAK_TLV_ADDR]] pointer 0 _weak_tlv +# CHECK-DAG: __DATA __thread_ptrs 0x[[#WEAK_DY_TLV_ADDR]] pointer 0 _weak_dysym_tlv +# CHECK-DAG: __DATA __data 0x{{[0-9a-f]*}} pointer 2 _weak_external +## Check that we don't have any other bindings +# CHECK-NOT: pointer + +## Weak internal symbols don't get bindings +# RUN: llvm-objdump --macho --bind --weak-bind %t/test | FileCheck %s --check-prefix=WEAK-internal +# WEAK-internal-NOT: _weak_internal +# WEAK-internal-NOT: _weak_internal_tlv + +#--- libfoo.s + +.globl _weak_dysym +.weak_definition _weak_dysym +_weak_dysym: + .quad 0x1234 + +.globl _weak_dysym_for_gotpcrel +.weak_definition _weak_dysym_for_gotpcrel +_weak_dysym_for_gotpcrel: + .quad 0x1234 + +.section __DATA,__thread_vars,thread_local_variables + +.globl _weak_dysym_tlv +.weak_definition _weak_dysym_tlv +_weak_dysym_tlv: + .quad 0x1234 + +#--- test.s + +.globl _main, _weak_external, _weak_external_for_gotpcrel +.weak_definition _weak_external, _weak_external_for_gotpcrel, _weak_internal + +_main: + mov _weak_dysym_for_gotpcrel@GOTPCREL(%rip), %rax + mov _weak_external_for_gotpcrel@GOTPCREL(%rip), %rax + mov _weak_tlv@TLVP(%rip), %rax + mov _weak_dysym_tlv@TLVP(%rip), %rax + mov _weak_internal_tlv@TLVP(%rip), %rax + mov $0, %rax + ret + +_weak_external: + .quad 0x1234 + +_weak_external_for_gotpcrel: + .quad 0x1234 + +_weak_internal: + .quad 0x1234 + +.data + .quad _weak_dysym + .quad _weak_external + 2 + .quad _weak_internal + +.tbss _weak_tlv$tlv$init, 4, 2 +.tbss _weak_internal_tlv$tlv$init, 4, 2 + +.section __DATA,__thread_vars,thread_local_variables +.globl _weak_tlv +.weak_definition _weak_tlv, _weak_internal_tlv + +_weak_tlv: + .quad __tlv_bootstrap + .quad 0 + .quad _weak_tlv$tlv$init + +_weak_internal_tlv: + .quad __tlv_bootstrap + .quad 0 + .quad _weak_internal_tlv$tlv$init