diff --git a/lld/MachO/Arch/ARM.cpp b/lld/MachO/Arch/ARM.cpp --- a/lld/MachO/Arch/ARM.cpp +++ b/lld/MachO/Arch/ARM.cpp @@ -34,7 +34,7 @@ void writeStub(uint8_t *buf, const Symbol &) const override; void writeStubHelperHeader(uint8_t *buf) const override; - void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, + void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const override; void relaxGotLoad(uint8_t *loc, uint8_t type) const override; @@ -148,7 +148,7 @@ fatal("TODO: implement this"); } -void ARM::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym, +void ARM::writeStubHelperEntry(uint8_t *buf, const Symbol &sym, uint64_t entryAddr) const { fatal("TODO: implement this"); } diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -31,7 +31,7 @@ ARM64(); void writeStub(uint8_t *buf, const Symbol &) const override; void writeStubHelperHeader(uint8_t *buf) const override; - void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, + void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; void populateThunk(InputSection *thunk, Symbol *funcSym) override; @@ -100,7 +100,7 @@ 0x00000000, // 08: l0: .long 0 }; -void ARM64::writeStubHelperEntry(uint8_t *buf8, const DylibSymbol &sym, +void ARM64::writeStubHelperEntry(uint8_t *buf8, const Symbol &sym, uint64_t entryVA) const { ::writeStubHelperEntry(buf8, stubHelperEntryCode, sym, entryVA); } diff --git a/lld/MachO/Arch/ARM64Common.h b/lld/MachO/Arch/ARM64Common.h --- a/lld/MachO/Arch/ARM64Common.h +++ b/lld/MachO/Arch/ARM64Common.h @@ -128,7 +128,7 @@ inline void writeStubHelperEntry(uint8_t *buf8, const uint32_t stubHelperEntryCode[3], - const DylibSymbol &sym, uint64_t entryVA) { + const Symbol &sym, uint64_t entryVA) { auto *buf32 = reinterpret_cast(buf8); auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); }; uint64_t stubHelperHeaderVA = in.stubHelper->addr; diff --git a/lld/MachO/Arch/ARM64_32.cpp b/lld/MachO/Arch/ARM64_32.cpp --- a/lld/MachO/Arch/ARM64_32.cpp +++ b/lld/MachO/Arch/ARM64_32.cpp @@ -31,7 +31,7 @@ ARM64_32(); void writeStub(uint8_t *buf, const Symbol &) const override; void writeStubHelperHeader(uint8_t *buf) const override; - void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, + void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; }; @@ -96,7 +96,7 @@ 0x00000000, // 08: l0: .long 0 }; -void ARM64_32::writeStubHelperEntry(uint8_t *buf8, const DylibSymbol &sym, +void ARM64_32::writeStubHelperEntry(uint8_t *buf8, const Symbol &sym, uint64_t entryVA) const { ::writeStubHelperEntry(buf8, stubHelperEntryCode, sym, entryVA); } diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -32,7 +32,7 @@ void writeStub(uint8_t *buf, const Symbol &) const override; void writeStubHelperHeader(uint8_t *buf) const override; - void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, + void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const override; void relaxGotLoad(uint8_t *loc, uint8_t type) const override; @@ -166,7 +166,7 @@ 0xe9, 0, 0, 0, 0, // 0x5: jmp <__stub_helper> }; -void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym, +void X86_64::writeStubHelperEntry(uint8_t *buf, const Symbol &sym, uint64_t entryAddr) const { memcpy(buf, stubHelperEntry, sizeof(stubHelperEntry)); write32le(buf + 1, sym.lazyBindOffset); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -96,10 +96,14 @@ // of a name conflict, we fall through to the replaceSymbol() call below. } + // With -flat_namespace, all extern symbols in dylibs are interposable. + bool interposable = config->namespaceKind == NamespaceKind::flat && + (config->outputType == MachO::MH_DYLIB || + config->outputType == MachO::MH_BUNDLE); Defined *defined = replaceSymbol( s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true, isPrivateExtern, isThumb, isReferencedDynamically, noDeadStrip, - overridesWeakDef, isWeakDefCanBeHidden); + overridesWeakDef, isWeakDefCanBeHidden, interposable); return defined; } diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -63,7 +63,7 @@ // Only undefined or dylib symbols can be weak references. A weak reference // need not be satisfied at runtime, e.g. due to the symbol not being // available on a given target platform. - virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } + virtual bool isWeakRef() const { return false; } virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } @@ -87,9 +87,9 @@ // on whether it is a thread-local. A given symbol cannot be referenced by // both these sections at once. uint32_t gotIndex = UINT32_MAX; - + uint32_t lazyBindOffset = UINT32_MAX; + uint32_t stubsHelperIndex = UINT32_MAX; uint32_t stubsIndex = UINT32_MAX; - uint32_t symtabIndex = UINT32_MAX; InputFile *getFile() const { return file; } @@ -118,7 +118,8 @@ Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, bool isThumb, bool isReferencedDynamically, bool noDeadStrip, - bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false); + bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false, + bool interposable = false); bool isWeakDef() const override { return weakDef; } bool isExternalWeakDef() const { @@ -158,6 +159,14 @@ // metadata. This is information only for the static linker and not written // to the output. bool noDeadStrip : 1; + // Whether references to this symbol can be interposed at runtime to point to + // a different symbol definition (with the same name). For example, if both + // dylib A and B define an interposable symbol _foo, and we load A before B at + // runtime, then all references to _foo within dylib B will point to the + // definition in dylib A. + // + // Only extern symbols may be interposable. + bool interposable : 1; bool weakDefCanBeHidden : 1; @@ -258,9 +267,6 @@ static bool classof(const Symbol *s) { return s->kind() == DylibKind; } - uint32_t stubsHelperIndex = UINT32_MAX; - uint32_t lazyBindOffset = UINT32_MAX; - RefState getRefState() const { return refState; } void reference(RefState newState) { diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -15,13 +15,13 @@ using namespace lld; using namespace lld::macho; -static_assert(sizeof(void *) != 8 || sizeof(Symbol) == 48, +static_assert(sizeof(void *) != 8 || sizeof(Symbol) == 56, "Try to minimize Symbol's size; we create many instances"); // The Microsoft ABI doesn't support using parent class tail padding for child // members, hence the _MSC_VER check. #if !defined(_MSC_VER) -static_assert(sizeof(void *) != 8 || sizeof(Defined) == 80, +static_assert(sizeof(void *) != 8 || sizeof(Defined) == 88, "Try to minimize Defined's size; we create many instances"); #endif @@ -44,12 +44,14 @@ uint64_t value, uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, bool isThumb, bool isReferencedDynamically, bool noDeadStrip, - bool canOverrideWeakDef, bool isWeakDefCanBeHidden) + bool canOverrideWeakDef, bool isWeakDefCanBeHidden, + bool interposable) : Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef), privateExtern(isPrivateExtern), includeInSymtab(true), thumb(isThumb), referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip), - weakDefCanBeHidden(isWeakDefCanBeHidden), weakDef(isWeakDef), - external(isExternal), isec(isec), value(value), size(size) { + interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden), + weakDef(isWeakDef), external(isExternal), isec(isec), value(value), + size(size) { if (isec) { isec->symbols.push_back(this); // Maintain sorted order. diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -192,13 +192,13 @@ bool isNeeded() const override { return !bindingsMap.empty(); } void writeTo(uint8_t *buf) const override; - void addEntry(const DylibSymbol *dysym, const InputSection *isec, - uint64_t offset, int64_t addend = 0) { + void addEntry(const Symbol *dysym, const InputSection *isec, uint64_t offset, + int64_t addend = 0) { bindingsMap[dysym].emplace_back(addend, Location(isec, offset)); } private: - BindingsMap bindingsMap; + BindingsMap bindingsMap; SmallVector contents; }; @@ -331,13 +331,13 @@ void writeTo(uint8_t *buf) const override; // Note that every entry here will by referenced by a corresponding entry in // the StubHelperSection. - void addEntry(DylibSymbol *dysym); - const llvm::SetVector &getEntries() const { return entries; } + void addEntry(Symbol *dysym); + const llvm::SetVector &getEntries() const { return entries; } private: - uint32_t encode(const DylibSymbol &); + uint32_t encode(const Symbol &); - llvm::SetVector entries; + llvm::SetVector entries; SmallVector contents; llvm::raw_svector_ostream os{contents}; }; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -235,6 +235,8 @@ in.rebase->addEntry(isec, offset); if (defined->isExternalWeakDef()) in.weakBinding->addEntry(sym, isec, offset, addend); + else if (defined->interposable) + in.binding->addEntry(sym, isec, offset, addend); } else { // Undefined symbols are filtered out in scanRelocations(); we should never // get here @@ -417,6 +419,13 @@ return dysym.getFile()->ordinal; } +static int16_t ordinalForSymbol(const Symbol &sym) { + if (const auto *dysym = dyn_cast(&sym)) + return ordinalForDylibSymbol(*dysym); + assert(cast(&sym)->interposable); + return BIND_SPECIAL_DYLIB_FLAT_LOOKUP; +} + static void encodeDylibOrdinal(int16_t ordinal, raw_svector_ostream &os) { if (ordinal <= 0) { os << static_cast(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @@ -486,14 +495,14 @@ int16_t lastOrdinal = 0; for (auto &p : sortBindings(bindingsMap)) { - const DylibSymbol *sym = p.first; + const Symbol *sym = p.first; std::vector &bindings = p.second; uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; if (sym->isWeakRef()) flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; os << flags << sym->getName() << '\0' << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); - int16_t ordinal = ordinalForDylibSymbol(*sym); + int16_t ordinal = ordinalForSymbol(*sym); if (ordinal != lastOrdinal) { encodeDylibOrdinal(ordinal, os); lastOrdinal = ordinal; @@ -596,7 +605,7 @@ void StubHelperSection::writeTo(uint8_t *buf) const { target->writeStubHelperHeader(buf); size_t off = target->stubHelperHeaderSize; - for (const DylibSymbol *sym : in.lazyBinding->getEntries()) { + for (const Symbol *sym : in.lazyBinding->getEntries()) { target->writeStubHelperEntry(buf + off, *sym, addr + off); off += target->stubHelperEntrySize; } @@ -667,7 +676,7 @@ void LazyBindingSection::finalizeContents() { // TODO: Just precompute output size here instead of writing to a temporary // buffer - for (DylibSymbol *sym : entries) + for (Symbol *sym : entries) sym->lazyBindOffset = encode(*sym); } @@ -675,11 +684,11 @@ memcpy(buf, contents.data(), contents.size()); } -void LazyBindingSection::addEntry(DylibSymbol *dysym) { - if (entries.insert(dysym)) { - dysym->stubsHelperIndex = entries.size() - 1; +void LazyBindingSection::addEntry(Symbol *sym) { + if (entries.insert(sym)) { + sym->stubsHelperIndex = entries.size() - 1; in.rebase->addEntry(in.lazyPointers->isec, - dysym->stubsIndex * target->wordSize); + sym->stubsIndex * target->wordSize); } } @@ -689,7 +698,7 @@ // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case, // we cannot encode just the differences between symbols; we have to emit the // complete bind information for each symbol. -uint32_t LazyBindingSection::encode(const DylibSymbol &sym) { +uint32_t LazyBindingSection::encode(const Symbol &sym) { uint32_t opstreamOffset = contents.size(); OutputSegment *dataSeg = in.lazyPointers->parent; os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @@ -697,7 +706,7 @@ uint64_t offset = in.lazyPointers->addr - dataSeg->addr + sym.stubsIndex * target->wordSize; encodeULEB128(offset, os); - encodeDylibOrdinal(ordinalForDylibSymbol(sym), os); + encodeDylibOrdinal(ordinalForSymbol(sym), os); uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; if (sym.isWeakRef()) diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -52,7 +52,7 @@ // details. virtual void writeStub(uint8_t *buf, const Symbol &) const = 0; virtual void writeStubHelperHeader(uint8_t *buf) const = 0; - virtual void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, + virtual void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const = 0; // Symbols may be referenced via either the GOT or the stubs section, diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -594,6 +594,9 @@ in.weakBinding->addEntry(sym, in.lazyPointers->isec, sym->stubsIndex * target->wordSize); } + } else if (defined->interposable) { + if (in.stubs->addEntry(sym)) + in.lazyBinding->addEntry(sym); } } else { llvm_unreachable("invalid branch target symbol type"); @@ -605,7 +608,7 @@ if (isa(sym)) return true; if (const auto *defined = dyn_cast(sym)) - return defined->isExternalWeakDef(); + return defined->isExternalWeakDef() || defined->interposable; return false; } diff --git a/lld/test/MachO/flat-namespace.s b/lld/test/MachO/flat-namespace-dysyms.s rename from lld/test/MachO/flat-namespace.s rename to lld/test/MachO/flat-namespace-dysyms.s diff --git a/lld/test/MachO/flat-namespace-interposable.s b/lld/test/MachO/flat-namespace-interposable.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/flat-namespace-interposable.s @@ -0,0 +1,73 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t + +## With -flat_namespace, non-weak extern symbols in dylibs become interposable. +## Check that we generate the correct bindings for them. The test also includes +## other symbol types like weak externs to verify we continue to do the same +## (correct) thing even when `-flat_namespace` is enabled, instead of generating +## spurious bindings. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/foo.o %t/foo.s +# RUN: %lld -lSystem -flat_namespace -o %t/foo %t/foo.o +# RUN: %lld -lSystem -dylib -flat_namespace -o %t/foo.dylib %t/foo.o +# RUN: %lld -lSystem -bundle -flat_namespace -o %t/foo.bundle %t/foo.o +# COM: llvm-objdump --macho --bind --lazy-bind --weak-bind %t/foo | FileCheck %s --check-prefix=EXEC +# RUN: llvm-objdump --macho --bind --lazy-bind --weak-bind %t/foo.dylib | FileCheck %s --check-prefix=DYLIB +# RUN: llvm-objdump --macho --bind --lazy-bind --weak-bind %t/foo.bundle | FileCheck %s --check-prefix=DYLIB + +## Executables with -flat_namespace don't have interposable externs. +# EXEC: Bind table: +# EXEC-NEXT: segment section address type addend dylib symbol +# EXEC-EMPTY: +# EXEC-NEXT: Lazy bind table: +# EXEC-NEXT: segment section address dylib symbol +# EXEC-EMPTY: +# EXEC-NEXT: Weak bind table: +# EXEC-NEXT: segment section address type addend symbol +# EXEC-NEXT: __DATA __la_symbol_ptr {{.*}} pointer 0 _weak_extern +# EXEC-NEXT: __DATA __data {{.*}} pointer 0 _weak_extern +# EXEC-EMPTY: + +# DYLIB: Bind table: +# DYLIB-NEXT: segment section address type addend dylib symbol +# DYLIB-DAG: __DATA __data {{.*}} pointer 0 flat-namespace _extern +# DYLIB-DAG: __DATA __thread_ptrs {{.*}} pointer 0 flat-namespace _tlv +# DYLIB-DAG: __DATA_CONST __got {{.*}} pointer 0 flat-namespace dyld_stub_binder +# DYLIB-EMPTY: +# DYLIB-NEXT: Lazy bind table: +# DYLIB-NEXT: segment section address dylib symbol +# DYLIB-NEXT: __DATA __la_symbol_ptr {{.*}} flat-namespace _extern +# DYLIB-EMPTY: +# DYLIB-NEXT: Weak bind table: +# DYLIB-NEXT: segment section address type addend symbol +# DYLIB-NEXT: __DATA __la_symbol_ptr {{.*}} pointer 0 _weak_extern +# DYLIB-NEXT: __DATA __data {{.*}} pointer 0 _weak_extern + +#--- foo.s + +.globl _main, _extern, _weak_extern, _tlv +.weak_definition _weak_extern + +_extern: + retq +_weak_extern: + retq +_local: + retq + +_main: + callq _extern + callq _weak_extern + callq _local + mov _tlv@TLVP(%rip), %rax + retq + +.data +.quad _extern +.quad _weak_extern +.quad _local + +.section __DATA,__thread_vars,thread_local_variables +_tlv: + +.subsections_via_symbols