diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1031,7 +1031,14 @@ } createSyntheticSections(); - symtab->addDSOHandle(in.header); + + // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit + // which does e.g. cleanup of static global variables. The ABI document says + // that the pointer can point to any address in one of the dylib's segments, + // but in practice ld64 seems to set it to point to the header, so that's + // what's implemented here. + symtab->addSynthetic("___dso_handle", in.header->isec, 0, + /*privateExtern=*/true, /*linkerInternal=*/true); for (const Arg *arg : args.filtered(OPT_sectcreate)) { StringRef segName = arg->getValue(0); diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -24,6 +24,7 @@ class InputSection; class MachHeaderSection; class Symbol; +class Defined; class Undefined; /* @@ -34,8 +35,8 @@ */ class SymbolTable { public: - Symbol *addDefined(StringRef name, InputFile *, InputSection *, - uint32_t value, bool isWeakDef, bool isPrivateExtern); + Defined *addDefined(StringRef name, InputFile *, InputSection *, + uint32_t value, bool isWeakDef, bool isPrivateExtern); Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef); @@ -48,7 +49,8 @@ Symbol *addLazy(StringRef name, ArchiveFile *file, const llvm::object::Archive::Symbol &sym); - Symbol *addDSOHandle(const MachHeaderSection *); + Defined *addSynthetic(StringRef name, InputSection *, uint32_t value, + bool isPrivateExtern, bool isLinkerInternal); ArrayRef getSymbols() const { return symVector; } Symbol *find(StringRef name); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -37,9 +37,9 @@ return {sym, true}; } -Symbol *SymbolTable::addDefined(StringRef name, InputFile *file, - InputSection *isec, uint32_t value, - bool isWeakDef, bool isPrivateExtern) { +Defined *SymbolTable::addDefined(StringRef name, InputFile *file, + InputSection *isec, uint32_t value, + bool isWeakDef, bool isPrivateExtern) { Symbol *s; bool wasInserted; bool overridesWeakDef = false; @@ -52,7 +52,7 @@ // If one of them isn't private extern, the merged symbol isn't. if (defined->isWeakDef()) defined->privateExtern &= isPrivateExtern; - return s; + return defined; } if (!defined->isWeakDef()) { error("duplicate symbol: " + name + "\n>>> defined in " + @@ -70,7 +70,7 @@ replaceSymbol(s, name, file, isec, value, isWeakDef, /*isExternal=*/true, isPrivateExtern); defined->overridesWeakDef = overridesWeakDef; - return s; + return defined; } Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file, @@ -158,18 +158,12 @@ return s; } -Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) { - Symbol *s; - bool wasInserted; - std::tie(s, wasInserted) = insert(DSOHandle::name); - if (!wasInserted) { - // FIXME: Make every symbol (including absolute symbols) contain a - // reference to their originating file, then add that file name to this - // error message. dynamic_lookup symbols don't have an originating file. - if (isa(s)) - error("found defined symbol with illegal name " + DSOHandle::name); - } - replaceSymbol(s, header); +Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec, + uint32_t value, bool isPrivateExtern, + bool isLinkerInternal) { + auto s = addDefined(name, nullptr, isec, value, /*isWeakDef=*/false, + isPrivateExtern); + s->linkerInternal = isLinkerInternal; return s; } diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -39,7 +39,6 @@ CommonKind, DylibKind, LazyKind, - DSOHandleKind, }; virtual ~Symbol() {} @@ -100,7 +99,7 @@ bool isWeakDef, bool isExternal, bool isPrivateExtern) : Symbol(DefinedKind, name, file), isec(isec), value(value), overridesWeakDef(false), privateExtern(isPrivateExtern), - weakDef(isWeakDef), external(isExternal) {} + linkerInternal(false), weakDef(isWeakDef), external(isExternal) {} bool isWeakDef() const override { return weakDef; } bool isExternalWeakDef() const { @@ -123,7 +122,10 @@ uint32_t value; bool overridesWeakDef : 1; + // Whether this symbol should appear in the output binary's export trie. bool privateExtern : 1; + // Whether this symbol should appear in the output binary's symbol table. + bool linkerInternal : 1; private: const bool weakDef : 1; @@ -228,44 +230,12 @@ const llvm::object::Archive::Symbol sym; }; -// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which -// does e.g. cleanup of static global variables. The ABI document says that the -// pointer can point to any address in one of the dylib's segments, but in -// practice ld64 seems to set it to point to the header, so that's what's -// implemented here. -// -// The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet -// tested this on an ARM platform. -// -// DSOHandle effectively functions like a Defined symbol, but it doesn't belong -// to an InputSection. -class DSOHandle : public Symbol { -public: - DSOHandle(const MachHeaderSection *header) - : Symbol(DSOHandleKind, name, nullptr), header(header) {} - - const MachHeaderSection *header; - - uint64_t getVA() const override; - - uint64_t getFileOffset() const override; - - bool isWeakDef() const override { return false; } - - bool isTlv() const override { return false; } - - static constexpr StringRef name = "___dso_handle"; - - static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; } -}; - union SymbolUnion { alignas(Defined) char a[sizeof(Defined)]; alignas(Undefined) char b[sizeof(Undefined)]; alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; alignas(LazySymbol) char e[sizeof(LazySymbol)]; - alignas(DSOHandle) char f[sizeof(DSOHandle)]; }; template diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -45,9 +45,3 @@ } void LazySymbol::fetchArchiveMember() { getFile()->fetch(sym); } - -uint64_t DSOHandle::getVA() const { return header->addr; } - -uint64_t DSOHandle::getFileOffset() const { return header->fileOff; } - -constexpr StringRef DSOHandle::name; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -73,6 +73,9 @@ } const StringRef segname; + // This fake InputSection makes it easier for us to write code that applies + // generically to both user inputs and synthetics. + InputSection *isec; }; // All sections in __LINKEDIT should inherit from this. @@ -165,16 +168,13 @@ section_names::threadPtrs) {} }; -using SectionPointerUnion = - llvm::PointerUnion; - struct Location { - SectionPointerUnion section = nullptr; - uint64_t offset = 0; + const InputSection *isec; + uint64_t offset; - Location(SectionPointerUnion section, uint64_t offset) - : section(section), offset(offset) {} - uint64_t getVA() const; + Location(const InputSection *isec, uint64_t offset) + : isec(isec), offset(offset) {} + uint64_t getVA() const { return isec->getVA() + offset; } }; // Stores rebase opcodes, which tell dyld where absolute addresses have been @@ -188,9 +188,9 @@ bool isNeeded() const override { return !locations.empty(); } void writeTo(uint8_t *buf) const override; - void addEntry(SectionPointerUnion section, uint64_t offset) { + void addEntry(const InputSection *isec, uint64_t offset) { if (config->isPic) - locations.push_back({section, offset}); + locations.push_back({isec, offset}); } private: @@ -215,9 +215,9 @@ bool isNeeded() const override { return !bindings.empty(); } void writeTo(uint8_t *buf) const override; - void addEntry(const DylibSymbol *dysym, SectionPointerUnion section, + void addEntry(const DylibSymbol *dysym, const InputSection *isec, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(dysym, addend, Location(section, offset)); + bindings.emplace_back(dysym, addend, Location(isec, offset)); } private: @@ -254,9 +254,9 @@ void writeTo(uint8_t *buf) const override; - void addEntry(const Symbol *symbol, SectionPointerUnion section, - uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(symbol, addend, Location(section, offset)); + void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset, + int64_t addend = 0) { + bindings.emplace_back(symbol, addend, Location(isec, offset)); } bool hasEntry() const { return !bindings.empty(); } @@ -277,7 +277,7 @@ bool needsBinding(const Symbol *); // Add bindings for symbols that need weak or non-lazy bindings. -void addNonLazyBindingEntries(const Symbol *, SectionPointerUnion, +void addNonLazyBindingEntries(const Symbol *, const InputSection *, uint64_t offset, int64_t addend = 0); // The following sections implement lazy symbol binding -- very similar to the diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -42,6 +42,11 @@ SyntheticSection::SyntheticSection(const char *segname, const char *name) : OutputSection(SyntheticKind, name), segname(segname) { + isec = make(); + isec->segname = segname; + isec->name = name; + isec->parent = this; + isec->outSecOff = 0; syntheticSections.push_back(this); } @@ -118,12 +123,6 @@ PageZeroSection::PageZeroSection() : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} -uint64_t Location::getVA() const { - if (const auto *isec = section.dyn_cast()) - return isec->getVA() + offset; - return section.get()->addr + offset; -} - RebaseSection::RebaseSection() : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} @@ -186,16 +185,11 @@ os << static_cast(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); llvm::sort(locations, [](const Location &a, const Location &b) { - return a.getVA() < b.getVA(); + return a.isec->getVA() < b.isec->getVA(); }); - for (const Location &loc : locations) { - if (const auto *isec = loc.section.dyn_cast()) { - encodeRebase(isec->parent, isec->outSecOff + loc.offset, lastRebase, os); - } else { - const auto *osec = loc.section.get(); - encodeRebase(osec, loc.offset, lastRebase, os); - } - } + for (const Location &loc : locations) + encodeRebase(loc.isec->parent, loc.isec->outSecOff + loc.offset, lastRebase, + os); if (lastRebase.consecutiveCount != 0) encodeDoRebase(lastRebase, os); @@ -218,7 +212,7 @@ assert(!sym->isInGot()); sym->gotIndex = entries.size() - 1; - addNonLazyBindingEntries(sym, this, sym->gotIndex * WordSize); + addNonLazyBindingEntries(sym, isec, sym->gotIndex * WordSize); } } @@ -336,14 +330,9 @@ encodeDylibOrdinal(ordinal, os); lastBinding.ordinal = ordinal; } - if (auto *isec = b.target.section.dyn_cast()) { - encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset, - b.addend, /*isWeakBinding=*/false, lastBinding, os); - } else { - auto *osec = b.target.section.get(); - encodeBinding(b.dysym, osec, b.target.offset, b.addend, - /*isWeakBinding=*/false, lastBinding, os); - } + encodeBinding(b.dysym, b.target.isec->parent, + b.target.isec->outSecOff + b.target.offset, b.addend, + /*isWeakBinding=*/false, lastBinding, os); } if (!bindings.empty()) os << static_cast(BIND_OPCODE_DONE); @@ -369,16 +358,10 @@ [](const WeakBindingEntry &a, const WeakBindingEntry &b) { return a.target.getVA() < b.target.getVA(); }); - for (const WeakBindingEntry &b : bindings) { - if (const auto *isec = b.target.section.dyn_cast()) { - encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset, - b.addend, /*isWeakBinding=*/true, lastBinding, os); - } else { - const auto *osec = b.target.section.get(); - encodeBinding(b.symbol, osec, b.target.offset, b.addend, - /*isWeakBinding=*/true, lastBinding, os); - } - } + for (const WeakBindingEntry &b : bindings) + encodeBinding(b.symbol, b.target.isec->parent, + b.target.isec->outSecOff + b.target.offset, b.addend, + /*isWeakBinding=*/true, lastBinding, os); if (!bindings.empty() || !definitions.empty()) os << static_cast(BIND_OPCODE_DONE); } @@ -396,23 +379,21 @@ } void macho::addNonLazyBindingEntries(const Symbol *sym, - SectionPointerUnion section, - uint64_t offset, int64_t addend) { + const InputSection *isec, uint64_t offset, + int64_t addend) { if (auto *dysym = dyn_cast(sym)) { - in.binding->addEntry(dysym, section, offset, addend); + in.binding->addEntry(dysym, isec, offset, addend); if (dysym->isWeakDef()) - in.weakBinding->addEntry(sym, section, offset, addend); + in.weakBinding->addEntry(sym, isec, offset, addend); } else if (auto *defined = dyn_cast(sym)) { - in.rebase->addEntry(section, offset); + in.rebase->addEntry(isec, offset); if (defined->isExternalWeakDef()) - in.weakBinding->addEntry(sym, section, offset, addend); - } else if (!isa(sym)) { + in.weakBinding->addEntry(sym, isec, offset, addend); + } else { // Undefined symbols are filtered out in scanRelocations(); we should never // get here llvm_unreachable("cannot bind to an undefined symbol"); } - // TODO: understand the DSOHandle case better. - // Is it bindable? Add a new test? } StubsSection::StubsSection() @@ -538,7 +519,7 @@ void LazyBindingSection::addEntry(DylibSymbol *dysym) { if (entries.insert(dysym)) { dysym->stubsHelperIndex = entries.size() - 1; - in.rebase->addEntry(in.lazyPointers, dysym->stubsIndex * WordSize); + in.rebase->addEntry(in.lazyPointers->isec, dysym->stubsIndex * WordSize); } } @@ -572,9 +553,9 @@ if (auto *dysym = dyn_cast(sym)) { if (in.stubs->addEntry(dysym)) { if (sym->isWeakDef()) { - in.binding->addEntry(dysym, in.lazyPointers, + in.binding->addEntry(dysym, in.lazyPointers->isec, sym->stubsIndex * WordSize); - in.weakBinding->addEntry(sym, in.lazyPointers, + in.weakBinding->addEntry(sym, in.lazyPointers->isec, sym->stubsIndex * WordSize); } else { in.lazyBinding->addEntry(dysym); @@ -583,8 +564,8 @@ } else if (auto *defined = dyn_cast(sym)) { if (defined->isExternalWeakDef()) { if (in.stubs->addEntry(sym)) { - in.rebase->addEntry(in.lazyPointers, sym->stubsIndex * WordSize); - in.weakBinding->addEntry(sym, in.lazyPointers, + in.rebase->addEntry(in.lazyPointers->isec, sym->stubsIndex * WordSize); + in.weakBinding->addEntry(sym, in.lazyPointers->isec, sym->stubsIndex * WordSize); } } @@ -786,9 +767,10 @@ for (Symbol *sym : symtab->getSymbols()) { if (auto *defined = dyn_cast(sym)) { + if (defined->linkerInternal) + continue; assert(defined->isExternal()); - (void)defined; - addSymbol(externalSymbols, sym); + addSymbol(externalSymbols, defined); } else if (auto *dysym = dyn_cast(sym)) { if (dysym->isReferenced()) addSymbol(undefinedSymbols, sym); diff --git a/lld/test/MachO/dso-handle.s b/lld/test/MachO/dso-handle.s --- a/lld/test/MachO/dso-handle.s +++ b/lld/test/MachO/dso-handle.s @@ -1,19 +1,31 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o -# RUN: %lld %t.o -o %t +# RUN: %lld -lSystem %t.o -o %t # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s # CHECK: leaq {{.*}} # 100000000 # CHECK-NEXT: leaq {{.*}} # 100000000 # RUN: %lld -dylib %t.o -o %t.dylib -# RUN: llvm-objdump -d --no-show-raw-insn %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK +# RUN: llvm-objdump -d --no-show-raw-insn --rebase --section-headers %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK # DYLIB-CHECK: leaq {{.*}} # 0 # DYLIB-CHECK-NEXT: leaq {{.*}} # 0 +# DYLIB-LABEL: Sections: +# DYLIB: __data 00000008 [[#%x,DATA:]] DATA +# DYLIB-LABEL: Rebase table: +# DYLIB-NEXT: segment section address type +# DYLIB-NEXT: __DATA __data 0x{{0*}}[[#DATA]] pointer + +# RUN: llvm-objdump --syms %t.dylib | FileCheck %s --check-prefix=SYMS +# SYMS-NOT: ___dso_handle + .globl _main .text _main: leaq ___dso_handle(%rip), %rdx movq ___dso_handle@GOTPCREL(%rip), %rdx ret + +.data +.quad ___dso_handle diff --git a/lld/test/MachO/invalid/dso-handle-duplicate.s b/lld/test/MachO/invalid/dso-handle-duplicate.s --- a/lld/test/MachO/invalid/dso-handle-duplicate.s +++ b/lld/test/MachO/invalid/dso-handle-duplicate.s @@ -7,8 +7,10 @@ ## far-out edge case that should be safe to ignore. # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o -# RUN: not %lld -dylib %t.o -o %t.dylib 2>&1 | FileCheck %s -# CHECK: error: found defined symbol with illegal name ___dso_handle +# RUN: not %lld -dylib %t.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t.o +# CHECK: error: duplicate symbol: ___dso_handle +# CHECK-NEXT: >>> defined in [[FILE]] +# CHECK-NEXT: >>> defined in .globl _main, ___dso_handle .text