diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -13,6 +13,7 @@ #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "Target.h" #include "Writer.h" @@ -429,6 +430,7 @@ } createSyntheticSections(); + symtab->addDSOHandle(in.header); // Initialize InputSections. for (InputFile *file : inputFiles) { diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -20,6 +20,7 @@ class ArchiveFile; class DylibFile; class InputSection; +class MachHeaderSection; class Symbol; class SymbolTable { @@ -34,6 +35,8 @@ Symbol *addLazy(StringRef name, ArchiveFile *file, const llvm::object::Archive::Symbol &sym); + Symbol *addDSOHandle(const MachHeaderSection *); + ArrayRef getSymbols() const { return symVector; } Symbol *find(StringRef name); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -94,4 +94,12 @@ return s; } +Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(DSOHandle::getName()); + replaceSymbol(s, header); + return s; +} + SymbolTable *macho::symtab; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -19,6 +19,7 @@ namespace macho { class InputSection; +class MachHeaderSection; class DylibFile; class ArchiveFile; @@ -37,15 +38,20 @@ UndefinedKind, DylibKind, LazyKind, + DSOHandleKind, }; + virtual ~Symbol() {} + Kind kind() const { return static_cast(symbolKind); } StringRef getName() const { return {name.data, name.size}; } - uint64_t getVA() const; + virtual uint64_t getVA() const { return 0; } - uint64_t getFileOffset() const; + virtual uint64_t getFileOffset() const { + llvm_unreachable("attempt to get an offset from a non-defined symbol"); + } uint32_t gotIndex = UINT32_MAX; @@ -65,6 +71,12 @@ Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef) : Symbol(DefinedKind, name, isWeakDef), isec(isec), value(value) {} + uint64_t getVA() const override { return isec->getVA() + value; } + + uint64_t getFileOffset() const override { + return isec->getFileOffset() + value; + } + InputSection *isec; uint32_t value; @@ -105,17 +117,29 @@ const llvm::object::Archive::Symbol sym; }; -inline uint64_t Symbol::getVA() const { - if (auto *d = dyn_cast(this)) - return d->isec->getVA() + d->value; - return 0; -} +// The C++ ABI requires dylibs to pass a pointer to __cxa_atexit which does +// e.g. cleanup of static global variables. The C++ spec says that the pointer +// can point to any address in one of the dylib's segments, but in practice +// ld64 seems to set it to point to the header, so that's what's implemented +// here. +// +// This effectively functions like a Defined symbol, but it doesn't belong to +// an InputSection, and it has precedence over Defined symbols. +class DSOHandle : public Symbol { +public: + DSOHandle(const MachHeaderSection *header) + : Symbol(DSOHandleKind, name, /*isWeakDef*/ false), header(header) {} -inline uint64_t Symbol::getFileOffset() const { - if (auto *d = dyn_cast(this)) - return d->isec->getFileOffset() + d->value; - llvm_unreachable("attempt to get an offset from an undefined symbol"); -} + const MachHeaderSection *header; + + uint64_t getVA() const override; + + uint64_t getFileOffset() const override; + + static const constexpr StringRef name = "___dso_handle"; + + static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } +}; union SymbolUnion { alignas(Defined) char a[sizeof(Defined)]; diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -8,6 +8,7 @@ #include "Symbols.h" #include "InputFiles.h" +#include "SyntheticSections.h" using namespace llvm; using namespace lld; @@ -21,3 +22,9 @@ return *s; return std::string(sym.getName()); } + +uint64_t DSOHandle::getVA() const { return header->addr; } + +uint64_t DSOHandle::getFileOffset() const { return header->fileOff; } + +constexpr StringRef DSOHandle::name; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -273,6 +273,7 @@ }; struct InStruct { + MachHeaderSection *header = nullptr; BindingSection *binding = nullptr; GotSection *got = nullptr; LazyPointerSection *lazyPointers = nullptr; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -53,7 +53,7 @@ std::unique_ptr &buffer; uint64_t addr = 0; uint64_t fileOff = 0; - MachHeaderSection *headerSection = nullptr; + MachHeaderSection *header = nullptr; LazyBindingSection *lazyBindingSection = nullptr; ExportSection *exportSection = nullptr; StringTableSection *stringTableSection = nullptr; @@ -260,20 +260,18 @@ } void Writer::createLoadCommands() { - headerSection->addLoadCommand( + in.header->addLoadCommand( make(in.binding, lazyBindingSection, exportSection)); - headerSection->addLoadCommand( - make(symtabSection, stringTableSection)); - headerSection->addLoadCommand(make()); + in.header->addLoadCommand(make(symtabSection, stringTableSection)); + in.header->addLoadCommand(make()); switch (config->outputType) { case MH_EXECUTE: - headerSection->addLoadCommand(make()); - headerSection->addLoadCommand(make()); + in.header->addLoadCommand(make()); + in.header->addLoadCommand(make()); break; case MH_DYLIB: - headerSection->addLoadCommand( - make(LC_ID_DYLIB, config->installName)); + in.header->addLoadCommand(make(LC_ID_DYLIB, config->installName)); break; default: llvm_unreachable("unhandled output file type"); @@ -281,19 +279,19 @@ uint8_t segIndex = 0; for (OutputSegment *seg : outputSegments) { - headerSection->addLoadCommand(make(seg->name, seg)); + in.header->addLoadCommand(make(seg->name, seg)); seg->index = segIndex++; } uint64_t dylibOrdinal = 1; for (InputFile *file : inputFiles) { if (auto *dylibFile = dyn_cast(file)) { - headerSection->addLoadCommand( + in.header->addLoadCommand( make(LC_LOAD_DYLIB, dylibFile->dylibName)); dylibFile->ordinal = dylibOrdinal++; if (dylibFile->reexport) - headerSection->addLoadCommand( + in.header->addLoadCommand( make(LC_REEXPORT_DYLIB, dylibFile->dylibName)); } } @@ -402,7 +400,6 @@ void Writer::createOutputSections() { // First, create hidden sections - headerSection = make(); lazyBindingSection = make(); stringTableSection = make(); symtabSection = make(*stringTableSection); @@ -533,6 +530,7 @@ void macho::writeResult() { Writer().run(); } void macho::createSyntheticSections() { + in.header = make(); in.binding = make(); in.got = make(); in.lazyPointers = make(); diff --git a/lld/test/MachO/dso-handle-no-override.s b/lld/test/MachO/dso-handle-no-override.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/dso-handle-no-override.s @@ -0,0 +1,21 @@ +# REQUIRES: x86 + +## If for some bizarre reason the input file defines its own ___dso_handle, we +## should ignore it. At least, we've implemented this behavior if the +## conflicting symbol is a global. A local symbol of the same name will still +## take priority in our implementation, unlike in ld64. But that's a pretty +## far-out edge case that should be safe to ignore. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -dylib %t.o -o %t.dylib +# RUN: llvm-objdump -d --no-show-raw-insn %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK +# DYLIB-CHECK: leaq {{.*}} # 0 + +.globl _main, ___dso_handle +.text +_main: + leaq ___dso_handle(%rip), %rdx + ret + +___dso_handle: + .space 1 diff --git a/lld/test/MachO/dso-handle.s b/lld/test/MachO/dso-handle.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/dso-handle.s @@ -0,0 +1,16 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o + +# RUN: lld -flavor darwinnew %t.o -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s +# CHECK: leaq {{.*}} # 100000000 + +# RUN: lld -flavor darwinnew -dylib %t.o -o %t.dylib +# RUN: llvm-objdump -d --no-show-raw-insn %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK +# DYLIB-CHECK: leaq {{.*}} # 0 + +.globl _main +.text +_main: + leaq ___dso_handle(%rip), %rdx + ret