diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -13,6 +13,7 @@ #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "Target.h" #include "Writer.h" @@ -479,6 +480,7 @@ } createSyntheticSections(); + symtab->addDSOHandle(in.header); // Initialize InputSections. for (InputFile *file : inputFiles) { diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -20,6 +20,7 @@ class ArchiveFile; class DylibFile; class InputSection; +class MachHeaderSection; class Symbol; /* @@ -40,6 +41,8 @@ Symbol *addLazy(StringRef name, ArchiveFile *file, const llvm::object::Archive::Symbol &sym); + Symbol *addDSOHandle(const MachHeaderSection *); + ArrayRef getSymbols() const { return symVector; } Symbol *find(StringRef name); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -94,4 +94,17 @@ return s; } +Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(DSOHandle::name); + if (!wasInserted) { + if (auto *defined = dyn_cast(s)) + error("found defined symbol from " + defined->isec->file->getName() + + " with illegal name " + DSOHandle::name); + } + replaceSymbol(s, header); + return s; +} + SymbolTable *macho::symtab; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -19,6 +19,7 @@ namespace macho { class InputSection; +class MachHeaderSection; class DylibFile; class ArchiveFile; @@ -37,6 +38,7 @@ UndefinedKind, DylibKind, LazyKind, + DSOHandleKind, }; virtual ~Symbol() {} @@ -45,9 +47,11 @@ StringRef getName() const { return {name.data, name.size}; } - uint64_t getVA() const; + virtual uint64_t getVA() const { return 0; } - uint64_t getFileOffset() const; + virtual uint64_t getFileOffset() const { + llvm_unreachable("attempt to get an offset from a non-defined symbol"); + } virtual bool isWeakDef() const { llvm_unreachable("cannot be weak"); } @@ -70,6 +74,12 @@ static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } + uint64_t getVA() const override { return isec->getVA() + value; } + + uint64_t getFileOffset() const override { + return isec->getFileOffset() + value; + } + InputSection *isec; uint32_t value; @@ -115,17 +125,32 @@ const llvm::object::Archive::Symbol sym; }; -inline uint64_t Symbol::getVA() const { - if (auto *d = dyn_cast(this)) - return d->isec->getVA() + d->value; - return 0; -} +// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which +// does e.g. cleanup of static global variables. The ABI document says that the +// pointer can point to any address in one of the dylib's segments, but in +// practice ld64 seems to set it to point to the header, so that's what's +// implemented here. +// +// The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet +// tested this on an ARM platform. +// +// DSOHandle effectively functions like a Defined symbol, but it doesn't belong +// to an InputSection. +class DSOHandle : public Symbol { +public: + DSOHandle(const MachHeaderSection *header) + : Symbol(DSOHandleKind, name), header(header) {} -inline uint64_t Symbol::getFileOffset() const { - if (auto *d = dyn_cast(this)) - return d->isec->getFileOffset() + d->value; - llvm_unreachable("attempt to get an offset from an undefined symbol"); -} + const MachHeaderSection *header; + + uint64_t getVA() const override; + + uint64_t getFileOffset() const override; + + static constexpr StringRef name = "___dso_handle"; + + static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } +}; union SymbolUnion { alignas(Defined) char a[sizeof(Defined)]; diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -8,6 +8,7 @@ #include "Symbols.h" #include "InputFiles.h" +#include "SyntheticSections.h" using namespace llvm; using namespace lld; @@ -21,3 +22,9 @@ return *s; return std::string(sym.getName()); } + +uint64_t DSOHandle::getVA() const { return header->addr; } + +uint64_t DSOHandle::getFileOffset() const { return header->fileOff; } + +constexpr StringRef DSOHandle::name; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -273,6 +273,7 @@ }; struct InStruct { + MachHeaderSection *header = nullptr; BindingSection *binding = nullptr; GotSection *got = nullptr; LazyPointerSection *lazyPointers = nullptr; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -53,7 +53,7 @@ std::unique_ptr &buffer; uint64_t addr = 0; uint64_t fileOff = 0; - MachHeaderSection *headerSection = nullptr; + MachHeaderSection *header = nullptr; LazyBindingSection *lazyBindingSection = nullptr; ExportSection *exportSection = nullptr; StringTableSection *stringTableSection = nullptr; @@ -264,20 +264,18 @@ } void Writer::createLoadCommands() { - headerSection->addLoadCommand( + in.header->addLoadCommand( make(in.binding, lazyBindingSection, exportSection)); - headerSection->addLoadCommand( - make(symtabSection, stringTableSection)); - headerSection->addLoadCommand(make()); + in.header->addLoadCommand(make(symtabSection, stringTableSection)); + in.header->addLoadCommand(make()); switch (config->outputType) { case MH_EXECUTE: - headerSection->addLoadCommand(make()); - headerSection->addLoadCommand(make()); + in.header->addLoadCommand(make()); + in.header->addLoadCommand(make()); break; case MH_DYLIB: - headerSection->addLoadCommand( - make(LC_ID_DYLIB, config->installName)); + in.header->addLoadCommand(make(LC_ID_DYLIB, config->installName)); break; default: llvm_unreachable("unhandled output file type"); @@ -285,19 +283,19 @@ uint8_t segIndex = 0; for (OutputSegment *seg : outputSegments) { - headerSection->addLoadCommand(make(seg->name, seg)); + in.header->addLoadCommand(make(seg->name, seg)); seg->index = segIndex++; } uint64_t dylibOrdinal = 1; for (InputFile *file : inputFiles) { if (auto *dylibFile = dyn_cast(file)) { - headerSection->addLoadCommand( + in.header->addLoadCommand( make(LC_LOAD_DYLIB, dylibFile->dylibName)); dylibFile->ordinal = dylibOrdinal++; if (dylibFile->reexport) - headerSection->addLoadCommand( + in.header->addLoadCommand( make(LC_REEXPORT_DYLIB, dylibFile->dylibName)); } } @@ -406,7 +404,6 @@ void Writer::createOutputSections() { // First, create hidden sections - headerSection = make(); lazyBindingSection = make(); stringTableSection = make(); symtabSection = make(*stringTableSection); @@ -539,6 +536,7 @@ void macho::writeResult() { Writer().run(); } void macho::createSyntheticSections() { + in.header = make(); in.binding = make(); in.got = make(); in.lazyPointers = make(); diff --git a/lld/test/MachO/dso-handle.s b/lld/test/MachO/dso-handle.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/dso-handle.s @@ -0,0 +1,16 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o + +# RUN: lld -flavor darwinnew %t.o -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s +# CHECK: leaq {{.*}} # 100000000 + +# RUN: lld -flavor darwinnew -dylib %t.o -o %t.dylib +# RUN: llvm-objdump -d --no-show-raw-insn %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK +# DYLIB-CHECK: leaq {{.*}} # 0 + +.globl _main +.text +_main: + leaq ___dso_handle(%rip), %rdx + ret diff --git a/lld/test/MachO/invalid/dso-handle-duplicate.s b/lld/test/MachO/invalid/dso-handle-duplicate.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/dso-handle-duplicate.s @@ -0,0 +1,20 @@ +# REQUIRES: x86 + +## If for some bizarre reason the input file defines its own ___dso_handle, we +## should raise an error. At least, we've implemented this behavior if the +## conflicting symbol is a global. A local symbol of the same name will still +## take priority in our implementation, unlike in ld64. But that's a pretty +## far-out edge case that should be safe to ignore. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: not lld -flavor darwinnew -dylib %t.o -o %t.dylib 2>&1 | FileCheck %s -DFILE=%t.o +# CHECK: error: found defined symbol from [[FILE]] with illegal name ___dso_handle + +.globl _main, ___dso_handle +.text +_main: + leaq ___dso_handle(%rip), %rdx + ret + +___dso_handle: + .space 1