diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1043,14 +1043,7 @@ } createSyntheticSections(); - - // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit - // which does e.g. cleanup of static global variables. The ABI document says - // that the pointer can point to any address in one of the dylib's segments, - // but in practice ld64 seems to set it to point to the header, so that's - // what's implemented here. - symtab->addSynthetic("___dso_handle", in.header->isec, 0, - /*privateExtern=*/true, /*linkerInternal=*/true); + createSyntheticSymbols(); for (const Arg *arg : args.filtered(OPT_sectcreate)) { StringRef segName = arg->getValue(0); diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -9,6 +9,8 @@ #ifndef LLD_MACHO_SYMBOL_TABLE_H #define LLD_MACHO_SYMBOL_TABLE_H +#include "Symbols.h" + #include "lld/Common/LLVM.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" @@ -49,8 +51,8 @@ Symbol *addLazy(StringRef name, ArchiveFile *file, const llvm::object::Archive::Symbol &sym); - Defined *addSynthetic(StringRef name, InputSection *, uint32_t value, - bool isPrivateExtern, bool isLinkerInternal); + Symbol *addSynthetic(StringRef name, InputSection *, uint32_t value, + bool isPrivateExtern, bool includeInSymtab); ArrayRef getSymbols() const { return symVector; } Symbol *find(llvm::CachedHashStringRef name); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -157,12 +157,12 @@ return s; } -Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec, - uint32_t value, bool isPrivateExtern, - bool isLinkerInternal) { - Defined *s = addDefined(name, nullptr, isec, value, /*isWeakDef=*/false, - isPrivateExtern); - s->linkerInternal = isLinkerInternal; +Symbol *SymbolTable::addSynthetic(StringRef name, InputSection *isec, + uint32_t value, bool isPrivateExtern, + bool includeInSymtab) { + Defined *s = addDefined(name, nullptr, isec, value, + /*isWeakDef=*/false, isPrivateExtern); + s->includeInSymtab = includeInSymtab; return s; } diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -99,7 +99,7 @@ bool isWeakDef, bool isExternal, bool isPrivateExtern) : Symbol(DefinedKind, name, file), isec(isec), value(value), overridesWeakDef(false), privateExtern(isPrivateExtern), - linkerInternal(false), weakDef(isWeakDef), external(isExternal) {} + includeInSymtab(true), weakDef(isWeakDef), external(isExternal) {} bool isWeakDef() const override { return weakDef; } bool isExternalWeakDef() const { @@ -124,8 +124,8 @@ bool overridesWeakDef : 1; // Whether this symbol should appear in the output binary's export trie. bool privateExtern : 1; - // Whether this symbol should appear in the output binary's symbol table. - bool linkerInternal : 1; + // Whether this symbol should appear in the output symbol table. + bool includeInSymtab : 1; private: const bool weakDef : 1; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -500,6 +500,8 @@ extern InStruct in; extern std::vector syntheticSections; +void createSyntheticSymbols(); + } // namespace macho } // namespace lld diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -754,7 +754,7 @@ for (Symbol *sym : symtab->getSymbols()) { if (auto *defined = dyn_cast(sym)) { - if (defined->linkerInternal) + if (!defined->includeInSymtab) continue; assert(defined->isExternal()); addSymbol(externalSymbols, defined); @@ -993,3 +993,55 @@ memcpy(id, fileName.begin(), fileName.size()); memset(id + fileName.size(), 0, fileNamePad); } + +void macho::createSyntheticSymbols() { + auto addHeaderSymbol = [](const char *name) { + symtab->addSynthetic(name, in.header->isec, 0, + /*privateExtern=*/true, + /*includeInSymtab*/ false); + }; + + switch (config->outputType) { + // FIXME: Assign the right addresse value for these symbols + // (rather than 0). But we need to do that after assignAddresses(). + case MH_EXECUTE: + // If linking PIE, __mh_execute_header is a defined symbol in + // __TEXT, __text) + // Otherwise, it's an absolute symbol. + if (config->isPic) + symtab->addSynthetic("__mh_execute_header", in.header->isec, 0, + /*privateExtern*/ false, + /*includeInSymbtab*/ true); + else + symtab->addSynthetic("__mh_execute_header", + /*isec*/ nullptr, 0, + /*privateExtern*/ false, + /*includeInSymbtab*/ true); + break; + + // The following symbols are + // N_SECT symbols, even though the header is not part of any section + // and that they are private to the bundle/dylib/object they are part of. + case MH_BUNDLE: + addHeaderSymbol("__mh_bundle_header"); + break; + case MH_DYLIB: + addHeaderSymbol("__mh_dylib_header"); + break; + case MH_DYLINKER: + addHeaderSymbol("__mh_dylinker_header"); + break; + case MH_OBJECT: + addHeaderSymbol("__mh_object_header"); + break; + default: + break; + } + + // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit + // which does e.g. cleanup of static global variables. The ABI document + // says that the pointer can point to any address in one of the dylib's + // segments, but in practice ld64 seems to set it to point to the header, + // so that's what's implemented here. + addHeaderSymbol("___dso_handle"); +} diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -746,12 +746,16 @@ uint32_t sectionIndex = 0; for (OutputSegment *seg : outputSegments) { seg->sortOutputSections(compareByOrder(sectionOrder)); + bool seenMachHeader = false; for (OutputSection *osec : seg->getSections()) { // Now that the output sections are sorted, assign the final // output section indices. if (!osec->isHidden()) osec->index = ++sectionIndex; - + if (!seenMachHeader && isa(osec)) { + osec->index = 1; + seenMachHeader = true; + } if (!firstTLVDataSection && isThreadLocalData(osec->flags)) firstTLVDataSection = osec; diff --git a/lld/test/MachO/export-trie.s b/lld/test/MachO/export-trie.s --- a/lld/test/MachO/export-trie.s +++ b/lld/test/MachO/export-trie.s @@ -15,7 +15,9 @@ # EXPORTS-DAG: [[#%x, HELLO_WORLD_ADDR:]] {{.*}} _hello_world # EXPORTS-DAG: [[#%x, HELLO_ITS_ME_ADDR:]] {{.*}} _hello_its_me # EXPORTS-DAG: [[#%x, HELLO_ITS_YOU_ADDR:]] {{.*}} _hello_its_you +# EXPORTS-DAG: {{0+}} g *ABS* __mh_execute_header # EXPORTS-LABEL: Exports trie: +# EXPORTS-DAG: 0x{{0+}} __mh_execute_header [absolute] # EXPORTS-DAG: 0x{{0*}}[[#%X, MAIN_ADDR]] _main # EXPORTS-DAG: 0x{{0*}}[[#%X, HELLO_ADDR]] _hello # EXPORTS-DAG: 0x{{0*}}[[#%X, HELLO_WORLD_ADDR]] _hello_world @@ -27,13 +29,16 @@ # CHECK-LABEL: ExportTrie: # CHECK: Name: '' # CHECK: Name: _ -# CHECK: Name: main -# CHECK: Name: hello +# CHECK-DAG: Name: _mh_execute_header +# CHECK-DAG: Name: main +# CHECK-DAG: Name: hello # CHECK: Name: _ # CHECK: Name: world # CHECK: Name: its_ -# CHECK: Name: you -# CHECK: Name: me +# CHECK-DAG: Name: you +# CHECK-DAG: Name: me + + .section __TEXT,__cstring .globl _hello, _hello_world, _hello_its_me, _hello_its_you, _main diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s --- a/lld/test/MachO/map-file.s +++ b/lld/test/MachO/map-file.s @@ -29,6 +29,7 @@ # CHECK-NEXT: [[#%x,MAIN:]] g F __TEXT,__text _main # CHECK-NEXT: [[#%x,NUMBER:]] g O __DATA,__common _number # CHECK-NEXT: [[#%x,FOO:]] g O __TEXT,obj _foo +# CHECK-NEXT: {{0+}} g *ABS* __mh_execute_header # CHECK-NEXT: # Path: {{.*}}{{/|\\}}map-file.s.tmp/test-map # CHECK-NEXT: # Arch: x86_64 diff --git a/lld/test/MachO/mh_dylib_header.s b/lld/test/MachO/mh_dylib_header.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/mh_dylib_header.s @@ -0,0 +1,47 @@ +## This tests that we can link against these synthetic symbols even +## if they are not in the symbol table. + +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t + +## Test that in a dybib, we can link against __mh_dylib_header in a dylib +## (but not in other type) +# RUN: llvm-mc %t/dylib.s -triple=x86_64-apple-macos11.0 -filetype=obj -o %t/dylib.o +# RUN: %lld -arch x86_64 -platform_version macOS 10 11 -dylib %t/dylib.o -o %t/dylib.out +# RUN: llvm-objdump --m --syms %t/dylib.out | FileCheck %s --check-prefix DYLIB + +# RUN: not %lld -arch x86_64 -platform_version macOS 10 11 %t/dylib.o 2>&1 | FileCheck %s --check-prefix ERR-DYLIB + +# DYLIB: SYMBOL TABLE: +# DYLIB-NEXT: {{[0-9a-f]+}} g F __TEXT,__text _main +# DYLIB-NEXT-EMPTY: +# ERR-DYLIB: error: undefined symbol: __mh_dylib_header + +## Test that in an execute, we can link against __mh_execute_header +# RUN: llvm-mc %t/main.s -triple=x86_64-apple-macos11.0 -filetype=obj -o %t/exec.o +# RUN: %lld -arch x86_64 -platform_version macOS 10 11 %t/exec.o -o %t/exec.out + +## But it would be an error trying to reference __mh_execute_header in a dylib +# RUN: not %lld -arch x86_64 -platform_version macOS 10 11 -dylib %t/exec.o 2>&1 | FileCheck %s --check-prefix ERR-EXEC + +# ERR-EXEC: error: undefined symbol: __mh_execute_header + +#--- main.s +.text +.globl _main +_main: +## FIXME: probably better to use +## mov__mh_*_header@GOTPCREL(%rip), %rax +## But we can't do it yet because of some GOT_LOAD-out of range error. + ret +.data + .quad __mh_execute_header + +#--- dylib.s +.text +.globl _main +_main: + ret + +.data + .quad __mh_dylib_header diff --git a/lld/test/MachO/mh_execute_header.s b/lld/test/MachO/mh_execute_header.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/mh_execute_header.s @@ -0,0 +1,16 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; mkdir %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: %lld -o %t/test.pie %t/test.o -pie +# RUN: llvm-objdump --macho --syms %t/test.pie | FileCheck %s --check-prefix=PIE + +# RUN: %lld -o %t/test.no_pie %t/test.o -no_pie +# RUN: llvm-objdump --macho --syms %t/test.no_pie | FileCheck %s --check-prefix=NO-PIE + +# PIE: 0000000100000000 g F __TEXT,__text __mh_execute_header +# NO-PIE: 0000000000000000 g *ABS* __mh_execute_header + +.text +.global _main +_main: + ret diff --git a/lld/test/MachO/objc.s b/lld/test/MachO/objc.s --- a/lld/test/MachO/objc.s +++ b/lld/test/MachO/objc.s @@ -32,7 +32,7 @@ # NO-OBJC-EMPTY: # NO-OBJC-NEXT: SYMBOL TABLE: # NO-OBJC-NEXT: g F __TEXT,__text _main -# NO-OBJC-EMPTY: +# NO_OBJC-NEXT: g *ABS* __mh_execute_header #--- has-objc-symbol.s .globl _OBJC_CLASS_$_MyObject diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s --- a/lld/test/MachO/stabs.s +++ b/lld/test/MachO/stabs.s @@ -60,6 +60,7 @@ # CHECK-NEXT: [[#ZERO]] S _zero # CHECK-NEXT: [[#FOO]] T _foo # CHECK-NEXT: {{[0-9af]+}} T _no_debug +# CHECK-NEXT: {{0+}} A __mh_execute_header # CHECK-EMPTY: ## Check that we don't attempt to emit rebase opcodes for the debug sections diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -56,6 +56,16 @@ # CHECK-NEXT: ] # CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: __mh_execute_header (81) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Abs (0x2) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } # CHECK-NEXT: Symbol { # CHECK-NEXT: Name: dyld_stub_binder # CHECK-NEXT: Extern @@ -82,8 +92,8 @@ # CHECK-NEXT: ilocalsym: 0 # CHECK-NEXT: nlocalsym: 2 # CHECK-NEXT: iextdefsym: 2 -# CHECK-NEXT: nextdefsym: 3 -# CHECK-NEXT: iundefsym: 5 +# CHECK-NEXT: nextdefsym: 4 +# CHECK-NEXT: iundefsym: 6 # CHECK-NEXT: nundefsym: 2 ## Verify that the first entry in the StringTable is a space, and that