diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -419,11 +419,20 @@ explicit StabsEntry(uint8_t type) : type(type) {} }; +// Symbols of the same type must be laid out contiguously: we choose to emit +// all local symbols first, then external symbols, and finally undefined +// symbols. For each symbol type, the LC_DYSYMTAB load command will record the +// range (start index and total number) of those symbols in the symbol table. class SymtabSection : public LinkEditSection { public: SymtabSection(StringTableSection &); void finalizeContents(); - size_t getNumSymbols() const { return stabs.size() + symbols.size(); } + uint32_t getNumSymbols() const; + uint32_t getNumLocalSymbols() const { + return stabs.size() + localSymbols.size(); + } + uint32_t getNumExternalSymbols() const { return externalSymbols.size(); } + uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); } uint64_t getRawSize() const override; void writeTo(uint8_t *buf) const override; @@ -434,8 +443,12 @@ void emitFunStabs(Defined *); StringTableSection &stringTableSection; + // STABS symbols are always local symbols, but we represent them with special + // entries because they may use fields like n_sect and n_desc differently. std::vector stabs; - std::vector symbols; + std::vector localSymbols; + std::vector externalSymbols; + std::vector undefinedSymbols; }; // The indirect symbol table is a list of 32-bit integers that serve as indices diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -19,6 +19,7 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" @@ -630,14 +631,35 @@ } void SymtabSection::finalizeContents() { - InputFile *lastFile = nullptr; + // Local symbols aren't in the SymbolTable, so we walk the list of object + // files to gather them. + for (InputFile *file : inputFiles) { + if (auto *objFile = dyn_cast(file)) { + for (Symbol *sym : objFile->symbols) { + // TODO: when we implement -dead_strip, we should filter out symbols + // that belong to dead sections. + if (auto *defined = dyn_cast(sym)) { + if (!defined->isExternal()) { + uint32_t strx = stringTableSection.addString(sym->getName()); + localSymbols.push_back({sym, strx}); + } + } + } + } + } + for (Symbol *sym : symtab->getSymbols()) { - // TODO support other symbol types - if (isa(sym) || sym->isInGot() || sym->isInStubs()) { - sym->symtabIndex = symbols.size(); - symbols.push_back({sym, stringTableSection.addString(sym->getName())}); + uint32_t strx = stringTableSection.addString(sym->getName()); + if (auto *defined = dyn_cast(sym)) { + assert(defined->isExternal()); + externalSymbols.push_back({sym, strx}); + } else if (sym->isInGot() || sym->isInStubs()) { + undefinedSymbols.push_back({sym, strx}); } + } + InputFile *lastFile = nullptr; + for (Symbol *sym : symtab->getSymbols()) { // Emit STABS symbols so that dsymutil and/or the debugger can map address // regions in the final binary to the source and object files from which // they originated. @@ -670,13 +692,35 @@ if (!stabs.empty()) emitEndSourceStab(); + + uint32_t symtabIndex = stabs.size(); + for (const SymtabEntry &entry : + concat(localSymbols, externalSymbols, undefinedSymbols)) { + entry.sym->symtabIndex = symtabIndex++; + } +} + +uint32_t SymtabSection::getNumSymbols() const { + return stabs.size() + localSymbols.size() + externalSymbols.size() + + undefinedSymbols.size(); } void SymtabSection::writeTo(uint8_t *buf) const { auto *nList = reinterpret_cast(buf); - for (const SymtabEntry &entry : symbols) { + // Emit the stabs entries before the "real" symbols. We cannot emit them + // after as that would render Symbol::symtabIndex inaccurate. + for (const StabsEntry &entry : stabs) { + nList->n_strx = entry.strx; + nList->n_type = entry.type; + nList->n_sect = entry.sect; + nList->n_desc = entry.desc; + nList->n_value = entry.value; + ++nList; + } + + for (const SymtabEntry &entry : concat( + localSymbols, externalSymbols, undefinedSymbols)) { nList->n_strx = entry.strx; - // TODO support other symbol types // TODO populate n_desc with more flags if (auto *defined = dyn_cast(entry.sym)) { if (defined->isAbsolute()) { @@ -684,7 +728,8 @@ nList->n_sect = MachO::NO_SECT; nList->n_value = defined->value; } else { - nList->n_type = MachO::N_EXT | MachO::N_SECT; + nList->n_type = + (defined->isExternal() ? MachO::N_EXT : 0) | MachO::N_SECT; nList->n_sect = defined->isec->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol nList->n_value = defined->getVA(); @@ -693,17 +738,6 @@ } ++nList; } - - // Emit the stabs entries after the "real" symbols. We cannot emit them - // before as that would render Symbol::symtabIndex inaccurate. - for (const StabsEntry &entry : stabs) { - nList->n_strx = entry.strx; - nList->n_type = entry.type; - nList->n_sect = entry.sect; - nList->n_desc = entry.desc; - nList->n_value = entry.value; - ++nList; - } } IndirectSymtabSection::IndirectSymtabSection() diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -112,8 +112,10 @@ class LCDysymtab : public LoadCommand { public: - LCDysymtab(IndirectSymtabSection *indirectSymtabSection) - : indirectSymtabSection(indirectSymtabSection) {} + LCDysymtab(SymtabSection *symtabSection, + IndirectSymtabSection *indirectSymtabSection) + : symtabSection(symtabSection), + indirectSymtabSection(indirectSymtabSection) {} uint32_t getSize() const override { return sizeof(dysymtab_command); } @@ -121,11 +123,19 @@ auto *c = reinterpret_cast(buf); c->cmd = LC_DYSYMTAB; c->cmdsize = getSize(); + + c->ilocalsym = 0; + c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); + c->nextdefsym = symtabSection->getNumExternalSymbols(); + c->iundefsym = c->iextdefsym + c->nextdefsym; + c->nundefsym = symtabSection->getNumUndefinedSymbols(); + c->indirectsymoff = indirectSymtabSection->fileOff; c->nindirectsyms = indirectSymtabSection->getNumSymbols(); } - IndirectSymtabSection *indirectSymtabSection = nullptr; + SymtabSection *symtabSection; + IndirectSymtabSection *indirectSymtabSection; }; class LCSegment : public LoadCommand { @@ -371,7 +381,8 @@ in.header->addLoadCommand(make( in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports)); in.header->addLoadCommand(make(symtabSection, stringTableSection)); - in.header->addLoadCommand(make(indirectSymtabSection)); + in.header->addLoadCommand( + make(symtabSection, indirectSymtabSection)); for (StringRef path : config->runtimePaths) in.header->addLoadCommand(make(path)); diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s --- a/lld/test/MachO/stabs.s +++ b/lld/test/MachO/stabs.s @@ -12,18 +12,18 @@ # RUN: cd %t && %lld -lSystem test.o foo.o -o test # RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -# CHECK-DAG: [[#%x, MAIN:]] T _main -# CHECK-DAG: [[#%x, FOO: ]] T _foo # CHECK: 0000000000000000 - 00 0000 SO /tmp/test.cpp # CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/test.o -# CHECK-NEXT: [[#MAIN]] - 01 0000 FUN _main +# CHECK-NEXT: [[#%x, MAIN:]] - 01 0000 FUN _main # CHECK-NEXT: 0000000000000001 - 00 0000 FUN # CHECK-NEXT: 0000000000000000 - 01 0000 SO # CHECK-NEXT: 0000000000000000 - 00 0000 SO /foo.cpp # CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/foo.o -# CHECK-NEXT: [[#FOO]] - 01 0000 FUN _foo +# CHECK-NEXT: [[#%x, FOO: ]] - 01 0000 FUN _foo # CHECK-NEXT: 0000000000000001 - 00 0000 FUN # CHECK-NEXT: 0000000000000000 - 01 0000 SO +# CHECK-NEXT: [[#MAIN]] T _main +# CHECK-NEXT: [[#FOO]] T _foo #--- test.s .text diff --git a/lld/test/MachO/subsections-symbol-relocs.s b/lld/test/MachO/subsections-symbol-relocs.s --- a/lld/test/MachO/subsections-symbol-relocs.s +++ b/lld/test/MachO/subsections-symbol-relocs.s @@ -22,7 +22,7 @@ # RUN: %lld -o %t/test-2 %t/test.o -order_file %t/order-file-2 # RUN: llvm-objdump -d --no-show-raw-insn %t/test-2 | FileCheck %s # CHECK-LABEL: Disassembly of section __TEXT,__text: -# CHECK: <_bar>: +# CHECK: <_ba{{r|z}}>: # CHECK-NEXT: callq {{.*}} <_foo> # CHECK-EMPTY: # CHECK-NEXT: <_qux>: @@ -30,7 +30,7 @@ # CHECK: <_foo>: # CHECK-NEXT: retq # CHECK: <_main>: -# CHECK-NEXT: callq {{.*}} <_bar> +# CHECK-NEXT: callq {{.*}} <_ba{{r|z}}> # CHECK-NEXT: movq $0, %rax # CHECK-NEXT: retq diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -1,56 +1,108 @@ # REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o -# RUN: %lld -o %t %t.o -# RUN: llvm-readobj -symbols %t | FileCheck %s +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o +# RUN: %lld -dylib %t/libfoo.o -o %t/libfoo.dylib +# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test +# RUN: llvm-readobj --syms --macho-dysymtab %t/test | FileCheck %s # CHECK: Symbols [ # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _main +# CHECK-NEXT: Name: _local (1) +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main (8) # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) # CHECK-NEXT: Section: __text (0x1) -# CHECK-NEXT: RefType: +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _external (54) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] -# CHECK-NEXT: Value: +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: bar +# CHECK-NEXT: Name: _external_weak (64) # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) # CHECK-NEXT: Section: __text (0x1) -# CHECK-NEXT: RefType: +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x80) # CHECK-NEXT: WeakDef (0x80) # CHECK-NEXT: ] -# CHECK-NEXT: Value: +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: foo +# CHECK-NEXT: Name: __dyld_private (102) # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __data -# CHECK-NEXT: RefType: +# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] -# CHECK-NEXT: Value: +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: dyld_stub_binder (14) +# CHECK-NEXT: Type: Undef (0x0) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _dynamic (79) +# CHECK-NEXT: Type: Undef (0x0) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 # CHECK-NEXT: } # CHECK-NEXT: ] +# CHECK-NEXT: Dysymtab { +# CHECK-NEXT: ilocalsym: 0 +# CHECK-NEXT: nlocalsym: 1 +# CHECK-NEXT: iextdefsym: 1 +# CHECK-NEXT: nextdefsym: 4 +# CHECK-NEXT: iundefsym: 5 +# CHECK-NEXT: nundefsym: 2 + +#--- libfoo.s +.globl _dynamic +_dynamic: + +#--- test.s +.globl _main, _external, _external_weak .data -.global foo -foo: - .asciz "Hello world!\n" +_external: + .space 0 +_local: + .space 0 .text -.weak_definition bar -.global bar -.global _main +.weak_definition _external_weak +_external_weak: + .space 0 _main: + callq _dynamic mov $0, %rax ret - -bar: - mov $2, %rax - ret diff --git a/lld/test/MachO/tlv.s b/lld/test/MachO/tlv.s --- a/lld/test/MachO/tlv.s +++ b/lld/test/MachO/tlv.s @@ -16,10 +16,13 @@ # CHECK-EMPTY: # CHECK-NEXT: Disassembly of section __DATA,__thread_data: # CHECK-EMPTY: -# CHECK-NEXT: <__thread_data>: -# CHECK-NEXT: ef -# CHECK-NEXT: be ad de be ba -# CHECK-NEXT: fe ca +# CHECK-NEXT: <_foo$tlv$init>: +# CHECK-NEXT: 00 00 +# CHECK-NEXT: 00 00 +# CHECK-EMPTY: +# CHECK-NEXT: <_bar$tlv$init>: +# CHECK-NEXT: 00 00 +# CHECK-NEXT: 00 00 # CHECK-EMPTY: # CHECK-NEXT: Disassembly of section __DATA,__thread_vars: # CHECK-EMPTY: @@ -41,9 +44,9 @@ .section __DATA,__thread_data,thread_local_regular _foo$tlv$init: - .long 0xdeadbeef + .space 4 _bar$tlv$init: - .long 0xcafebabe + .space 4 .section __DATA,__thread_vars,thread_local_variables .globl _foo, _bar