diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -23,6 +23,8 @@ constexpr const char *binding = "__binding"; constexpr const char *header = "__mach_header"; constexpr const char *pageZero = "__pagezero"; +constexpr const char *stringPool = "__string_pool"; +constexpr const char *symbolTable = "__symbol_table"; } // namespace section_names @@ -114,6 +116,28 @@ uint32_t poolSize = 1; }; +struct SymtabEntry { + Symbol *sym; + size_t strx; +}; + +class SymtabSection : public InputSection { +public: + SymtabSection(StringPoolSection &); + void finalizeContents(); + size_t getNumSymbols() const { return symbols.size(); } + size_t getSize() const override; + // Like other sections in __LINKEDIT, the symtab section is special: its + // offsets are recorded in the LC_SYMTAB load command, instead of in section + // headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) override; + +private: + StringPoolSection &stringPoolSection; + std::vector symbols; +}; + struct InStruct { GotSection *got = nullptr; }; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -9,6 +9,7 @@ #include "SyntheticSections.h" #include "InputFiles.h" #include "OutputSegment.h" +#include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" @@ -128,6 +129,59 @@ memcpy(buf, contents.data(), contents.size()); } +SymtabSection::SymtabSection(StringPoolSection &stringPoolSection) + : stringPoolSection(stringPoolSection) { + segname = segment_names::linkEdit; + name = section_names::symbolTable; +} + +size_t SymtabSection::getSize() const { + return symbols.size() * sizeof(nlist_64); +} + +void SymtabSection::finalizeContents() { + // TODO: We should filter out some symbols. + for (Symbol *sym : symtab->getSymbols()) + symbols.push_back({sym, stringPoolSection.addString(sym->getName())}); +} + +void SymtabSection::writeTo(uint8_t *buf) { + auto *nList = reinterpret_cast(buf); + for (const SymtabEntry &entry : symbols) { + // TODO support other symbol types + // TODO populate n_desc + if (auto defined = dyn_cast(entry.sym)) { + nList->n_strx = entry.strx; + nList->n_type = N_EXT | N_SECT; + nList->n_sect = defined->isec->sectionIndex; + // For the N_SECT symbol type, n_value is the address of the symbol + nList->n_value = defined->value + defined->isec->addr; + } + + ++nList; + } +} + +StringPoolSection::StringPoolSection() { + segname = segment_names::linkEdit; + name = section_names::stringPool; +} + +uint32_t StringPoolSection::addString(StringRef str) { + uint32_t strx = poolSize; + pool.push_back(str); + poolSize += str.size() + 1; // account for null terminator + return strx; +} + +void StringPoolSection::writeTo(uint8_t *buf) { + uint32_t off = 0; + for (StringRef str : pool) { + memcpy(buf + off, str.data(), str.size()); + off += str.size() + 1; // account for null terminator + } +} + InStruct in; } // namespace macho diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -52,6 +52,8 @@ uint64_t fileOff = 0; MachHeaderSection *headerSection = nullptr; BindingSection *bindingSection = nullptr; + SymtabSection *symtabSection = nullptr; + StringPoolSection *stringPoolSection = nullptr; }; // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. @@ -163,13 +165,23 @@ class LCSymtab : public LoadCommand { public: + LCSymtab(SymtabSection *symtabSection, StringPoolSection *stringPoolSection) + : symtabSection(symtabSection), stringPoolSection(stringPoolSection) {} + uint32_t getSize() const override { return sizeof(symtab_command); } void writeTo(uint8_t *buf) const override { auto *c = reinterpret_cast(buf); c->cmd = LC_SYMTAB; c->cmdsize = getSize(); + c->symoff = symtabSection->getFileOffset(); + c->nsyms = symtabSection->getNumSymbols(); + c->stroff = stringPoolSection->getFileOffset(); + c->strsize = stringPoolSection->getFileSize(); } + + SymtabSection *symtabSection = nullptr; + StringPoolSection *stringPoolSection = nullptr; }; class LCLoadDylib : public LoadCommand { @@ -233,6 +245,8 @@ // Make sure __LINKEDIT is the last segment (i.e. all its hidden // sections must be ordered after other sections). section_names::binding, + section_names::symbolTable, + section_names::stringPool, }; for (uint32_t i = 0, n = ordering.size(); i < n; ++i) @@ -275,7 +289,8 @@ void Writer::createLoadCommands() { headerSection->addLoadCommand(make(bindingSection)); headerSection->addLoadCommand(make()); - headerSection->addLoadCommand(make()); + headerSection->addLoadCommand( + make(symtabSection, stringPoolSection)); headerSection->addLoadCommand(make()); headerSection->addLoadCommand(make()); @@ -304,6 +319,8 @@ void Writer::createHiddenSections() { headerSection = createInputSection(); bindingSection = createInputSection(); + stringPoolSection = createInputSection(); + symtabSection = createInputSection(*stringPoolSection); createInputSection(); } @@ -386,6 +403,7 @@ // Fill __LINKEDIT contents. bindingSection->finalizeContents(); + symtabSection->finalizeContents(); // Now that __LINKEDIT is filled out, do a proper calculation of its // addresses and offsets. We don't have to recalculate the other segments diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/symtab.s @@ -0,0 +1,54 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-readobj -symbols %t | FileCheck %s + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: bar +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: foo +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: ] + +.data +.global foo +foo: + .asciz "Hello world!\n" + +.text +.global bar +.global _main + +_main: + mov $0, %rax + ret + +bar: + mov $2, %rax + ret