diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -23,6 +23,8 @@ constexpr const char *pageZero = "__pagezero"; constexpr const char *header = "__mach_header"; constexpr const char *binding = "__binding"; +constexpr const char *symbolTable = "__symbol_table"; +constexpr const char *stringTable = "__string_table"; } // namespace section_names @@ -93,6 +95,49 @@ SmallVector contents; }; +// Stores the strings referenced by the symbol table. +class StringTableSection : public InputSection { +public: + StringTableSection(); + // Returns the start offset of the added string. + uint32_t addString(StringRef); + size_t getSize() const override { return size; } + // Like other sections in __LINKEDIT, the string table section is special: its + // offsets are recorded in the LC_SYMTAB load command, instead of in section + // headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) override; + +private: + // An n_strx value of 0 always indicates the empty string, so we must locate + // our non-empty string values at positive offsets in the string table. + // Therefore we insert a dummy value at position zero. + std::vector strings{"\0"}; + size_t size = 1; +}; + +struct SymtabEntry { + Symbol *sym; + size_t strx; +}; + +class SymtabSection : public InputSection { +public: + SymtabSection(StringTableSection &); + void finalizeContents(); + size_t getNumSymbols() const { return symbols.size(); } + size_t getSize() const override; + // Like other sections in __LINKEDIT, the symtab section is special: its + // offsets are recorded in the LC_SYMTAB load command, instead of in section + // headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) override; + +private: + StringTableSection &stringTableSection; + std::vector symbols; +}; + struct InStruct { GotSection *got = nullptr; }; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -9,6 +9,7 @@ #include "SyntheticSections.h" #include "InputFiles.h" #include "OutputSegment.h" +#include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" @@ -128,6 +129,62 @@ memcpy(buf, contents.data(), contents.size()); } +SymtabSection::SymtabSection(StringTableSection &stringTableSection) + : stringTableSection(stringTableSection) { + segname = segment_names::linkEdit; + name = section_names::symbolTable; + // TODO: When we introduce the SyntheticSections superclass, we should make + // all synthetic sections aligned to WordSize by default. + align = WordSize; +} + +size_t SymtabSection::getSize() const { + return symbols.size() * sizeof(nlist_64); +} + +void SymtabSection::finalizeContents() { + // TODO: We should filter out some symbols. + for (Symbol *sym : symtab->getSymbols()) + symbols.push_back({sym, stringTableSection.addString(sym->getName())}); +} + +void SymtabSection::writeTo(uint8_t *buf) { + auto *nList = reinterpret_cast(buf); + for (const SymtabEntry &entry : symbols) { + // TODO support other symbol types + // TODO populate n_desc + if (auto defined = dyn_cast(entry.sym)) { + nList->n_strx = entry.strx; + nList->n_type = N_EXT | N_SECT; + nList->n_sect = defined->isec->sectionIndex; + // For the N_SECT symbol type, n_value is the address of the symbol + nList->n_value = defined->value + defined->isec->addr; + } + + ++nList; + } +} + +StringTableSection::StringTableSection() { + segname = segment_names::linkEdit; + name = section_names::stringTable; +} + +uint32_t StringTableSection::addString(StringRef str) { + uint32_t strx = size; + strings.push_back(str); + size += str.size() + 1; // account for null terminator + return strx; +} + +void StringTableSection::writeTo(uint8_t *buf) { + uint32_t off = 0; + for (StringRef str : strings) { + memcpy(buf + off, str.data(), str.size()); + off += str.size() + 1; // account for null terminator + } +} + InStruct in; } // namespace macho diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -52,6 +52,8 @@ uint64_t fileOff = 0; MachHeaderSection *headerSection = nullptr; BindingSection *bindingSection = nullptr; + SymtabSection *symtabSection = nullptr; + StringTableSection *stringTableSection = nullptr; }; // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. @@ -163,13 +165,23 @@ class LCSymtab : public LoadCommand { public: + LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) + : symtabSection(symtabSection), stringTableSection(stringTableSection) {} + uint32_t getSize() const override { return sizeof(symtab_command); } void writeTo(uint8_t *buf) const override { auto *c = reinterpret_cast(buf); c->cmd = LC_SYMTAB; c->cmdsize = getSize(); + c->symoff = symtabSection->getFileOffset(); + c->nsyms = symtabSection->getNumSymbols(); + c->stroff = stringTableSection->getFileOffset(); + c->strsize = stringTableSection->getFileSize(); } + + SymtabSection *symtabSection = nullptr; + StringTableSection *stringTableSection = nullptr; }; class LCLoadDylib : public LoadCommand { @@ -238,7 +250,12 @@ {defaultPosition, {}}, // Make sure __LINKEDIT is the last segment (i.e. all its hidden // sections must be ordered after other sections). - {segment_names::linkEdit, {section_names::binding}}, + {segment_names::linkEdit, + { + section_names::binding, + section_names::symbolTable, + section_names::stringTable, + }}, }; for (uint32_t i = 0, n = ordering.size(); i < n; ++i) { @@ -294,7 +311,8 @@ void Writer::createLoadCommands() { headerSection->addLoadCommand(make(bindingSection)); headerSection->addLoadCommand(make()); - headerSection->addLoadCommand(make()); + headerSection->addLoadCommand( + make(symtabSection, stringTableSection)); headerSection->addLoadCommand(make()); headerSection->addLoadCommand(make()); @@ -323,6 +341,8 @@ void Writer::createHiddenSections() { headerSection = createInputSection(); bindingSection = createInputSection(); + stringTableSection = createInputSection(); + symtabSection = createInputSection(*stringTableSection); createInputSection(); } @@ -351,6 +371,9 @@ ArrayRef sections = p.second; for (InputSection *isec : sections) { addr = alignTo(addr, isec->align); + // We must align the file offsets too to avoid misaligned writes of + // structs. + fileOff = alignTo(fileOff, isec->align); isec->addr = addr; addr += isec->getSize(); fileOff += isec->getFileSize(); @@ -376,6 +399,7 @@ uint64_t fileOff = seg->fileOff; for (auto § : seg->getSections()) { for (InputSection *isec : sect.second) { + fileOff = alignTo(fileOff, isec->align); isec->writeTo(buf + fileOff); fileOff += isec->getFileSize(); } @@ -405,6 +429,7 @@ // Fill __LINKEDIT contents. bindingSection->finalizeContents(); + symtabSection->finalizeContents(); // Now that __LINKEDIT is filled out, do a proper calculation of its // addresses and offsets. We don't have to recalculate the other segments diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/symtab.s @@ -0,0 +1,54 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-readobj -symbols %t | FileCheck %s + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: bar +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: foo +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: ] + +.data +.global foo +foo: + .asciz "Hello world!\n" + +.text +.global bar +.global _main + +_main: + mov $0, %rax + ret + +bar: + mov $2, %rax + ret