diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -135,9 +135,9 @@ config->outputFile = args.getLastArgValue(OPT_o, "a.out"); config->searchPaths = getSearchPaths(args); - getOrCreateOutputSegment("__TEXT", VM_PROT_READ | VM_PROT_EXECUTE); - getOrCreateOutputSegment("__DATA", VM_PROT_READ | VM_PROT_WRITE); - getOrCreateOutputSegment("__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE); + getOrCreateOutputSegment("__TEXT"); + getOrCreateOutputSegment("__DATA"); + getOrCreateOutputSegment("__DATA_CONST"); for (opt::Arg *arg : args) { switch (arg->getOption().getID()) { @@ -165,8 +165,7 @@ // Add input sections to output segments. for (InputSection *isec : inputSections) { - OutputSegment *os = - getOrCreateOutputSegment(isec->segname, VM_PROT_READ | VM_PROT_WRITE); + OutputSegment *os = getOrCreateOutputSegment(isec->segname); isec->parent = os; os->sections[isec->name].push_back(isec); } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -41,8 +41,12 @@ StringRef segname; ArrayRef data; + + // TODO these properties ought to live in an OutputSection class. + // Move them once available. uint64_t addr = 0; uint32_t align = 1; + uint32_t sectionIndex = 0; uint32_t flags = 0; std::vector relocs; diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -31,7 +31,7 @@ extern std::vector outputSegments; -OutputSegment *getOrCreateOutputSegment(StringRef name, uint32_t perms); +OutputSegment *getOrCreateOutputSegment(StringRef name); } // namespace macho } // namespace lld diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -8,14 +8,16 @@ #include "OutputSegment.h" #include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/MachO.h" using namespace llvm; +using namespace llvm::MachO; using namespace lld; using namespace lld::macho; std::vector macho::outputSegments; -OutputSegment *macho::getOrCreateOutputSegment(StringRef name, uint32_t perms) { +OutputSegment *macho::getOrCreateOutputSegment(StringRef name) { for (OutputSegment *os : outputSegments) if (os->name == name) // TODO: assert that os->perms == perms, once we figure out what to do @@ -24,7 +26,13 @@ auto *os = make(); os->name = name; - os->perms = perms; + + if (name == "__TEXT") { + os->perms = VM_PROT_READ | VM_PROT_EXECUTE; + } else { + os->perms = VM_PROT_READ | VM_PROT_WRITE; + } + outputSegments.push_back(os); return os; } diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -49,6 +49,9 @@ void scanRelocations(); void assignAddresses(); + void assignSymtabAddresses(uint64_t start); + + void createSymtabContents(); void createDyldInfoContents(); void openFile(); @@ -210,7 +213,20 @@ auto *c = reinterpret_cast(buf); c->cmd = LC_SYMTAB; c->cmdsize = getSize(); + c->symoff = symOff; + c->nsyms = nSyms; + c->stroff = strOff; + c->strsize = strSize; } + + uint64_t symOff = 0; + uint64_t nSyms = 0; + uint64_t strOff = 0; + uint64_t strSize = 0; + + // As part of the address pass, the stringTable must be + // calculated. + SmallVector stringTable; }; class LCLoadDylib : public LoadCommand { @@ -312,6 +328,9 @@ sizeofCmds = size; addr += size; + // TODO This is wrong; input sections ought to be grouped into + // output sections, which are then organized like this. + uint64_t sectionIndex = 0; for (OutputSegment *seg : outputSegments) { addr = alignTo(addr, PageSize); @@ -321,6 +340,7 @@ addr = alignTo(addr, isec->align); isec->addr = addr; addr += isec->getSize(); + isec->sectionIndex = ++sectionIndex; } } } @@ -328,6 +348,67 @@ addr = alignTo(addr, PageSize); linkEditSeg->addr = addr; linkEditSeg->fileOff = addr - ImageBase; + + // Calculate the symbol table's final position + assignSymtabAddresses(linkEditSeg->getOffset()); +} + +void Writer::assignSymtabAddresses(uint64_t start) { + symtabSeg->symOff = start; + symtabSeg->nSyms = 0; + symtabSeg->strSize = 1; // For the initial empty string value + for (Symbol *sym : symtab->getSymbols()) { + ++symtabSeg->nSyms; + // To account for null terminator later + symtabSeg->strSize += sym->getName().size() + 1; + } + // The string table is located right after the symbol table. + symtabSeg->strOff = start + symtabSeg->nSyms * sizeof(nlist_64); +} + +// TODO: Implement symbol export trie. +// A symbol table is composed of two lists: +// - The symbol table: A sequence of nlist_64 structs +// - The string table: A sequence of symbol names delimited by \0 +// symbol table entry [i] corresponds to string table entry [i] +// https://github.com/aidansteele/osx-abi-macho-file-format-reference#nlist_64 +void Writer::createSymtabContents() { + SmallVector stringTable; + raw_svector_ostream stringTableOs{stringTable}; + // An n_strx value of 0 always indicates the empty string, so we must locate + // our non-empty string values at positive offsets in the string pool. + // Therefore we insert a dummy value at position zero. + stringTableOs << '\0'; + + raw_svector_ostream os{linkEditSeg->contents}; + for (Symbol *sym : symtab->getSymbols()) { + uint8_t nType = N_UNDF; + uint8_t nSect = NO_SECT; + uint16_t nDesc = 0; + uint64_t nValue = 0; + + // TODO support other symbol types + // TODO populate n_desc + if (auto defined = dyn_cast(sym)) { + nType = (N_EXT | N_SECT); + nSect = defined->isec->sectionIndex; + + // For the N_SECT symbol type, n_value is the address of the symbol + nValue = defined->value + defined->isec->addr; + } + + auto nStrx = stringTable.size(); + stringTableOs << sym->getName() << '\0'; + + // Emit one nlist_64 struct. + endian::write(os, nStrx, endianness::little); // n_strx + os << nType; // n_type + os << nSect; // n_sect + endian::write(os, nDesc, endianness::little); // n_desc + endian::write(os, nValue, endianness::little); // n_value + } + + os << stringTable; } // LC_DYLD_INFO_ONLY contains symbol import/export information. Imported @@ -362,8 +443,6 @@ dyldInfoSeg->bindOff = sectionStart; dyldInfoSeg->bindSize = linkEditSeg->getOffset() - sectionStart; - - // TODO: Implement symbol export trie. } void Writer::openFile() { @@ -413,7 +492,9 @@ assignAddresses(); // Fill __LINKEDIT contents + createSymtabContents(); createDyldInfoContents(); + fileSize = linkEditSeg->fileOff + linkEditSeg->contents.size(); openFile(); diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/symtab.s @@ -0,0 +1,54 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-readobj -symbols %t | FileCheck %s + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x[[#%x, MAIN_VALUE:]] +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: bar +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x[[#MAIN_VALUE + 8]] +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: foo +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: ] + +.data +.global foo +foo: + .asciz "Hello world!\n" + +.text +.global bar +.global _main + +_main: + mov $0, %rax + ret + +bar: + mov $2, %rax + ret