diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -135,9 +135,9 @@ config->outputFile = args.getLastArgValue(OPT_o, "a.out"); config->searchPaths = getSearchPaths(args); - getOrCreateOutputSegment("__TEXT", VM_PROT_READ | VM_PROT_EXECUTE); - getOrCreateOutputSegment("__DATA", VM_PROT_READ | VM_PROT_WRITE); - getOrCreateOutputSegment("__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE); + getOrCreateOutputSegment("__TEXT"); + getOrCreateOutputSegment("__DATA"); + getOrCreateOutputSegment("__DATA_CONST"); for (opt::Arg *arg : args) { switch (arg->getOption().getID()) { @@ -166,7 +166,7 @@ // Add input sections to output segments. for (InputSection *isec : inputSections) { OutputSegment *os = - getOrCreateOutputSegment(isec->segname, VM_PROT_READ | VM_PROT_WRITE); + getOrCreateOutputSegment(isec->segname); isec->parent = os; os->sections[isec->name].push_back(isec); } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -41,8 +41,12 @@ StringRef segname; ArrayRef data; + + // TODO these properties ought to live in an OutputSection class. + // Move them once available. uint64_t addr = 0; uint32_t align = 1; + uint32_t nSect = 0; uint32_t flags = 0; std::vector relocs; diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -31,7 +31,7 @@ extern std::vector outputSegments; -OutputSegment *getOrCreateOutputSegment(StringRef name, uint32_t perms); +OutputSegment *getOrCreateOutputSegment(StringRef name); } // namespace macho } // namespace lld diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -7,15 +7,17 @@ //===----------------------------------------------------------------------===// #include "OutputSegment.h" +#include "llvm/BinaryFormat/MachO.h" #include "lld/Common/Memory.h" using namespace llvm; +using namespace llvm::MachO; using namespace lld; using namespace lld::macho; std::vector macho::outputSegments; -OutputSegment *macho::getOrCreateOutputSegment(StringRef name, uint32_t perms) { +OutputSegment *macho::getOrCreateOutputSegment(StringRef name) { for (OutputSegment *os : outputSegments) if (os->name == name) // TODO: assert that os->perms == perms, once we figure out what to do @@ -24,7 +26,13 @@ auto *os = make(); os->name = name; - os->perms = perms; + + if (name == "__TEXT") { + os->perms = VM_PROT_READ | VM_PROT_EXECUTE; + } else { + os->perms = VM_PROT_READ | VM_PROT_WRITE; + } + outputSegments.push_back(os); return os; } diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -50,6 +50,7 @@ void assignAddresses(); void createDyldInfoContents(); + void createSymtabContents(); void openFile(); void writeHeader(); @@ -210,7 +211,16 @@ auto *c = reinterpret_cast(buf); c->cmd = LC_SYMTAB; c->cmdsize = getSize(); + c->symoff = symOff; + c->nsyms = nSyms; + c->stroff = strOff; + c->strsize = strSize; } + + uint64_t symOff = 0; + uint64_t nSyms = 0; + uint64_t strOff = 0; + uint64_t strSize = 0; }; class LCLoadDylib : public LoadCommand { @@ -312,6 +322,9 @@ sizeofCmds = size; addr += size; + // TODO This is wrong; input sections ought to be grouped into + // output sections, which are then organized like this. + uint64_t nSect = 0; for (OutputSegment *seg : outputSegments) { addr = alignTo(addr, PageSize); @@ -321,6 +334,7 @@ addr = alignTo(addr, isec->align); isec->addr = addr; addr += isec->getSize(); + isec->nSect = ++nSect; } } } @@ -362,8 +376,55 @@ dyldInfoSeg->bindOff = sectionStart; dyldInfoSeg->bindSize = linkEditSeg->getOffset() - sectionStart; +} + +// TODO: Implement symbol export trie. +void Writer::createSymtabContents() { + uint64_t start = linkEditSeg->getOffset(); + + symtabSeg->symOff = start; + symtabSeg->nSyms = 0; + + SmallVector stringTable; + raw_svector_ostream stringTableOs{stringTable}; + // An n_strx value of 0 always indicates the empty string, so we must locate + // our non-empty string values at positive offsets in the string pool. + // Therefore we insert a dummy value at position zero. + stringTableOs << '\0'; + + raw_svector_ostream contensOs{linkEditSeg->contents}; + for (Symbol *sym : symtab->getSymbols()) { + uint8_t nType = N_UNDF; + uint8_t nSect = NO_SECT; + uint16_t nDesc = 0; + uint64_t nValue = 0; + + // TODO support other symbol types + // TODO populate n_desc + if (auto defined = dyn_cast(sym)) { + nType = (N_EXT | N_SECT); + nSect = defined->isec->nSect; + + // For the N_SECT symbol type, n_value is the address of the symbol + nValue = defined->value + defined->isec->addr; + } - // TODO: Implement symbol export trie. + ++symtabSeg->nSyms; + auto nStrx = stringTable.size(); + stringTableOs << sym->getName() << '\0'; + + // Emit one nlist_64 struct. + endian::write(contensOs, nStrx, endianness::little); // n_strx + contensOs << nType; // n_type + contensOs << nSect; // n_sect + endian::write(contensOs, nDesc, endianness::little); // n_desc + endian::write(contensOs, nValue, endianness::little); // n_value + } + + // The string table is located right after the symbol table. + symtabSeg->strOff = start + symtabSeg->nSyms * sizeof(nlist_64); + symtabSeg->strSize = stringTable.size(); + contensOs << stringTable; } void Writer::openFile() { @@ -414,6 +475,8 @@ // Fill __LINKEDIT contents createDyldInfoContents(); + createSymtabContents(); + fileSize = linkEditSeg->fileOff + linkEditSeg->contents.size(); openFile(); diff --git a/lld/test/MachO/symtab_basic.s b/lld/test/MachO/symtab_basic.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/symtab_basic.s @@ -0,0 +1,54 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-readobj -symbols %t | FileCheck %s + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x[[#%x, MAIN_VALUE:]] +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: bar +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x[[#MAIN_VALUE + 8]] +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: foo +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: ] + +.data +.global foo +foo: + .asciz "Hello world!\n" + +.text +.global bar +.global _main + +_main: + mov $0, %rax + ret + +bar: + mov $2, %rax + ret