diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -13,6 +13,7 @@ #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/Memory.h" +#include "lld/Common/Sort.h" #include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -215,17 +216,6 @@ ms->finalizeContents(); } -static void sortByOrder(MutableArrayRef in, - llvm::function_ref order) { - std::vector> v; - for (InputSection *s : in) - v.push_back({order(s), s}); - llvm::stable_sort(v, less_first()); - - for (size_t i = 0; i < v.size(); ++i) - in[i] = v[i].second; -} - uint64_t getHeaderSize() { if (config->oFormatBinary) return 0; @@ -240,7 +230,7 @@ assert(isLive()); for (BaseCommand *b : sectionCommands) if (auto *isd = dyn_cast(b)) - sortByOrder(isd->sections, order); + sortByOrder(isd->sections, order); } // Fill [Buf, Buf + Size) with Filler. diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -135,10 +135,6 @@ config->outputFile = args.getLastArgValue(OPT_o, "a.out"); config->searchPaths = getSearchPaths(args); - getOrCreateOutputSegment("__TEXT"); - getOrCreateOutputSegment("__DATA"); - getOrCreateOutputSegment("__DATA_CONST"); - for (opt::Arg *arg : args) { switch (arg->getOption().getID()) { case OPT_INPUT: @@ -163,14 +159,6 @@ for (InputSection *sec : file->sections) inputSections.push_back(sec); - // Add input sections to output segments. - for (InputSection *isec : inputSections) { - OutputSegment *os = - getOrCreateOutputSegment(isec->segname); - isec->parent = os; - os->sections[isec->name].push_back(isec); - } - // Write to an output file. writeResult(); diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -32,8 +32,14 @@ class InputSection { public: virtual ~InputSection() = default; - virtual void writeTo(uint8_t *buf); virtual size_t getSize() const { return data.size(); } + virtual uint64_t getFileSize() const { return getSize(); } + uint64_t getFileOffset() const; + // Whether to emit a section_64 header for this section. + virtual bool isHidden() const { return false; } + // Whether to omit this section entirely (header and body). + virtual bool isNeeded() const { return true; } + virtual void writeTo(uint8_t *buf); InputFile *file = nullptr; OutputSegment *parent = nullptr; @@ -46,7 +52,7 @@ // Move them once available. uint64_t addr = 0; uint32_t align = 1; - uint32_t nSect = 0; + uint32_t sectionIndex = 0; uint32_t flags = 0; std::vector relocs; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputSection.h" +#include "OutputSegment.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -20,6 +21,10 @@ std::vector macho::inputSections; +uint64_t InputSection::getFileOffset() const { + return parent->fileOff + addr - parent->firstSection()->addr; +} + void InputSection::writeTo(uint8_t *buf) { memcpy(buf, data.data(), data.size()); diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -15,6 +15,14 @@ namespace lld { namespace macho { +namespace segment_names { + +constexpr const char *text = "__TEXT"; +constexpr const char *pageZero = "__PAGEZERO"; +constexpr const char *linkEdit = "__LINKEDIT"; + +} // namespace segment_names + class InputSection; class OutputSegment { @@ -23,14 +31,31 @@ InputSection *lastSection() const { return sections.back().second.back(); } + bool isNeeded() const { + return sections.size() != 0 || name == segment_names::linkEdit; + } + + void addSection(InputSection *); + + const llvm::MapVector> & + getSections() const { + return sections; + } + + uint64_t fileOff = 0; StringRef name; - uint32_t perms; + uint32_t numNonHiddenSections = 0; + uint32_t maxProt = 0; + uint32_t initProt = 0; uint8_t index; + +private: llvm::MapVector> sections; }; extern std::vector outputSegments; +OutputSegment *getOutputSegment(StringRef name); OutputSegment *getOrCreateOutputSegment(StringRef name); } // namespace macho diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -7,32 +7,58 @@ //===----------------------------------------------------------------------===// #include "OutputSegment.h" -#include "llvm/BinaryFormat/MachO.h" +#include "InputSection.h" + #include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/MachO.h" using namespace llvm; using namespace llvm::MachO; using namespace lld; using namespace lld::macho; +static uint32_t initProt(StringRef name) { + if (name == segment_names::text) + return VM_PROT_READ | VM_PROT_EXECUTE; + if (name == segment_names::pageZero) + return 0; + if (name == segment_names::linkEdit) + return VM_PROT_READ; + return VM_PROT_READ | VM_PROT_WRITE; +} + +static uint32_t maxProt(StringRef name) { + if (name == segment_names::pageZero) + return 0; + return VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; +} + +void OutputSegment::addSection(InputSection *isec) { + isec->parent = this; + std::vector &vec = sections[isec->name]; + if (vec.empty() && !isec->isHidden()) { + ++numNonHiddenSections; + } + vec.push_back(isec); +} + +static llvm::DenseMap nameToOutputSegment; std::vector macho::outputSegments; +OutputSegment *macho::getOutputSegment(StringRef name) { + return nameToOutputSegment.lookup(name); +} + OutputSegment *macho::getOrCreateOutputSegment(StringRef name) { - for (OutputSegment *os : outputSegments) - if (os->name == name) - // TODO: assert that os->perms == perms, once we figure out what to do - // about default-created segments. - return os; - - auto *os = make(); - os->name = name; - - if (name == "__TEXT") { - os->perms = VM_PROT_READ | VM_PROT_EXECUTE; - } else { - os->perms = VM_PROT_READ | VM_PROT_WRITE; - } + OutputSegment *&segRef = nameToOutputSegment[name]; + if (segRef != nullptr) + return segRef; + + segRef = make(); + segRef->name = name; + segRef->maxProt = maxProt(name); + segRef->initProt = initProt(name); - outputSegments.push_back(os); - return os; + outputSegments.push_back(segRef); + return segRef; } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -13,10 +13,47 @@ #include "Target.h" #include "llvm/ADT/SetVector.h" +using namespace llvm::MachO; + namespace lld { namespace macho { +namespace section_names { + +constexpr const char *binding = "__binding"; +constexpr const char *header = "__mach_header"; +constexpr const char *pageZero = "__pagezero"; +constexpr const char *stringPool = "__string_pool"; +constexpr const char *symbolTable = "__symbol_table"; + +} // namespace section_names + class DylibSymbol; +class LoadCommand; + +// The header of the Mach-O file, which must have a file offset of zero. +class MachHeaderSection : public InputSection { +public: + MachHeaderSection(); + void addLoadCommand(LoadCommand *); + bool isHidden() const override { return true; } + size_t getSize() const override; + void writeTo(uint8_t *buf) override; + +private: + std::vector loadCommands; + uint32_t sizeOfCmds = 0; +}; + +// A hidden section that exists solely for the purpose of creating the +// __PAGEZERO segment, which is used to catch null pointer dereferences. +class PageZeroSection : public InputSection { +public: + PageZeroSection(); + bool isHidden() const override { return true; } + size_t getSize() const override { return ImageBase; } + uint64_t getFileSize() const override { return 0; } +}; // This section will be populated by dyld with addresses to non-lazily-loaded // dylib symbols. @@ -31,6 +68,8 @@ size_t getSize() const override { return entries.size() * WordSize; } + bool isNeeded() const override { return !entries.empty(); } + void writeTo(uint8_t *buf) override { // Nothing to write, GOT contains all zeros at link time; it's populated at // runtime by dyld. @@ -40,8 +79,62 @@ llvm::SetVector entries; }; +// Stores bind opcodes for telling dyld which symbols to load non-lazily. +class BindingSection : public InputSection { +public: + BindingSection(); + size_t getSize() const override { return contents.size(); } + void encode(); + // Like other sections in __LINKEDIT, the binding section is special: its + // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in + // section headers. + bool isHidden() const override { return true; } + bool isNeeded() const override; + void writeTo(uint8_t *buf) override; + + SmallVector contents; +}; + +class SymtabSection : public InputSection { +public: + SymtabSection(); + void addSymbol(uint32_t strx, uint8_t type, uint8_t sect, uint16_t desc, + uint64_t value); + size_t getSize() const override { return contents.size(); } + // Like other sections in __LINKEDIT, the symtab section is special: its + // offsets are recorded in the LC_SYMTAB load command, instead of in section + // headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) override; + + uint32_t nsyms = 0; + SmallVector contents; + llvm::raw_svector_ostream os{contents}; +}; + +// Stores the strings referenced by the symbol table. +class StringPoolSection : public InputSection { +public: + StringPoolSection(); + // Returns the start offset of the added string. + uint32_t addString(StringRef); + size_t getSize() const override { return poolSize; } + // Like other sections in __LINKEDIT, the string pool section is special: its + // offsets are recorded in the LC_SYMTAB load command, instead of in section + // headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) override; + +private: + // An n_strx value of 0 always indicates the empty string, so we must locate + // our non-empty string values at positive offsets in the string pool. + // Therefore we insert a dummy value at position zero. + std::vector pool{"\0"}; + uint32_t poolSize = 1; +}; + struct InStruct { - GotSection *got; + GotSection *got = nullptr; }; extern InStruct in; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -7,13 +7,60 @@ //===----------------------------------------------------------------------===// #include "SyntheticSections.h" +#include "InputFiles.h" +#include "OutputSegment.h" #include "Symbols.h" +#include "Writer.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; using namespace llvm::MachO; +using namespace llvm::support; namespace lld { namespace macho { +MachHeaderSection::MachHeaderSection() { + // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts + // from the beginning of the file (i.e. the header). + segname = segment_names::text; + name = section_names::header; +} + +void MachHeaderSection::addLoadCommand(LoadCommand *lc) { + loadCommands.push_back(lc); + sizeOfCmds += lc->getSize(); +} + +size_t MachHeaderSection::getSize() const { + return sizeof(mach_header_64) + sizeOfCmds; +} + +void MachHeaderSection::writeTo(uint8_t *buf) { + auto *hdr = reinterpret_cast(buf); + hdr->magic = MH_MAGIC_64; + hdr->cputype = CPU_TYPE_X86_64; + hdr->cpusubtype = CPU_SUBTYPE_X86_64_ALL | CPU_SUBTYPE_LIB64; + hdr->filetype = MH_EXECUTE; + hdr->ncmds = loadCommands.size(); + hdr->sizeofcmds = sizeOfCmds; + hdr->flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL; + + uint8_t *p = reinterpret_cast(hdr + 1); + for (LoadCommand *lc : loadCommands) { + lc->writeTo(p); + p += lc->getSize(); + } +} + +PageZeroSection::PageZeroSection() { + segname = segment_names::pageZero; + name = section_names::pageZero; +} + GotSection::GotSection() { segname = "__DATA_CONST"; name = "__got"; @@ -30,6 +77,97 @@ } } +BindingSection::BindingSection() { + segname = segment_names::linkEdit; + name = section_names::binding; +} + +bool BindingSection::isNeeded() const { return in.got->isNeeded(); } + +// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld +// interprets to update a record with the following fields: +// * segment index (of the segment to write the symbol addresses to, typically +// the __DATA_CONST segment which contains the GOT) +// * offset within the segment, indicating the next location to write a binding +// * symbol type +// * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command) +// * symbol name +// * addend +// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind +// a symbol in the GOT, and increments the segment offset to point to the next +// entry. It does *not* clear the record state after doing the bind, so +// subsequent opcodes only need to encode the differences between bindings. +void BindingSection::encode() { + if (!isNeeded()) + return; + + raw_svector_ostream os{contents}; + os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | + in.got->parent->index); + encodeULEB128(in.got->addr - in.got->parent->firstSection()->addr, os); + for (const DylibSymbol *sym : in.got->getEntries()) { + // TODO: Implement compact encoding -- we only need to encode the + // differences between consecutive symbol entries. + if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) { + os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + sym->file->ordinal); + } else { + error("TODO: Support larger dylib symbol ordinals"); + continue; + } + os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) + << sym->getName() << '\0' + << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) + << static_cast(BIND_OPCODE_DO_BIND); + } + + os << static_cast(BIND_OPCODE_DONE); +} + +void BindingSection::writeTo(uint8_t *buf) { + memcpy(buf, contents.data(), contents.size()); +} + +SymtabSection::SymtabSection() { + segname = segment_names::linkEdit; + name = section_names::symbolTable; +} + +void SymtabSection::addSymbol(uint32_t strx, uint8_t type, uint8_t sect, + uint16_t desc, uint64_t value) { + ++nsyms; + // Emit one nlist_64 struct. + endian::write(os, strx, endianness::little); + os << type; + os << sect; + endian::write(os, desc, endianness::little); + endian::write(os, value, endianness::little); +} + +void SymtabSection::writeTo(uint8_t *buf) { + memcpy(buf, contents.data(), contents.size()); +} + +StringPoolSection::StringPoolSection() { + segname = segment_names::linkEdit; + name = section_names::stringPool; +} + +uint32_t StringPoolSection::addString(StringRef str) { + uint32_t strx = poolSize; + pool.push_back(str); + poolSize += str.size() + 1; // account for null terminator + return strx; +} + +void StringPoolSection::writeTo(uint8_t *buf) { + uint32_t off = 0; + for (StringRef str : pool) { + memcpy(buf + off, str.data(), str.size()); + off += str.size() + 1; // account for null terminator + } +} + InStruct in; } // namespace macho diff --git a/lld/MachO/Writer.h b/lld/MachO/Writer.h --- a/lld/MachO/Writer.h +++ b/lld/MachO/Writer.h @@ -9,9 +9,18 @@ #ifndef LLD_MACHO_WRITER_H #define LLD_MACHO_WRITER_H +#include + namespace lld { namespace macho { +class LoadCommand { +public: + virtual ~LoadCommand() = default; + virtual uint32_t getSize() const = 0; + virtual void writeTo(uint8_t *buf) const = 0; +}; + void writeResult(); void createSyntheticSections(); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -18,14 +18,12 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Sort.h" #include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/EndianStream.h" -#include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" using namespace llvm; using namespace llvm::MachO; -using namespace llvm::support; using namespace lld; using namespace lld::macho; @@ -34,91 +32,51 @@ class LCDyldInfo; class LCSymtab; -class LoadCommand { -public: - virtual ~LoadCommand() = default; - virtual uint32_t getSize() const = 0; - virtual void writeTo(uint8_t *buf) const = 0; -}; - class Writer { public: Writer() : buffer(errorHandler().outputBuffer) {} - void createLoadCommands(); void scanRelocations(); - void assignAddresses(); - - void createDyldInfoContents(); + void createHiddenSections(); + void sortSections(); + void createLoadCommands(); + void assignAddresses(OutputSegment *); void createSymtabContents(); void openFile(); - void writeHeader(); void writeSections(); void run(); - std::vector loadCommands; std::unique_ptr &buffer; - uint64_t fileSize = 0; - uint64_t sizeofCmds = 0; - LCLinkEdit *linkEditSeg = nullptr; - LCDyldInfo *dyldInfoSeg = nullptr; - LCSymtab *symtabSeg = nullptr; -}; - -class LCPagezero : public LoadCommand { -public: - uint32_t getSize() const override { return sizeof(segment_command_64); } - - void writeTo(uint8_t *buf) const override { - auto *c = reinterpret_cast(buf); - c->cmd = LC_SEGMENT_64; - c->cmdsize = getSize(); - strcpy(c->segname, "__PAGEZERO"); - c->vmsize = PageSize; - } -}; - -class LCLinkEdit : public LoadCommand { -public: - uint32_t getSize() const override { return sizeof(segment_command_64); } - - void writeTo(uint8_t *buf) const override { - auto *c = reinterpret_cast(buf); - c->cmd = LC_SEGMENT_64; - c->cmdsize = getSize(); - strcpy(c->segname, "__LINKEDIT"); - c->vmaddr = addr; - c->fileoff = fileOff; - c->filesize = c->vmsize = contents.size(); - c->maxprot = VM_PROT_READ | VM_PROT_WRITE; - c->initprot = VM_PROT_READ; - } - - uint64_t getOffset() const { return fileOff + contents.size(); } - - uint64_t fileOff = 0; uint64_t addr = 0; - SmallVector contents; + uint64_t fileOff = 0; + MachHeaderSection *headerSection = nullptr; + BindingSection *bindingSection = nullptr; + SymtabSection *symtabSection = nullptr; + StringPoolSection *stringPoolSection = nullptr; }; +// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. class LCDyldInfo : public LoadCommand { public: + LCDyldInfo(BindingSection *bindingSection) : bindingSection(bindingSection) {} + uint32_t getSize() const override { return sizeof(dyld_info_command); } void writeTo(uint8_t *buf) const override { auto *c = reinterpret_cast(buf); c->cmd = LC_DYLD_INFO_ONLY; c->cmdsize = getSize(); - c->bind_off = bindOff; - c->bind_size = bindSize; + if (bindingSection->isNeeded()) { + c->bind_off = bindingSection->getFileOffset(); + c->bind_size = bindingSection->getFileSize(); + } c->export_off = exportOff; c->export_size = exportSize; } - uint64_t bindOff = 0; - uint64_t bindSize = 0; + BindingSection *bindingSection; uint64_t exportOff = 0; uint64_t exportSize = 0; }; @@ -140,7 +98,7 @@ uint32_t getSize() const override { return sizeof(segment_command_64) + - seg->sections.size() * sizeof(section_64); + seg->numNonHiddenSections * sizeof(section_64); } void writeTo(uint8_t *buf) const override { @@ -150,22 +108,26 @@ c->cmd = LC_SEGMENT_64; c->cmdsize = getSize(); memcpy(c->segname, name.data(), name.size()); + c->fileoff = seg->fileOff; + c->maxprot = seg->maxProt; + c->initprot = seg->initProt; + + if (seg->getSections().empty()) + return; - // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts - // from the beginning of the file (i.e. the header). - // TODO: replace this logic by creating a synthetic __TEXT,__mach_header - // section instead. - c->fileoff = name == "__TEXT" ? 0 : seg->firstSection()->addr - ImageBase; - c->vmaddr = c->fileoff + ImageBase; - c->vmsize = c->filesize = + c->vmaddr = seg->firstSection()->addr; + c->vmsize = seg->lastSection()->addr + seg->lastSection()->getSize() - c->vmaddr; - c->maxprot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; - c->initprot = seg->perms; - c->nsects = seg->sections.size(); + c->nsects = seg->numNonHiddenSections; - for (auto &p : seg->sections) { + for (auto &p : seg->getSections()) { StringRef s = p.first; - std::vector §ions = p.second; + ArrayRef sections = p.second; + for (InputSection *isec : sections) + c->filesize += isec->getFileSize(); + if (sections[0]->isHidden()) { + continue; + } auto *sectHdr = reinterpret_cast(buf); buf += sizeof(section_64); @@ -174,7 +136,7 @@ memcpy(sectHdr->segname, name.data(), name.size()); sectHdr->addr = sections[0]->addr; - sectHdr->offset = sections[0]->addr - ImageBase; + sectHdr->offset = sections[0]->getFileOffset(); sectHdr->align = sections[0]->align; uint32_t maxAlign = 0; for (const InputSection *section : sections) @@ -205,22 +167,23 @@ class LCSymtab : public LoadCommand { public: + LCSymtab(SymtabSection *symtabSection, StringPoolSection *stringPoolSection) + : symtabSection(symtabSection), stringPoolSection(stringPoolSection) {} + uint32_t getSize() const override { return sizeof(symtab_command); } void writeTo(uint8_t *buf) const override { auto *c = reinterpret_cast(buf); c->cmd = LC_SYMTAB; c->cmdsize = getSize(); - c->symoff = symOff; - c->nsyms = nSyms; - c->stroff = strOff; - c->strsize = strSize; + c->symoff = symtabSection->getFileOffset(); + c->nsyms = symtabSection->nsyms; + c->stroff = stringPoolSection->getFileOffset(); + c->strsize = stringPoolSection->getFileSize(); } - uint64_t symOff = 0; - uint64_t nSyms = 0; - uint64_t strOff = 0; - uint64_t strSize = 0; + SymtabSection *symtabSection = nullptr; + StringPoolSection *stringPoolSection = nullptr; }; class LCLoadDylib : public LoadCommand { @@ -270,25 +233,52 @@ // different location. const StringRef path = "/usr/lib/dyld"; }; + +// This determines the order that sections will appear in the output. Sections +// belonging to the same segment are required to be arranged contiguously. +uint32_t sectionOrder(const InputSection *isec) { + // Make sure __LINKEDIT is the last segment (i.e. all its hidden sections + // must be ordered after other sections). + constexpr uint32_t linkEditOffset = 0xffff; + + if (isec->name == section_names::pageZero) + return 0; + if (isec->name == section_names::header) + return 1; + if (isec->name == section_names::binding) + return linkEditOffset + 1; + return linkEditOffset; +} + +template +SectionType *createInputSection(ArgT &&... args) { + auto *section = make(std::forward(args)...); + inputSections.push_back(section); + return section; +} + } // namespace +void Writer::scanRelocations() { + for (InputSection *sect : inputSections) + for (Reloc &r : sect->relocs) + if (auto *s = r.target.dyn_cast()) + if (auto *dylibSymbol = dyn_cast(s)) + in.got->addEntry(*dylibSymbol); +} + void Writer::createLoadCommands() { - linkEditSeg = make(); - dyldInfoSeg = make(); - symtabSeg = make(); - - loadCommands.push_back(linkEditSeg); - loadCommands.push_back(dyldInfoSeg); - loadCommands.push_back(symtabSeg); - loadCommands.push_back(make()); - loadCommands.push_back(make()); - loadCommands.push_back(make()); - loadCommands.push_back(make()); - - uint8_t segIndex = 1; // LCPagezero is a segment load command + headerSection->addLoadCommand(make(bindingSection)); + headerSection->addLoadCommand(make()); + headerSection->addLoadCommand( + make(symtabSection, stringPoolSection)); + headerSection->addLoadCommand(make()); + headerSection->addLoadCommand(make()); + + uint8_t segIndex = 0; for (OutputSegment *seg : outputSegments) { - if (!seg->sections.empty()) { - loadCommands.push_back(make(seg->name, seg)); + if (seg->isNeeded()) { + headerSection->addLoadCommand(make(seg->name, seg)); seg->index = segIndex++; } } @@ -296,146 +286,82 @@ uint64_t dylibOrdinal = 1; for (InputFile *file : inputFiles) { if (auto *dylibFile = dyn_cast(file)) { - loadCommands.push_back(make(dylibFile->dylibName)); + headerSection->addLoadCommand(make(dylibFile->dylibName)); dylibFile->ordinal = dylibOrdinal++; } } // TODO: dyld requires libSystem to be loaded. libSystem is a universal // binary and we don't have support for that yet, so mock it out here. - loadCommands.push_back(make("/usr/lib/libSystem.B.dylib")); + headerSection->addLoadCommand( + make("/usr/lib/libSystem.B.dylib")); } -void Writer::scanRelocations() { - for (InputSection *sect : inputSections) - for (Reloc &r : sect->relocs) - if (auto *s = r.target.dyn_cast()) - if (auto *dylibSymbol = dyn_cast(s)) - in.got->addEntry(*dylibSymbol); +void Writer::createHiddenSections() { + headerSection = createInputSection(); + bindingSection = createInputSection(); + symtabSection = createInputSection(); + stringPoolSection = createInputSection(); + createInputSection(); } -void Writer::assignAddresses() { - uint64_t addr = ImageBase + sizeof(mach_header_64); - - uint64_t size = 0; - for (LoadCommand *lc : loadCommands) - size += lc->getSize(); - sizeofCmds = size; - addr += size; +void Writer::sortSections() { + sortByOrder(inputSections, sectionOrder); // TODO This is wrong; input sections ought to be grouped into // output sections, which are then organized like this. - uint64_t nSect = 0; - for (OutputSegment *seg : outputSegments) { - addr = alignTo(addr, PageSize); - - for (auto &p : seg->sections) { - ArrayRef sections = p.second; - for (InputSection *isec : sections) { - addr = alignTo(addr, isec->align); - isec->addr = addr; - addr += isec->getSize(); - isec->nSect = ++nSect; - } + uint32_t sectionIndex = 0; + // Add input sections to output segments. + for (InputSection *isec : inputSections) { + if (isec->isNeeded()) { + if (!isec->isHidden()) + isec->sectionIndex = ++sectionIndex; + getOrCreateOutputSegment(isec->segname)->addSection(isec); } } - - addr = alignTo(addr, PageSize); - linkEditSeg->addr = addr; - linkEditSeg->fileOff = addr - ImageBase; } -// LC_DYLD_INFO_ONLY contains symbol import/export information. Imported -// symbols are described by a sequence of bind opcodes, which allow for a -// compact encoding. Exported symbols are described using a trie. -void Writer::createDyldInfoContents() { - uint64_t sectionStart = linkEditSeg->getOffset(); - raw_svector_ostream os{linkEditSeg->contents}; - - if (in.got->getSize() != 0) { - // Emit bind opcodes, which tell dyld which dylib symbols to load. - - // Tell dyld to write to the section containing the GOT. - os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | - in.got->parent->index); - encodeULEB128(in.got->addr - in.got->parent->firstSection()->addr, os); - for (const DylibSymbol *sym : in.got->getEntries()) { - // TODO: Implement compact encoding -- we only need to encode the - // differences between consecutive symbol entries. - if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) { - os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - sym->file->ordinal); - } else { - error("TODO: Support larger dylib symbol ordinals"); - continue; - } - os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << sym->getName() << '\0' - << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) - << static_cast(BIND_OPCODE_DO_BIND); +void Writer::assignAddresses(OutputSegment *seg) { + addr = alignTo(addr, PageSize); + fileOff = alignTo(fileOff, PageSize); + seg->fileOff = fileOff; + + for (auto &p : seg->getSections()) { + ArrayRef sections = p.second; + for (InputSection *isec : sections) { + addr = alignTo(addr, isec->align); + isec->addr = addr; + addr += isec->getSize(); + fileOff += isec->getFileSize(); } - - os << static_cast(BIND_OPCODE_DONE); - - dyldInfoSeg->bindOff = sectionStart; - dyldInfoSeg->bindSize = linkEditSeg->getOffset() - sectionStart; } - - // TODO: emit bind opcodes for lazy symbols. - // TODO: Implement symbol export trie. } void Writer::createSymtabContents() { - uint64_t start = linkEditSeg->getOffset(); - - symtabSeg->symOff = start; - symtabSeg->nSyms = 0; - - SmallVector stringTable; - raw_svector_ostream stringTableOs{stringTable}; - // An n_strx value of 0 always indicates the empty string, so we must locate - // our non-empty string values at positive offsets in the string pool. - // Therefore we insert a dummy value at position zero. - stringTableOs << '\0'; - - raw_svector_ostream contentsOs{linkEditSeg->contents}; for (Symbol *sym : symtab->getSymbols()) { - uint8_t nType = N_UNDF; - uint8_t nSect = NO_SECT; - uint16_t nDesc = 0; - uint64_t nValue = 0; + uint32_t strx = 0; + uint8_t type = N_UNDF; + uint8_t sect = NO_SECT; + uint16_t desc = 0; + uint64_t value = 0; // TODO support other symbol types // TODO populate n_desc if (auto defined = dyn_cast(sym)) { - nType = (N_EXT | N_SECT); - nSect = defined->isec->nSect; + type = (N_EXT | N_SECT); + sect = defined->isec->sectionIndex; // For the N_SECT symbol type, n_value is the address of the symbol - nValue = defined->value + defined->isec->addr; + value = defined->value + defined->isec->addr; } - - ++symtabSeg->nSyms; - auto nStrx = stringTable.size(); - stringTableOs << sym->getName() << '\0'; - - // Emit one nlist_64 struct. - endian::write(contentsOs, nStrx, endianness::little); // n_strx - contentsOs << nType; // n_type - contentsOs << nSect; // n_sect - endian::write(contentsOs, nDesc, endianness::little); // n_desc - endian::write(contentsOs, nValue, endianness::little); // n_value + strx = stringPoolSection->addString(sym->getName()); + symtabSection->addSymbol(strx, type, sect, desc, value); } - - // The string table is located right after the symbol table. - symtabSeg->strOff = start + symtabSeg->nSyms * sizeof(nlist_64); - symtabSeg->strSize = stringTable.size(); - contentsOs << stringTable; } void Writer::openFile() { Expected> bufferOrErr = - FileOutputBuffer::create(config->outputFile, fileSize, + FileOutputBuffer::create(config->outputFile, fileOff, FileOutputBuffer::F_executable); if (!bufferOrErr) @@ -445,51 +371,52 @@ buffer = std::move(*bufferOrErr); } -void Writer::writeHeader() { - auto *hdr = reinterpret_cast(buffer->getBufferStart()); - hdr->magic = MH_MAGIC_64; - hdr->cputype = CPU_TYPE_X86_64; - hdr->cpusubtype = CPU_SUBTYPE_X86_64_ALL | CPU_SUBTYPE_LIB64; - hdr->filetype = MH_EXECUTE; - hdr->ncmds = loadCommands.size(); - hdr->sizeofcmds = sizeofCmds; - hdr->flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL; - - uint8_t *p = reinterpret_cast(hdr + 1); - for (LoadCommand *lc : loadCommands) { - lc->writeTo(p); - p += lc->getSize(); - } -} - void Writer::writeSections() { uint8_t *buf = buffer->getBufferStart(); - - for (OutputSegment *seg : outputSegments) - for (auto § : seg->sections) - for (InputSection *isec : sect.second) - isec->writeTo(buf + isec->addr - ImageBase); - - memcpy(buf + linkEditSeg->fileOff, linkEditSeg->contents.data(), - linkEditSeg->contents.size()); + for (OutputSegment *seg : outputSegments) { + uint64_t fileOff = seg->fileOff; + for (auto § : seg->getSections()) { + for (InputSection *isec : sect.second) { + isec->writeTo(buf + fileOff); + fileOff += isec->getFileSize(); + } + } + } } void Writer::run() { - createLoadCommands(); scanRelocations(); - assignAddresses(); + createHiddenSections(); + // Sort and assign sections to their respective segments. No more sections can + // be created after this method runs. + sortSections(); + // dyld requires __LINKEDIT segment to always exist (even if empty). + getOrCreateOutputSegment(segment_names::linkEdit); + // No more segments can be created after this method runs. + createLoadCommands(); + + // Ensure that segments (and the sections they contain) are allocated + // addresses in ascending order, which dyld requires. + // + // Note that at this point, __LINKEDIT sections are empty, but we need to + // determine addresses of other segments/sections before generating its + // contents. + for (OutputSegment *seg : outputSegments) + assignAddresses(seg); - // Fill __LINKEDIT contents - createDyldInfoContents(); + // Fill __LINKEDIT contents. + bindingSection->encode(); createSymtabContents(); - fileSize = linkEditSeg->fileOff + linkEditSeg->contents.size(); + // Now that __LINKEDIT is filled out, do a proper calculation of its + // addresses and offsets. We don't have to recalculate the other segments + // since sortSections() ensures that __LINKEDIT is the last segment. + assignAddresses(getOutputSegment(segment_names::linkEdit)); openFile(); if (errorCount()) return; - writeHeader(); writeSections(); if (auto e = buffer->commit()) @@ -499,6 +426,5 @@ void macho::writeResult() { Writer().run(); } void macho::createSyntheticSections() { - in.got = make(); - inputSections.push_back(in.got); + in.got = createInputSection(); } diff --git a/lld/test/MachO/segments.s b/lld/test/MachO/segments.s --- a/lld/test/MachO/segments.s +++ b/lld/test/MachO/segments.s @@ -3,13 +3,46 @@ # RUN: lld -flavor darwinnew -o %t %t.o # RUN: llvm-readobj --macho-segment %t | FileCheck %s -# These segments must always be present. -# CHECK-DAG: Name: __PAGEZERO -# CHECK-DAG: Name: __LINKEDIT -# CHECK-DAG: Name: __TEXT +## These two segments must always be present at the start of an executable. +# CHECK-NOT: Segment { +# CHECK: Segment { +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK: Name: __PAGEZERO +# CHECK: Size: 72 +# CHECK: vmaddr: +# CHECK: vmsize: +# CHECK: fileoff: 0 +# CHECK: filesize: 0 +## The kernel won't execute a binary with the wrong protections for __PAGEZERO. +# CHECK: maxprot: --- +# CHECK: initprot: --- +# CHECK: nsects: 0 +# CHECK: flags: 0x0 +# CHECK: } +# CHECK: Segment { +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK: Name: __TEXT +# CHECK: Size: 152 +# CHECK: vmaddr: +# CHECK: vmsize: +## dyld3 assumes that the __TEXT segment starts from the file header +# CHECK: fileoff: 0 +# CHECK: filesize: +# CHECK: maxprot: rwx +# CHECK: initprot: r-x +# CHECK: nsects: 1 +# CHECK: flags: 0x0 +# CHECK: } -# Check that we handle max-length names correctly. -# CHECK-DAG: Name: maxlen_16ch_name +## Check that we handle max-length names correctly. +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK-NEXT: Name: maxlen_16ch_name + +## This segment must always be present at the end of an executable. +# CHECK: Name: __LINKEDIT +# CHECK: maxprot: rwx +# CHECK: initprot: r-- +# CHECK-NOT: Cmd: LC_SEGMENT_64 .text .global _main diff --git a/lld/test/MachO/text-segment.s b/lld/test/MachO/text-segment.s deleted file mode 100644 --- a/lld/test/MachO/text-segment.s +++ /dev/null @@ -1,15 +0,0 @@ -# REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o -# RUN: lld -flavor darwinnew -o %t %t.o -# RUN: llvm-readobj --macho-segment %t | FileCheck %s - -# CHECK: Name: __TEXT -# CHECK-NOT: } -# dyld3 assumes that the __TEXT segment starts from the file header -# CHECK: fileoff: 0 - -.text -.global _main -_main: - mov $0, %rax - ret