diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -8,6 +8,8 @@ ExportTrie.cpp InputFiles.cpp InputSection.cpp + MergedOutputSection.cpp + OutputSection.cpp OutputSegment.cpp SymbolTable.cpp Symbols.cpp diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -9,6 +9,7 @@ #include "Driver.h" #include "Config.h" #include "InputFiles.h" +#include "OutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" diff --git a/lld/MachO/ExportTrie.h b/lld/MachO/ExportTrie.h --- a/lld/MachO/ExportTrie.h +++ b/lld/MachO/ExportTrie.h @@ -24,7 +24,7 @@ void addSymbol(const Symbol &sym) { exported.push_back(&sym); } // Returns the size in bytes of the serialized trie. size_t build(); - void writeTo(uint8_t *buf); + void writeTo(uint8_t *buf) const; private: TrieNode *makeNode(); diff --git a/lld/MachO/ExportTrie.cpp b/lld/MachO/ExportTrie.cpp --- a/lld/MachO/ExportTrie.cpp +++ b/lld/MachO/ExportTrie.cpp @@ -65,7 +65,7 @@ size_t offset = 0; bool updateOffset(size_t &nextOffset); - void writeTo(uint8_t *buf); + void writeTo(uint8_t *buf) const; }; bool TrieNode::updateOffset(size_t &nextOffset) { @@ -90,7 +90,7 @@ return result; } -void TrieNode::writeTo(uint8_t *buf) { +void TrieNode::writeTo(uint8_t *buf) const { buf += offset; if (info) { // TrieNodes with Symbol info: size, flags address @@ -209,7 +209,7 @@ return offset; } -void TrieBuilder::writeTo(uint8_t *buf) { +void TrieBuilder::writeTo(uint8_t *buf) const { for (TrieNode *node : nodes) node->writeTo(buf); } diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -43,7 +43,7 @@ #include "InputFiles.h" #include "InputSection.h" -#include "OutputSegment.h" +#include "OutputSection.h" #include "SymbolTable.h" #include "Symbols.h" #include "Target.h" diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -19,7 +19,7 @@ class InputFile; class InputSection; -class OutputSegment; +class OutputSection; class Symbol; struct Reloc { @@ -35,26 +35,21 @@ virtual size_t getSize() const { return data.size(); } virtual uint64_t getFileSize() const { return getSize(); } uint64_t getFileOffset() const; - // Don't emit section_64 headers for hidden sections. - virtual bool isHidden() const { return false; } - // Unneeded sections are omitted entirely (header and body). - virtual bool isNeeded() const { return true; } + uint64_t getVA() const; + virtual void writeTo(uint8_t *buf); InputFile *file = nullptr; - OutputSegment *parent = nullptr; StringRef name; StringRef segname; - ArrayRef data; + OutputSection *parent = nullptr; + uint64_t outSecOff = 0; - // TODO these properties ought to live in an OutputSection class. - // Move them once available. - uint64_t addr = 0; uint32_t align = 1; - uint32_t sectionIndex = 0; uint32_t flags = 0; + ArrayRef data; std::vector relocs; }; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -22,9 +22,11 @@ std::vector macho::inputSections; uint64_t InputSection::getFileOffset() const { - return parent->fileOff + addr - parent->firstSection()->addr; + return parent->getFileOffset() + outSecOff; } +uint64_t InputSection::getVA() const { return parent->addr + outSecOff; } + void InputSection::writeTo(uint8_t *buf) { memcpy(buf, data.data(), data.size()); @@ -37,13 +39,13 @@ va = s->getVA(); } } else if (auto *isec = r.target.dyn_cast()) - va = isec->addr; + va = isec->getVA(); else llvm_unreachable("Unknown relocation target"); uint64_t val = va + r.addend; if (1) // TODO: handle non-pcrel relocations - val -= addr - ImageBase + r.offset; + val -= getVA() - ImageBase + r.offset; target->relocateOne(buf + r.offset, r.type, val); } } diff --git a/lld/MachO/MergedOutputSection.h b/lld/MachO/MergedOutputSection.h new file mode 100644 --- /dev/null +++ b/lld/MachO/MergedOutputSection.h @@ -0,0 +1,45 @@ +//===- OutputSection.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_MERGED_OUTPUT_SECTION_H +#define LLD_MACHO_MERGED_OUTPUT_SECTION_H + +#include "InputSection.h" +#include "OutputSection.h" +#include "lld/Common/LLVM.h" + +namespace lld { +namespace macho { + +class MergedOutputSection : public OutputSection { +public: + const InputSection *firstSection() const { return inputs.front(); } + const InputSection *lastSection() const { return inputs.back(); } + + // These accessors will only be valid after finalizing the section + size_t getSize() const override { return size; } + uint64_t getFileSize() const override { return fileSize; } + + void mergeInput(InputSection *input) override; + void finalize() override; + + void writeTo(uint8_t *buf) const override; + + std::vector inputs; + +private: + void mergeFlags(uint32_t inputFlags); + + size_t size = 0; + uint64_t fileSize = 0; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/MergedOutputSection.cpp b/lld/MachO/MergedOutputSection.cpp new file mode 100644 --- /dev/null +++ b/lld/MachO/MergedOutputSection.cpp @@ -0,0 +1,72 @@ +//===- OutputSection.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MergedOutputSection.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/MachO.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; +using namespace lld::macho; + +void MergedOutputSection::mergeInput(InputSection *input) { + if (this->inputs.empty()) { + this->align = input->align; + this->flags = input->flags; + } else { + this->mergeFlags(input->flags); + this->align = std::max(this->align, input->align); + } + + this->inputs.push_back(input); + input->parent = this; +} + +void MergedOutputSection::finalize() { + uint64_t addr = this->addr; + this->fileSize = 0; + + for (InputSection *i : inputs) { + i->outSecOff = alignTo(addr, i->align) - this->addr; + addr += i->getSize(); + this->fileSize += i->getFileSize(); + } + this->size = addr - this->addr; +} + +void MergedOutputSection::writeTo(uint8_t *buf) const { + for (InputSection *i : inputs) { + i->writeTo(buf); + buf += i->getFileSize(); + } +} + +// TODO: this is most likely wrong; reconsider how section flags +// are actually merged. +void MergedOutputSection::mergeFlags(uint32_t inputFlags) { + uint8_t sectionFlag = MachO::SECTION_TYPE & inputFlags; + if (sectionFlag != (MachO::SECTION_TYPE & this->flags)) + error("Cannot add merge section; inconsistent type flags " + + Twine(sectionFlag)); + + uint32_t inconsistentFlags = + MachO::S_ATTR_DEBUG | MachO::S_ATTR_STRIP_STATIC_SYMS | + MachO::S_ATTR_NO_DEAD_STRIP | MachO::S_ATTR_LIVE_SUPPORT; + if ((inputFlags ^ this->flags) & inconsistentFlags) + error("Cannot add merge section; cannot merge inconsistent flags"); + + // Negate pure instruction presence if any segment isn't pure. + uint32_t pureMask = + ~(MachO::S_ATTR_PURE_INSTRUCTIONS & inputFlags & this->flags); + + // Merge the rest + this->flags |= inputFlags; + this->flags &= pureMask; +} diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h new file mode 100644 --- /dev/null +++ b/lld/MachO/OutputSection.h @@ -0,0 +1,62 @@ +//===- OutputSection.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_OUTPUT_SECTION_H +#define LLD_MACHO_OUTPUT_SECTION_H + +#include "lld/Common/LLVM.h" + +namespace lld { +namespace macho { + +class InputSection; +class OutputSegment; + +class OutputSection { +public: + virtual ~OutputSection() = default; + + // These accessors will only be valid after finalizing the section. + virtual uint64_t getFileOffset() const; + virtual uint64_t getSegmentOffset() const; + + // How much space the section occupies in the address space. + virtual size_t getSize() const = 0; + // How much space the section occupies in the file. Most sections are copied + // as-is so their file size is the same as their address space size. + virtual uint64_t getFileSize() const { return getSize(); } + + // Hidden sections omit header content, but body content is still present. + virtual bool isHidden() const { return !this->isNeeded(); } + // Unneeded sections are omitted entirely (header and body). + virtual bool isNeeded() const { return true; } + + // Some sections may allow coalescing other raw input sections. + virtual void mergeInput(InputSection *input); + + // Specifically finalizes addresses and section size, not content. + virtual void finalize() { + // TODO investigate refactoring synthetic section finalization logic into overrides + // of this function/ + } + + virtual void writeTo(uint8_t *buf) const = 0; + + StringRef name; + OutputSegment *parent = nullptr; + + uint32_t index = 0; + uint64_t addr = 0; + uint32_t align = 1; + uint32_t flags = 0; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/OutputSection.cpp b/lld/MachO/OutputSection.cpp new file mode 100644 --- /dev/null +++ b/lld/MachO/OutputSection.cpp @@ -0,0 +1,28 @@ +//===- OutputSection.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OutputSection.h" +#include "OutputSegment.h" +#include "lld/Common/ErrorHandler.h" + +using namespace llvm; +using namespace lld; +using namespace lld::macho; + +uint64_t OutputSection::getFileOffset() const { + return parent->fileOff + this->getSegmentOffset(); +} + +uint64_t OutputSection::getSegmentOffset() const { + return addr - parent->firstSection()->addr; +} + +void OutputSection::mergeInput(InputSection *input) { + error("Section " + this->name + " in segment " + parent->name + + " cannot be merged"); +} diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -9,6 +9,7 @@ #ifndef LLD_MACHO_OUTPUT_SEGMENT_H #define LLD_MACHO_OUTPUT_SEGMENT_H +#include "OutputSection.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/MapVector.h" @@ -23,34 +24,39 @@ } // namespace segment_names +class OutputSection; class InputSection; class OutputSegment { public: - InputSection *firstSection() const { return sections.front().second.at(0); } + typedef llvm::MapVector SectionMap; - InputSection *lastSection() const { return sections.back().second.back(); } + const OutputSection *firstSection() const { return sections.front().second; } + const OutputSection *lastSection() const { return sections.back().second; } bool isNeeded() const { - return !sections.empty() || name == segment_names::linkEdit; + if (name == segment_names::linkEdit) + return true; + for (auto i : sections) { + if (i.second->isNeeded()) { + return true; + } + } + return false; } - void addSection(InputSection *); + OutputSection *getOrCreateOutputSection(StringRef name); + void setOutputSection(OutputSection *os); - const llvm::MapVector> & - getSections() const { - return sections; - } + const SectionMap &getSections() const { return sections; } + size_t numNonHiddenSections() const; uint64_t fileOff = 0; StringRef name; - uint32_t numNonHiddenSections = 0; uint32_t maxProt = 0; uint32_t initProt = 0; uint8_t index; - -private: - llvm::MapVector> sections; + SectionMap sections; }; extern std::vector outputSegments; diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -8,6 +8,7 @@ #include "OutputSegment.h" #include "InputSection.h" +#include "MergedOutputSection.h" #include "lld/Common/Memory.h" #include "llvm/BinaryFormat/MachO.h" @@ -33,13 +34,29 @@ return VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; } -void OutputSegment::addSection(InputSection *isec) { - isec->parent = this; - std::vector &vec = sections[isec->name]; - if (vec.empty() && !isec->isHidden()) { - ++numNonHiddenSections; +size_t OutputSegment::numNonHiddenSections() const { + size_t count = 0; + for (auto i : sections) { + count += (i.second->isHidden() ? 0 : 1); } - vec.push_back(isec); + return count; +} + +void OutputSegment::setOutputSection(OutputSection *os) { + os->parent = this; + this->sections[os->name] = os; +} + +OutputSection *OutputSegment::getOrCreateOutputSection(StringRef name) { + OutputSegment::SectionMap::iterator i = this->sections.find(name); + if (i != this->sections.end()) { + return i->second; + } + + auto *os = make(); + os->name = name; + this->setOutputSection(os); + return os; } static llvm::DenseMap nameToOutputSegment; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -81,7 +81,7 @@ inline uint64_t Symbol::getVA() const { if (auto *d = dyn_cast(this)) - return d->isec->addr + d->value - ImageBase; + return d->isec->getVA() + d->value - ImageBase; return 0; } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -10,12 +10,10 @@ #define LLD_MACHO_SYNTHETIC_SECTIONS_H #include "ExportTrie.h" -#include "InputSection.h" +#include "OutputSection.h" #include "Target.h" #include "llvm/ADT/SetVector.h" -using namespace llvm::MachO; - namespace lld { namespace macho { @@ -34,13 +32,15 @@ class LoadCommand; // The header of the Mach-O file, which must have a file offset of zero. -class MachHeaderSection : public InputSection { +class MachHeaderSection : public OutputSection { public: MachHeaderSection(); void addLoadCommand(LoadCommand *); bool isHidden() const override { return true; } size_t getSize() const override; - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; + + const char *segname; private: std::vector loadCommands; @@ -49,17 +49,20 @@ // A hidden section that exists solely for the purpose of creating the // __PAGEZERO segment, which is used to catch null pointer dereferences. -class PageZeroSection : public InputSection { +class PageZeroSection : public OutputSection { public: PageZeroSection(); bool isHidden() const override { return true; } size_t getSize() const override { return ImageBase; } uint64_t getFileSize() const override { return 0; } + void writeTo(uint8_t *buf) const override {} + + const char *segname; }; // This section will be populated by dyld with addresses to non-lazily-loaded // dylib symbols. -class GotSection : public InputSection { +class GotSection : public OutputSection { public: GotSection(); @@ -68,21 +71,23 @@ return entries; } - size_t getSize() const override { return entries.size() * WordSize; } - bool isNeeded() const override { return !entries.empty(); } - void writeTo(uint8_t *buf) override { + size_t getSize() const override { return entries.size() * WordSize; } + + void writeTo(uint8_t *buf) const override { // Nothing to write, GOT contains all zeros at link time; it's populated at // runtime by dyld. } + const char *segname; + private: llvm::SetVector entries; }; // Stores bind opcodes for telling dyld which symbols to load non-lazily. -class BindingSection : public InputSection { +class BindingSection : public OutputSection { public: BindingSection(); void finalizeContents(); @@ -92,13 +97,14 @@ // section headers. bool isHidden() const override { return true; } bool isNeeded() const override; - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; + const char *segname; SmallVector contents; }; // Stores a trie that describes the set of exported symbols. -class ExportSection : public InputSection { +class ExportSection : public OutputSection { public: ExportSection(); void finalizeContents(); @@ -107,7 +113,9 @@ // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in // section headers. bool isHidden() const override { return true; } - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; + + const char *segname; private: TrieBuilder trieBuilder; @@ -115,7 +123,7 @@ }; // Stores the strings referenced by the symbol table. -class StringPoolSection : public InputSection { +class StringPoolSection : public OutputSection { public: StringPoolSection(); // Returns the start offset of the added string. @@ -125,7 +133,9 @@ // offsets are recorded in the LC_SYMTAB load command, instead of in section // headers. bool isHidden() const override { return true; } - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; + + const char *segname; private: // An n_strx value of 0 always indicates the empty string, so we must locate @@ -140,7 +150,7 @@ size_t strx; }; -class SymtabSection : public InputSection { +class SymtabSection : public OutputSection { public: SymtabSection(StringPoolSection &); void finalizeContents(); @@ -150,7 +160,9 @@ // offsets are recorded in the LC_SYMTAB load command, instead of in section // headers. bool isHidden() const override { return true; } - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; + + const char *segname; private: StringPoolSection &stringPoolSection; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -42,7 +42,7 @@ return sizeof(mach_header_64) + sizeOfCmds; } -void MachHeaderSection::writeTo(uint8_t *buf) { +void MachHeaderSection::writeTo(uint8_t *buf) const { auto *hdr = reinterpret_cast(buf); hdr->magic = MH_MAGIC_64; hdr->cputype = CPU_TYPE_X86_64; @@ -107,7 +107,7 @@ raw_svector_ostream os{contents}; os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | in.got->parent->index); - encodeULEB128(in.got->addr - in.got->parent->firstSection()->addr, os); + encodeULEB128(in.got->getSegmentOffset(), os); for (const DylibSymbol *sym : in.got->getEntries()) { // TODO: Implement compact encoding -- we only need to encode the // differences between consecutive symbol entries. @@ -127,7 +127,7 @@ os << static_cast(BIND_OPCODE_DONE); } -void BindingSection::writeTo(uint8_t *buf) { +void BindingSection::writeTo(uint8_t *buf) const { memcpy(buf, contents.data(), contents.size()); } @@ -144,7 +144,7 @@ size = trieBuilder.build(); } -void ExportSection::writeTo(uint8_t *buf) { trieBuilder.writeTo(buf); } +void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } SymtabSection::SymtabSection(StringPoolSection &stringPoolSection) : stringPoolSection(stringPoolSection) { @@ -163,7 +163,7 @@ symbols.push_back({sym, stringPoolSection.addString(sym->getName())}); } -void SymtabSection::writeTo(uint8_t *buf) { +void SymtabSection::writeTo(uint8_t *buf) const { auto *nList = reinterpret_cast(buf); for (const SymtabEntry &entry : symbols) { nList->n_strx = entry.strx; @@ -171,9 +171,9 @@ // TODO populate n_desc if (auto defined = dyn_cast(entry.sym)) { nList->n_type = N_EXT | N_SECT; - nList->n_sect = defined->isec->sectionIndex; + nList->n_sect = defined->isec->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol - nList->n_value = defined->value + defined->isec->addr; + nList->n_value = defined->value + defined->isec->getVA(); } ++nList; } @@ -191,7 +191,7 @@ return strx; } -void StringPoolSection::writeTo(uint8_t *buf) { +void StringPoolSection::writeTo(uint8_t *buf) const { uint32_t off = 0; for (StringRef str : pool) { memcpy(buf + off, str.data(), str.size()); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -37,8 +37,7 @@ Writer() : buffer(errorHandler().outputBuffer) {} void scanRelocations(); - void createHiddenSections(); - void sortSections(); + void createOutputSections(); void createLoadCommands(); void assignAddresses(OutputSegment *); void createSymtabContents(); @@ -101,7 +100,7 @@ uint32_t getSize() const override { return sizeof(segment_command_64) + - seg->numNonHiddenSections * sizeof(section_64); + seg->numNonHiddenSections() * sizeof(section_64); } void writeTo(uint8_t *buf) const override { @@ -115,20 +114,19 @@ c->maxprot = seg->maxProt; c->initprot = seg->initProt; - if (seg->getSections().empty()) + if (!seg->isNeeded()) return; c->vmaddr = seg->firstSection()->addr; c->vmsize = seg->lastSection()->addr + seg->lastSection()->getSize() - c->vmaddr; - c->nsects = seg->numNonHiddenSections; + c->nsects = seg->numNonHiddenSections(); for (auto &p : seg->getSections()) { StringRef s = p.first; - ArrayRef sections = p.second; - for (InputSection *isec : sections) - c->filesize += isec->getFileSize(); - if (sections[0]->isHidden()) + OutputSection *section = p.second; + c->filesize += section->getFileSize(); + if (section->isHidden()) continue; auto *sectHdr = reinterpret_cast(buf); @@ -137,16 +135,11 @@ memcpy(sectHdr->sectname, s.data(), s.size()); memcpy(sectHdr->segname, name.data(), name.size()); - sectHdr->addr = sections[0]->addr; - sectHdr->offset = sections[0]->getFileOffset(); - sectHdr->align = sections[0]->align; - uint32_t maxAlign = 0; - for (const InputSection *section : sections) - maxAlign = std::max(maxAlign, section->align); - sectHdr->align = Log2_32(maxAlign); - sectHdr->flags = sections[0]->flags; - sectHdr->size = sections.back()->addr + sections.back()->getSize() - - sections[0]->addr; + sectHdr->addr = section->addr; + sectHdr->offset = section->getFileOffset(); + sectHdr->align = Log2_32(section->align); + sectHdr->flags = section->flags; + sectHdr->size = section->getSize(); } } @@ -300,19 +293,26 @@ // Return a {segmentOrder, sectionOrder} pair. Using this as a key will // ensure that all sections in the same segment are sorted contiguously. - std::pair order(const InputSection *isec) { - auto it = orderMap.find(isec->segname); + std::pair order(const OutputSection *os) { + auto it = orderMap.find(os->parent->name); if (it == orderMap.end()) return {orderMap[defaultPosition].segmentOrder, 0}; OrderInfo &info = it->second; - auto sectIt = info.sectionOrdering.find(isec->name); + auto sectIt = info.sectionOrdering.find(os->name); if (sectIt != info.sectionOrdering.end()) return {info.segmentOrder, sectIt->second}; return {info.segmentOrder, info.sectionOrdering.size()}; } - bool operator()(const InputSection *a, const InputSection *b) { - return order(a) < order(b); + // Sort sections within a common segment + bool operator()(const std::pair a, + const std::pair b) { + return order(a.second) < order(b.second); + } + + // Sort segments using a representative output section. + bool operator()(const OutputSegment *a, const OutputSegment *b) { + return order(a->firstSection()) < order(b->firstSection()); } private: @@ -323,9 +323,9 @@ } // namespace template -SectionType *createInputSection(ArgT &&... args) { +SectionType *createOutputSection(ArgT &&... args) { auto *section = make(std::forward(args)...); - inputSections.push_back(section); + getOrCreateOutputSegment(section->segname)->setOutputSection(section); return section; } @@ -379,16 +379,16 @@ make("/usr/lib/libSystem.B.dylib")); } -void Writer::createHiddenSections() { - headerSection = createInputSection(); - bindingSection = createInputSection(); - stringPoolSection = createInputSection(); - symtabSection = createInputSection(*stringPoolSection); - exportSection = createInputSection(); - +void Writer::createOutputSections() { + // First, create hidden sections + headerSection = createOutputSection(); + bindingSection = createOutputSection(); + stringPoolSection = createOutputSection(); + symtabSection = createOutputSection(*stringPoolSection); + exportSection = createOutputSection(); switch (config->outputType) { case MH_EXECUTE: - createInputSection(); + createOutputSection(); break; case MH_DYLIB: break; @@ -396,20 +396,30 @@ error("unhandled output file type"); return; } -} -void Writer::sortSections() { - llvm::stable_sort(inputSections, SectionComparator()); + // Then merge input sections into output sections/segments. + for (InputSection *isec : inputSections) { + getOrCreateOutputSegment(isec->segname) + ->getOrCreateOutputSection(isec->name) + ->mergeInput(isec); + } + + // Sorting only can happen once all outputs have been collected. + // Since output sections are grouped by segment, sorting happens + // first over all segments, then over sections per segment. + auto comparator = SectionComparator(); + llvm::stable_sort(outputSegments, comparator); - // TODO This is wrong; input sections ought to be grouped into - // output sections, which are then organized like this. + // Now that the output sections are sorted, assign the final + // output section indices. uint32_t sectionIndex = 0; - // Add input sections to output segments. - for (InputSection *isec : inputSections) { - if (isec->isNeeded()) { - if (!isec->isHidden()) - isec->sectionIndex = ++sectionIndex; - getOrCreateOutputSegment(isec->segname)->addSection(isec); + for (OutputSegment *seg : outputSegments) { + llvm::stable_sort(seg->sections, comparator); + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + if (!section->isHidden()) { + section->index = ++sectionIndex; + } } } } @@ -420,13 +430,13 @@ seg->fileOff = fileOff; for (auto &p : seg->getSections()) { - ArrayRef sections = p.second; - for (InputSection *isec : sections) { - addr = alignTo(addr, isec->align); - isec->addr = addr; - addr += isec->getSize(); - fileOff += isec->getFileSize(); - } + OutputSection *section = p.second; + addr = alignTo(addr, section->align); + section->addr = addr; + section->finalize(); + + addr += section->getSize(); + fileOff += section->getFileSize(); } } @@ -446,24 +456,23 @@ uint8_t *buf = buffer->getBufferStart(); for (OutputSegment *seg : outputSegments) { uint64_t fileOff = seg->fileOff; - for (auto § : seg->getSections()) { - for (InputSection *isec : sect.second) { - isec->writeTo(buf + fileOff); - fileOff += isec->getFileSize(); - } + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + section->writeTo(buf + fileOff); + fileOff += section->getFileSize(); } } } void Writer::run() { - scanRelocations(); - createHiddenSections(); - // Sort and assign sections to their respective segments. No more sections can - // be created after this method runs. - sortSections(); // dyld requires __LINKEDIT segment to always exist (even if empty). - getOrCreateOutputSegment(segment_names::linkEdit); - // No more segments can be created after this method runs. + auto *linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); + + scanRelocations(); + + // Sort and assign sections to their respective segments. No more sections nor + // segments may be created after this method runs. + createOutputSections(); createLoadCommands(); // Ensure that segments (and the sections they contain) are allocated @@ -473,7 +482,8 @@ // determine addresses of other segments/sections before generating its // contents. for (OutputSegment *seg : outputSegments) - assignAddresses(seg); + if (seg != linkEditSegment) + assignAddresses(seg); // Fill __LINKEDIT contents. bindingSection->finalizeContents(); @@ -481,9 +491,8 @@ symtabSection->finalizeContents(); // Now that __LINKEDIT is filled out, do a proper calculation of its - // addresses and offsets. We don't have to recalculate the other segments - // since sortSections() ensures that __LINKEDIT is the last segment. - assignAddresses(getOutputSegment(segment_names::linkEdit)); + // addresses and offsets. + assignAddresses(linkEditSegment); openFile(); if (errorCount()) @@ -498,5 +507,5 @@ void macho::writeResult() { Writer().run(); } void macho::createSyntheticSections() { - in.got = createInputSection(); + in.got = createOutputSection(); } diff --git a/lld/test/MachO/section-merge.s b/lld/test/MachO/section-merge.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/section-merge.s @@ -0,0 +1,37 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libhello.s \ +# RUN: -o %t/libhello.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libgoodbye.s \ +# RUN: -o %t/libgoodbye.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s \ +# RUN: -o %t/main.o +# RUN: lld -flavor darwinnew -o %t/output %t/libgoodbye.o %t/libhello.o %t/main.o +# RUN: llvm-readobj -symbols %t/output | FileCheck %s + +# CHECK: Name: _goodbye_world +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __cstring (0x2) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x[[#%X,BASE:]] + +# CHECK: Name: _hello_world +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __cstring (0x2) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x[[#BASE + 0x10]] + +.section __TEXT,__text +.global _goodbye_world +.global _hello_world +.global _main + +_main: + mov $0, %rax + ret