diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -8,6 +8,8 @@ ExportTrie.cpp InputFiles.cpp InputSection.cpp + MergedOutputSection.cpp + OutputSection.cpp OutputSegment.cpp SymbolTable.cpp Symbols.cpp diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -9,6 +9,7 @@ #include "Driver.h" #include "Config.h" #include "InputFiles.h" +#include "OutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" diff --git a/lld/MachO/ExportTrie.h b/lld/MachO/ExportTrie.h --- a/lld/MachO/ExportTrie.h +++ b/lld/MachO/ExportTrie.h @@ -24,7 +24,7 @@ void addSymbol(const Symbol &sym) { exported.push_back(&sym); } // Returns the size in bytes of the serialized trie. size_t build(); - void writeTo(uint8_t *buf); + void writeTo(uint8_t *buf) const; private: TrieNode *makeNode(); diff --git a/lld/MachO/ExportTrie.cpp b/lld/MachO/ExportTrie.cpp --- a/lld/MachO/ExportTrie.cpp +++ b/lld/MachO/ExportTrie.cpp @@ -76,7 +76,7 @@ // Returns whether the new estimated offset differs from the old one. bool updateOffset(size_t &nextOffset); - void writeTo(uint8_t *buf); + void writeTo(uint8_t *buf) const; }; bool TrieNode::updateOffset(size_t &nextOffset) { @@ -108,7 +108,7 @@ return result; } -void TrieNode::writeTo(uint8_t *buf) { +void TrieNode::writeTo(uint8_t *buf) const { buf += offset; if (info) { // TrieNodes with Symbol info: size, flags address @@ -227,7 +227,7 @@ return offset; } -void TrieBuilder::writeTo(uint8_t *buf) { +void TrieBuilder::writeTo(uint8_t *buf) const { for (TrieNode *node : nodes) node->writeTo(buf); } diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -43,7 +43,7 @@ #include "InputFiles.h" #include "InputSection.h" -#include "OutputSegment.h" +#include "OutputSection.h" #include "SymbolTable.h" #include "Symbols.h" #include "Target.h" diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -19,7 +19,7 @@ class InputFile; class InputSection; -class OutputSegment; +class OutputSection; class Symbol; struct Reloc { @@ -35,26 +35,22 @@ virtual size_t getSize() const { return data.size(); } virtual uint64_t getFileSize() const { return getSize(); } uint64_t getFileOffset() const; - // Don't emit section_64 headers for hidden sections. - virtual bool isHidden() const { return false; } - // Unneeded sections are omitted entirely (header and body). - virtual bool isNeeded() const { return true; } + uint64_t getVA() const; + virtual void writeTo(uint8_t *buf); InputFile *file = nullptr; - OutputSegment *parent = nullptr; StringRef name; StringRef segname; - ArrayRef data; + OutputSection *parent = nullptr; + uint64_t outSecOff = 0; + uint64_t outSecFileOff = 0; - // TODO these properties ought to live in an OutputSection class. - // Move them once available. - uint64_t addr = 0; uint32_t align = 1; - uint32_t sectionIndex = 0; uint32_t flags = 0; + ArrayRef data; std::vector relocs; }; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -22,9 +22,11 @@ std::vector macho::inputSections; uint64_t InputSection::getFileOffset() const { - return parent->fileOff + addr - parent->firstSection()->addr; + return parent->fileOff + outSecFileOff; } +uint64_t InputSection::getVA() const { return parent->addr + outSecOff; } + void InputSection::writeTo(uint8_t *buf) { if (!data.empty()) memcpy(buf, data.data(), data.size()); @@ -38,14 +40,14 @@ va = s->getVA(); } } else if (auto *isec = r.target.dyn_cast()) { - va = isec->addr; + va = isec->getVA(); } else { llvm_unreachable("Unknown relocation target"); } uint64_t val = va + r.addend; if (1) // TODO: handle non-pcrel relocations - val -= addr + r.offset; + val -= getVA() + r.offset; target->relocateOne(buf + r.offset, r.type, val); } } diff --git a/lld/MachO/MergedOutputSection.h b/lld/MachO/MergedOutputSection.h new file mode 100644 --- /dev/null +++ b/lld/MachO/MergedOutputSection.h @@ -0,0 +1,51 @@ +//===- OutputSection.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_MERGED_OUTPUT_SECTION_H +#define LLD_MACHO_MERGED_OUTPUT_SECTION_H + +#include "InputSection.h" +#include "OutputSection.h" +#include "lld/Common/LLVM.h" + +namespace lld { +namespace macho { + +// Linking multiple files will inevitably mean resolving sections in different +// files that are labeled with the same segment and section name. This class +// contains all such sections and writes the data from each section sequentially +// in the final binary. +class MergedOutputSection : public OutputSection { +public: + MergedOutputSection(StringRef name) : OutputSection(name) {} + + const InputSection *firstSection() const { return inputs.front(); } + const InputSection *lastSection() const { return inputs.back(); } + + // These accessors will only be valid after finalizing the section + size_t getSize() const override { return size; } + uint64_t getFileSize() const override { return fileSize; } + + void mergeInput(InputSection *input) override; + void finalize() override; + + void writeTo(uint8_t *buf) const override; + + std::vector inputs; + +private: + void mergeFlags(uint32_t inputFlags); + + size_t size = 0; + uint64_t fileSize = 0; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/MergedOutputSection.cpp b/lld/MachO/MergedOutputSection.cpp new file mode 100644 --- /dev/null +++ b/lld/MachO/MergedOutputSection.cpp @@ -0,0 +1,72 @@ +//===- OutputSection.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MergedOutputSection.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/MachO.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; +using namespace lld::macho; + +void MergedOutputSection::mergeInput(InputSection *input) { + if (inputs.empty()) { + align = input->align; + flags = input->flags; + } else { + mergeFlags(input->flags); + align = std::max(align, input->align); + } + + inputs.push_back(input); + input->parent = this; +} + +void MergedOutputSection::finalize() { + uint64_t isecAddr = addr; + uint64_t isecFileOff = fileOff; + for (InputSection *i : inputs) { + i->outSecOff = alignTo(isecAddr, i->align) - addr; + i->outSecFileOff = alignTo(isecFileOff, i->align) - fileOff; + isecAddr += i->getSize(); + isecFileOff += i->getFileSize(); + } + size = isecAddr - addr; + fileSize = isecFileOff - fileOff; +} + +void MergedOutputSection::writeTo(uint8_t *buf) const { + for (InputSection *isec : inputs) { + isec->writeTo(buf + isec->outSecFileOff); + } +} + +// TODO: this is most likely wrong; reconsider how section flags +// are actually merged. The logic presented here was written without +// any form of informed research. +void MergedOutputSection::mergeFlags(uint32_t inputFlags) { + uint8_t sectionFlag = MachO::SECTION_TYPE & inputFlags; + if (sectionFlag != (MachO::SECTION_TYPE & flags)) + error("Cannot add merge section; inconsistent type flags " + + Twine(sectionFlag)); + + uint32_t inconsistentFlags = + MachO::S_ATTR_DEBUG | MachO::S_ATTR_STRIP_STATIC_SYMS | + MachO::S_ATTR_NO_DEAD_STRIP | MachO::S_ATTR_LIVE_SUPPORT; + if ((inputFlags ^ flags) & inconsistentFlags) + error("Cannot add merge section; cannot merge inconsistent flags"); + + // Negate pure instruction presence if any section isn't pure. + uint32_t pureMask = ~MachO::S_ATTR_PURE_INSTRUCTIONS | (inputFlags & flags); + + // Merge the rest + flags |= inputFlags; + flags &= pureMask; +} diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h new file mode 100644 --- /dev/null +++ b/lld/MachO/OutputSection.h @@ -0,0 +1,100 @@ +//===- OutputSection.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_OUTPUT_SECTION_H +#define LLD_MACHO_OUTPUT_SECTION_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/DenseMap.h" + +namespace lld { +namespace macho { + +class InputSection; +class OutputSegment; + +// Output sections represent the finalized sections present within the final +// linked executable. They can represent special sections (like the symbol +// table), or represent coalesced sections from the various inputs given to the +// linker with the same segment / section name. +class OutputSection { +public: + OutputSection(StringRef name) : name(name) {} + virtual ~OutputSection() = default; + + // These accessors will only be valid after finalizing the section. + uint64_t getSegmentOffset() const; + + // How much space the section occupies in the address space. + virtual size_t getSize() const = 0; + // How much space the section occupies in the file. Most sections are copied + // as-is so their file size is the same as their address space size. + virtual uint64_t getFileSize() const { return getSize(); } + + // Hidden sections omit header content, but body content is still present. + virtual bool isHidden() const { return !this->isNeeded(); } + // Unneeded sections are omitted entirely (header and body). + virtual bool isNeeded() const { return true; } + + // Some sections may allow coalescing other raw input sections. + virtual void mergeInput(InputSection *input); + + // Specifically finalizes addresses and section size, not content. + virtual void finalize() { + // TODO investigate refactoring synthetic section finalization logic into + // overrides of this function. + } + + virtual void writeTo(uint8_t *buf) const = 0; + + StringRef name; + OutputSegment *parent = nullptr; + + uint32_t index = 0; + uint64_t addr = 0; + uint64_t fileOff = 0; + uint32_t align = 1; + uint32_t flags = 0; +}; + +class OutputSectionComparator { +public: + OutputSectionComparator(uint32_t segmentOrder, + const std::vector §Ordering) + : segmentOrder(segmentOrder) { + for (uint32_t j = 0, m = sectOrdering.size(); j < m; ++j) + sectionOrdering[sectOrdering[j]] = j; + } + + uint32_t sectionOrder(StringRef secname) { + auto sectIt = sectionOrdering.find(secname); + if (sectIt != sectionOrdering.end()) + return sectIt->second; + return sectionOrdering.size(); + } + + // Sort sections within a common segment, which stores them in + // a MapVector of section name -> section + bool operator()(const std::pair &a, + const std::pair &b) { + return sectionOrder(a.first) < sectionOrder(b.first); + } + + bool operator<(const OutputSectionComparator &b) { + return segmentOrder < b.segmentOrder; + } + +private: + uint32_t segmentOrder; + llvm::DenseMap sectionOrdering; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/OutputSection.cpp b/lld/MachO/OutputSection.cpp new file mode 100644 --- /dev/null +++ b/lld/MachO/OutputSection.cpp @@ -0,0 +1,23 @@ +//===- OutputSection.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OutputSection.h" +#include "OutputSegment.h" +#include "lld/Common/ErrorHandler.h" + +using namespace llvm; +using namespace lld; +using namespace lld::macho; + +uint64_t OutputSection::getSegmentOffset() const { + return addr - parent->firstSection()->addr; +} + +void OutputSection::mergeInput(InputSection *input) { + llvm_unreachable("Cannot merge input section into unmergable output section"); +} diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -9,6 +9,7 @@ #ifndef LLD_MACHO_OUTPUT_SEGMENT_H #define LLD_MACHO_OUTPUT_SEGMENT_H +#include "OutputSection.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/MapVector.h" @@ -20,43 +21,77 @@ constexpr const char *text = "__TEXT"; constexpr const char *pageZero = "__PAGEZERO"; constexpr const char *linkEdit = "__LINKEDIT"; +constexpr const char *dataConst = "__DATA_CONST"; } // namespace segment_names +class OutputSection; +class OutputSegmentComparator; class InputSection; class OutputSegment { public: - InputSection *firstSection() const { return sections.front().second.at(0); } + using SectionMap = typename llvm::MapVector; + using SectionMapEntry = typename std::pair; - InputSection *lastSection() const { return sections.back().second.back(); } + const OutputSection *firstSection() const { return sections.front().second; } + const OutputSection *lastSection() const { return sections.back().second; } bool isNeeded() const { - return !sections.empty() || name == segment_names::linkEdit; + if (name == segment_names::linkEdit) + return true; + for (const SectionMapEntry &i : sections) { + OutputSection *os = i.second; + if (os->isNeeded()) + return true; + } + return false; } - void addSection(InputSection *); + OutputSection *getOrCreateOutputSection(StringRef name); + void addOutputSection(OutputSection *os); + void sortOutputSections(OutputSegmentComparator *comparator); - const llvm::MapVector> & - getSections() const { - return sections; - } + const SectionMap &getSections() const { return sections; } + size_t numNonHiddenSections() const; uint64_t fileOff = 0; StringRef name; - uint32_t numNonHiddenSections = 0; uint32_t maxProt = 0; uint32_t initProt = 0; uint8_t index; private: - llvm::MapVector> sections; + SectionMap sections; +}; + +class OutputSegmentComparator { +public: + OutputSegmentComparator(); + + OutputSectionComparator *sectionComparator(const OutputSegment *os) { + auto it = orderMap.find(os->name); + if (it == orderMap.end()) { + return defaultPositionComparator; + } + return &it->second; + } + + bool operator()(const OutputSegment *a, const OutputSegment *b) { + return *sectionComparator(a) < *sectionComparator(b); + } + +private: + const StringRef defaultPosition = StringRef(); + llvm::DenseMap orderMap; + OutputSectionComparator *defaultPositionComparator; }; extern std::vector outputSegments; OutputSegment *getOutputSegment(StringRef name); OutputSegment *getOrCreateOutputSegment(StringRef name); +void sortOutputSegmentsAndSections(); } // namespace macho } // namespace lld diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -8,7 +8,10 @@ #include "OutputSegment.h" #include "InputSection.h" +#include "MergedOutputSection.h" +#include "SyntheticSections.h" +#include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/BinaryFormat/MachO.h" @@ -33,13 +36,71 @@ return VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; } -void OutputSegment::addSection(InputSection *isec) { - isec->parent = this; - std::vector &vec = sections[isec->name]; - if (vec.empty() && !isec->isHidden()) { - ++numNonHiddenSections; +size_t OutputSegment::numNonHiddenSections() const { + size_t count = 0; + for (const OutputSegment::SectionMapEntry &i : sections) { + OutputSection *os = i.second; + count += (os->isHidden() ? 0 : 1); } - vec.push_back(isec); + return count; +} + +void OutputSegment::addOutputSection(OutputSection *os) { + os->parent = this; + std::pair result = + sections.insert(SectionMapEntry(os->name, os)); + if (!result.second) { + llvm_unreachable("Attempted to set section, but a section with the same " + "name already exists"); + } +} + +OutputSection *OutputSegment::getOrCreateOutputSection(StringRef name) { + OutputSegment::SectionMap::iterator i = sections.find(name); + if (i != sections.end()) { + return i->second; + } + + auto *os = make(name); + addOutputSection(os); + return os; +} + +void OutputSegment::sortOutputSections(OutputSegmentComparator *comparator) { + llvm::stable_sort(sections, *comparator->sectionComparator(this)); +} + +OutputSegmentComparator::OutputSegmentComparator() { + // This defines the order of segments and the sections within each segment. + // Segments that are not mentioned here will end up at defaultPosition; + // sections that are not mentioned will end up at the end of the section + // list for their given segment. + std::vector>> ordering{ + {segment_names::pageZero, {}}, + {segment_names::text, {section_names::header}}, + {defaultPosition, {}}, + // Make sure __LINKEDIT is the last segment (i.e. all its hidden + // sections must be ordered after other sections). + {segment_names::linkEdit, + { + section_names::binding, + section_names::export_, + section_names::symbolTable, + section_names::stringTable, + }}, + }; + + for (uint32_t i = 0, n = ordering.size(); i < n; ++i) { + auto &p = ordering[i]; + StringRef segname = p.first; + const std::vector §Ordering = p.second; + orderMap.insert(std::pair( + segname, OutputSectionComparator(i, sectOrdering))); + } + + // Cache the position for the default comparator since this is the likely + // scenario. + defaultPositionComparator = &orderMap.find(defaultPosition)->second; } static llvm::DenseMap nameToOutputSegment; @@ -62,3 +123,24 @@ outputSegments.push_back(segRef); return segRef; } + +void macho::sortOutputSegmentsAndSections() { + // Sorting only can happen once all outputs have been collected. + // Since output sections are grouped by segment, sorting happens + // first over all segments, then over sections per segment. + auto comparator = OutputSegmentComparator(); + llvm::stable_sort(outputSegments, comparator); + + // Now that the output sections are sorted, assign the final + // output section indices. + uint32_t sectionIndex = 0; + for (OutputSegment *seg : outputSegments) { + seg->sortOutputSections(&comparator); + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + if (!section->isHidden()) { + section->index = ++sectionIndex; + } + } + } +} diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -81,7 +81,7 @@ inline uint64_t Symbol::getVA() const { if (auto *d = dyn_cast(this)) - return d->isec->addr + d->value; + return d->isec->getVA() + d->value; return 0; } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -10,12 +10,10 @@ #define LLD_MACHO_SYNTHETIC_SECTIONS_H #include "ExportTrie.h" -#include "InputSection.h" +#include "OutputSection.h" #include "Target.h" #include "llvm/ADT/SetVector.h" -using namespace llvm::MachO; - namespace lld { namespace macho { @@ -27,20 +25,27 @@ constexpr const char *export_ = "__export"; constexpr const char *symbolTable = "__symbol_table"; constexpr const char *stringTable = "__string_table"; +constexpr const char *got = "__got"; } // namespace section_names class DylibSymbol; class LoadCommand; +class SyntheticSection : public OutputSection { +public: + SyntheticSection(const char *segname, const char *name); + virtual ~SyntheticSection() = default; +}; + // The header of the Mach-O file, which must have a file offset of zero. -class MachHeaderSection : public InputSection { +class MachHeaderSection : public SyntheticSection { public: MachHeaderSection(); void addLoadCommand(LoadCommand *); bool isHidden() const override { return true; } size_t getSize() const override; - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; private: std::vector loadCommands; @@ -49,17 +54,18 @@ // A hidden section that exists solely for the purpose of creating the // __PAGEZERO segment, which is used to catch null pointer dereferences. -class PageZeroSection : public InputSection { +class PageZeroSection : public SyntheticSection { public: PageZeroSection(); bool isHidden() const override { return true; } size_t getSize() const override { return ImageBase; } uint64_t getFileSize() const override { return 0; } + void writeTo(uint8_t *buf) const override {} }; // This section will be populated by dyld with addresses to non-lazily-loaded // dylib symbols. -class GotSection : public InputSection { +class GotSection : public SyntheticSection { public: GotSection(); @@ -68,11 +74,11 @@ return entries; } - size_t getSize() const override { return entries.size() * WordSize; } - bool isNeeded() const override { return !entries.empty(); } - void writeTo(uint8_t *buf) override { + size_t getSize() const override { return entries.size() * WordSize; } + + void writeTo(uint8_t *buf) const override { // Nothing to write, GOT contains all zeros at link time; it's populated at // runtime by dyld. } @@ -82,7 +88,7 @@ }; // Stores bind opcodes for telling dyld which symbols to load non-lazily. -class BindingSection : public InputSection { +class BindingSection : public SyntheticSection { public: BindingSection(); void finalizeContents(); @@ -92,13 +98,13 @@ // section headers. bool isHidden() const override { return true; } bool isNeeded() const override; - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; SmallVector contents; }; // Stores a trie that describes the set of exported symbols. -class ExportSection : public InputSection { +class ExportSection : public SyntheticSection { public: ExportSection(); void finalizeContents(); @@ -107,7 +113,7 @@ // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in // section headers. bool isHidden() const override { return true; } - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; private: TrieBuilder trieBuilder; @@ -115,7 +121,7 @@ }; // Stores the strings referenced by the symbol table. -class StringTableSection : public InputSection { +class StringTableSection : public SyntheticSection { public: StringTableSection(); // Returns the start offset of the added string. @@ -125,7 +131,7 @@ // offsets are recorded in the LC_SYMTAB load command, instead of in section // headers. bool isHidden() const override { return true; } - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; private: // An n_strx value of 0 always indicates the empty string, so we must locate @@ -140,7 +146,7 @@ size_t strx; }; -class SymtabSection : public InputSection { +class SymtabSection : public SyntheticSection { public: SymtabSection(StringTableSection &); void finalizeContents(); @@ -150,7 +156,7 @@ // offsets are recorded in the LC_SYMTAB load command, instead of in section // headers. bool isHidden() const override { return true; } - void writeTo(uint8_t *buf) override; + void writeTo(uint8_t *buf) const override; private: StringTableSection &stringTableSection; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -26,13 +26,18 @@ namespace lld { namespace macho { -MachHeaderSection::MachHeaderSection() { - // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts - // from the beginning of the file (i.e. the header). - segname = segment_names::text; - name = section_names::header; +SyntheticSection::SyntheticSection(const char *segname, const char *name) + : OutputSection(name) { + // Synthetic sections always know which segment they belong to so hook + // them up when they're made + getOrCreateOutputSegment(segname)->addOutputSection(this); } +// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts +// from the beginning of the file (i.e. the header). +MachHeaderSection::MachHeaderSection() + : SyntheticSection(segment_names::text, section_names::header) {} + void MachHeaderSection::addLoadCommand(LoadCommand *lc) { loadCommands.push_back(lc); sizeOfCmds += lc->getSize(); @@ -42,7 +47,7 @@ return sizeof(mach_header_64) + sizeOfCmds; } -void MachHeaderSection::writeTo(uint8_t *buf) { +void MachHeaderSection::writeTo(uint8_t *buf) const { auto *hdr = reinterpret_cast(buf); hdr->magic = MH_MAGIC_64; hdr->cputype = CPU_TYPE_X86_64; @@ -59,14 +64,11 @@ } } -PageZeroSection::PageZeroSection() { - segname = segment_names::pageZero; - name = section_names::pageZero; -} +PageZeroSection::PageZeroSection() + : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} -GotSection::GotSection() { - segname = "__DATA_CONST"; - name = "__got"; +GotSection::GotSection() + : SyntheticSection(segment_names::dataConst, section_names::got) { align = 8; flags = S_NON_LAZY_SYMBOL_POINTERS; @@ -80,10 +82,8 @@ } } -BindingSection::BindingSection() { - segname = segment_names::linkEdit; - name = section_names::binding; -} +BindingSection::BindingSection() + : SyntheticSection(segment_names::linkEdit, section_names::binding) {} bool BindingSection::isNeeded() const { return in.got->isNeeded(); } @@ -107,7 +107,7 @@ raw_svector_ostream os{contents}; os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | in.got->parent->index); - encodeULEB128(in.got->addr - in.got->parent->firstSection()->addr, os); + encodeULEB128(in.got->getSegmentOffset(), os); for (const DylibSymbol *sym : in.got->getEntries()) { // TODO: Implement compact encoding -- we only need to encode the // differences between consecutive symbol entries. @@ -127,14 +127,12 @@ os << static_cast(BIND_OPCODE_DONE); } -void BindingSection::writeTo(uint8_t *buf) { +void BindingSection::writeTo(uint8_t *buf) const { memcpy(buf, contents.data(), contents.size()); } -ExportSection::ExportSection() { - segname = segment_names::linkEdit; - name = section_names::export_; -} +ExportSection::ExportSection() + : SyntheticSection(segment_names::linkEdit, section_names::export_) {} void ExportSection::finalizeContents() { // TODO: We should check symbol visibility. @@ -144,12 +142,11 @@ size = trieBuilder.build(); } -void ExportSection::writeTo(uint8_t *buf) { trieBuilder.writeTo(buf); } +void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } SymtabSection::SymtabSection(StringTableSection &stringTableSection) - : stringTableSection(stringTableSection) { - segname = segment_names::linkEdit; - name = section_names::symbolTable; + : SyntheticSection(segment_names::linkEdit, section_names::symbolTable), + stringTableSection(stringTableSection) { // TODO: When we introduce the SyntheticSections superclass, we should make // all synthetic sections aligned to WordSize by default. align = WordSize; @@ -166,7 +163,7 @@ symbols.push_back({sym, stringTableSection.addString(sym->getName())}); } -void SymtabSection::writeTo(uint8_t *buf) { +void SymtabSection::writeTo(uint8_t *buf) const { auto *nList = reinterpret_cast(buf); for (const SymtabEntry &entry : symbols) { nList->n_strx = entry.strx; @@ -174,18 +171,16 @@ // TODO populate n_desc if (auto defined = dyn_cast(entry.sym)) { nList->n_type = N_EXT | N_SECT; - nList->n_sect = defined->isec->sectionIndex; + nList->n_sect = defined->isec->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol - nList->n_value = defined->value + defined->isec->addr; + nList->n_value = defined->value + defined->isec->getVA(); } ++nList; } } -StringTableSection::StringTableSection() { - segname = segment_names::linkEdit; - name = section_names::stringTable; -} +StringTableSection::StringTableSection() + : SyntheticSection(segment_names::linkEdit, section_names::stringTable) {} uint32_t StringTableSection::addString(StringRef str) { uint32_t strx = size; @@ -194,7 +189,7 @@ return strx; } -void StringTableSection::writeTo(uint8_t *buf) { +void StringTableSection::writeTo(uint8_t *buf) const { uint32_t off = 0; for (StringRef str : strings) { memcpy(buf + off, str.data(), str.size()); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -37,8 +37,7 @@ Writer() : buffer(errorHandler().outputBuffer) {} void scanRelocations(); - void createHiddenSections(); - void sortSections(); + void createOutputSections(); void createLoadCommands(); void assignAddresses(OutputSegment *); void createSymtabContents(); @@ -71,11 +70,11 @@ c->cmd = LC_DYLD_INFO_ONLY; c->cmdsize = getSize(); if (bindingSection->isNeeded()) { - c->bind_off = bindingSection->getFileOffset(); + c->bind_off = bindingSection->fileOff; c->bind_size = bindingSection->getFileSize(); } if (exportSection->isNeeded()) { - c->export_off = exportSection->getFileOffset(); + c->export_off = exportSection->fileOff; c->export_size = exportSection->getFileSize(); } } @@ -101,7 +100,7 @@ uint32_t getSize() const override { return sizeof(segment_command_64) + - seg->numNonHiddenSections * sizeof(section_64); + seg->numNonHiddenSections() * sizeof(section_64); } void writeTo(uint8_t *buf) const override { @@ -115,20 +114,19 @@ c->maxprot = seg->maxProt; c->initprot = seg->initProt; - if (seg->getSections().empty()) + if (!seg->isNeeded()) return; c->vmaddr = seg->firstSection()->addr; c->vmsize = seg->lastSection()->addr + seg->lastSection()->getSize() - c->vmaddr; - c->nsects = seg->numNonHiddenSections; + c->nsects = seg->numNonHiddenSections(); for (auto &p : seg->getSections()) { StringRef s = p.first; - ArrayRef sections = p.second; - for (InputSection *isec : sections) - c->filesize += isec->getFileSize(); - if (sections[0]->isHidden()) + OutputSection *section = p.second; + c->filesize += section->getFileSize(); + if (section->isHidden()) continue; auto *sectHdr = reinterpret_cast(buf); @@ -137,16 +135,11 @@ memcpy(sectHdr->sectname, s.data(), s.size()); memcpy(sectHdr->segname, name.data(), name.size()); - sectHdr->addr = sections[0]->addr; - sectHdr->offset = sections[0]->getFileOffset(); - sectHdr->align = sections[0]->align; - uint32_t maxAlign = 0; - for (const InputSection *section : sections) - maxAlign = std::max(maxAlign, section->align); - sectHdr->align = Log2_32(maxAlign); - sectHdr->flags = sections[0]->flags; - sectHdr->size = sections.back()->addr + sections.back()->getSize() - - sections[0]->addr; + sectHdr->addr = section->addr; + sectHdr->offset = section->fileOff; + sectHdr->align = Log2_32(section->align); + sectHdr->flags = section->flags; + sectHdr->size = section->getSize(); } } @@ -178,9 +171,9 @@ auto *c = reinterpret_cast(buf); c->cmd = LC_SYMTAB; c->cmdsize = getSize(); - c->symoff = symtabSection->getFileOffset(); + c->symoff = symtabSection->fileOff; c->nsyms = symtabSection->getNumSymbols(); - c->stroff = stringTableSection->getFileOffset(); + c->stroff = stringTableSection->fileOff; c->strsize = stringTableSection->getFileSize(); } @@ -259,76 +252,8 @@ // different location. const StringRef path = "/usr/lib/dyld"; }; - -class SectionComparator { -public: - struct OrderInfo { - uint32_t segmentOrder; - DenseMap sectionOrdering; - }; - - SectionComparator() { - // This defines the order of segments and the sections within each segment. - // Segments that are not mentioned here will end up at defaultPosition; - // sections that are not mentioned will end up at the end of the section - // list for their given segment. - std::vector>> ordering{ - {segment_names::pageZero, {}}, - {segment_names::text, {section_names::header}}, - {defaultPosition, {}}, - // Make sure __LINKEDIT is the last segment (i.e. all its hidden - // sections must be ordered after other sections). - {segment_names::linkEdit, - { - section_names::binding, - section_names::export_, - section_names::symbolTable, - section_names::stringTable, - }}, - }; - - for (uint32_t i = 0, n = ordering.size(); i < n; ++i) { - auto &p = ordering[i]; - StringRef segname = p.first; - const std::vector §Ordering = p.second; - OrderInfo &info = orderMap[segname]; - info.segmentOrder = i; - for (uint32_t j = 0, m = sectOrdering.size(); j < m; ++j) - info.sectionOrdering[sectOrdering[j]] = j; - } - } - - // Return a {segmentOrder, sectionOrder} pair. Using this as a key will - // ensure that all sections in the same segment are sorted contiguously. - std::pair order(const InputSection *isec) { - auto it = orderMap.find(isec->segname); - if (it == orderMap.end()) - return {orderMap[defaultPosition].segmentOrder, 0}; - OrderInfo &info = it->second; - auto sectIt = info.sectionOrdering.find(isec->name); - if (sectIt != info.sectionOrdering.end()) - return {info.segmentOrder, sectIt->second}; - return {info.segmentOrder, info.sectionOrdering.size()}; - } - - bool operator()(const InputSection *a, const InputSection *b) { - return order(a) < order(b); - } - -private: - const StringRef defaultPosition = StringRef(); - DenseMap orderMap; -}; - } // namespace -template -SectionType *createInputSection(ArgT &&... args) { - auto *section = make(std::forward(args)...); - inputSections.push_back(section); - return section; -} - void Writer::scanRelocations() { for (InputSection *sect : inputSections) for (Reloc &r : sect->relocs) @@ -378,37 +303,29 @@ make("/usr/lib/libSystem.B.dylib")); } -void Writer::createHiddenSections() { - headerSection = createInputSection(); - bindingSection = createInputSection(); - stringTableSection = createInputSection(); - symtabSection = createInputSection(*stringTableSection); - exportSection = createInputSection(); +void Writer::createOutputSections() { + // First, create hidden sections + headerSection = make(); + bindingSection = make(); + stringTableSection = make(); + symtabSection = make(*stringTableSection); + exportSection = make(); switch (config->outputType) { case MH_EXECUTE: - createInputSection(); + make(); break; case MH_DYLIB: break; default: llvm_unreachable("unhandled output file type"); } -} - -void Writer::sortSections() { - llvm::stable_sort(inputSections, SectionComparator()); - // TODO This is wrong; input sections ought to be grouped into - // output sections, which are then organized like this. - uint32_t sectionIndex = 0; - // Add input sections to output segments. + // Then merge input sections into output sections/segments. for (InputSection *isec : inputSections) { - if (isec->isNeeded()) { - if (!isec->isHidden()) - isec->sectionIndex = ++sectionIndex; - getOrCreateOutputSegment(isec->segname)->addSection(isec); - } + getOrCreateOutputSegment(isec->segname) + ->getOrCreateOutputSection(isec->name) + ->mergeInput(isec); } } @@ -418,16 +335,17 @@ seg->fileOff = fileOff; for (auto &p : seg->getSections()) { - ArrayRef sections = p.second; - for (InputSection *isec : sections) { - addr = alignTo(addr, isec->align); - // We must align the file offsets too to avoid misaligned writes of - // structs. - fileOff = alignTo(fileOff, isec->align); - isec->addr = addr; - addr += isec->getSize(); - fileOff += isec->getFileSize(); - } + OutputSection *section = p.second; + addr = alignTo(addr, section->align); + // We must align the file offsets too to avoid misaligned writes of + // structs. + fileOff = alignTo(fileOff, section->align); + section->addr = addr; + section->fileOff = fileOff; + section->finalize(); + + addr += section->getSize(); + fileOff += section->getFileSize(); } } @@ -446,26 +364,25 @@ void Writer::writeSections() { uint8_t *buf = buffer->getBufferStart(); for (OutputSegment *seg : outputSegments) { - uint64_t fileOff = seg->fileOff; - for (auto § : seg->getSections()) { - for (InputSection *isec : sect.second) { - fileOff = alignTo(fileOff, isec->align); - isec->writeTo(buf + fileOff); - fileOff += isec->getFileSize(); - } + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + section->writeTo(buf + section->fileOff); } } } void Writer::run() { - scanRelocations(); - createHiddenSections(); - // Sort and assign sections to their respective segments. No more sections can - // be created after this method runs. - sortSections(); // dyld requires __LINKEDIT segment to always exist (even if empty). - getOrCreateOutputSegment(segment_names::linkEdit); - // No more segments can be created after this method runs. + OutputSegment *linkEditSegment = + getOrCreateOutputSegment(segment_names::linkEdit); + + scanRelocations(); + + // Sort and assign sections to their respective segments. No more sections nor + // segments may be created after this method runs. + createOutputSections(); + sortOutputSegmentsAndSections(); + createLoadCommands(); // Ensure that segments (and the sections they contain) are allocated @@ -475,7 +392,8 @@ // determine addresses of other segments/sections before generating its // contents. for (OutputSegment *seg : outputSegments) - assignAddresses(seg); + if (seg != linkEditSegment) + assignAddresses(seg); // Fill __LINKEDIT contents. bindingSection->finalizeContents(); @@ -483,9 +401,8 @@ symtabSection->finalizeContents(); // Now that __LINKEDIT is filled out, do a proper calculation of its - // addresses and offsets. We don't have to recalculate the other segments - // since sortSections() ensures that __LINKEDIT is the last segment. - assignAddresses(getOutputSegment(segment_names::linkEdit)); + // addresses and offsets. + assignAddresses(linkEditSegment); openFile(); if (errorCount()) @@ -499,6 +416,4 @@ void macho::writeResult() { Writer().run(); } -void macho::createSyntheticSections() { - in.got = createInputSection(); -} +void macho::createSyntheticSections() { in.got = make(); } diff --git a/lld/test/MachO/Inputs/libfunction.s b/lld/test/MachO/Inputs/libfunction.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/Inputs/libfunction.s @@ -0,0 +1,6 @@ +.section __TEXT,__text +.globl _some_function + +_some_function: + mov $1, %rax + ret diff --git a/lld/test/MachO/section-merge.s b/lld/test/MachO/section-merge.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/section-merge.s @@ -0,0 +1,35 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libhello.s \ +# RUN: -o %t/libhello.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libgoodbye.s \ +# RUN: -o %t/libgoodbye.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libfunction.s \ +# RUN: -o %t/libfunction.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s \ +# RUN: -o %t/main.o +# RUN: lld -flavor darwinnew -o %t/output %t/libfunction.o %t/libgoodbye.o %t/libhello.o %t/main.o + +# RUN: llvm-objdump --syms %t/output | FileCheck %s +# CHECK: SYMBOL TABLE: +# CHECK-DAG: {{[0-9a-z]+}} g O __TEXT,__cstring _goodbye_world +# CHECK-DAG: {{[0-9a-z]+}} g O __TEXT,__cstring _hello_its_me +# CHECK-DAG: {{[0-9a-z]+}} g O __TEXT,__cstring _hello_world +# CHECK-DAG: {{[0-9a-z]+}} g F __TEXT,__text _main +# CHECK-DAG: {{[0-9a-z]+}} g F __TEXT,__text _some_function + +# RUN: llvm-objdump -d %t/output | FileCheck %s --check-prefix DATA +# DATA: Disassembly of section __TEXT,__text: +# DATA: {{0*}}[[#%x,BASE:]] <_some_function>: +# DATA-NEXT: [[#BASE]]: 48 c7 c0 01 00 00 00 movq $1, %rax +# DATA-NEXT: [[#BASE + 0x7]]: c3 retq +# DATA: {{0*}}[[#BASE + 0x8]] <_main>: +# DATA-NEXT: [[#BASE + 0x8]]: 48 c7 c0 00 00 00 00 movq $0, %rax +# DATA-NEXT: [[#BASE + 0xf]]: c3 retq + +.section __TEXT,__text +.global _main + +_main: + mov $0, %rax + ret