diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -8,6 +8,7 @@ ExportTrie.cpp InputFiles.cpp InputSection.cpp + OutputSection.cpp OutputSegment.cpp SymbolTable.cpp Symbols.cpp diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -9,6 +9,7 @@ #include "Driver.h" #include "Config.h" #include "InputFiles.h" +#include "OutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -43,7 +43,7 @@ #include "InputFiles.h" #include "InputSection.h" -#include "OutputSegment.h" +#include "OutputSection.h" #include "SymbolTable.h" #include "Symbols.h" #include "Target.h" diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -19,7 +19,7 @@ class InputFile; class InputSection; -class OutputSegment; +class OutputSection; class Symbol; struct Reloc { @@ -35,6 +35,8 @@ virtual size_t getSize() const { return data.size(); } virtual uint64_t getFileSize() const { return getSize(); } uint64_t getFileOffset() const; + uint64_t getVA() const; + // Don't emit section_64 headers for hidden sections. virtual bool isHidden() const { return false; } // Unneeded sections are omitted entirely (header and body). @@ -42,19 +44,16 @@ virtual void writeTo(uint8_t *buf); InputFile *file = nullptr; - OutputSegment *parent = nullptr; StringRef name; StringRef segname; - ArrayRef data; + OutputSection *parent = nullptr; + uint64_t outSecOff = 0; - // TODO these properties ought to live in an OutputSection class. - // Move them once available. - uint64_t addr = 0; uint32_t align = 1; - uint32_t sectionIndex = 0; uint32_t flags = 0; + ArrayRef data; std::vector relocs; }; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -22,9 +22,11 @@ std::vector macho::inputSections; uint64_t InputSection::getFileOffset() const { - return parent->fileOff + addr - parent->firstSection()->addr; + return parent->getFileOffset() + outSecOff; } +uint64_t InputSection::getVA() const { return parent->addr + outSecOff; } + void InputSection::writeTo(uint8_t *buf) { memcpy(buf, data.data(), data.size()); @@ -32,18 +34,18 @@ uint64_t va = 0; if (auto *s = r.target.dyn_cast()) { if (auto *dylibSymbol = dyn_cast(s)) { - va = in.got->addr - ImageBase + dylibSymbol->gotIndex * WordSize; + va = in.got->getVA() - ImageBase + dylibSymbol->gotIndex * WordSize; } else { va = s->getVA(); } } else if (auto *isec = r.target.dyn_cast()) - va = isec->addr; + va = isec->getVA(); else llvm_unreachable("Unknown relocation target"); uint64_t val = va + r.addend; if (1) // TODO: handle non-pcrel relocations - val -= addr - ImageBase + r.offset; + val -= getVA() - ImageBase + r.offset; target->relocateOne(buf + r.offset, r.type, val); } } diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h new file mode 100644 --- /dev/null +++ b/lld/MachO/OutputSection.h @@ -0,0 +1,59 @@ +//===- OutputSection.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_OUTPUT_SECTION_H +#define LLD_MACHO_OUTPUT_SECTION_H + +#include "InputSection.h" +#include "lld/Common/LLVM.h" +#include "llvm/ADT/MapVector.h" + +namespace lld { +namespace macho { + +class InputSection; +class OutputSegment; + +class OutputSection { +public: + const InputSection *firstSection() const { return inputs.front(); } + const InputSection *lastSection() const { return inputs.back(); } + + // These accessors will only be valid after finalizing the section + uint64_t getFileOffset() const; + size_t getSize() const { return size; } + size_t getFileSize() const { return fileSize; } + bool isHidden() const { return hidden; } + + void addInput(InputSection *input); + void finalize(); + + void writeTo(uint8_t *buf) const; + + StringRef name; + OutputSegment *parent = nullptr; + std::vector inputs; + + uint32_t index = 0; + + uint64_t addr = 0; + uint32_t align = 1; + uint32_t flags = 0; + +private: + void mergeFlags(uint32_t inputFlags); + + bool hidden = false; + size_t size = 0; + size_t fileSize = 0; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/OutputSection.cpp b/lld/MachO/OutputSection.cpp new file mode 100644 --- /dev/null +++ b/lld/MachO/OutputSection.cpp @@ -0,0 +1,80 @@ +//===- OutputSection.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OutputSection.h" +#include "OutputSegment.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/MachO.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; +using namespace lld::macho; + +uint64_t OutputSection::getFileOffset() const { + return parent->fileOff + addr - parent->firstSection()->addr; +} + +void OutputSection::addInput(InputSection *input) { + if (this->inputs.empty()) { + this->align = input->align; + this->flags = input->flags; + } else { + this->mergeFlags(input->flags); + this->align = std::max(this->align, input->align); + } + + // TODO: reconsider how hidden inputs are merged (or figure out + // if they need merging at all) + this->hidden |= input->isHidden(); + this->inputs.push_back(input); + input->parent = this; +} + +void OutputSection::finalize() { + uint64_t addr = this->addr; + this->fileSize = 0; + + for (InputSection *i : inputs) { + i->outSecOff = alignTo(addr, i->align) - this->addr; + addr += i->getSize(); + this->fileSize += i->getFileSize(); + } + this->size = addr - this->addr; +} + +void OutputSection::writeTo(uint8_t *buf) const { + for (InputSection *i : inputs) { + i->writeTo(buf); + buf += i->getFileSize(); + } +} + +// TODO: this is most likely wrong; reconsider how section flags +// are actually merged. +void OutputSection::mergeFlags(uint32_t inputFlags) { + uint8_t sectionFlag = MachO::SECTION_TYPE & inputFlags; + if (sectionFlag != (MachO::SECTION_TYPE & this->flags)) + error("Cannot add merge section; inconsistent type flags " + + Twine(sectionFlag)); + + uint32_t inconsistentFlags = + MachO::S_ATTR_DEBUG | MachO::S_ATTR_STRIP_STATIC_SYMS | + MachO::S_ATTR_NO_DEAD_STRIP | MachO::S_ATTR_LIVE_SUPPORT; + if ((inputFlags ^ this->flags) & inconsistentFlags) + error("Cannot add merge section; cannot merge inconsistent flags"); + + // Negate pure instruction presence if any segment isn't pure. + uint32_t pureMask = + ~(MachO::S_ATTR_PURE_INSTRUCTIONS & inputFlags & this->flags); + + // Merge the rest + this->flags |= inputFlags; + this->flags &= pureMask; +} diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -9,6 +9,7 @@ #ifndef LLD_MACHO_OUTPUT_SEGMENT_H #define LLD_MACHO_OUTPUT_SEGMENT_H +#include "OutputSection.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/MapVector.h" @@ -23,34 +24,32 @@ } // namespace segment_names +class OutputSection; class InputSection; class OutputSegment { public: - InputSection *firstSection() const { return sections.front().second.at(0); } + typedef llvm::MapVector SectionMap; - InputSection *lastSection() const { return sections.back().second.back(); } + const OutputSection *firstSection() const { return sections.front().second; } + + const OutputSection *lastSection() const { return sections.back().second; } bool isNeeded() const { return !sections.empty() || name == segment_names::linkEdit; } - void addSection(InputSection *); + OutputSection *addSection(InputSection *); - const llvm::MapVector> & - getSections() const { - return sections; - } + const SectionMap &getSections() const { return sections; } + uint32_t numNonHiddenSections = 0; uint64_t fileOff = 0; StringRef name; - uint32_t numNonHiddenSections = 0; uint32_t maxProt = 0; uint32_t initProt = 0; uint8_t index; - -private: - llvm::MapVector> sections; + SectionMap sections; }; extern std::vector outputSegments; diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -33,13 +33,25 @@ return VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; } -void OutputSegment::addSection(InputSection *isec) { - isec->parent = this; - std::vector &vec = sections[isec->name]; - if (vec.empty() && !isec->isHidden()) { - ++numNonHiddenSections; +OutputSection *OutputSegment::addSection(InputSection *input) { + OutputSegment::SectionMap::iterator i = this->sections.find(input->name); + if (i != this->sections.end()) { + auto os = i->second; + os->addInput(input); + return os; } - vec.push_back(isec); + + auto *os = make(); + os->name = input->name; + os->parent = this; + os->addInput(input); + this->sections[os->name] = os; + + if (!os->isHidden()) { + this->numNonHiddenSections++; + } + + return os; } static llvm::DenseMap nameToOutputSegment; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -81,7 +81,7 @@ inline uint64_t Symbol::getVA() const { if (auto *d = dyn_cast(this)) - return d->isec->addr + d->value - ImageBase; + return d->isec->getVA() + d->value - ImageBase; return 0; } diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -107,7 +107,7 @@ raw_svector_ostream os{contents}; os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | in.got->parent->index); - encodeULEB128(in.got->addr - in.got->parent->firstSection()->addr, os); + encodeULEB128(in.got->outSecOff, os); for (const DylibSymbol *sym : in.got->getEntries()) { // TODO: Implement compact encoding -- we only need to encode the // differences between consecutive symbol entries. @@ -171,9 +171,9 @@ // TODO populate n_desc if (auto defined = dyn_cast(entry.sym)) { nList->n_type = N_EXT | N_SECT; - nList->n_sect = defined->isec->sectionIndex; + nList->n_sect = defined->isec->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol - nList->n_value = defined->value + defined->isec->addr; + nList->n_value = defined->value + defined->isec->getVA(); } ++nList; } diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -38,7 +38,7 @@ void scanRelocations(); void createHiddenSections(); - void sortSections(); + void createOutputSections(); void createLoadCommands(); void assignAddresses(OutputSegment *); void createSymtabContents(); @@ -125,10 +125,9 @@ for (auto &p : seg->getSections()) { StringRef s = p.first; - ArrayRef sections = p.second; - for (InputSection *isec : sections) - c->filesize += isec->getFileSize(); - if (sections[0]->isHidden()) + OutputSection *section = p.second; + c->filesize += section->getFileSize(); + if (section->isHidden()) continue; auto *sectHdr = reinterpret_cast(buf); @@ -137,16 +136,11 @@ memcpy(sectHdr->sectname, s.data(), s.size()); memcpy(sectHdr->segname, name.data(), name.size()); - sectHdr->addr = sections[0]->addr; - sectHdr->offset = sections[0]->getFileOffset(); - sectHdr->align = sections[0]->align; - uint32_t maxAlign = 0; - for (const InputSection *section : sections) - maxAlign = std::max(maxAlign, section->align); - sectHdr->align = Log2_32(maxAlign); - sectHdr->flags = sections[0]->flags; - sectHdr->size = sections.back()->addr + sections.back()->getSize() - - sections[0]->addr; + sectHdr->addr = section->addr; + sectHdr->offset = section->getFileOffset(); + sectHdr->align = Log2_32(section->align); + sectHdr->flags = section->flags; + sectHdr->size = section->getSize(); } } @@ -398,20 +392,27 @@ } } -void Writer::sortSections() { +void Writer::createOutputSections() { llvm::stable_sort(inputSections, SectionComparator()); - // TODO This is wrong; input sections ought to be grouped into - // output sections, which are then organized like this. - uint32_t sectionIndex = 0; - // Add input sections to output segments. + // Add input sections to output sections/segments. for (InputSection *isec : inputSections) { if (isec->isNeeded()) { - if (!isec->isHidden()) - isec->sectionIndex = ++sectionIndex; getOrCreateOutputSegment(isec->segname)->addSection(isec); } } + + // Now that the input sections are sorted, assign the final + // output section indices. + uint32_t sectionIndex = 0; + for (OutputSegment *seg : outputSegments) { + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + if (!section->isHidden()) { + section->index = ++sectionIndex; + } + } + } } void Writer::assignAddresses(OutputSegment *seg) { @@ -420,13 +421,13 @@ seg->fileOff = fileOff; for (auto &p : seg->getSections()) { - ArrayRef sections = p.second; - for (InputSection *isec : sections) { - addr = alignTo(addr, isec->align); - isec->addr = addr; - addr += isec->getSize(); - fileOff += isec->getFileSize(); - } + OutputSection *section = p.second; + addr = alignTo(addr, section->align); + section->addr = addr; + section->finalize(); + + addr += section->getSize(); + fileOff += section->getFileSize(); } } @@ -446,11 +447,10 @@ uint8_t *buf = buffer->getBufferStart(); for (OutputSegment *seg : outputSegments) { uint64_t fileOff = seg->fileOff; - for (auto § : seg->getSections()) { - for (InputSection *isec : sect.second) { - isec->writeTo(buf + fileOff); - fileOff += isec->getFileSize(); - } + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + section->writeTo(buf + fileOff); + fileOff += section->getFileSize(); } } } @@ -460,9 +460,9 @@ createHiddenSections(); // Sort and assign sections to their respective segments. No more sections can // be created after this method runs. - sortSections(); + createOutputSections(); // dyld requires __LINKEDIT segment to always exist (even if empty). - getOrCreateOutputSegment(segment_names::linkEdit); + auto *linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); // No more segments can be created after this method runs. createLoadCommands(); @@ -482,8 +482,8 @@ // Now that __LINKEDIT is filled out, do a proper calculation of its // addresses and offsets. We don't have to recalculate the other segments - // since sortSections() ensures that __LINKEDIT is the last segment. - assignAddresses(getOutputSegment(segment_names::linkEdit)); + // since createOutputSections() ensures that __LINKEDIT is the last segment. + assignAddresses(linkEditSegment); openFile(); if (errorCount()) diff --git a/lld/test/MachO/section-merge.s b/lld/test/MachO/section-merge.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/section-merge.s @@ -0,0 +1,37 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libhello.s \ +# RUN: -o %t/libhello.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libgoodbye.s \ +# RUN: -o %t/libgoodbye.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s \ +# RUN: -o %t/main.o +# RUN: lld -flavor darwinnew -o %t/output %t/libgoodbye.o %t/libhello.o %t/main.o +# RUN: llvm-readobj -symbols %t/output | FileCheck %s + +# CHECK: Name: _goodbye_world +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __cstring (0x2) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x[[#%X,BASE:]] + +# CHECK: Name: _hello_world +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __cstring (0x2) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x[[#BASE + 0x10]] + +.section __TEXT,__text +.global _goodbye_world +.global _hello_world +.global _main + +_main: + mov $0, %rax + ret