diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1021,10 +1021,10 @@ TimeTraceScope timeScope("Gathering input sections"); int inputOrder = 0; for (const InputFile *file : inputFiles) { - for (const SubsectionMap &map : file->subsections) { + for (const ParsedSection §ion : file->sections) { ConcatOutputSection *osec = nullptr; - for (const SubsectionEntry &entry : map) { - if (auto *isec = dyn_cast(entry.isec)) { + for (const ParsedSubsection &subsection : section.subsections) { + if (auto *isec = dyn_cast(subsection.isec)) { if (isec->isCoalescedWeak()) continue; if (isec->getSegName() == segment_names::ld) { @@ -1036,11 +1036,13 @@ osec = ConcatOutputSection::getOrCreateForInput(isec); isec->parent = osec; inputSections.push_back(isec); - } else if (auto *isec = dyn_cast(entry.isec)) { + } else if (auto *isec = + dyn_cast(subsection.isec)) { if (in.cStringSection->inputOrder == UnspecifiedInputOrder) in.cStringSection->inputOrder = inputOrder++; in.cStringSection->addInput(isec); - } else if (auto *isec = dyn_cast(entry.isec)) { + } else if (auto *isec = + dyn_cast(subsection.isec)) { if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) in.wordLiteralSection->inputOrder = inputOrder++; in.wordLiteralSection->addInput(isec); diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -51,11 +51,17 @@ // If .subsections_via_symbols is set, each InputSection will be split along // symbol boundaries. The field offset represents the offset of the subsection // from the start of the original pre-split InputSection. -struct SubsectionEntry { - uint64_t offset; - InputSection *isec; +struct ParsedSubsection { + uint64_t offset = 0; + InputSection *isec = nullptr; +}; + +using ParsedSubsecVec = std::vector; + +struct ParsedSection { + // uint64_t offset = 0; // TODO(gkm): this will debut with __eh_frame hacking + ParsedSubsecVec subsections; }; -using SubsectionMap = std::vector; class InputFile { public: @@ -75,7 +81,7 @@ MemoryBufferRef mb; std::vector symbols; - std::vector subsections; + std::vector sections; // Provides an easy way to sort InputFiles deterministically. const int id; @@ -109,16 +115,16 @@ private: template void parse(); - template void parseSections(ArrayRef
); + template void parseSections(ArrayRef); template void parseSymbols(ArrayRef sectionHeaders, ArrayRef nList, const char *strtab, bool subsectionsViaSymbols); template Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); - template - void parseRelocations(ArrayRef
sectionHeaders, const Section &, - SubsectionMap &); + template + void parseRelocations(ArrayRef sectionHeaders, + const SectionHeader &, ParsedSubsecVec &); void parseDebugInfo(); void parseDataInCode(); void registerCompactUnwind(); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -257,12 +257,14 @@ return {}; } -template -void ObjFile::parseSections(ArrayRef
sections) { - subsections.reserve(sections.size()); +// Parse the sequence of sections within a single LC_SEGMENT(_64). +// Split each section into subsections. +template +void ObjFile::parseSections(ArrayRef sectionHeaders) { + sections.reserve(sectionHeaders.size()); auto *buf = reinterpret_cast(mb.getBufferStart()); - for (const Section &sec : sections) { + for (const SectionHeader &sec : sectionHeaders) { StringRef name = StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname))); StringRef segname = @@ -273,27 +275,27 @@ if (sec.align >= 32) { error("alignment " + std::to_string(sec.align) + " of section " + name + " is too large"); - subsections.push_back({}); + sections.push_back({}); continue; } uint32_t align = 1 << sec.align; uint32_t flags = sec.flags; auto splitRecords = [&](int recordSize) -> void { - subsections.push_back({}); + sections.push_back({}); if (data.empty()) return; - SubsectionMap &subsecMap = subsections.back(); - subsecMap.reserve(data.size() / recordSize); + ParsedSubsecVec &subsections = sections.back().subsections; + subsections.reserve(data.size() / recordSize); auto *isec = make( segname, name, this, data.slice(0, recordSize), align, flags); - subsecMap.push_back({0, isec}); + subsections.push_back({0, isec}); for (uint64_t off = recordSize; off < data.size(); off += recordSize) { // Copying requires less memory than constructing a fresh InputSection. auto *copy = make(*isec); copy->data = data.slice(off, recordSize); - subsecMap.push_back({off, copy}); + subsections.push_back({off, copy}); } }; @@ -315,7 +317,8 @@ isec = make(segname, name, this, data, align, flags); } - subsections.push_back({{0, isec}}); + sections.push_back({}); + sections.back().subsections.push_back({0, isec}); } else if (auto recordSize = getRecordSize(segname, name)) { splitRecords(*recordSize); } else if (segname == segment_names::llvm) { @@ -323,9 +326,9 @@ // segment. Symbols within those sections point to bitcode metadata // instead of actual symbols. Global symbols within those sections could // have the same name without causing duplicate symbol errors. Push an - // empty map to ensure indices line up for the remaining sections. + // empty entry to ensure indices line up for the remaining sections. // TODO: Evaluate whether the bitcode metadata is needed. - subsections.push_back({}); + sections.push_back({}); } else { auto *isec = make(segname, name, this, data, align, flags); @@ -334,11 +337,12 @@ // Instead of emitting DWARF sections, we emit STABS symbols to the // object files that contain them. We filter them out early to avoid // parsing their relocations unnecessarily. But we must still push an - // empty map to ensure the indices line up for the remaining sections. - subsections.push_back({}); + // empty entry to ensure the indices line up for the remaining sections. + sections.push_back({}); debugSections.push_back(isec); } else { - subsections.push_back({{0, isec}}); + sections.push_back({}); + sections.back().subsections.push_back({0, isec}); } } } @@ -351,18 +355,18 @@ // any subsection splitting has occurred). It will be updated to represent the // same location as an offset relative to the start of the containing // subsection. -static InputSection *findContainingSubsection(SubsectionMap &map, +static InputSection *findContainingSubsection(ParsedSubsecVec &subsections, uint64_t *offset) { auto it = std::prev(llvm::upper_bound( - map, *offset, [](uint64_t value, SubsectionEntry subsecEntry) { - return value < subsecEntry.offset; + subsections, *offset, [](uint64_t value, ParsedSubsection subsec) { + return value < subsec.offset; })); *offset -= it->offset; return it->isec; } -template -static bool validateRelocationInfo(InputFile *file, const Section &sec, +template +static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec, relocation_info rel) { const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type); bool valid = true; @@ -393,14 +397,15 @@ return valid; } -template -void ObjFile::parseRelocations(ArrayRef
sectionHeaders, - const Section &sec, SubsectionMap &subsecMap) { +template +void ObjFile::parseRelocations(ArrayRef sectionHeaders, + const SectionHeader &sec, + ParsedSubsecVec &subsections) { auto *buf = reinterpret_cast(mb.getBufferStart()); ArrayRef relInfos( reinterpret_cast(buf + sec.reloff), sec.nreloc); - auto subsecIt = subsecMap.rbegin(); + auto subsecIt = subsections.rbegin(); for (size_t i = 0; i < relInfos.size(); i++) { // Paired relocations serve as Mach-O's method for attaching a // supplemental datum to a primary relocation record. ELF does not @@ -454,7 +459,8 @@ r.addend = isSubtrahend ? 0 : totalAddend; } else { assert(!isSubtrahend); - const Section &referentSec = sectionHeaders[relInfo.r_symbolnum - 1]; + const SectionHeader &referentSecHead = + sectionHeaders[relInfo.r_symbolnum - 1]; uint64_t referentOffset; if (relInfo.r_pcrel) { // The implicit addend for pcrel section relocations is the pcrel offset @@ -464,14 +470,15 @@ // have pcrel section relocations. We may want to factor this out into // the arch-specific .cpp file. assert(target->hasAttr(r.type, RelocAttrBits::BYTE4)); - referentOffset = - sec.addr + relInfo.r_address + 4 + totalAddend - referentSec.addr; + referentOffset = sec.addr + relInfo.r_address + 4 + totalAddend - + referentSecHead.addr; } else { // The addend for a non-pcrel relocation is its absolute address. - referentOffset = totalAddend - referentSec.addr; + referentOffset = totalAddend - referentSecHead.addr; } - SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1]; - r.referent = findContainingSubsection(referentSubsecMap, &referentOffset); + ParsedSubsecVec &referentSubsecVec = + sections[relInfo.r_symbolnum - 1].subsections; + r.referent = findContainingSubsection(referentSubsecVec, &referentOffset); r.addend = referentOffset; } @@ -481,14 +488,14 @@ // unsorted relocations (in `-r` mode), so we have a fallback for that // uncommon case. InputSection *subsec; - while (subsecIt != subsecMap.rend() && subsecIt->offset > r.offset) + while (subsecIt != subsections.rend() && subsecIt->offset > r.offset) ++subsecIt; - if (subsecIt == subsecMap.rend() || + if (subsecIt == subsections.rend() || subsecIt->offset + subsecIt->isec->getSize() <= r.offset) { - subsec = findContainingSubsection(subsecMap, &r.offset); + subsec = findContainingSubsection(subsections, &r.offset); // Now that we know the relocs are unsorted, avoid trying the 'fast path' // for the other relocations. - subsecIt = subsecMap.rend(); + subsecIt = subsections.rend(); } else { subsec = subsecIt->isec; r.offset -= subsecIt->offset; @@ -509,10 +516,10 @@ } else { uint64_t referentOffset = totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr; - SubsectionMap &referentSubsecMap = - subsections[minuendInfo.r_symbolnum - 1]; + ParsedSubsecVec &referentSubsectVec = + sections[minuendInfo.r_symbolnum - 1].subsections; p.referent = - findContainingSubsection(referentSubsecMap, &referentOffset); + findContainingSubsection(referentSubsectVec, &referentOffset); p.addend = referentOffset; } subsec->relocs.push_back(p); @@ -642,7 +649,7 @@ using NList = typename LP::nlist; // Groups indices of the symbols by the sections that contain them. - std::vector> symbolsBySection(subsections.size()); + std::vector> symbolsBySection(sections.size()); symbols.resize(nList.size()); SmallVector undefineds; for (uint32_t i = 0; i < nList.size(); ++i) { @@ -655,9 +662,9 @@ StringRef name = strtab + sym.n_strx; if ((sym.n_type & N_TYPE) == N_SECT) { - SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; + ParsedSubsecVec &subsections = sections[sym.n_sect - 1].subsections; // parseSections() may have chosen not to parse this section. - if (subsecMap.empty()) + if (subsections.empty()) continue; symbolsBySection[sym.n_sect - 1].push_back(i); } else if (isUndef(sym)) { @@ -667,16 +674,16 @@ } } - for (size_t i = 0; i < subsections.size(); ++i) { - SubsectionMap &subsecMap = subsections[i]; - if (subsecMap.empty()) + for (size_t i = 0; i < sections.size(); ++i) { + ParsedSubsecVec &subsections = sections[i].subsections; + if (subsections.empty()) continue; std::vector &symbolIndices = symbolsBySection[i]; uint64_t sectionAddr = sectionHeaders[i].addr; uint32_t sectionAlign = 1u << sectionHeaders[i].align; - InputSection *lastIsec = subsecMap.back().isec; + InputSection *lastIsec = subsections.back().isec; // Record-based sections have already been split into subsections during // parseSections(), so we simply need to match Symbols to the corresponding // subsection here. @@ -686,7 +693,8 @@ const NList &sym = nList[symIndex]; StringRef name = strtab + sym.n_strx; uint64_t symbolOffset = sym.n_value - sectionAddr; - InputSection *isec = findContainingSubsection(subsecMap, &symbolOffset); + InputSection *isec = + findContainingSubsection(subsections, &symbolOffset); if (symbolOffset != 0) { error(toString(lastIsec) + ": symbol " + name + " at misaligned offset"); @@ -699,19 +707,19 @@ // Calculate symbol sizes and create subsections by splitting the sections // along symbol boundaries. - // We populate subsecMap by repeatedly splitting the last (highest address) - // subsection. + // We populate subsections by repeatedly splitting the last (highest + // address) subsection. llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { return nList[lhs].n_value < nList[rhs].n_value; }); - SubsectionEntry subsecEntry = subsecMap.back(); + ParsedSubsection subsec = subsections.back(); for (size_t j = 0; j < symbolIndices.size(); ++j) { uint32_t symIndex = symbolIndices[j]; const NList &sym = nList[symIndex]; StringRef name = strtab + sym.n_strx; - InputSection *isec = subsecEntry.isec; + InputSection *isec = subsec.isec; - uint64_t subsecAddr = sectionAddr + subsecEntry.offset; + uint64_t subsecAddr = sectionAddr + subsec.offset; size_t symbolOffset = sym.n_value - subsecAddr; uint64_t symbolSize = j + 1 < symbolIndices.size() @@ -751,8 +759,8 @@ // subsection's offset from the last aligned address. We should consider // emulating that behavior. nextIsec->align = MinAlign(sectionAlign, sym.n_value); - subsecMap.push_back({sym.n_value - sectionAddr, nextIsec}); - subsecEntry = subsecMap.back(); + subsections.push_back({sym.n_value - sectionAddr, nextIsec}); + subsec = subsections.back(); } } @@ -778,7 +786,8 @@ make(segName.take_front(16), sectName.take_front(16), /*file=*/this, data); isec->live = true; - subsections.push_back({{0, isec}}); + sections.push_back({}); + sections.back().subsections.push_back({0, isec}); } ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName) @@ -793,7 +802,7 @@ template void ObjFile::parse() { using Header = typename LP::mach_header; using SegmentCommand = typename LP::segment_command; - using Section = typename LP::section; + using SectionHeader = typename LP::section; using NList = typename LP::nlist; auto *buf = reinterpret_cast(mb.getBufferStart()); @@ -819,11 +828,11 @@ parseLCLinkerOption(this, cmd->count, data); } - ArrayRef
sectionHeaders; + ArrayRef sectionHeaders; if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) { auto *c = reinterpret_cast(cmd); - sectionHeaders = - ArrayRef
{reinterpret_cast(c + 1), c->nsects}; + sectionHeaders = ArrayRef{ + reinterpret_cast(c + 1), c->nsects}; parseSections(sectionHeaders); } @@ -839,9 +848,10 @@ // The relocations may refer to the symbols, so we parse them after we have // parsed all the symbols. - for (size_t i = 0, n = subsections.size(); i < n; ++i) - if (!subsections[i].empty()) - parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]); + for (size_t i = 0, n = sections.size(); i < n; ++i) + if (!sections[i].subsections.empty()) + parseRelocations(sectionHeaders, sectionHeaders[i], + sections[i].subsections); parseDebugInfo(); if (config->emitDataInCodeInfo) @@ -890,20 +900,20 @@ // Create pointers from symbols to their associated compact unwind entries. void ObjFile::registerCompactUnwind() { // First, locate the __compact_unwind section. - SubsectionMap *cuSubsecMap = nullptr; - for (SubsectionMap &map : subsections) { - if (map.empty()) + ParsedSection *cuSection = nullptr; + for (ParsedSection §ion : sections) { + if (section.subsections.empty()) continue; - if (map[0].isec->getSegName() != segment_names::ld) + if (section.subsections[0].isec->getSegName() != segment_names::ld) continue; - cuSubsecMap = ↦ + cuSection = §ion; break; } - if (!cuSubsecMap) + if (!cuSection) return; - for (SubsectionEntry &entry : *cuSubsecMap) { - ConcatInputSection *isec = cast(entry.isec); + for (ParsedSubsection &subsection : cuSection->subsections) { + ConcatInputSection *isec = cast(subsection.isec); ConcatInputSection *referentIsec; for (const Reloc &r : isec->relocs) { if (r.offset != 0) diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -77,7 +77,7 @@ case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS: return false; default: - llvm_unreachable("Section type"); + llvm_unreachable("ParsedSection type"); } } diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -21,7 +21,7 @@ using namespace lld::macho; template static bool objectHasObjCSection(MemoryBufferRef mb) { - using Section = typename LP::section; + using SectionHeader = typename LP::section; auto *hdr = reinterpret_cast(mb.getBufferStart()); @@ -30,12 +30,13 @@ if (const auto *c = findCommand(hdr, LP::segmentLCType)) { - auto sectionHeaders = - ArrayRef
{reinterpret_cast(c + 1), c->nsects}; - for (const Section &sec : sectionHeaders) { - StringRef sectname(sec.sectname, - strnlen(sec.sectname, sizeof(sec.sectname))); - StringRef segname(sec.segname, strnlen(sec.segname, sizeof(sec.segname))); + auto sectionHeaders = ArrayRef{ + reinterpret_cast(c + 1), c->nsects}; + for (const SectionHeader &secHead : sectionHeaders) { + StringRef sectname(secHead.sectname, + strnlen(secHead.sectname, sizeof(secHead.sectname))); + StringRef segname(secHead.segname, + strnlen(secHead.segname, sizeof(secHead.segname))); if ((segname == segment_names::data && sectname == section_names::objcCatList) || (segname == segment_names::text && diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -734,7 +734,7 @@ template static std::vector collectDataInCodeEntries() { using SegmentCommand = typename LP::segment_command; - using Section = typename LP::section; + using SectionHeader = typename LP::section; std::vector dataInCodeEntries; for (const InputFile *inputFile : inputFiles) { @@ -745,8 +745,8 @@ findCommand(objFile->mb.getBufferStart(), LP::segmentLCType)); if (!c) continue; - ArrayRef
sections{reinterpret_cast(c + 1), - c->nsects}; + ArrayRef sectionHeaders{ + reinterpret_cast(c + 1), c->nsects}; ArrayRef entries = objFile->dataInCodeEntries; if (entries.empty()) @@ -754,15 +754,14 @@ // For each code subsection find 'data in code' entries residing in it. // Compute the new offset values as // + - <__TEXT address>. - for (size_t i = 0, n = sections.size(); i < n; ++i) { - const SubsectionMap &subsecMap = objFile->subsections[i]; - for (const SubsectionEntry &subsecEntry : subsecMap) { - const InputSection *isec = subsecEntry.isec; + for (size_t i = 0, n = sectionHeaders.size(); i < n; ++i) { + for (const ParsedSubsection &subsec : objFile->sections[i].subsections) { + const InputSection *isec = subsec.isec; if (!isCodeSection(isec)) continue; if (cast(isec)->shouldOmitFromOutput()) continue; - const uint64_t beginAddr = sections[i].addr + subsecEntry.offset; + const uint64_t beginAddr = sectionHeaders[i].addr + subsec.offset; auto it = llvm::lower_bound( entries, beginAddr, [](const MachO::data_in_code_entry &entry, uint64_t addr) { diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -227,7 +227,7 @@ void writeTo(uint8_t *buf) const override { using SegmentCommand = typename LP::segment_command; - using Section = typename LP::section; + using SectionHeader = typename LP::section; auto *c = reinterpret_cast(buf); buf += sizeof(SegmentCommand); @@ -248,8 +248,8 @@ if (osec->isHidden()) continue; - auto *sectHdr = reinterpret_cast
(buf); - buf += sizeof(Section); + auto *sectHdr = reinterpret_cast(buf); + buf += sizeof(SectionHeader); memcpy(sectHdr->sectname, osec->name.data(), osec->name.size()); memcpy(sectHdr->segname, name.data(), name.size()); @@ -503,7 +503,7 @@ // this is not a time-based and not a random hash. MD5 seems like the least // bad lie we can put here. assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3"); - assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2"); + assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 ParsedSection 4.2.2"); } mutable uint8_t *uuidBuf;