diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1022,15 +1022,17 @@ int inputOrder = 0; for (const InputFile *file : inputFiles) { for (const Section §ion : file->sections) { + const Subsections &subsections = section.subsections; + if (subsections.empty()) + continue; + if (subsections[0].isec->getName() == section_names::compactUnwind) + // Compact unwind entries require special handling elsewhere. + continue; ConcatOutputSection *osec = nullptr; - for (const Subsection &subsection : section.subsections) { + for (const Subsection &subsection : subsections) { if (auto *isec = dyn_cast(subsection.isec)) { if (isec->isCoalescedWeak()) continue; - if (isec->getSegName() == segment_names::ld) { - assert(isec->getName() == section_names::compactUnwind); - continue; - } isec->outSecOff = inputOrder++; if (!osec) osec = ConcatOutputSection::getOrCreateForInput(isec); diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -187,7 +187,7 @@ // info matches. For simplicity, we only handle the case where there are only // symbols at offset zero within the section (which is typically the case with // .subsections_via_symbols.) - auto hasCU = [](Defined *d) { return d->compactUnwind; }; + auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; }; auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU); auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU); if (itA == ia->symbols.end()) @@ -196,8 +196,8 @@ return false; const Defined *da = *itA; const Defined *db = *itB; - if (da->compactUnwind->icfEqClass[icfPass % 2] != - db->compactUnwind->icfEqClass[icfPass % 2] || + if (da->unwindEntry->icfEqClass[icfPass % 2] != + db->unwindEntry->icfEqClass[icfPass % 2] || da->value != 0 || db->value != 0) return false; auto isZero = [](Defined *d) { return d->value == 0; }; @@ -363,8 +363,8 @@ if (isHashable) { hashable.push_back(isec); for (Defined *d : isec->symbols) - if (d->compactUnwind) - hashable.push_back(d->compactUnwind); + if (d->unwindEntry) + hashable.push_back(d->unwindEntry); } else { isec->icfEqClass[0] = ++icfUniqueID; } diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -41,6 +41,7 @@ struct PlatformInfo; class ConcatInputSection; class Symbol; +class Defined; struct Reloc; enum class RefState : uint8_t; @@ -59,8 +60,9 @@ using Subsections = std::vector; struct Section { - // uint64_t address = 0; // TODO(gkm): this will debut with __eh_frame handler + uint64_t address = 0; Subsections subsections; + Section(uint64_t addr) : address(addr){}; }; class InputFile { @@ -114,6 +116,8 @@ ArrayRef dataInCodeEntries; private: + Section *compactUnwindSection = nullptr; + template void parse(); template void parseSections(ArrayRef); template diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -277,17 +277,16 @@ if (sec.align >= 32) { error("alignment " + std::to_string(sec.align) + " of section " + name + " is too large"); - sections.push_back({}); + sections.push_back(sec.addr); continue; } uint32_t align = 1 << sec.align; uint32_t flags = sec.flags; auto splitRecords = [&](int recordSize) -> void { - sections.push_back({}); + sections.push_back(sec.addr); if (data.empty()) return; - Subsections &subsections = sections.back().subsections; subsections.reserve(data.size() / recordSize); auto *isec = make( @@ -319,10 +318,12 @@ isec = make(segname, name, this, data, align, flags); } - sections.push_back({}); + sections.push_back(sec.addr); sections.back().subsections.push_back({0, isec}); } else if (auto recordSize = getRecordSize(segname, name)) { splitRecords(*recordSize); + if (name == section_names::compactUnwind) + compactUnwindSection = §ions.back(); } else if (segname == segment_names::llvm) { // ld64 does not appear to emit contents from sections within the __LLVM // segment. Symbols within those sections point to bitcode metadata @@ -330,7 +331,7 @@ // have the same name without causing duplicate symbol errors. Push an // empty entry to ensure indices line up for the remaining sections. // TODO: Evaluate whether the bitcode metadata is needed. - sections.push_back({}); + sections.push_back(sec.addr); } else { auto *isec = make(segname, name, this, data, align, flags); @@ -340,10 +341,10 @@ // object files that contain them. We filter them out early to avoid // parsing their relocations unnecessarily. But we must still push an // empty entry to ensure the indices line up for the remaining sections. - sections.push_back({}); + sections.push_back(sec.addr); debugSections.push_back(isec); } else { - sections.push_back({}); + sections.push_back(sec.addr); sections.back().subsections.push_back({0, isec}); } } @@ -358,7 +359,7 @@ // same location as an offset relative to the start of the containing // subsection. template -static InputSection *findContainingSubsection(Subsections &subsections, +static InputSection *findContainingSubsection(const Subsections &subsections, T *offset) { static_assert(std::is_same::value || std::is_same::value, @@ -437,8 +438,17 @@ // and insert them. Storing addends in the instruction stream is // possible, but inconvenient and more costly at link time. - int64_t pairedAddend = 0; relocation_info relInfo = relInfos[i]; + bool isSubtrahend = + target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND); + if (isSubtrahend && StringRef(sec.sectname) == section_names::ehFrame) { + // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r + // adds local "EH_Frame1" and "func.eh". Ignore them because they have + // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009. + ++i; + continue; + } + int64_t pairedAddend = 0; if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) { pairedAddend = SignExtend64<24>(relInfo.r_symbolnum); relInfo = relInfos[++i]; @@ -449,8 +459,6 @@ if (relInfo.r_address & R_SCATTERED) fatal("TODO: Scattered relocations not supported"); - bool isSubtrahend = - target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND); int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo); assert(!(embeddedAddend && pairedAddend)); int64_t totalAddend = pairedAddend + embeddedAddend; @@ -691,12 +699,17 @@ Subsections &subsections = sections[i].subsections; if (subsections.empty()) continue; - + InputSection *lastIsec = subsections.back().isec; + if (lastIsec->getName() == section_names::ehFrame) { + // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r + // adds local "EH_Frame1" and "func.eh". Ignore them because they have + // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009. + continue; + } std::vector &symbolIndices = symbolsBySection[i]; uint64_t sectionAddr = sectionHeaders[i].addr; uint32_t sectionAlign = 1u << sectionHeaders[i].align; - InputSection *lastIsec = subsections.back().isec; // Record-based sections have already been split into subsections during // parseSections(), so we simply need to match Symbols to the corresponding // subsection here. @@ -725,11 +738,11 @@ llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { return nList[lhs].n_value < nList[rhs].n_value; }); - Subsection subsec = subsections.back(); for (size_t j = 0; j < symbolIndices.size(); ++j) { uint32_t symIndex = symbolIndices[j]; const NList &sym = nList[symIndex]; StringRef name = strtab + sym.n_strx; + Subsection &subsec = subsections.back(); InputSection *isec = subsec.isec; uint64_t subsecAddr = sectionAddr + subsec.offset; @@ -773,7 +786,6 @@ // emulating that behavior. nextIsec->align = MinAlign(sectionAlign, sym.n_value); subsections.push_back({sym.n_value - sectionAddr, nextIsec}); - subsec = subsections.back(); } } @@ -799,7 +811,7 @@ make(segName.take_front(16), sectName.take_front(16), /*file=*/this, data); isec->live = true; - sections.push_back({}); + sections.push_back(0); sections.back().subsections.push_back({0, isec}); } @@ -869,7 +881,8 @@ parseDebugInfo(); if (config->emitDataInCodeInfo) parseDataInCode(); - registerCompactUnwind(); + if (compactUnwindSection) + registerCompactUnwind(); } void ObjFile::parseDebugInfo() { @@ -912,20 +925,7 @@ // Create pointers from symbols to their associated compact unwind entries. void ObjFile::registerCompactUnwind() { - // First, locate the __compact_unwind section. - Section *cuSection = nullptr; - for (Section §ion : sections) { - if (section.subsections.empty()) - continue; - if (section.subsections[0].isec->getSegName() != segment_names::ld) - continue; - cuSection = §ion; - break; - } - if (!cuSection) - return; - - for (Subsection &subsection : cuSection->subsections) { + for (const Subsection &subsection : compactUnwindSection->subsections) { ConcatInputSection *isec = cast(subsection.isec); // Hack!! Since each CUE contains a different function address, if ICF // operated naively and compared the entire contents of each CUE, entries @@ -939,7 +939,7 @@ ConcatInputSection *referentIsec; for (auto it = isec->relocs.begin(); it != isec->relocs.end();) { Reloc &r = *it; - // We only wish to handle the relocation for CUE::functionAddress. + // CUE::functionAddress is at offset 0. Skip personality & LSDA relocs. if (r.offset != 0) { ++it; continue; @@ -974,7 +974,7 @@ ++it; continue; } - (*symIt)->compactUnwind = isec; + (*symIt)->unwindEntry = isec; // Since we've sliced away the functionAddress, we should remove the // corresponding relocation too. Given that clang emits relocations in // reverse order of address, this relocation should be at the end of the diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -298,6 +298,7 @@ constexpr const char debugInfo[] = "__debug_info"; constexpr const char debugStr[] = "__debug_str"; constexpr const char ehFrame[] = "__eh_frame"; +constexpr const char gccExceptTab[] = "__gcc_except_tab"; constexpr const char export_[] = "__export"; constexpr const char dataInCode[] = "__data_in_code"; constexpr const char functionStarts[] = "__func_starts"; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -120,10 +120,11 @@ it = symbols.begin(); uint64_t v = (*it)->value; for (++it; it != symbols.end(); ++it) { - if ((*it)->value == v) - (*it)->compactUnwind = nullptr; + Defined *d = *it; + if (d->value == v) + d->unwindEntry = nullptr; else - v = (*it)->value; + v = d->value; } } diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp --- a/lld/MachO/MarkLive.cpp +++ b/lld/MachO/MarkLive.cpp @@ -51,8 +51,8 @@ if (auto *d = dyn_cast(s)) { if (d->isec) enqueue(d->isec, d->value); - if (d->compactUnwind) - enqueue(d->compactUnwind, 0); + if (d->unwindEntry) + enqueue(d->unwindEntry, 0); } }; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -168,7 +168,7 @@ uint64_t value; // size is only calculated for regular (non-bitcode) symbols. uint64_t size; - ConcatInputSection *compactUnwind = nullptr; + ConcatInputSection *unwindEntry = nullptr; }; // This enum does double-duty: as a symbol property, it indicates whether & how diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -95,8 +95,8 @@ } void Defined::canonicalize() { - if (compactUnwind) - compactUnwind = compactUnwind->canonical(); + if (unwindEntry) + unwindEntry = unwindEntry->canonical(); if (isec) isec = isec->canonical(); } diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -154,8 +154,8 @@ // entries to the GOT. Hence the use of a MapVector for // UnwindInfoSection::symbols. for (const Defined *d : make_second_range(symbols)) - if (d->compactUnwind) - prepareRelocations(d->compactUnwind); + if (d->unwindEntry) + prepareRelocations(d->unwindEntry); } // Record function symbols that may need entries emitted in __unwind_info, which @@ -169,7 +169,7 @@ // symbols for each unique address regardless of whether they have associated // unwind info. void UnwindInfoSection::addSymbol(const Defined *d) { - if (d->compactUnwind) + if (d->unwindEntry) allEntriesAreOmitted = false; // We don't yet know the final output address of this symbol, but we know that // they are uniquely determined by a combination of the isec and value, so @@ -177,8 +177,8 @@ auto p = symbols.insert({{d->isec, d->value}, d}); // If we have multiple symbols at the same address, only one of them can have // an associated CUE. - if (!p.second && d->compactUnwind) { - assert(!p.first->second->compactUnwind); + if (!p.second && d->unwindEntry) { + assert(!p.first->second->unwindEntry); p.first->second = d; } } @@ -202,11 +202,19 @@ Reloc &r = isec->relocs[i]; assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); + // Functions and LSDA entries always reside in the same object file as the + // compact unwind entries that references them, and thus appear as section + // relocs. There is no need to prepare them. We only prepare relocs for + // personality functions. if (r.offset % sizeof(CompactUnwindEntry) != offsetof(CompactUnwindEntry, personality)) continue; if (auto *s = r.referent.dyn_cast()) { + // Personality functions are nearly always system-defined (e.g., + // ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an + // application provides its own personality function, it might be + // referenced by an extern Defined symbol reloc, or a local section reloc. if (auto *defined = dyn_cast(s)) { // XXX(vyng) This is a a special case for handling duplicate personality // symbols. Note that LD64's behavior is a bit different and it is @@ -291,13 +299,13 @@ const Defined *d = symbolsVec[i].second; // Write the functionAddress. writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2); - if (!d->compactUnwind) + if (!d->unwindEntry) return; // Write the rest of the CUE. - memcpy(buf + sizeof(Ptr), d->compactUnwind->data.data(), - d->compactUnwind->data.size()); - for (const Reloc &r : d->compactUnwind->relocs) { + memcpy(buf + sizeof(Ptr), d->unwindEntry->data.data(), + d->unwindEntry->data.size()); + for (const Reloc &r : d->unwindEntry->relocs) { uint64_t referentVA = 0; if (auto *referentSym = r.referent.dyn_cast()) { if (!isa(referentSym)) { @@ -432,9 +440,8 @@ // So we check their relocations instead. // FIXME: should we account for an LSDA at an absolute address? ld64 seems // to support it, but it seems unlikely to be used in practice. - Reloc *lsda1 = - findLsdaReloc(symbolsVec[*foldBegin].second->compactUnwind); - Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->compactUnwind); + Reloc *lsda1 = findLsdaReloc(symbolsVec[*foldBegin].second->unwindEntry); + Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->unwindEntry); if (lsda1 == nullptr && lsda2 == nullptr) continue; if (lsda1 == nullptr || lsda2 == nullptr) @@ -536,7 +543,8 @@ for (size_t idx : cuIndices) { lsdaIndex[idx] = entriesWithLsda.size(); - if (findLsdaReloc(symbolsVec[idx].second->compactUnwind)) + const Defined *d = symbolsVec[idx].second; + if (findLsdaReloc(d->unwindEntry)) entriesWithLsda.push_back(idx); } @@ -614,7 +622,7 @@ for (size_t idx : entriesWithLsda) { const CompactUnwindEntry &cu = cuEntries[idx]; const Defined *d = symbolsVec[idx].second; - if (Reloc *r = findLsdaReloc(d->compactUnwind)) { + if (Reloc *r = findLsdaReloc(d->unwindEntry)) { uint64_t va; if (auto *isec = r->referent.dyn_cast()) { va = isec->getVA(r->addend);