diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1022,15 +1022,17 @@ int inputOrder = 0; for (const InputFile *file : inputFiles) { for (const ParsedSection §ion : file->sections) { + const ParsedSubsecVec &subsections = section.subsections; + if (subsections.empty()) + continue; + if (subsections[0].isec->getName() == section_names::compactUnwind) + // Compact unwind entries require special handling elsewhere. + continue; ConcatOutputSection *osec = nullptr; - for (const ParsedSubsection &subsection : section.subsections) { + for (const ParsedSubsection &subsection : subsections) { if (auto *isec = dyn_cast(subsection.isec)) { if (isec->isCoalescedWeak()) continue; - if (isec->getSegName() == segment_names::ld) { - assert(isec->getName() == section_names::compactUnwind); - continue; - } isec->outSecOff = inputOrder++; if (!osec) osec = ConcatOutputSection::getOrCreateForInput(isec); diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -340,7 +340,7 @@ // ICF can't fold functions with unwind info if (isHashable) for (Defined *d : isec->symbols) - if (d->compactUnwind) { + if (d->unwindEntry) { isHashable = false; break; } diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -41,6 +41,7 @@ struct PlatformInfo; class ConcatInputSection; class Symbol; +class Defined; struct Reloc; enum class RefState : uint8_t; @@ -59,8 +60,9 @@ using ParsedSubsecVec = std::vector; struct ParsedSection { - // uint64_t offset = 0; // TODO(gkm): this will debut with __eh_frame hacking + uint64_t address = 0; ParsedSubsecVec subsections; + ParsedSection(uint64_t addr) : address(addr){}; }; class InputFile { @@ -114,6 +116,12 @@ ArrayRef dataInCodeEntries; private: + ParsedSection *textSection = nullptr; + ParsedSection *staticInitSection = nullptr; + ParsedSection *gccExceptTabSection = nullptr; + ParsedSection *compactUnwindSection = nullptr; + ParsedSection *ehFrameSection = nullptr; + template void parse(); template void parseSections(ArrayRef); template @@ -127,7 +135,12 @@ const SectionHeader &, ParsedSubsecVec &); void parseDebugInfo(); void parseDataInCode(); - void registerCompactUnwind(); + void splitEhFrame(); + void registerCompactUnwindEntries(); + void registerEhFrameUnwindEntries(); + Defined *findFunctionSymbol(const Reloc &r); // for __compact_unwind + Defined *findFunctionSymbol(uint64_t value); // for __eh_frame + Defined *findLSDASymbol(uint64_t value); // for __eh_frame }; // command-line -sectcreate file diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -61,6 +61,7 @@ #include "lld/Common/Memory.h" #include "lld/Common/Reproduce.h" #include "llvm/ADT/iterator.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/LTO/LTO.h" #include "llvm/Support/Endian.h" @@ -71,6 +72,7 @@ #include "llvm/TextAPI/InterfaceFile.h" using namespace llvm; +using namespace llvm::dwarf; using namespace llvm::MachO; using namespace llvm::support::endian; using namespace llvm::sys; @@ -275,17 +277,16 @@ if (sec.align >= 32) { error("alignment " + std::to_string(sec.align) + " of section " + name + " is too large"); - sections.push_back({}); + sections.push_back(sec.addr); continue; } uint32_t align = 1 << sec.align; uint32_t flags = sec.flags; auto splitRecords = [&](int recordSize) -> void { - sections.push_back({}); + sections.push_back(sec.addr); if (data.empty()) return; - ParsedSubsecVec &subsections = sections.back().subsections; subsections.reserve(data.size() / recordSize); auto *isec = make( @@ -317,10 +318,21 @@ isec = make(segname, name, this, data, align, flags); } - sections.push_back({}); + sections.push_back(sec.addr); sections.back().subsections.push_back({0, isec}); } else if (auto recordSize = getRecordSize(segname, name)) { splitRecords(*recordSize); + if (name == section_names::compactUnwind) + compactUnwindSection = §ions.back(); + } else if (name == section_names::ehFrame) { + sections.push_back(sec.addr); + if (data.size() == 0) + return; + auto *isec = + make(segname, name, this, data, align, flags); + ehFrameSection = §ions.back(); + sections.back().subsections.push_back({0, isec}); + splitEhFrame(); } else if (segname == segment_names::llvm) { // ld64 does not appear to emit contents from sections within the __LLVM // segment. Symbols within those sections point to bitcode metadata @@ -328,7 +340,7 @@ // have the same name without causing duplicate symbol errors. Push an // empty entry to ensure indices line up for the remaining sections. // TODO: Evaluate whether the bitcode metadata is needed. - sections.push_back({}); + sections.push_back(sec.addr); } else { auto *isec = make(segname, name, this, data, align, flags); @@ -338,16 +350,64 @@ // object files that contain them. We filter them out early to avoid // parsing their relocations unnecessarily. But we must still push an // empty entry to ensure the indices line up for the remaining sections. - sections.push_back({}); + sections.push_back(sec.addr); debugSections.push_back(isec); } else { - sections.push_back({}); + sections.push_back(sec.addr); sections.back().subsections.push_back({0, isec}); + if (name == section_names::text) + textSection = §ions.back(); + else if (name == section_names::staticInit) + staticInitSection = §ions.back(); + else if (name == section_names::gccExceptTab) + gccExceptTabSection = §ions.back(); } } } } +static void fatalAt(const InputSection *isec, const ArrayRef &data, + const Twine &msg) { + uint64_t offset = data.data() - isec->data.data(); + fatal("corrupted __eh_frame at offset=" + utohexstr(offset) + ": " + msg + + "\n>>> defined in " + toString(isec->getFile())); +} + +// Parse DWARF CIE (Common Information Entry) & FDEs (Frame Description +// Entries). There is one CIE per object file at subsection[0], and one FDE per +// function at subsction[1..N]. +void ObjFile::splitEhFrame() { + ParsedSubsecVec &subsections = sections.back().subsections; + ConcatInputSection *isec = cast(subsections.back().isec); + ArrayRef data = isec->data; + uint64_t off = 0; + while (true) { + if (data.size() < 4) + fatalAt(isec, data, "CIE/FDE too small"); + const uint8_t *buf0 = data.data(); + const uint8_t *buf = buf0; + uint64_t length = read32le(buf); + buf += 4; + if (length == UINT32_MAX) { + length = read64le(buf); + buf += 8; + } + length += (buf - buf0); + if (length > data.size()) + fatalAt(isec, data, "CIE/FDE ends past the end of the section"); + uint32_t id = read32le(buf); + if (off == 0 && id > 0) + fatalAt(isec, data, "First record must be a CIE"); + isec->data = data.take_front(length); + data = data.drop_front(length); + if (data.size() == 0) + break; + isec = make(*isec); + off += length; + subsections.push_back({off, isec}); + } +} + // Find the subsection corresponding to the greatest section offset that is <= // that of the given offset. // @@ -355,8 +415,8 @@ // any subsection splitting has occurred). It will be updated to represent the // same location as an offset relative to the start of the containing // subsection. -static InputSection *findContainingSubsection(ParsedSubsecVec &subsections, - uint64_t *offset) { +static InputSection * +findContainingSubsection(const ParsedSubsecVec &subsections, uint64_t *offset) { auto it = std::prev(llvm::upper_bound( subsections, *offset, [](uint64_t value, ParsedSubsection subsec) { return value < subsec.offset; @@ -786,7 +846,7 @@ make(segName.take_front(16), sectName.take_front(16), /*file=*/this, data); isec->live = true; - sections.push_back({}); + sections.push_back(0); sections.back().subsections.push_back({0, isec}); } @@ -856,7 +916,10 @@ parseDebugInfo(); if (config->emitDataInCodeInfo) parseDataInCode(); - registerCompactUnwind(); + if (compactUnwindSection) + registerCompactUnwindEntries(); + if (false && ehFrameSection) + registerEhFrameUnwindEntries(); } void ObjFile::parseDebugInfo() { @@ -897,51 +960,276 @@ })); } -// Create pointers from symbols to their associated compact unwind entries. -void ObjFile::registerCompactUnwind() { - // First, locate the __compact_unwind section. - ParsedSection *cuSection = nullptr; - for (ParsedSection §ion : sections) { - if (section.subsections.empty()) - continue; - if (section.subsections[0].isec->getSegName() != segment_names::ld) - continue; - cuSection = §ion; - break; +// * The following blog entries provide an for an overview of DWARF +// formats for exception-unwind data: +// https://www.airs.com/blog/archives/460 about .eh_frame +// https://www.airs.com/blog/archives/464 about .gcc_except_table +// +// * Though it is irrelevant to MachO, the complete blog series on DWARF +// unwinding formats includes this post on a Linux-specific section: +// https://www.airs.com/blog/archives/462 about .eh_frame_hdr +// +// * Here are more reference documents for exception handling: +// https://llvm.org/docs/ExceptionHandling.html +// https://refspecs.linuxfoundation.org/abi-eh-1.22.html +// https://github.com/itanium-cxx-abi/cxx-abi/blob/main/exceptions.pdf +// http://www.dwarfstd.org/doc/DWARF5.pdf +// +// * Processing unwind info involves a complex mesh of four input sections: +// (1) __LD,__compact_unwind: vector of CUE (Compact Unwind Entry) +// (2) __TEXT,__eh_frame: sequence of DWARF records (see below) +// (3) __TEXT,__gcc_except_tab: LSDA (Language Specific Data Area) +// (4) __TEXT,__text: Functions associated with unwind info +// +// * The __eh_frame section contains two types of record: CIE (Common +// Information Entry - one per object file) and FDE (Frame Description Entry - +// one per function). CIEs are simple to deduplicate, but since an FDE's +// content is logically tied to its associated function body, we must +// deduplicate function+FDE during ICF. +// +// * CUE contains relocation_info for the __text entrypoint of its associated +// function. It can also contain relocation_info for the optional personality +// function and LSDA +// +// * CIE can contain relocation_info to an optional personality function +// +// * FDE references the __text entrypoint of its associated function, but the +// compiler generates no relocation_info +// +// * FDE can reference __gcc_except_tab for LSDA, but again, there is +// no compiler-generated relocation_info +// +// * The compiler will produce unwind information for a function according to +// one of four cases, and the linker must respond accordingly: +// (1) no unwind info: linker must create a degenerate CUE +// (2) CUE only: no action necessary from linker +// (3) CUE and FDE: linker must discard the FDE +// (4) FDE only: linker must create a special CUE that references the FDE +// +// Note: case #3 should disappear over time. There is no reason for the +// compiler to produce both CUE and FDE, since CUE has been the default format +// for many years, and there is no reason to accommodate ancient runtimes that +// can only handle FDEs. Even so, in mid-2021 most LLVM MachO compiler targets +// still produce both CUEs & FDEs. Unfortunately, all of the complexity +// in transforming input to output for section __TEXT,__eh_frame pertains to +// case #3, which shouldn't exist anymore. Ugh. +// +// * When complete, the linker output will contain CUEs for every function, +// including those that are merely references to DWARF FDEs. The output will +// contain FDEs only for functions whose unwind info cannot be represented as +// a full CUE. The output will have a __TEXT,__eh_frame section only if there +// are one or more FDEs. + +// Create pointers from symbols to their associated unwind entries, either +// compact (CUEs) or DWARF (FDEs). + +static Defined *findSymbol(const ConcatInputSection *isec, uint64_t value) { + // The functionAddress relocations are typically section relocations. + // However, unwind info operates on a per-symbol basis, so we search for + // the function symbol here. + auto it = + llvm::lower_bound(isec->symbols, value, [](Defined *d, uint64_t val) { + return d->value < val; + }); + // The relocation should point at the exact address of a symbol (with no + // addend). + if (it != isec->symbols.end() && (*it)->value == value) + return *it; + return nullptr; +} + +static Defined *findSymbol(const ParsedSection §ion, uint64_t value) { + value -= section.address; + auto it = llvm::lower_bound(section.subsections, value, + [](const ParsedSubsection &subsec, uint64_t val) { + return subsec.offset < val; + }); + if (it != section.subsections.end()) { + ConcatInputSection *isec = cast(it->isec); + if (Defined *d = findSymbol(isec, it->offset - value)) + return d; + assert(isec->wasCoalesced); } - if (!cuSection) - return; + return nullptr; +} + +Defined *ObjFile::findFunctionSymbol(uint64_t value) { + if (textSection) + if (Defined *d = findSymbol(*textSection, value)) + return d; + if (staticInitSection) + if (Defined *d = findSymbol(*staticInitSection, value)) + return d; + return nullptr; +} - for (ParsedSubsection &subsection : cuSection->subsections) { +Defined *ObjFile::findLSDASymbol(uint64_t value) { + if (Defined *d = findSymbol(*textSection, value)) + return d; + if (staticInitSection) + if (Defined *d = findSymbol(*staticInitSection, value)) + return d; + return nullptr; +} + +Defined *ObjFile::findFunctionSymbol(const macho::Reloc &r) { + ConcatInputSection *referentIsec; + uint64_t value = r.addend; + if (auto *sym = + cast_or_null(r.referent.dyn_cast())) { + value += sym->value; + referentIsec = cast(sym->isec); + } else { + referentIsec = + cast(r.referent.dyn_cast()); + } + if (referentIsec->getSegName() != segment_names::text) + error("compact unwind references address in " + toString(referentIsec) + + " which is not in segment __TEXT"); + Defined *d = findSymbol(referentIsec, value); + assert(d || referentIsec->wasCoalesced); + return d; +} + +void ObjFile::registerCompactUnwindEntries() { + assert(textSection); + for (const ParsedSubsection &subsection : compactUnwindSection->subsections) { ConcatInputSection *isec = cast(subsection.isec); - ConcatInputSection *referentIsec; - for (const Reloc &r : isec->relocs) { + for (const macho::Reloc &r : isec->relocs) { + // Skip the personality and LSDA relocs, which have non-zero offset. if (r.offset != 0) continue; - uint64_t add = r.addend; - if (auto *sym = cast_or_null(r.referent.dyn_cast())) { - add += sym->value; - referentIsec = cast(sym->isec); - } else { - referentIsec = - cast(r.referent.dyn_cast()); + if (Defined *d = findFunctionSymbol(r)) { + d->unwindEntry = isec; + d->hasCompactUnwind = true; } - if (referentIsec->getSegName() != segment_names::text) - error("compact unwind references address in " + toString(referentIsec) + - " which is not in segment __TEXT"); - // The functionAddress relocations are typically section relocations. - // However, unwind info operates on a per-symbol basis, so we search for - // the function symbol here. - auto it = llvm::lower_bound( - referentIsec->symbols, add, - [](Defined *d, uint64_t add) { return d->value < add; }); - // The relocation should point at the exact address of a symbol (with no - // addend). - if (it == referentIsec->symbols.end() || (*it)->value != add) { - assert(referentIsec->wasCoalesced); - continue; + } + } +} + +static uint64_t readEhFrameEncodedAddress(const uint8_t *buf, uint64_t offset) { + return target->wordSize == 8 ? read64le(buf) + offset + : static_cast(read32le(buf) + offset); +} + +// Object files only contain relocation records for personality function +// addresses in the CIE. We must synthesize Reloc entries in FDEs for the LSDA +// and function entrypoint addresses. We have already registered the CUEs, so +// function symbols with CUEs have Defined::hasCompactUnwind == true. When +// a function has a CUE, we ignore the FDE. + +// TODO(gkm): need ... +// * VMA of __eh_frame section +// * assume VMA of __text is 0 +// * sectionEntry of __text for lower_bound() on funcAddr + +void ObjFile::registerEhFrameUnwindEntries() { + assert(textSection); + assert(textSection->address == 0); + const uint8_t PersEncode = + DW_EH_PE_sdata4 | DW_EH_PE_pcrel | DW_EH_PE_indirect; + const uint8_t FuncEncode = DW_EH_PE_pcrel | DW_EH_PE_absptr; + const uint8_t LsdaEncode = DW_EH_PE_pcrel | DW_EH_PE_absptr; + uint8_t persEncode; + uint8_t lsdaEncode; + uint8_t funcEncode; + for (const ParsedSubsection &subsection : ehFrameSection->subsections) { + ConcatInputSection *isec = cast(subsection.isec); + const uint8_t *buf = isec->data.data(); + // uint8_t length = read32le(buf); + buf += 4; // skip length + uint32_t id = read32le(buf); + buf += 4; // skip ID + if (id == 0) { + // CIE: + // Augmentation has two pieces: format string--which only appears in + // CIEs--and data--which appears in both CIEs. The string contains the + // format, and the data follows the format. The augmentation data for 'P' + // (personality) appears in the CIE, while data for 'L' (LSDA) and 'R' + // (function address & size) appear in the FDE. When we parse a CIE, + // we must save info extracted from the augmentation string in order + // to properly parse the FDEs that immediately follow it. + buf++; // version + persEncode = lsdaEncode = funcEncode = DW_EH_PE_omit; + auto *augmentString = reinterpret_cast(buf); + buf += strlen(augmentString) + 1; // skip augmentation string + buf += 3; // skip code align, data align, return addr reg + while (*augmentString) { + // Numbers below are encoded as ULEB128, but expected quantities + // fit within a single byte, so we can skip the parsing machinery. + switch (*augmentString++) { + case 'z': + assert(*buf < 128); + buf++; // skip augmentation data length + break; + case 'P': + persEncode = *buf++; + assert(persEncode == PersEncode); + buf += 4; + break; + case 'L': + lsdaEncode = *buf++; + assert(lsdaEncode == LsdaEncode); + break; + case 'R': + funcEncode = *buf++; + assert(funcEncode == FuncEncode); + break; + case 'S': // signal trampoline + case 'B': // signing b-key + break; + default: + llvm_unreachable("unknown DWARF EH augmentation char"); + } + } + } else { + // FDE: + uint8_t width = target->wordSize == 8 ? 3 : 2; + uint64_t offset = + ehFrameSection->address + subsection.offset + buf - isec->data.data(); + // TODO(gkm): decode pointer at *buf and resolve to the + // ConcatInputSection for the function. + uint64_t funcAddr = readEhFrameEncodedAddress(buf, offset); + buf += target->wordSize; // skip function address + buf += target->wordSize; // skip function length + if (Defined *d = findFunctionSymbol(funcAddr)) { + if (d->hasCompactUnwind) + continue; + if (read64le(buf) == 0) { + continue; + // If the next 8 bytes are 0, i.e., 0 (aug-data-length) followed by + // seven 0 (DW_CFA_nop), then this is a degenerate FDE for a function + // that has no unwind info. It is a bug in MC that this object file + // has no degenerate CUE for this function, so we must synthesize one + // here. + ConcatInputSection *cuIsec = + nullptr; // FIXME: create within CU section + d->hasCompactUnwind = true; + d->unwindEntry = cuIsec; + cuIsec->relocs.push_back({/*type=*/GENERIC_RELOC_VANILLA, + /*pcrel=*/false, width, offset, + /*addend=*/0, + /*referent=*/d}); + continue; + } + isec->relocs.push_back({/*type=*/GENERIC_RELOC_VANILLA, + /*pcrel=*/true, width, offset, + /*addend=*/0, /*referent=*/d}); + assert(d->unwindEntry == nullptr); + d->unwindEntry = isec; + d->hasDwarfFDE = true; + if (lsdaEncode != DW_EH_PE_omit) { + uint64_t offset = ehFrameSection->address + subsection.offset + buf - + isec->data.data(); + uint64_t lsdaAddr = readEhFrameEncodedAddress(buf, offset); + if (Defined *d = findLSDASymbol(lsdaAddr)) + isec->relocs.push_back({/*type=*/GENERIC_RELOC_VANILLA, + /*pcrel=*/true, width, offset, + /*addend=*/0, + /*referent=*/d}); + buf += target->wordSize; // lsda address + } } - (*it)->compactUnwind = isec; } } } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -303,6 +303,7 @@ constexpr const char debugInfo[] = "__debug_info"; constexpr const char debugStr[] = "__debug_str"; constexpr const char ehFrame[] = "__eh_frame"; +constexpr const char gccExceptTab[] = "__gcc_except_tab"; constexpr const char export_[] = "__export"; constexpr const char dataInCode[] = "__data_in_code"; constexpr const char functionStarts[] = "__func_starts"; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -116,7 +116,7 @@ uint64_t v = (*it)->value; for (++it; it != symbols.end(); ++it) { if ((*it)->value == v) - (*it)->compactUnwind = nullptr; + (*it)->unwindEntry = nullptr; else v = (*it)->value; } diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp --- a/lld/MachO/MarkLive.cpp +++ b/lld/MachO/MarkLive.cpp @@ -51,8 +51,8 @@ if (auto *d = dyn_cast(s)) { if (d->isec) enqueue(d->isec, d->value); - if (d->compactUnwind) - enqueue(d->compactUnwind, 0); + if (d->unwindEntry) + enqueue(d->unwindEntry, 0); } }; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -138,7 +138,9 @@ uint64_t value; // size is only calculated for regular (non-bitcode) symbols. uint64_t size; - ConcatInputSection *compactUnwind = nullptr; + // hasCompactUnwind -> unwindEntry points to a CUE + // !hasCompactUnwind && hasDwarfFDE -> unwindEntry points to an FDE + ConcatInputSection *unwindEntry = nullptr; bool overridesWeakDef : 1; // Whether this symbol should appear in the output binary's export trie. @@ -159,6 +161,17 @@ // metadata. This is information only for the static linker and not written // to the output. bool noDeadStrip : 1; + // unwindEntry can be compact (CUE) and/or DWARF (FDE). When both are present, + // the CUE takes precedence. Older MC emitted both, and expected the linker to + // discard the redundant FDEs. Newer MC does the right thing, which is that + // when a frame can be represented by a CUE, emit only the CUE. When a frame + // is too complex for a CUE, then emit only an FDE. When a frame has no unwind + // info, older MC doesn't emit the trivial no-op CUE, but always emits the + // trivial no-op FDE. In this case, LD64 preserves the FDE since there is no + // CUE to override it. LLD does the right thing, which is to synthesize the + // no-op CUE. + bool hasCompactUnwind : 1; + bool hasDwarfFDE : 1; private: const bool weakDef : 1; diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -81,8 +81,8 @@ } void Defined::canonicalize() { - if (compactUnwind) - compactUnwind = compactUnwind->canonical(); + if (unwindEntry) + unwindEntry = unwindEntry->canonical(); if (isec) isec = isec->canonical(); } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -38,6 +38,7 @@ class LoadCommand; class ObjFile; class UnwindInfoSection; +class EhFrameSection; class SyntheticSection : public OutputSection { public: @@ -606,6 +607,7 @@ StubsSection *stubs = nullptr; StubHelperSection *stubHelper = nullptr; UnwindInfoSection *unwindInfo = nullptr; + EhFrameSection *ehFrame = nullptr; ConcatInputSection *imageLoaderCache = nullptr; }; diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h --- a/lld/MachO/UnwindInfoSection.h +++ b/lld/MachO/UnwindInfoSection.h @@ -40,6 +40,25 @@ UnwindInfoSection *makeUnwindInfoSection(); +class EhFrameSection : public SyntheticSection { +public: + EhFrameSection(); + + bool isNeeded() const override { return !allEntriesAreOmitted; } + uint64_t getSize() const override { return ehFrameSize; } + void finalize() override; + void writeTo(uint8_t *buf) const override; + + llvm::MapVector, + const Defined *> + symbols; + ConcatOutputSection *ehFrameSection; + uint64_t ehFrameSize = 0; + bool allEntriesAreOmitted = true; +}; + +EhFrameSection *makeEhFrameSection(); + } // namespace macho } // namespace lld diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -116,7 +116,7 @@ class UnwindInfoSectionImpl final : public UnwindInfoSection { public: void prepareRelocations(ConcatInputSection *) override; - void relocateCompactUnwind(std::vector> &); + void relocateCompactUnwindEntries(std::vector> &); void finalize() override; void writeTo(uint8_t *buf) const override; @@ -147,8 +147,8 @@ // entries to the GOT. Hence the use of a MapVector for // UnwindInfoSection::symbols. for (const Defined *d : make_second_range(symbols)) - if (d->compactUnwind) - prepareRelocations(d->compactUnwind); + if (d->unwindEntry) + prepareRelocations(d->unwindEntry); } // Record function symbols that may need entries emitted in __unwind_info, which @@ -162,7 +162,7 @@ // symbols for each unique address regardless of whether they have associated // unwind info. void UnwindInfoSection::addSymbol(const Defined *d) { - if (d->compactUnwind) + if (d->unwindEntry) allEntriesAreOmitted = false; // We don't yet know the final output address of this symbol, but we know that // they are uniquely determined by a combination of the isec and value, so @@ -170,8 +170,8 @@ auto p = symbols.insert({{d->isec, d->value}, d}); // If we have multiple symbols at the same address, only one of them can have // an associated CUE. - if (!p.second && d->compactUnwind) { - assert(!p.first->second->compactUnwind); + if (!p.second && d->unwindEntry) { + assert(!p.first->second->unwindEntry); p.first->second = d; } } @@ -179,8 +179,8 @@ // Compact unwind relocations have different semantics, so we handle them in a // separate code path from regular relocations. First, we do not wish to add // rebase opcodes for __LD,__compact_unwind, because that section doesn't -// actually end up in the final binary. Second, personality pointers always -// reside in the GOT and must be treated specially. +// actually end up in the final binary. Second, personality-function pointers +// usually reside in the GOT and must be treated specially. template void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { assert(!isec->shouldOmitFromOutput() && @@ -195,11 +195,19 @@ Reloc &r = isec->relocs[i]; assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); + // Functions and LSDA entries always reside in the same object file as the + // compact unwind entries that references them, and this appear as section + // relocs. There is no need to prepare them. We only prepare relocs for + // personality functions. if (r.offset % sizeof(CompactUnwindEntry) != offsetof(CompactUnwindEntry, personality)) continue; if (auto *s = r.referent.dyn_cast()) { + // Personality functions are nearly always system-defined (e.g., + // ___gxx_personality_v0 for C++) and relocated as DyLib symbols. + // When an application provides its own personality function, + // it might be an extern Defined symbol reloc, or a local section reloc. if (auto *defined = dyn_cast(s)) { // XXX(vyng) This is a a special case for handling duplicate personality // symbols. Note that LD64's behavior is a bit different and it is @@ -276,7 +284,7 @@ // relocations here: since we are not emitting the pre-link CU section, there // is no source address to make a relative location meaningful. template -void UnwindInfoSectionImpl::relocateCompactUnwind( +void UnwindInfoSectionImpl::relocateCompactUnwindEntries( std::vector> &cuVector) { auto symbolsVec = symbols.takeVector(); parallelForEachN(0, symbolsVec.size(), [&](size_t i) { @@ -285,12 +293,12 @@ const Defined *d = symbolsVec[i].second; // Write the functionAddress. writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2); - if (!d->compactUnwind) + if (!d->unwindEntry) return; // Write the rest of the CUE. - memcpy(buf, d->compactUnwind->data.data(), d->compactUnwind->data.size()); - for (const Reloc &r : d->compactUnwind->relocs) { + memcpy(buf, d->unwindEntry->data.data(), d->unwindEntry->data.size()); + for (const Reloc &r : d->unwindEntry->relocs) { uint64_t referentVA = 0; if (auto *referentSym = r.referent.dyn_cast()) { if (!isa(referentSym)) { @@ -378,7 +386,7 @@ // encoding+personality+lsda. Folding is necessary because it reduces // the number of CU entries by as much as 3 orders of magnitude! cuVector.resize(symbols.size()); - relocateCompactUnwind(cuVector); + relocateCompactUnwindEntries(cuVector); // Rather than sort & fold the 32-byte entries directly, we create a // vector of pointers to entries and sort & fold that instead. @@ -629,3 +637,17 @@ else return make>(); } + +EhFrameSection::EhFrameSection() + : SyntheticSection(segment_names::text, section_names::ehFrame) { + align = 4; + ehFrameSection = make(section_names::ehFrame); +} + +void EhFrameSection::finalize() { + // TODO(gkm): write me +} + +void EhFrameSection::writeTo(uint8_t *buf) const { + // TODO(gkm): write me +} diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1176,6 +1176,7 @@ in.stubs = make(); in.stubHelper = make(); in.unwindInfo = makeUnwindInfoSection(); + in.ehFrame = make(); // This section contains space for just a single word, and will be used by // dyld to cache an address to the image loader it uses.