diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1025,8 +1025,10 @@ const Subsections &subsections = section.subsections; if (subsections.empty()) continue; - if (subsections[0].isec->getName() == section_names::compactUnwind) - // Compact unwind entries require special handling elsewhere. + StringRef name = subsections[0].isec->getName(); + // Unwind entries require special handling. + if (name == section_names::compactUnwind || + name == section_names::ehFrame) continue; ConcatOutputSection *osec = nullptr; for (const Subsection &subsection : subsections) { diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -116,6 +116,10 @@ ArrayRef dataInCodeEntries; private: + Section *textSection = nullptr; + Section *initSection = nullptr; + Section *lsdaSection = nullptr; + Section *ehFrameSection = nullptr; Section *compactUnwindSection = nullptr; template void parse(); @@ -131,7 +135,12 @@ const SectionHeader &, Subsections &); void parseDebugInfo(); void parseDataInCode(); + void splitEhFrame(ConcatInputSection *isec, uint64_t addr); void registerCompactUnwind(); + void registerEhFrameUnwind(); + Defined *findFunctionSymbol(const Reloc &r); // for __compact_unwind + Defined *findFunctionSymbol(uint64_t value); // for __eh_frame + Defined *findLsdaSymbol(uint64_t value); // for __eh_frame }; // command-line -sectcreate file diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -61,6 +61,7 @@ #include "lld/Common/Memory.h" #include "lld/Common/Reproduce.h" #include "llvm/ADT/iterator.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/LTO/LTO.h" #include "llvm/Support/Endian.h" @@ -73,6 +74,7 @@ #include using namespace llvm; +using namespace llvm::dwarf; using namespace llvm::MachO; using namespace llvm::support::endian; using namespace llvm::sys; @@ -291,6 +293,10 @@ subsections.reserve(data.size() / recordSize); auto *isec = make( segname, name, this, data.slice(0, recordSize), align, flags); + if (name == section_names::compactUnwind) { + compactUnwindSection = §ions.back(); + isec->unwindType = UnwindType::compact; + } subsections.push_back({0, isec}); for (uint64_t off = recordSize; off < data.size(); off += recordSize) { // Copying requires less memory than constructing a fresh InputSection. @@ -322,8 +328,11 @@ sections.back().subsections.push_back({0, isec}); } else if (auto recordSize = getRecordSize(segname, name)) { splitRecords(*recordSize); - if (name == section_names::compactUnwind) - compactUnwindSection = §ions.back(); + } else if (name == section_names::ehFrame) { + auto *isec = + make(segname, name, this, data, align, flags); + splitEhFrame(isec, sec.addr); + ehFrameSection = §ions.back(); } else if (segname == segment_names::llvm) { // ld64 does not appear to emit contents from sections within the __LLVM // segment. Symbols within those sections point to bitcode metadata @@ -346,11 +355,65 @@ } else { sections.push_back(sec.addr); sections.back().subsections.push_back({0, isec}); + if (name == section_names::text) + textSection = §ions.back(); + else if (name == section_names::staticInit) + initSection = §ions.back(); + else if (name == section_names::gccExceptTab) + lsdaSection = §ions.back(); } } } } +static void fatalAt(const InputSection *isec, const ArrayRef &data, + const Twine &msg) { + uint64_t offset = data.data() - isec->data.data(); + fatal("corrupted __eh_frame at offset=" + utohexstr(offset) + ": " + msg + + "\n>>> defined in " + toString(isec->getFile())); +} + +// Parse DWARF CIE (Common Information Entry) & FDEs (Frame Description +// Entries). There is one CIE per object file at subsection[0], and one FDE per +// function at subsction[1..N]. +void ObjFile::splitEhFrame(ConcatInputSection *isec, uint64_t addr) { + sections.push_back(addr); + Subsections &subsections = sections.back().subsections; + subsections.push_back({0, isec}); + ArrayRef data = isec->data; + uint64_t off = 0; + while (true) { + if (data.size() < 4) + fatalAt(isec, data, "CIE/FDE too small"); + const uint8_t *buf0 = data.data(); + const uint8_t *buf = buf0; + uint64_t length = read32le(buf); + buf += 4; + if (length == UINT32_MAX) { + length = read64le(buf); + buf += 8; + } + // The length written to an entry does not include the length field itself! + length += (buf - buf0); + if (length > data.size()) + fatalAt(isec, data, "CIE/FDE ends past the end of the section"); + uint32_t id = read32le(buf); + if (off == 0 && id > 0) + fatalAt(isec, data, "First record must be a CIE"); + isec->data = data.take_front(length); + isec->unwindType = id == 0 ? UnwindType::dwarfCIE : UnwindType::dwarfFDE; + data = data.drop_front(length); + if (data.size() == 0) + break; + // The first isec instance comes in as an argument. Subsequent isecs + // are copy-constructed because everything except the data member is + // identical, and copying is cheaper than constructing from scratch. + isec = make(*isec); + off += length; + subsections.push_back({off, isec}); + } +} + // Find the subsection corresponding to the greatest section offset that is <= // that of the given offset. // @@ -746,6 +809,7 @@ InputSection *isec = subsec.isec; uint64_t subsecAddr = sectionAddr + subsec.offset; + assert(sym.n_value >= subsecAddr); size_t symbolOffset = sym.n_value - subsecAddr; uint64_t symbolSize = j + 1 < symbolIndices.size() @@ -883,6 +947,8 @@ parseDataInCode(); if (compactUnwindSection) registerCompactUnwind(); + if (ehFrameSection) + registerEhFrameUnwind(); } void ObjFile::parseDebugInfo() { @@ -923,8 +989,145 @@ })); } +// * The following blog entries provide an for an overview of DWARF +// formats for exception-unwind data: +// https://www.airs.com/blog/archives/460 about .eh_frame +// https://www.airs.com/blog/archives/464 about .gcc_except_table +// +// * Though it is irrelevant to MachO, the complete blog series on DWARF +// unwinding formats includes this post on a Linux-specific section: +// https://www.airs.com/blog/archives/462 about .eh_frame_hdr +// +// * Here are more reference documents for exception handling: +// https://llvm.org/docs/ExceptionHandling.html +// https://refspecs.linuxfoundation.org/abi-eh-1.22.html +// https://github.com/itanium-cxx-abi/cxx-abi/blob/main/exceptions.pdf +// http://www.dwarfstd.org/doc/DWARF5.pdf +// +// * Processing unwind info involves a complex mesh of four input sections: +// (1) __LD,__compact_unwind: vector of CUE (Compact Unwind Entry) +// (2) __TEXT,__eh_frame: sequence of DWARF records (see below) +// (3) __TEXT,__gcc_except_tab: LSDA (Language Specific Data Area) +// (4) __TEXT,__text: Functions associated with unwind info +// +// * The __eh_frame section contains two types of record: CIE (Common +// Information Entry - one per object file) and FDE (Frame Description Entry - +// one per function). CIEs are simple to deduplicate, but since an FDE's +// content is logically tied to its associated function body, we must +// deduplicate function+FDE during ICF. +// +// * CUE contains relocation_info for the __text entrypoint of its associated +// function. It can also contain relocation_info for the optional personality +// function and LSDA +// +// * CIE can contain relocation_info to an optional personality function +// +// * FDE references the __text entrypoint of its associated function, but the +// compiler generates no relocation_info +// +// * FDE can reference __gcc_except_tab for LSDA, but again, there is +// no compiler-generated relocation_info +// +// * The compiler will produce unwind information for a function according to +// one of four cases, and the linker must respond accordingly: +// (1) no unwind info: linker must create a degenerate CUE +// (2) CUE only: no action necessary from linker +// (3) CUE and FDE: linker must discard the FDE +// (4) FDE only: linker must create a special CUE that references the FDE +// +// Note: case #3 should disappear over time. There is no reason for the +// compiler to produce both CUE and FDE, since CUE has been the default format +// for many years, and there is no reason to accommodate ancient runtimes that +// can only handle FDEs. Even so, in mid-2021 most LLVM MachO compiler targets +// still produce both CUEs & FDEs. Unfortunately, all of the complexity +// in transforming input to output for section __TEXT,__eh_frame pertains to +// case #3, which shouldn't exist anymore. Ugh. +// +// * When complete, the linker output will contain CUEs for every function, +// including those that are merely references to DWARF FDEs. The output will +// contain FDEs only for functions whose unwind info cannot be represented as +// a full CUE. The output will have a __TEXT,__eh_frame section only if there +// are one or more FDEs. + +// Create pointers from symbols to their associated unwind entries, either +// compact (CUEs) or DWARF (FDEs). + +static Defined *findSymbol(const ConcatInputSection *isec, uint64_t value) { + // The functionAddress relocations are typically section relocations. + // However, unwind info operates on a per-symbol basis, so we search for + // the function symbol here. + auto it = + llvm::lower_bound(isec->symbols, value, [](Defined *d, uint64_t val) { + return d->value < val; + }); + // The relocation should point at the exact address of a symbol (with no + // addend). + if (it != isec->symbols.end() && (*it)->value == value) + return *it; + // assert(isec->wasCoalesced); + return nullptr; +} + +static Defined *findSymbol(const Section §ion, uint64_t value) { + value -= section.address; + auto it = llvm::lower_bound(section.subsections, value, + [](const Subsection subsec, uint64_t val) { + return subsec.offset < val; + }); + if (it != section.subsections.end()) { + ConcatInputSection *isec = cast(it->isec); + if (Defined *d = findSymbol(isec, it->offset - value)) + return d; + // assert(isec->wasCoalesced); + } + return nullptr; +} + +Defined *ObjFile::findFunctionSymbol(uint64_t value) { + uint64_t textOffset = textSection ? value - textSection->address : ~0ull; + uint64_t initOffset = initSection ? value - initSection->address : ~0ull; + + if (textSection && textOffset < initOffset) { + if (Defined *d = findSymbol(*textSection, value)) + return d; + } else if (initSection && initOffset < textOffset) { + if (Defined *d = findSymbol(*initSection, value)) + return d; + } + return nullptr; +} + +Defined *ObjFile::findLsdaSymbol(uint64_t value) { + if (lsdaSection) + if (Defined *d = findSymbol(*lsdaSection, value)) + return d; + warn("LSDA symbol not found at " + to_hexString(value)); + return nullptr; +} + +Defined *ObjFile::findFunctionSymbol(const Reloc &r) { + ConcatInputSection *referentIsec; + uint64_t value = r.addend; + if (auto *sym = + cast_or_null(r.referent.dyn_cast())) { + value += sym->value; + referentIsec = cast(sym->isec); + } else { + referentIsec = + cast(r.referent.dyn_cast()); + } + if (referentIsec->getSegName() != segment_names::text) + error("compact unwind references address in " + toString(referentIsec) + + " which is not in segment __TEXT"); + if (Defined *d = findSymbol(referentIsec, value)) + return d; + assert(referentIsec->wasCoalesced); + return nullptr; +} + // Create pointers from symbols to their associated compact unwind entries. void ObjFile::registerCompactUnwind() { + assert(textSection); for (const Subsection &subsection : compactUnwindSection->subsections) { ConcatInputSection *isec = cast(subsection.isec); // Hack!! Since each CUE contains a different function address, if ICF @@ -935,52 +1138,152 @@ // of the corresponding relocations.) We rely on `relocateCompactUnwind()` // to correctly handle these truncated input sections. isec->data = isec->data.slice(target->wordSize); - - ConcatInputSection *referentIsec; for (auto it = isec->relocs.begin(); it != isec->relocs.end();) { Reloc &r = *it; // CUE::functionAddress is at offset 0. Skip personality & LSDA relocs. if (r.offset != 0) { ++it; - continue; + } else if (Defined *d = findFunctionSymbol(r)) { + d->unwindEntry = isec; + // Since we've sliced away the functionAddress, we should remove the + // corresponding relocation too. Given that clang emits relocations in + // reverse order of address, this relocation should be at the end of the + // vector for most of our input object files, so this is typically an + // O(1) operation. + it = isec->relocs.erase(it); + } else { + ++it; } - uint64_t add = r.addend; - if (auto *sym = cast_or_null(r.referent.dyn_cast())) { - // Check whether the symbol defined in this file is the prevailing one. - // Skip if it is e.g. a weak def that didn't prevail. - if (sym->getFile() != this) { - ++it; + } + } +} + +static uint64_t readEhFrameEncodedAddress(const uint8_t *buf, uint64_t offset) { + return target->wordSize == 8 ? read64le(buf) + offset + : static_cast(read32le(buf) + offset); +} + +// Object files only contain relocation records for personality function +// addresses in the CIE. We must synthesize Reloc entries in FDEs for the LSDA +// and function entrypoint addresses. We have already registered the CUEs, so +// function symbols with CUEs have Defined::hasCompactUnwind == true. When +// a function has a CUE, we ignore the FDE. + +void ObjFile::registerEhFrameUnwind() { + assert(textSection); + assert(textSection->address == 0); + uint8_t lsdaEncode; + ConcatInputSection *cieIsec = nullptr; + for (const Subsection &subsection : ehFrameSection->subsections) { + ConcatInputSection *isec = cast(subsection.isec); + const uint8_t *buf = isec->data.data(); + // uint8_t length = read32le(buf); + buf += 4; // skip length + uint32_t idOffset = buf - isec->data.data(); + uint32_t id = read32le(buf); + buf += 4; // skip ID + if (id == 0) { + // These are the DWARF encodings always used by MachO: + const uint8_t AbsoluteEncode = DW_EH_PE_pcrel | DW_EH_PE_absptr; + const uint8_t IndirectEncode = + DW_EH_PE_sdata4 | DW_EH_PE_pcrel | DW_EH_PE_indirect; + cieIsec = isec; // FDEs need to find their CIE + // CIE: + // Augmentation has two pieces: format string--which only appears in + // CIEs--and data--which appears in both CIEs. The string contains the + // format, and the data follows the format. The augmentation data for 'P' + // (personality) appears in the CIE, while data for 'L' (LSDA) and 'R' + // (function address & size) appear in the FDE. When we parse a CIE, + // we must save info extracted from the augmentation string in order + // to properly parse the FDEs that immediately follow it. + buf++; // version + uint8_t persEncode = lsdaEncode = DW_EH_PE_omit; + auto *augmentString = reinterpret_cast(buf); + buf += strlen(augmentString) + 1; // skip augmentation string + buf += 3; // skip code align, data align, return addr reg + while (*augmentString) { + // Numbers below are encoded as ULEB128, but expected quantities + // fit within a single byte, so we can skip the parsing machinery. + switch (*augmentString++) { + case 'z': + assert(*buf < 128); + buf++; // skip augmentation data length + break; + case 'P': + persEncode = *buf++; + assert(persEncode == IndirectEncode); + buf += 4; + break; + case 'L': + lsdaEncode = *buf++; + assert(lsdaEncode == AbsoluteEncode); + break; + case 'R': + assert(*buf == AbsoluteEncode); + buf++; + break; + case 'S': // signal trampoline + case 'B': // signing b-key + break; + default: + llvm_unreachable("unknown DWARF EH augmentation char"); + } + } + if (persEncode != DW_EH_PE_omit) { + // The compiler already generates a reloc for the personality-function + // pointer. I believe there is nothing for us to do here. + } + } else { + // FDE: + uint8_t wordWidth = target->wordSize == 8 ? 3 : 2; + uint32_t funcAddrOffset = buf - isec->data.data(); + uint64_t funcAddr = readEhFrameEncodedAddress(buf, funcAddrOffset); + buf += target->wordSize; // skip function address + buf += target->wordSize; // skip function length + if (Defined *d = findFunctionSymbol(funcAddr)) { + if (d->unwindEntry) { + // We already have a CUE for this function, and CUE takes precedence + assert(d->unwindEntry->unwindType == UnwindType::compact); + continue; + } + // If the next 8 bytes are 0, i.e., 0 (aug-data-length) followed by + // seven 0 (DW_CFA_nop), then this is a degenerate FDE that means "no + // unwind info". MC is buggy here: it omits the CUE, and generates the + // degenerate FDE, so that the linked output has FDEs for functions + // without unwind info. We prefer the degenerate CUE over the FDE, so + // we correct course here in LLD, which will synthesize one later. + if (read64le(buf) == 0) continue; + assert(isec->unwindType == UnwindType::dwarfFDE); + d->unwindEntry = isec; + // The ID field of an FDE is a positive integer that is subtracted + // from the current position (pcrel) to indicate the start of its + // associated CIE. This is a arch-independent reloc type that + // is absent from all architectures. We commandeer + isec->relocs.push_back({/*type=*/EHFRAME_RELOC_FDE_TO_CIE, + /*pcrel=*/true, 2, idOffset, + /*addend=*/0, /*referent=*/cieIsec}); + + isec->relocs.push_back({/*type=*/GENERIC_RELOC_VANILLA, + /*pcrel=*/true, wordWidth, funcAddrOffset, + /*addend=*/0, /*referent=*/d}); + if (lsdaEncode != DW_EH_PE_omit) { + uint32_t lsdaAddrOffset = buf - isec->data.data(); + uint64_t lsdaAddr = readEhFrameEncodedAddress(buf, lsdaAddrOffset); + buf += target->wordSize; // skip LSDA address + if (Defined *d = findLsdaSymbol(lsdaAddr)) + isec->relocs.push_back({/*type=*/EHFRAME_RELOC_FDE_TO_LSDA, + /*pcrel=*/true, wordWidth, lsdaAddrOffset, + /*addend=*/0, + /*referent=*/d}); + else + ; // warn(toString(this) + ": no symbol for FDE LSDA at address " + + // to_hexString(funcAddr)); } - add += sym->value; - referentIsec = cast(sym->isec); } else { - referentIsec = - cast(r.referent.dyn_cast()); - } - if (referentIsec->getSegName() != segment_names::text) - error("compact unwind references address in " + toString(referentIsec) + - " which is not in segment __TEXT"); - // The functionAddress relocations are typically section relocations. - // However, unwind info operates on a per-symbol basis, so we search for - // the function symbol here. - auto symIt = llvm::lower_bound( - referentIsec->symbols, add, - [](Defined *d, uint64_t add) { return d->value < add; }); - // The relocation should point at the exact address of a symbol (with no - // addend). - if (symIt == referentIsec->symbols.end() || (*symIt)->value != add) { - assert(referentIsec->wasCoalesced); - ++it; - continue; + ; // warn(toString(this) + ": no symbol for FDE func at address " + + // to_hexString(funcAddr)); } - (*symIt)->unwindEntry = isec; - // Since we've sliced away the functionAddress, we should remove the - // corresponding relocation too. Given that clang emits relocations in - // reverse order of address, this relocation should be at the end of the - // vector for most of our input object files, so this is typically an O(1) - // operation. - it = isec->relocs.erase(it); } } } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -96,6 +96,25 @@ const Shared *const shared; }; +// unwindEntry can be compact (CUE) and/or DWARF (FDE). When both are present, +// the CUE takes precedence. Older MC emitted both, and expected the linker to +// discard the redundant FDEs. Newer MC does the right thing, which is that when +// a frame can be represented by a CUE, emit only the CUE. When a frame is too +// complex for a CUE, then emit only an FDE. When a frame has no unwind info, +// older MC doesn't emit the trivial no-op CUE, but always emits the trivial +// no-op FDE. In this case, LD64 preserves the FDE since there is no CUE to +// override it. LLD does the right thing, which is to synthesize the no-op CUE. +enum class UnwindType : uint8_t { + notUnwind, // default: ordinary InputSection, not unwind info + compact, // compact unwind entry + dwarfCIE, // DWARF Common Information Entry + dwarfFDE, // DWARF Frame Description Entry +}; + +inline bool isUnwindTypeDwarf(UnwindType type) { + return type == UnwindType::dwarfCIE || type == UnwindType::dwarfFDE; +} + // ConcatInputSections are combined into (Concat)OutputSections through simple // concatenation, in contrast with literal sections which may have their // contents merged before output. @@ -143,6 +162,7 @@ // first and not copied to the output. bool wasCoalesced = false; bool live = !config->deadStrip; + UnwindType unwindType = UnwindType::notUnwind; // This variable has two usages. Initially, it represents the input order. // After assignAddresses is called, it represents the offset from the // beginning of the output section this section was assigned to. diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -38,6 +38,7 @@ class LoadCommand; class ObjFile; class UnwindInfoSection; +class EhFrameSection; class SyntheticSection : public OutputSection { public: @@ -606,6 +607,7 @@ StubsSection *stubs = nullptr; StubHelperSection *stubHelper = nullptr; UnwindInfoSection *unwindInfo = nullptr; + EhFrameSection *ehFrame = nullptr; ConcatInputSection *imageLoaderCache = nullptr; }; diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h --- a/lld/MachO/UnwindInfoSection.h +++ b/lld/MachO/UnwindInfoSection.h @@ -19,10 +19,18 @@ namespace macho { class UnwindInfoSection : public SyntheticSection { + // In some ways, __TEXT,__eh_frame is a distinct and independent section. In + // other ways, it is subordinate-to and dependent-upon __TEXT,__unwind_info to + // hold the common symbols MapVector, and to handle some common processing. + // We express its independence by giving it a class and singleton object of + // its own. We express its dependence by allowing it friend access to symbols, + // and by handling FDEs alongside CUEs in prepareRelocations(). + friend class EhFrameSection; + public: // If all functions are free of unwind info, we can omit the unwind info // section entirely. - bool isNeeded() const override { return !allEntriesAreOmitted; } + bool isNeeded() const override { return hasCUEs; } uint64_t getSize() const override { return unwindInfoSize; } void addSymbol(const Defined *); void prepareRelocations(); @@ -36,11 +44,24 @@ symbols; std::vector symbolsVec; uint64_t unwindInfoSize = 0; - bool allEntriesAreOmitted = true; + bool hasCUEs = false; + bool hasFDEs = false; }; UnwindInfoSection *makeUnwindInfoSection(); +class EhFrameSection : public SyntheticSection { +public: + EhFrameSection(); + + bool isNeeded() const override { return in.unwindInfo->hasFDEs; } + uint64_t getSize() const override { return size; } + void finalize() override; + void writeTo(uint8_t *buf) const override; + + uint64_t size = 0; +}; + } // namespace macho } // namespace lld diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -92,9 +92,6 @@ // compact_unwind_encoding.h for an overview of the format we are encoding // here. -// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410 -// TODO(gkm): how do we align the 2nd-level pages? - template struct CompactUnwindEntry { Ptr functionAddress; uint32_t functionLength; @@ -169,8 +166,14 @@ // symbols for each unique address regardless of whether they have associated // unwind info. void UnwindInfoSection::addSymbol(const Defined *d) { - if (d->unwindEntry) - allEntriesAreOmitted = false; + if (d->unwindEntry) { + if (d->unwindEntry->unwindType == UnwindType::compact) + hasCUEs = true; +#if 0 // FIXME(gkm) + else if (d->unwindEntry->unwindType == UnwindType::dwarfFDE) + hasFDEs = true; +#endif + } // We don't yet know the final output address of this symbol, but we know that // they are uniquely determined by a combination of the isec and value, so // we use that as the key here. @@ -183,9 +186,9 @@ } } -// Compact unwind relocations have different semantics, so we handle them in a -// separate code path from regular relocations. First, we do not wish to add -// rebase opcodes for __LD,__compact_unwind, because that section doesn't +// Unwind relocations (both CUE and FDE) have different semantics, so we handle +// them in a separate code path from regular relocations. First, we do not wish +// to add rebase opcodes for __LD,__compact_unwind, because that section doesn't // actually end up in the final binary. Second, personality pointers always // reside in the GOT and must be treated specially. template @@ -200,16 +203,25 @@ // live, it wouldn't reduce number of got entries. for (size_t i = 0; i < isec->relocs.size(); ++i) { Reloc &r = isec->relocs[i]; + if (isec->unwindType == UnwindType::compact) { + // Functions and LSDA entries always reside in the same object file as the + // compact unwind entries that references them, and thus appear as section + // relocs. There is no need to prepare them. We only prepare relocs for + // personality functions. + if (r.offset % sizeof(CompactUnwindEntry) != + offsetof(CompactUnwindEntry, personality)) + continue; + } else if (isUnwindTypeDwarf(isec->unwindType)) { + // Do not handle arch-independent EHFRAME_RELOC_* types here + continue; // FIXME(gkm) + if (isValidEhFrameRelocType(r.type)) + continue; + } else { + llvm_unreachable("UnwindInfoSectionImpl::prepareRelocations()"); + } + // This is a personality reloc in either a CUE or FDE assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); - // Functions and LSDA entries always reside in the same object file as the - // compact unwind entries that references them, and thus appear as section - // relocs. There is no need to prepare them. We only prepare relocs for - // personality functions. - if (r.offset % sizeof(CompactUnwindEntry) != - offsetof(CompactUnwindEntry, personality)) - continue; - if (auto *s = r.referent.dyn_cast()) { // Personality functions are nearly always system-defined (e.g., // ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an @@ -302,6 +314,10 @@ writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2); if (!d->unwindEntry) return; + if (d->unwindEntry->unwindType == UnwindType::dwarfFDE) { + return; // TODO(gkm): write CUE that refers to FDE + } + assert(d->unwindEntry->unwindType == UnwindType::compact); // Write the rest of the CUE. memcpy(buf + sizeof(Ptr), d->unwindEntry->data.data(), @@ -691,3 +707,37 @@ else return make>(); } + +EhFrameSection::EhFrameSection() + : SyntheticSection(segment_names::text, section_names::ehFrame) { + align = 4; +} + +void EhFrameSection::finalize() { + uint64_t isecAddr = addr; + for (const Defined *d : make_second_range(in.unwindInfo->symbols)) { + if (d->unwindEntry == nullptr || + !isUnwindTypeDwarf(d->unwindEntry->unwindType)) + continue; + ConcatInputSection *isec = cast(d->isec); + if (isec->shouldOmitFromOutput()) + continue; + isecAddr = alignTo(isecAddr, isec->align); + isec->outSecOff = isecAddr - addr; + isec->isFinal = true; + isecAddr += isec->getSize(); + }; + size = isecAddr - addr; +} + +void EhFrameSection::writeTo(uint8_t *buf) const { + for (const Defined *d : make_second_range(in.unwindInfo->symbols)) { + if (d->unwindEntry == nullptr || + !isUnwindTypeDwarf(d->unwindEntry->unwindType)) + continue; + ConcatInputSection *isec = cast(d->isec); + if (isec->shouldOmitFromOutput()) + continue; + isec->writeTo(buf + isec->outSecOff); + } +} diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1176,6 +1176,7 @@ in.stubs = make(); in.stubHelper = make(); in.unwindInfo = makeUnwindInfoSection(); + in.ehFrame = make(); // This section contains space for just a single word, and will be used by // dyld to cache an address to the image loader it uses.