diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1049,8 +1049,7 @@ if (auto *isec = dyn_cast(entry.isec)) { if (isec->isCoalescedWeak()) continue; - if (isec->getSegName() == segment_names::ld) { - assert(isec->getName() == section_names::compactUnwind); + if (isec->getName() == section_names::compactUnwind) { in.unwindInfo->addInput(isec); continue; } @@ -1059,6 +1058,9 @@ osec = ConcatOutputSection::getOrCreateForInput(isec); isec->parent = osec; inputSections.push_back(isec); + } else if (auto *isec = dyn_cast(entry.isec)) { + in.ehFrameSection->addInput(isec); + continue; } else if (auto *isec = dyn_cast(entry.isec)) { if (in.cStringSection->inputOrder == UnspecifiedInputOrder) in.cStringSection->inputOrder = inputOrder++; diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -268,9 +268,10 @@ flags); } subsections.push_back({{0, isec}}); - } else if (config->icfLevel != ICFLevel::none && - (name == section_names::cfString && - segname == segment_names::data)) { + continue; + } + if (config->icfLevel != ICFLevel::none && + (name == section_names::cfString && segname == segment_names::data)) { uint64_t literalSize = target->wordSize == 8 ? 32 : 16; subsections.push_back({}); SubsectionMap &subsecMap = subsections.back(); @@ -279,21 +280,27 @@ {off, make(segname, name, this, data.slice(off, literalSize), align, flags)}); - } else { + continue; + } + if (name == section_names::ehFrame) { auto *isec = - make(segname, name, this, data, align, flags); - if (!(isDebugSection(isec->getFlags()) && - isec->getSegName() == segment_names::dwarf)) { - subsections.push_back({{0, isec}}); - } else { - // Instead of emitting DWARF sections, we emit STABS symbols to the - // object files that contain them. We filter them out early to avoid - // parsing their relocations unnecessarily. But we must still push an - // empty map to ensure the indices line up for the remaining sections. - subsections.push_back({}); - debugSections.push_back(isec); - } + make(segname, name, this, data, align, flags); + subsections.push_back({{0, isec}}); + continue; } + auto *isec = + make(segname, name, this, data, align, flags); + if (isDebugSection(isec->getFlags()) && + isec->getSegName() == segment_names::dwarf) { + // Instead of emitting DWARF sections, we emit STABS symbols to the + // object files that contain them. We filter them out early to avoid + // parsing their relocations unnecessarily. But we must still push an + // empty map to ensure the indices line up for the remaining sections. + subsections.push_back({}); + debugSections.push_back(isec); + continue; + } + subsections.push_back({{0, isec}}); } } @@ -746,6 +753,104 @@ parse(); } +// (See https://www.airs.com/blog/archives/460 for an overview of DWARF +// __eh_frame format) +// +// * Processing unwind info involves a complex mesh of four input sections: +// (1) __LD,__compact_unwind: vector of CUE (Compact Unwind Entry) +// (2) __TEXT,__eh_frame: sequence of DWARF records (see below) +// (3) __TEXT,__gcc_except_tab: LSDA (Language Specific Data Area) +// (4) __TEXT,__text: Functions associated with unwind info +// +// * The __eh_frame section contains two types of record: CIE (Common +// Information Entry) and FDE (Frame Description Entry). CIEs are simple +// to deduplicate, but since an FDE's content is logically tied to its +// associated function body, we must deduplicate function+FDE during ICF. +// +// * CUE contains relocation_info for the __text entrypoint of its associated +// function. It can also contain relocation_info for the optional personality +// function and LSDA +// +// * CIE can contain relocation_info to an optional personality function +// +// * FDE references the __text entrypoint of its associated function, but the +// compiler generates no relocation_info +// +// * FDE can reference __gcc_except_tab for LSDA, but again, there is +// no compiler-generated relocation_info +// +// * The compiler will produce one of four configurations for each function: +// (1) no unwind info: we must create a degenerate CUE +// (2) CUE only: no action necessary +// (3) CUE and FDE: discard the FDE +// (4) FDE only: create a special CUE that references the FDE + +using EhFrameFdeMap = DenseMap; +using ExceptTabMap = DenseMap; +using CompactUnwindMap = DenseMap; + +static void populateEhFrameFDEs(EhFrameFdeMap &ehFrameFDEs, + SubsectionMap &subsecMap) { + InputSection *isec = get(subsecMap.back().isec); + isec->splitIntoRecords(); + for (EhFrameRecord &record : isec->records) { + if (record.id == UINT32_MAX) + continue; + // FIXME: relocate record.functionAddress + ehFrameFDEs[*record.functionAddress] = &record; + } +} + +static void populateExceptTabs(ExceptTabMap &exceptTabs, + SubsectionMap &subsecMap) { + uint64_t off = 0; + for (const SubsectionEntry &entry : subsecMap) { + ConcatInputSection *isec = get(entry.isec); + exceptTabs[off] = isec; + off += isec->getSize(); + } +} + +static void populateCompactUnwinds(CompactUnwindMap &compactUnwinds, + SubsectionMap &subsecMap) { + ConcatInputSection *isec = get(subsecMap.back().isec); + CompactUnwindEntry<> isec->data.data() for () {} +} + +ObjFile::parseUnwindInfo() { + EhFrameFdeMap ehFrameFDEs; + ExceptTabMap exceptTabs; + CompactUnwindMap compactUnwinds; + for (const SubsectionMap &map : subsections) + if (InputSection *isec = map.back().isec) { + if (isec->name == section_names::gccExceptTab) + populateExceptTabs(exceptTabs, map); + else if (isec->name == section_names::compactUnwind) + populateCompactUnwinds(compactUnwinds, map); + else if (isec->name == section_names::ehFrame) + populateEhFrameFDEs(ehFrameFDEs, map); + } + + for (const SubsectionMap &map : subsections) { + if (!isCodeSection(isec)) + continue; + for (const SubsectionEntry &entry : map) { + ConcatInputSection *isec = get(entry.isec); + } + } + + // iterate over CUE, make map for function addresses + // parse DWARF into a vector of CIEs & FDEs, make map for function addresses + // make map for __gcc_except_tab subsection offsets + // for __text subsections: + // if (CUE[s]) + // keep the CUE + // else if (FDE[s]) + // mark FDE as live, create CUE to reference FDE + // else + // create degenerate CUE +} + template void ObjFile::parse() { using Header = typename LP::mach_header; using SegmentCommand = typename LP::segment_command; @@ -790,6 +895,8 @@ parseSymbols(sectionHeaders, nList, strtab, subsectionsViaSymbols); } + parseUnwindInfo(); + // The relocations may refer to the symbols, so we parse them after we have // parsed all the symbols. for (size_t i = 0, n = subsections.size(); i < n; ++i) diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -32,6 +32,7 @@ ConcatKind, CStringLiteralKind, WordLiteralKind, + EhFrameKind, }; Kind kind() const { return shared->sectionKind; } @@ -280,6 +281,67 @@ bool isCfStringSection(const InputSection *); +template struct EhFrameRecord { + EhFrameRecord(ArrayRef data) : data(data) {} + + void parse(); + + ArrayRef data; + + // In the object file, according to DWARF ... + // CIE: id == 0 + // FDE: id is the backward offset to its CIE + // While parsing records, id holds the value read from the record. + // + // After parsing ... + // CIE: id == ~0 + // FDE: id is the index for its CIE within EhFrameInputSection::records[] + uint32_t id; + uint8_t personalityEncoding; + uint8_t lsdaEncoding; + uint8_t fdeEncocding; + + // The CIE optionally contains a pointer to the personality function, and + // MachO has a relocation_info for it. + // + // The FDE contains the function address, and optionally the LSDA address, but + // MachO does not have relocation_info for either, so we need to write code to + // handle relocation fixups. The values for the fixups are the addresses + // assigned to the ConcatInputSection for the function and LSDA respectively. + // + // These two offsets within the FDE identify the location(s) to fixup within + // the FDE data. + uint8_t personalityOffset; + uint8_t funcAddrOffset; + uint8_t lsdaOffset; +}; + +// This corresponds to a __eh_frame section of an input file. +template class EhFrameInputSection : public InputSection { +public: + EhFrameInputSection(StringRef segname, StringRef name) + : InputSection(EhFrameKind, segname, name) {} + + EhFrameInputSection(StringRef segname, StringRef name, InputFile *file, + ArrayRef data, uint32_t align = 1, + uint32_t flags = 0) + : InputSection(EhFrameKind, segname, name, file, data, align, flags) {} + + // FIXME: stubbed for now + uint64_t getOffset(uint64_t off) const override { return off; } + bool isLive(uint64_t off) const override { return true; } + void markLive(uint64_t off) override {} + + ArrayRef getEhRecordData(uint64_t off); + void splitIntoRecords(); + + static bool classof(const InputSection *isec) { + return isec->kind() == EhFrameKind; + } + + std::vector> records; +}; + extern std::vector inputSections; namespace section_names { @@ -298,6 +360,7 @@ constexpr const char debugInfo[] = "__debug_info"; constexpr const char debugStr[] = "__debug_str"; constexpr const char ehFrame[] = "__eh_frame"; +constexpr const char gccExceptTab[] = "__gcc_except_tab"; constexpr const char export_[] = "__export"; constexpr const char dataInCode[] = "__data_in_code"; constexpr const char functionStarts[] = "__func_starts"; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -17,12 +17,14 @@ #include "UnwindInfoSection.h" #include "Writer.h" #include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/xxhash.h" using namespace llvm; using namespace llvm::MachO; using namespace llvm::support; +using namespace llvm::support::endian; using namespace lld; using namespace lld::macho; @@ -216,6 +218,92 @@ } } +static void fatalAt(const InputSection *isec, const ArrayRef &data, + const Twine &msg) { + uint64_t offset = data.data() - isec->data.data(); + fatal("corrupted __eh_frame at offset=" + utohexstr(offset) + ": " + msg + + "\n>>> defined in " + toString(isec->getFile())); +} + +uint32_t EhFrameRecord::parse(EhFrameInputSection *isec, + ArrayRef front) { + data = front; + if (data.size() < 4) + fatalAt(isec, data, "CIE/FDE too small"); + uint8_t *buf = data.data(); + uint64_t length = read32le(buf); + buf += 4; + if (length == UINT32_MAX) + fatalAt(isec, data, "CIE/FDE too large"); + length += 4; + if (length > data.size()) + fatalAt(isec, data, "CIE/FDE ends past the end of the section"); + data = data.take_front(length); + + id = read32le(buf); + buf += 4; + if (id == 0) { + // CIE + uint8_t version = *buf++; + assert(version == 1); + auto *augmentation = reinterpret_cast(buf); + buf += strlen(augmentation) + 1; + ++buf; // skip code alignment factor + ++buf; // skip data alignment factor + ++buf; // skip return address register + while (*augmentation++) + switch (*augmentation) { + case 'z': + ++buf; // skip augmentation length + break; + case 'P': + personalityEncoding = *buf++; + assert(personalityEncoding == + (DW_EH_PE_sdata4 | DW_EH_PE_pcrel | DW_EH_PE_indirect)); + personalityOffset = buf - data.data(); + buf += 4; + break; + case 'L': + lsdaEncoding = *buf++; + assert(lsdaEncoding == DW_EH_PE_pcrel); + break; + case 'R': + fdeEncocding = *buf++; + assert(fdeEncocding == DW_EH_PE_pcrel); + break; + case 'S': + case 'B': + break; + default: + llvm_unreachable("unknown DWARF EH augmentation char"); + } + } else { + // FDE + funcAddrOffset = buf - data.data(); + buf += target->wordSize; // skip function address + buf += target->wordSize; // skip function length + ++buf; // skip augmentation length + lsdaOffset = buf - data.data(); + buf += target->wordSize; // skip LSDA address + } +} + +void EhFrameInputSection::splitIntoRecords() { + size_t off = 0; + size_t idx = 0; + DenseMap recordIndex; + while (off < data.size()) { + records.emplace_back(); + EhFrameRecord &record = records.back(); + record.parse(this, data.drop_front(off)); + record.id = record.id ? recordIndex[off - record.id] : UINT32_MAX; + if (record.id > idx) + fatalAt(this, data, "malformed ID in EhFrame record"); + off += record.data.size(); + ++idx; + } +} + bool macho::isCodeSection(const InputSection *isec) { uint32_t type = sectionType(isec->getFlags()); if (type != S_REGULAR && type != S_COALESCED) diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -38,6 +38,7 @@ class LoadCommand; class ObjFile; class UnwindInfoSection; +class EhFrameSection; class SyntheticSection : public OutputSection { public: @@ -603,6 +604,7 @@ StubsSection *stubs = nullptr; StubHelperSection *stubHelper = nullptr; UnwindInfoSection *unwindInfo = nullptr; + EhFrameSection *ehFrame = nullptr; ConcatInputSection *imageLoaderCache = nullptr; }; diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h --- a/lld/MachO/UnwindInfoSection.h +++ b/lld/MachO/UnwindInfoSection.h @@ -48,6 +48,29 @@ UnwindInfoSection *makeUnwindInfoSection(); +class EhFrameSection : public SyntheticSection { +public: + bool isNeeded() const override { + return !ehFrameInputSection->records.empty() && !allEntriesAreOmitted; + } + uint64_t getSize() const override { return ehFrameSize; } + virtual void addInput(EhFrameInputSection *) = 0; + std::vector getPieces() { + return ehFrameInputSection->records; + } + void prepareRelocations(); + +protected: + EhFrameSection(); + virtual void prepareRelocations(EhFrameInputSection *) = 0; + + EhFrameInputSection *ehFrameInputSection; + uint64_t ehFrameSize = 0; + bool allEntriesAreOmitted = true; +}; + +EhFrameSection *makeEhFrameSection(); + } // namespace macho } // namespace lld diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -122,6 +122,7 @@ // Map of function offset (from the image base) to an index within the LSDA // array. DenseMap functionToLsdaIndex; + // std::vector> ehVector; std::vector> cuVector; std::vector *> cuPtrVector; std::vector secondLevelPages; @@ -387,7 +388,7 @@ compactUnwindSection->getSize() / sizeof(CompactUnwindEntry); cuVector.resize(cuCount); relocateCompactUnwind(compactUnwindSection, cuVector); - + // removeRedundantEhFrameEntries(ehVector, cuVector); addEntriesForFunctionsWithoutUnwindInfo(cuVector); // Rather than sort & fold the 32-byte entries directly, we create a @@ -662,3 +663,22 @@ else return make>(); } + +EhFrameSection::EhFrameSection() + : SyntheticSection(segment_names::text, section_names::ehFrame) { + align = 4; + ehFrameSection = make(section_names::ehFrameSection); +} + +template +void UnwindInfoSectionImpl::addInput(EhFrameInputSection *isec) { + assert(isec->getName() == section_names::ehFrame); + ehFrameSection->addInput(isec); +} + +EhFrameSection *macho::makeEhFrameSection() { + if (target->wordSize == 8) + return make>(); + else + return make>(); +}