diff --git a/libunwind/include/mach-o/compact_unwind_encoding.h b/libunwind/include/mach-o/compact_unwind_encoding.h --- a/libunwind/include/mach-o/compact_unwind_encoding.h +++ b/libunwind/include/mach-o/compact_unwind_encoding.h @@ -468,10 +468,35 @@ // encodings array }; -#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry) (entry & 0x00FFFFFF) -#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry) ((entry >> 24) & 0xFF) - - +#define UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE 4096 +#define UNWIND_INFO_REGULAR_SECOND_LEVEL_ENTRIES_MAX \ + ((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \ + sizeof(unwind_info_regular_second_level_page_header)) / \ + sizeof(unwind_info_regular_second_level_entry)) +#define UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \ + ((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \ + sizeof(unwind_info_compressed_second_level_page_header)) / \ + sizeof(uint32_t)) + +#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24 +#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK \ + ((1 << UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS) - 1) + +#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX_BITS \ + (32 - UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS) +#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX_MASK \ + ((1 << UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX_BITS) - 1) + +#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry) \ + ((entry)&UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK) +#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry) \ + (((entry) >> UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS) & \ + UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX_MASK) + +struct unwind_info_section_header_lsda_entry { + uint32_t functionOffset; + uint32_t lsdaOffset; +}; #endif diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -2,8 +2,11 @@ tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(MachOOptionsTableGen) +include_directories(${LLVM_MAIN_SRC_DIR}/../libunwind/include) + add_lld_library(lldMachO2 Arch/X86_64.cpp + UnwindInfo.cpp Driver.cpp DriverUtils.cpp ExportTrie.cpp diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -22,6 +22,7 @@ constexpr const char data[] = "__DATA"; constexpr const char linkEdit[] = "__LINKEDIT"; constexpr const char dataConst[] = "__DATA_CONST"; +constexpr const char ld[] = "__LD"; // really a non-output segment } // namespace segment_names @@ -40,6 +41,8 @@ } const std::vector &getSections() const { return sections; } + const OutputSection *getSection(StringRef name) const; + size_t numNonHiddenSections() const; uint64_t fileOff = 0; @@ -55,6 +58,7 @@ extern std::vector outputSegments; OutputSegment *getOrCreateOutputSegment(StringRef name); +const OutputSection *getOutputSection(StringRef segName, StringRef secName); } // namespace macho } // namespace lld diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -49,6 +49,14 @@ sections.push_back(osec); } +const OutputSection *OutputSegment::getSection(StringRef name) const { + for (const OutputSection *osec : sections) { + if (name == osec->name) + return osec; + } + return nullptr; +} + static llvm::DenseMap nameToOutputSegment; std::vector macho::outputSegments; @@ -65,3 +73,11 @@ outputSegments.push_back(segRef); return segRef; } + +const OutputSection *macho::getOutputSection(StringRef segName, + StringRef secName) { + OutputSegment *oseg = nameToOutputSegment.lookup(segName); + if (!oseg) + return nullptr; + return oseg->getSection(secName); +} diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -19,6 +19,7 @@ #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/raw_ostream.h" +#include namespace lld { namespace macho { @@ -35,6 +36,10 @@ constexpr const char stringTable[] = "__string_table"; constexpr const char got[] = "__got"; constexpr const char threadPtrs[] = "__thread_ptrs"; +constexpr const char unwindInfo[] = "__unwind_info"; +// these are not synthetic, but in service of synthetic __unwind_info +constexpr const char compactUnwind[] = "__compact_unwind"; +constexpr const char ehFrame[] = "__eh_frame"; } // namespace section_names @@ -386,6 +391,8 @@ std::vector symbols; }; +class UnwindInfoSection; + struct InStruct { MachHeaderSection *header = nullptr; BindingSection *binding = nullptr; @@ -398,6 +405,7 @@ StubsSection *stubs = nullptr; StubHelperSection *stubHelper = nullptr; ImageLoaderCacheSection *imageLoaderCache = nullptr; + UnwindInfoSection *unwindInfo = nullptr; }; extern InStruct in; diff --git a/lld/MachO/UnwindInfo.h b/lld/MachO/UnwindInfo.h new file mode 100644 --- /dev/null +++ b/lld/MachO/UnwindInfo.h @@ -0,0 +1,68 @@ +//===- UnwindInfo.h -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_UNWIND_INFO_H +#define LLD_MACHO_UNWIND_INFO_H + +#include "MergedOutputSection.h" +#include "SyntheticSections.h" +#include +#include +#include + +#include "mach-o/compact_unwind_encoding.h" + +// In 2020, we mostly care about 64-bit targets: x86_64 and arm64 +struct CompactUnwindEntry64 { + uint64_t functionAddress; + uint32_t functionLength; + uint32_t encoding; + uint64_t personality; + uint64_t lsda; +}; + +// FIXME(gkm): someday we might care about 32-bit targets: x86 & arm +struct CompactUnwindEntry32 { + uint32_t functionAddress; + uint32_t functionLength; + uint32_t encoding; + uint32_t personality; + uint32_t lsda; +}; + +namespace lld { +namespace macho { + +class UnwindInfoSection : public SyntheticSection { +public: + UnwindInfoSection(); + uint64_t getSize() const override; + bool isNeeded() const override; + void writeTo(uint8_t *buf) const override; + +private: + mutable std::map commonEncodingIndexes; + mutable std::vector> + commonEncodings; + mutable std::vector personalities; + mutable std::vector lsdaEntries; + mutable std::vector cuVector; + mutable std::vector cuPtrVector; + mutable std::vector::const_iterator> + pageBounds; + mutable const MergedOutputSection *cuSection = nullptr; + mutable uint64_t level2PagesOffset = 0; + mutable uint64_t unwindInfoSize = std::numeric_limits::max(); + mutable bool hasCompactUnwindInputs = false; + mutable bool checkedCompactUnwindInputs = false; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/UnwindInfo.cpp b/lld/MachO/UnwindInfo.cpp new file mode 100644 --- /dev/null +++ b/lld/MachO/UnwindInfo.cpp @@ -0,0 +1,284 @@ +//===- UnwindInfo.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UnwindInfo.h" +#include "Config.h" +#include "InputSection.h" +#include "MergedOutputSection.h" +#include "OutputSection.h" +#include "OutputSegment.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" + +#include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/MachO.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; +using namespace lld::macho; + +// Compact Unwind format is a Mach-O evolution of DWARF Unwind that +// optimizes space and exception-time lookup. Most DWARF unwind +// entries can be replaced with Compact Unwind entries, but the ones +// that cannot are retained in DWARF form. +// +// This comment will address macro-level organization of the pre-link +// and post-link compact unwind tables. For micro-level organization +// pertaining to the bitfield layout of the 32-bit compact unwind +// entries, see libunwind/include/mach-o/compact_unwind_encoding.h +// +// Important clarifying factoids: +// +// * __LD,__compact_unwind is the compact unwind format for compiler +// output and linker input. It is never a final output. +// +// * __TEXT,__unwind_info is the compact unwind format for final +// linker output. It is never an input. +// +// * __TEXT,__eh_frame is the DWARF format for both linker input and output. +// +// __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd +// level) by ascending address, and the pages are referenced by an +// index (1st level) in the section header. +// +// TODO(gkm): ... continue block commment ... + +// TODO(gkm): prune __eh_frame entries superceded by __unwind_info +// TODO(gkm): how do we align the 2nd-level pages? + +UnwindInfoSection::UnwindInfoSection() + : SyntheticSection(segment_names::text, section_names::unwindInfo) {} + +bool UnwindInfoSection::isNeeded() const { + if (checkedCompactUnwindInputs) + return hasCompactUnwindInputs; + for (InputSection *isec : inputSections) { + if (isec->segname == segment_names::ld && + isec->name == section_names::compactUnwind) + hasCompactUnwindInputs = true; + } + checkedCompactUnwindInputs = true; + return hasCompactUnwindInputs; +} + +// Scan the __LD,__compact_unwind entries and compute the space needs of +// __TEXT,__unwind_info and __TEXT,__eh_frame + +uint64_t UnwindInfoSection::getSize() const { + if (unwindInfoSize != std::numeric_limits::max()) + return unwindInfoSize; + unwindInfoSize = 0; + cuSection = dyn_cast_or_null( + getOutputSection(segment_names::ld, section_names::compactUnwind)); + if (cuSection == nullptr) + return unwindInfoSize; + + // At this point, the address space for __TEXT,__text has been + // assigned, so we ought to be able to relocate the + // __LD,__compact_unwind entries into a temporary buffer. + // First, count the entries, and ensure there are no + // relocations against floating sections. + size_t cuCount = 0; + for (const InputSection *isec : cuSection->inputs) { + size_t size = isec->getSize(); + assert(size % sizeof(CompactUnwindEntry64) == 0); + cuCount += (size / sizeof(CompactUnwindEntry64)); + for (const Reloc &r : isec->relocs) { + if (const auto *targetIsec = r.target.dyn_cast()) { + if (targetIsec->getVA() == 0) { + error(Twine("__compact_unwind relocation is unresolved against " + + targetIsec->segname + "," + targetIsec->name)); + } + } else { + error(Twine("__compact_unwind relocation is not section based: " + + targetIsec->segname + "," + targetIsec->name)); + } + } + } + // Relocate all __LD,__compact_unwind entries + cuVector.resize(cuCount); + cuSection->writeTo(reinterpret_cast(cuVector.data())); + + // Fold adjacent entries with matching encoding+personality+lsda + // Retain the original cuVector + cuPtrVector.reserve(cuCount); + std::map encodingFrequencies; + for (size_t i = 0; i < cuCount;) { + const CompactUnwindEntry64 &prev = cuVector[i]; + cuPtrVector.emplace_back(&prev); + encodingFrequencies[prev.encoding]++; + while (++i < cuCount && prev.encoding == cuVector[i].encoding && + prev.personality == cuVector[i].personality && + prev.lsda == cuVector[i].lsda) { + ; + } + } + + // make a table of common encodings, and sort by descending frequency + for (const auto &frequency : encodingFrequencies) { + commonEncodings.emplace_back(frequency); + } + std::sort(commonEncodings.begin(), commonEncodings.end(), + [](const std::pair &a, + const std::pair &b) { + if (a.second == b.second) + // When freqnecies match, secondarily sort on encoding + // to maintain parity with validate-unwind-info.py + return a.first > b.first; + return a.second > b.second; + }); + + // create map from encoding to common-encoding-table index compact + // encoding entries use 7 bits to index the common-encoding table + if (commonEncodings.size() >= 128) { + error("TODO(gkm): handle common encodings table overflow"); + } + size_t i = 0; + for (const auto &encoding : commonEncodings) { + commonEncodingIndexes[encoding.first] = i++; + } + + // Split into pages, limited by capacity of a page and the 24-bit + // range of function offset + // + // Record the page splits as a vector of iterators on cuPtrVector + // such that succesive elements form a semi-open interval. E.g., + // page X's bounds are thus: [ pageBounds[X] .. pageBounds[X+1] ) + // + // Note that pageBouinds.size() is one greater than the number of + // pages, where pageBounds.cend() holds the sentinel cuPtrVector.cend() + pageBounds.push_back(cuPtrVector.cbegin()); + // TODO(gkm): cut 1st page short to accommodate section headers + for (size_t i = 0;;) { + // Limit the search to entries that can fit witin a 4 KiB page. + std::vector::const_iterator it0 = + pageBounds[0] + i; + std::vector::const_iterator itN = + pageBounds[0] + + std::min(i + UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX, + cuPtrVector.size()); + // exclude entries with functionOffset that would overflow 24 bits + uint64_t functionAddressMax = + (*it0)->functionAddress + UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK; + std::vector::const_iterator it = + std::lower_bound(it0, itN, nullptr, + [functionAddressMax](const CompactUnwindEntry64 *a, + const CompactUnwindEntry64 *b) { + return a->functionAddress < functionAddressMax; + }); + pageBounds.push_back(it); + if (it == cuPtrVector.cend()) + break; + i = it - cuPtrVector.cbegin(); + } + + // compute size of __TEXT,__unwind_info section + level2PagesOffset = + sizeof(unwind_info_section_header) + + commonEncodings.size() * sizeof(uint32_t) + + personalities.size() * sizeof(uint32_t) + + pageBounds.size() * sizeof(unwind_info_section_header_index_entry) + + lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_entry); + unwindInfoSize = level2PagesOffset + + (pageBounds.size() - 1) * + sizeof(unwind_info_compressed_second_level_page_header) + + cuPtrVector.size() * sizeof(uint32_t); + + return unwindInfoSize; +} + +// All inputs are relocated and output adddress known, so write! + +void UnwindInfoSection::writeTo(uint8_t *buf) const { + // relocate __compact_unwind entries into cuBuf + if (getSize() == 0) + return; + + // section header + auto *uip = reinterpret_cast(buf); + uip->version = 1; + uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header); + uip->commonEncodingsArrayCount = commonEncodings.size(); + uip->personalityArraySectionOffset = + uip->commonEncodingsArraySectionOffset + + (uip->commonEncodingsArrayCount * sizeof(uint32_t)); + uip->personalityArrayCount = personalities.size(); + uip->indexSectionOffset = uip->personalityArraySectionOffset + + (uip->personalityArrayCount * sizeof(uint32_t)); + uip->indexCount = pageBounds.size(); + + // Common encodings + auto *i32p = reinterpret_cast(&uip[1]); + for (const auto &encoding : commonEncodings) { + *i32p++ = encoding.first; + } + + // Personalities + for (const auto &personality : personalities) { + *i32p++ = personality; + } + + // Level-1 index + uint32_t lsdaOffset = + uip->indexSectionOffset + + uip->indexCount * sizeof(unwind_info_section_header_index_entry); + auto *iep = reinterpret_cast(i32p); + for (size_t i = 0; i < pageBounds.size() - 1; i++) { + iep->functionOffset = (*pageBounds[i])->functionAddress; + iep->secondLevelPagesSectionOffset = level2PagesOffset; + iep->lsdaIndexArraySectionOffset = lsdaOffset; + iep++; + size_t entryCount = pageBounds[i + 1] - pageBounds[i]; + uint64_t pageSize = sizeof(unwind_info_section_header_index_entry) + + entryCount * sizeof(uint32_t); + level2PagesOffset += pageSize; + } + // Level-1 sentinel + const CompactUnwindEntry64 &cuEnd = cuVector.back(); + iep->functionOffset = cuEnd.functionAddress + cuEnd.functionLength; + iep->secondLevelPagesSectionOffset = 0; + iep->lsdaIndexArraySectionOffset = lsdaOffset; + iep++; + + // LSDAs + auto *lep = reinterpret_cast(iep); + for (const auto &lsda : lsdaEntries) { + lep->functionOffset = lsda.functionOffset; + lep->lsdaOffset = lsda.lsdaOffset; + } + + // Level-2 pages + auto *p2p = + reinterpret_cast(lep); + for (size_t i = 0; i < pageBounds.size() - 1; i++) { + p2p->kind = 3; + p2p->entryPageOffset = + sizeof(unwind_info_compressed_second_level_page_header); + p2p->entryCount = pageBounds[i + 1] - pageBounds[i]; + p2p->encodingsPageOffset = + p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t); + p2p->encodingsCount = 0; + auto *ep = reinterpret_cast(&p2p[1]); + std::vector::const_iterator it = + pageBounds[i]; + uintptr_t functionAddressBase = (*it)->functionAddress; + while (it < pageBounds[i + 1]) { + const CompactUnwindEntry64 *cuep = *it++; + *ep++ = ((commonEncodingIndexes[cuep->encoding] + << UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS) | + (cuep->functionAddress - functionAddressBase)); + } + p2p = + reinterpret_cast(ep); + } + auto *bufN = reinterpret_cast(p2p); + assert(getSize() == (size_t)(bufN - buf)); +} diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -17,6 +17,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "UnwindInfo.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" @@ -348,6 +349,8 @@ uint8_t segIndex = 0; for (OutputSegment *seg : outputSegments) { + if (seg->name == segment_names::ld) + break; in.header->addLoadCommand(make(seg->name, seg)); seg->index = segIndex++; } @@ -406,7 +409,10 @@ .Case(segment_names::text, -1) // Make sure __LINKEDIT is the last segment (i.e. all its hidden // sections must be ordered after other sections). - .Case(segment_names::linkEdit, std::numeric_limits::max()) + .Case(segment_names::linkEdit, 100) + // FIXME(gkm): __LD,__compact_unwind is a normal I/O section with -r + // __LD,__compact_unwind isn't an output segment, so shove it past the end + .Case(segment_names::ld, 101) .Default(0); } @@ -414,8 +420,11 @@ StringRef segname = osec->parent->name; // Sections are uniquely identified by their segment + section name. if (segname == segment_names::text) { - if (osec->name == section_names::header) - return -1; + return StringSwitch(osec->name) + .Case(section_names::header, -1) + .Case(section_names::unwindInfo, 10) + .Case(section_names::ehFrame, 11) + .Default(0); } else if (segname == segment_names::linkEdit) { return StringSwitch(osec->name) .Case(section_names::binding, -6) @@ -429,7 +438,7 @@ // ZeroFill sections must always be the at the end of their segments, // otherwise subsequent sections may get overwritten with zeroes at runtime. if (isZeroFill(osec->flags)) - return std::numeric_limits::max(); + return 100; return 0; } @@ -547,9 +556,12 @@ void Writer::writeSections() { uint8_t *buf = buffer->getBufferStart(); - for (OutputSegment *seg : outputSegments) + for (OutputSegment *seg : outputSegments) { + if (seg->name == segment_names::ld) + break; for (OutputSection *osec : seg->getSections()) osec->writeTo(buf + osec->fileOff); + } } void Writer::run() { @@ -579,9 +591,11 @@ // Note that at this point, __LINKEDIT sections are empty, but we need to // determine addresses of other segments/sections before generating its // contents. - for (OutputSegment *seg : outputSegments) - if (seg != linkEditSegment) - assignAddresses(seg); + for (OutputSegment *seg : outputSegments) { + if (seg == linkEditSegment) + break; + assignAddresses(seg); + } // Fill __LINKEDIT contents. in.binding->finalizeContents(); @@ -618,4 +632,5 @@ in.stubs = make(); in.stubHelper = make(); in.imageLoaderCache = make(); + in.unwindInfo = make(); } diff --git a/lld/test/MachO/compact-unwind.test b/lld/test/MachO/compact-unwind.test new file mode 100644 --- /dev/null +++ b/lld/test/MachO/compact-unwind.test @@ -0,0 +1,24 @@ +# REQUIRES: x86 +# FIXME(gkm): I want "requires python", but that disables the test +# even on Linux, which definitely has Python. Why?! + +# FIXME(gkm): This test is fast on a Release tree, and slow (~10s) on +# a Debug tree mostly because of llvm-mc. Is there a way to prefer the +# fast installed llvm-mc rather than the slow one in our Debug tree? + +# If headers and offsets are proper, then ... +# +# 1) llvm-objdump will not crash, and exit with good status +# +# 2) Summary encodings from the input object will match +# those from the linked output +# +# 3) Encodings & symbols from the intput object will match +# those from the linked output + +# RUN: %S/tools/generate-cfi-funcs.py --seed=facebook >%t.s +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 -o %t.o %t.s +# RUN: lld -flavor darwinnew -Z -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t %t.o +# RUN: llvm-objdump --unwind-info %t.o >%t.o.dump +# RUN: llvm-objdump --unwind-info --syms %t >%t.dump +# RUN: %S/tools/validate-unwind-info.py --object-dump=%t.o.dump --program-dump=%t.dump diff --git a/lld/test/MachO/tools/generate-cfi-funcs.py b/lld/test/MachO/tools/generate-cfi-funcs.py new file mode 100755 --- /dev/null +++ b/lld/test/MachO/tools/generate-cfi-funcs.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python + +"""Generate skeletal functions with a variety .cfi_ directives. +The purpose is to produce object-file test inputs to lld with a +variety of compact unwind encodings. +""" +import random +import argparse + +lsda_n = 0 +lsda_odds = 0.0 +func_size_low = 0x10 +func_size_high = 0x100 + +def print_function(name: str): + global lsda_odds, func_size_low, func_size_high + print_lsda = (random.random() < lsda_odds) + ### FIXME(gkm): frame size should alter the compact encoding, but it does not + frame_size = random.randint(4, 64) * 16 + reg_count = random.randint(0, 6) + + # Function sizes begin small, then monotonically increase in size. + # The goal is to produce early pages that are full and later pages + # that are less than full, in order to test handling for both cases. + # Full pages contain the maximum of 1021 compact unwind entries for + # a total page size = 4 KiB. + func_size = random.randint(func_size_low, func_size_high) * 0x10 + func_size_high += 1 + if func_size_high % 0x10 == 0: + func_size_low += 1 + + print(f"""\ +### {name} regs={reg_count} frame={frame_size} lsda={1 if print_lsda else 0} size = {func_size} + .section __TEXT,__text,regular,pure_instructions + .p2align 4, 0x90 + .globl {name} +{name}: + .cfi_startproc""") + if print_lsda: + global lsda_n + lsda_n += 1 + print(f"""\ + .cfi_personality 155, ___gxx_personality_v0 + .cfi_lsda 16, Lexception{lsda_n}""") + print(f"""\ + pushq %rbp + .cfi_def_cfa_offset {frame_size} + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp""") + if reg_count >= 5: + print(".cfi_offset %rbx, -56") + if reg_count >= 4: + print(".cfi_offset %r12, -48") + if reg_count >= 3: + print(".cfi_offset %r13, -40") + if reg_count >= 2: + print(".cfi_offset %r14, -32") + if reg_count >= 1: + print(".cfi_offset %r15, -24") + print(f"""\ + .fill {func_size - 6} + popq %rbp + retq + .cfi_endproc +""") + + if print_lsda: + print(f"""\ + .section __TEXT,__gcc_except_tab + .p2align 2 +Lexception{lsda_n}: + .space 0x10 +""") + return func_size + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--seed', + help='Seed the random number generator') + parser.add_argument('--pages', type=int, default=2, + help='Number of compact-unwind pages') + parser.add_argument('--encodings', type=int, default=127, + help='Maximum number of unique unwind encodings (default = 127)') + parser.add_argument('--lsda', type=int, default=0, + help='Percentage of functions with personality & LSDA (default = 10') + args = parser.parse_args() + if (args.seed): + random.seed(args.seed) + + lsda_odds = args.lsda / 100.0 + size = 0 + base = (1 << 12) + print(f""" + .section __TEXT,__text,regular,pure_instructions + .p2align 12, 0x90 +""") + + while size < args.pages << 24: + func_size = print_function(f"x{size+base:08x}") + size += func_size + + print(f"""\ + .section __TEXT,__text,regular,pure_instructions + .globl _main + .p2align 4, 0x90 +_main: + retq + + .p2align 4, 0x90 +___gxx_personality_v0: + retq +""") + + +if __name__ == '__main__': + main() diff --git a/lld/test/MachO/tools/validate-unwind-info.py b/lld/test/MachO/tools/validate-unwind-info.py new file mode 100755 --- /dev/null +++ b/lld/test/MachO/tools/validate-unwind-info.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +"""Validate compact unwind info by cross checking the llvm-objdump +reports of the input object file vs final linked output. +""" +import sys +import argparse +import re +from pprint import pprint + +def main(): + hex = "[a-f\d]" + hex8 = hex + "{8}" + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--object-dump', default=None, + help='Object file') + parser.add_argument('--program-dump', default=None, + help='Linked file') + parser.add_argument('--debug', action='store_true', + help='Linked file') + args = parser.parse_args() + + with open(args.object_dump, "r") as object_dump: + object_string = object_dump.read() + with open(args.program_dump, "r") as program_dump: + program_string = program_dump.read() + + object_encodings_map = { + symbol:encoding for symbol, encoding in + re.findall(rf"start:\s+0x{hex}+\s+(\w+).+?encoding:\s+0x({hex}+)", + object_string, re.DOTALL)} + + program_symbols_map = { + address:symbol for address, symbol in + re.findall(rf"^{hex8}({hex8}) g\s+F __TEXT,__text (\w+)$", + program_string, re.MULTILINE)} + + program_common_encodings = ( + re.findall(rf"^\s+encoding\[\d+\]: 0x({hex}+)$", + program_string, re.MULTILINE)) + + program_encodings_map = { + program_symbols_map[address]:encoding for address, encoding in + re.findall(rf"^\s+\[\d+\]: function offset=0x({hex}+)," + + rf" encoding\[\d+\]=0x({hex}+)$", + program_string, re.MULTILINE)} + + if args.debug and False: + pprint(f"object encodings map:\n{object_encodings_map}") + pprint(f"program symbols map:\n{program_symbols_map}") + pprint(f"program encodings map:\n{program_encodings_map}") + pprint(f"program common encodings:\n{program_common_encodings}") + + # Fold adjacent entries from the object file that have matching encodings + # TODO(gkm) add check for personality+lsda + encoding0 = 0 + for symbol in sorted(object_encodings_map): + encoding = object_encodings_map[symbol] + fold = (encoding == encoding0) + if fold: + del object_encodings_map[symbol] + if args.debug and False: + print(f"{'delete' if fold else 'retain'} {symbol} with {encoding}") + encoding0 = encoding + + if program_encodings_map != object_encodings_map: + if args.debug: + pprint(f"program encodings map:\n{program_encodings_map}") + pprint(f"object encodings map:\n{object_encodings_map}") + sys.exit("encoding maps differ") + + # Count frequency of object-file folded encodings + # and compare with the program-file common encodings table + encoding_frequency_map = {} + for _, encoding in object_encodings_map.items(): + encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0) + encoding_frequencies = [x for x in + sorted(encoding_frequency_map, + key=lambda x: (encoding_frequency_map.get(x), x), + reverse=True)] + + if program_common_encodings != encoding_frequencies: + if args.debug: + pprint(f"program common encodings:\n{program_common_encodings}") + pprint(f"object encoding frequencies:\n{encoding_frequencies}") + sys.exit("encoding frequencies differ") + + return sys.exit() + +if __name__ == '__main__': + main()