diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -2,8 +2,11 @@ tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(MachOOptionsTableGen) +include_directories(${LLVM_MAIN_SRC_DIR}/../libunwind/include) + add_lld_library(lldMachO2 Arch/X86_64.cpp + UnwindInfoSection.cpp Driver.cpp DriverUtils.cpp ExportTrie.cpp diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -22,6 +22,7 @@ constexpr const char data[] = "__DATA"; constexpr const char linkEdit[] = "__LINKEDIT"; constexpr const char dataConst[] = "__DATA_CONST"; +constexpr const char ld[] = "__LD"; // really a non-output segment } // namespace segment_names diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -62,6 +62,7 @@ segRef->maxProt = maxProt(name); segRef->initProt = initProt(name); - outputSegments.push_back(segRef); + if (name != segment_names::ld) // TODO(gkm): only when no -r + outputSegments.push_back(segRef); return segRef; } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -35,6 +35,10 @@ constexpr const char stringTable[] = "__string_table"; constexpr const char got[] = "__got"; constexpr const char threadPtrs[] = "__thread_ptrs"; +constexpr const char unwindInfo[] = "__unwind_info"; +// these are not synthetic, but in service of synthetic __unwind_info +constexpr const char compactUnwind[] = "__compact_unwind"; +constexpr const char ehFrame[] = "__eh_frame"; } // namespace section_names diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h new file mode 100644 --- /dev/null +++ b/lld/MachO/UnwindInfoSection.h @@ -0,0 +1,84 @@ +//===- UnwindInfoSection.h ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_UNWIND_INFO_H +#define LLD_MACHO_UNWIND_INFO_H + +#include "MergedOutputSection.h" +#include "SyntheticSections.h" + +#include "mach-o/compact_unwind_encoding.h" +#include "llvm/ADT/DenseMap.h" + +#include + +// In 2020, we mostly care about 64-bit targets: x86_64 and arm64 +struct CompactUnwindEntry64 { + uint64_t functionAddress; + uint32_t functionLength; + compact_unwind_encoding_t encoding; + uint64_t personality; + uint64_t lsda; +}; + +// FIXME(gkm): someday we might care about 32-bit targets: x86 & arm +struct CompactUnwindEntry32 { + uint32_t functionAddress; + uint32_t functionLength; + compact_unwind_encoding_t encoding; + uint32_t personality; + uint32_t lsda; +}; + +namespace lld { +namespace macho { + +class UnwindInfoSection : public SyntheticSection { +public: + UnwindInfoSection(); + uint64_t getSize() const override { return unwindInfoSize; } + bool isNeeded() const override; + void finalize() override; + void writeTo(uint8_t *buf) const override; + void setCompactUnwindSection(MergedOutputSection *cuSection) { + compactUnwindSection = cuSection; + } + +private: + std::vector> commonEncodings; + std::vector personalities; + std::vector lsdaEntries; + std::vector cuVector; + std::vector cuPtrVector; + std::vector::const_iterator> + pageBounds; + MergedOutputSection *compactUnwindSection = nullptr; + uint64_t level2PagesOffset = 0; + uint64_t unwindInfoSize = 0; +}; + +#define UNWIND_INFO_COMMON_ENCODINGS_MAX 127 + +#define UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE 4096 +#define UNWIND_INFO_REGULAR_SECOND_LEVEL_ENTRIES_MAX \ + ((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \ + sizeof(unwind_info_regular_second_level_page_header)) / \ + sizeof(unwind_info_regular_second_level_entry)) +#define UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \ + ((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \ + sizeof(unwind_info_compressed_second_level_page_header)) / \ + sizeof(uint32_t)) + +#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24 +#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK \ + UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0) + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp new file mode 100644 --- /dev/null +++ b/lld/MachO/UnwindInfoSection.cpp @@ -0,0 +1,284 @@ +//===- UnwindInfoSection.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UnwindInfoSection.h" +#include "Config.h" +#include "InputSection.h" +#include "MergedOutputSection.h" +#include "OutputSection.h" +#include "OutputSegment.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" + +#include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/MachO.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; +using namespace lld::macho; + +// Compact Unwind format is a Mach-O evolution of DWARF Unwind that +// optimizes space and exception-time lookup. Most DWARF unwind +// entries can be replaced with Compact Unwind entries, but the ones +// that cannot are retained in DWARF form. +// +// This comment will address macro-level organization of the pre-link +// and post-link compact unwind tables. For micro-level organization +// pertaining to the bitfield layout of the 32-bit compact unwind +// entries, see libunwind/include/mach-o/compact_unwind_encoding.h +// +// Important clarifying factoids: +// +// * __LD,__compact_unwind is the compact unwind format for compiler +// output and linker input. It is never a final output. It could be +// an intermediate output with the `-r` option which retains relocs. +// +// * __TEXT,__unwind_info is the compact unwind format for final +// linker output. It is never an input. +// +// * __TEXT,__eh_frame is the DWARF format for both linker input and output. +// +// * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd +// level) by ascending address, and the pages are referenced by an +// index (1st level) in the section header. +// +// * Following the headers in __TEXT,__unwind_info, the bulk of the +// section contains a vector of compact unwind entries +// `{functionOffset, encoding}` sorted by ascending `functionOffset`. +// Adjacent entries with the same encoding can be folded to great +// advantage, achieving a 3-order-of-magnitude reduction in the +// number of entries. +// +// * The __TEXT,__unwind_info format can accommodate up to 127 unique +// encodings for the space-efficient compressed format. In practice, +// fewer than a dozen unique encodings are used by C++ programs of +// all sizes. Therefore, we don't even bother implementing the regular +// non-compressed format. Time will tell if anyone in the field ever +// overflows the 127-encodings limit. + +// TODO(gkm): prune __eh_frame entries superseded by __unwind_info +// TODO(gkm): how do we align the 2nd-level pages? + +UnwindInfoSection::UnwindInfoSection() + : SyntheticSection(segment_names::text, section_names::unwindInfo) {} + +bool UnwindInfoSection::isNeeded() const { + return (compactUnwindSection != nullptr); +} + +// Scan the __LD,__compact_unwind entries and compute the space needs of +// __TEXT,__unwind_info and __TEXT,__eh_frame + +void UnwindInfoSection::finalize() { + if (compactUnwindSection == nullptr) + return; + + // At this point, the address space for __TEXT,__text has been + // assigned, so we can relocate the __LD,__compact_unwind entries + // into a temporary buffer. Relocation is necessary in order to sort + // the CU entries by function address. Sorting is necessary so that + // we can fold adjacent CU entries with identical + // encoding+personality+lsda. Folding is necessary because it reduces + // the number of CU entries by as much as 3 orders of magnitude! + compactUnwindSection->finalize(); + assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry64) == 0); + size_t cuCount = + compactUnwindSection->getSize() / sizeof(CompactUnwindEntry64); + cuVector.resize(cuCount); + // Relocate all __LD,__compact_unwind entries + compactUnwindSection->writeTo(reinterpret_cast(cuVector.data())); + + // Rather than sort & fold the 32-byte entries directly, we create a + // vector of pointers to entries and sort & fold that instead. + cuPtrVector.reserve(cuCount); + for (const auto &cuEntry : cuVector) + cuPtrVector.emplace_back(&cuEntry); + std::sort(cuPtrVector.begin(), cuPtrVector.end(), + [](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) { + return a->functionAddress < b->functionAddress; + }); + + // Fold adjacent entries with matching encoding+personality+lsda + // We use three iterators on the same cuPtrVector to fold in-situ: + // (1) `foldBegin` is the first of a potential sequence of matching entries + // (2) `foldEnd` is the first non-matching entry after `foldBegin`. + // The semi-open interval [ foldBegin .. foldEnd ) contains a range + // entries that can be folded into a single entry and written to ... + // (3) `foldWrite` + auto foldWrite = cuPtrVector.begin(); + for (auto foldBegin = cuPtrVector.begin(); foldBegin < cuPtrVector.end();) { + auto foldEnd = foldBegin; + while (++foldEnd < cuPtrVector.end() && + (*foldBegin)->encoding == (*foldEnd)->encoding && + (*foldBegin)->personality == (*foldEnd)->personality && + (*foldBegin)->lsda == (*foldEnd)->lsda) + ; + *foldWrite++ = *foldBegin; + foldBegin = foldEnd; + } + cuPtrVector.erase(foldWrite, cuPtrVector.end()); + + // Count frequencies of the folded encodings + llvm::DenseMap encodingFrequencies; + for (auto cuPtrEntry : cuPtrVector) + encodingFrequencies[cuPtrEntry->encoding]++; + if (encodingFrequencies.size() > UNWIND_INFO_COMMON_ENCODINGS_MAX) + error("TODO(gkm): handle common encodings table overflow"); + + // Make a table of encodings, sorted by descending frequency + for (const auto &frequency : encodingFrequencies) + commonEncodings.emplace_back(frequency); + std::sort(commonEncodings.begin(), commonEncodings.end(), + [](const std::pair &a, + const std::pair &b) { + if (a.second == b.second) + // When frequencies match, secondarily sort on encoding + // to maintain parity with validate-unwind-info.py + return a.first > b.first; + return a.second > b.second; + }); + + // Split folded encodings into pages, limited by capacity of a page + // and the 24-bit range of function offset + // + // Record the page splits as a vector of iterators on cuPtrVector + // such that successive elements form a semi-open interval. E.g., + // page X's bounds are thus: [ pageBounds[X] .. pageBounds[X+1] ) + // + // Note that pageBounds.size() is one greater than the number of + // pages, and pageBounds.back() holds the sentinel cuPtrVector.cend() + pageBounds.push_back(cuPtrVector.cbegin()); + // TODO(gkm): cut 1st page entries short to accommodate section headers ??? + CompactUnwindEntry64 cuEntryKey; + for (size_t i = 0;;) { + // Limit the search to entries that can fit within a 4 KiB page. + const auto pageBegin = pageBounds[0] + i; + const auto pageMax = + pageBounds[0] + + std::min(i + UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX, + cuPtrVector.size()); + // Exclude entries with functionOffset that would overflow 24 bits + cuEntryKey.functionAddress = (*pageBegin)->functionAddress + + UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK; + const auto pageBreak = std::lower_bound( + pageBegin, pageMax, &cuEntryKey, + [](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) { + return a->functionAddress < b->functionAddress; + }); + pageBounds.push_back(pageBreak); + if (pageBreak == cuPtrVector.cend()) + break; + i = pageBreak - cuPtrVector.cbegin(); + } + + // compute size of __TEXT,__unwind_info section + level2PagesOffset = + sizeof(unwind_info_section_header) + + commonEncodings.size() * sizeof(uint32_t) + + personalities.size() * sizeof(uint32_t) + + pageBounds.size() * sizeof(unwind_info_section_header_index_entry) + + lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry); + unwindInfoSize = level2PagesOffset + + (pageBounds.size() - 1) * + sizeof(unwind_info_compressed_second_level_page_header) + + cuPtrVector.size() * sizeof(uint32_t); +} + +// All inputs are relocated and output adddresses are known, so write! + +void UnwindInfoSection::writeTo(uint8_t *buf) const { + // section header + auto *uip = reinterpret_cast(buf); + uip->version = 1; + uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header); + uip->commonEncodingsArrayCount = commonEncodings.size(); + uip->personalityArraySectionOffset = + uip->commonEncodingsArraySectionOffset + + (uip->commonEncodingsArrayCount * sizeof(uint32_t)); + uip->personalityArrayCount = personalities.size(); + uip->indexSectionOffset = uip->personalityArraySectionOffset + + (uip->personalityArrayCount * sizeof(uint32_t)); + uip->indexCount = pageBounds.size(); + + // Common encodings + auto *i32p = reinterpret_cast(&uip[1]); + for (const auto &encoding : commonEncodings) + *i32p++ = encoding.first; + + // Personalities + for (const auto &personality : personalities) + *i32p++ = personality; + + // Level-1 index + uint32_t lsdaOffset = + uip->indexSectionOffset + + uip->indexCount * sizeof(unwind_info_section_header_index_entry); + uint64_t l2PagesOffset = level2PagesOffset; + auto *iep = reinterpret_cast(i32p); + for (size_t i = 0; i < pageBounds.size() - 1; i++) { + iep->functionOffset = (*pageBounds[i])->functionAddress; + iep->secondLevelPagesSectionOffset = l2PagesOffset; + iep->lsdaIndexArraySectionOffset = lsdaOffset; + iep++; + // TODO(gkm): pad to 4 KiB page boundary ??? + size_t entryCount = pageBounds[i + 1] - pageBounds[i]; + uint64_t pageSize = sizeof(unwind_info_section_header_index_entry) + + entryCount * sizeof(uint32_t); + l2PagesOffset += pageSize; + } + // Level-1 sentinel + const CompactUnwindEntry64 &cuEnd = cuVector.back(); + iep->functionOffset = cuEnd.functionAddress + cuEnd.functionLength; + iep->secondLevelPagesSectionOffset = 0; + iep->lsdaIndexArraySectionOffset = lsdaOffset; + iep++; + + // LSDAs + auto *lep = + reinterpret_cast(iep); + for (const auto &lsda : lsdaEntries) { + lep->functionOffset = lsda.functionOffset; + lep->lsdaOffset = lsda.lsdaOffset; + } + + // create map from encoding to common-encoding-table index compact + // encoding entries use 7 bits to index the common-encoding table + size_t i = 0; + llvm::DenseMap commonEncodingIndexes; + for (const auto &encoding : commonEncodings) + commonEncodingIndexes[encoding.first] = i++; + + // Level-2 pages + auto *p2p = + reinterpret_cast(lep); + for (size_t i = 0; i < pageBounds.size() - 1; i++) { + p2p->kind = UNWIND_SECOND_LEVEL_COMPRESSED; + p2p->entryPageOffset = + sizeof(unwind_info_compressed_second_level_page_header); + p2p->entryCount = pageBounds[i + 1] - pageBounds[i]; + p2p->encodingsPageOffset = + p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t); + p2p->encodingsCount = 0; + auto *ep = reinterpret_cast(&p2p[1]); + auto cuPtrVectorIt = pageBounds[i]; + uintptr_t functionAddressBase = (*cuPtrVectorIt)->functionAddress; + while (cuPtrVectorIt < pageBounds[i + 1]) { + const CompactUnwindEntry64 *cuep = *cuPtrVectorIt++; + size_t cueIndex = commonEncodingIndexes.lookup(cuep->encoding); + *ep++ = ((cueIndex << UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS) | + (cuep->functionAddress - functionAddressBase)); + } + p2p = + reinterpret_cast(ep); + } + assert(getSize() == + static_cast((reinterpret_cast(p2p) - buf))); +} diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -17,6 +17,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "UnwindInfoSection.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" @@ -57,6 +58,7 @@ MachHeaderSection *header = nullptr; StringTableSection *stringTableSection = nullptr; SymtabSection *symtabSection = nullptr; + UnwindInfoSection *unwindInfoSection = nullptr; }; // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. @@ -414,8 +416,11 @@ StringRef segname = osec->parent->name; // Sections are uniquely identified by their segment + section name. if (segname == segment_names::text) { - if (osec->name == section_names::header) - return -1; + return StringSwitch(osec->name) + .Case(section_names::header, -1) + .Case(section_names::unwindInfo, std::numeric_limits::max() - 1) + .Case(section_names::ehFrame, std::numeric_limits::max()) + .Default(0); } else if (segname == segment_names::linkEdit) { return StringSwitch(osec->name) .Case(section_names::binding, -6) @@ -472,6 +477,7 @@ void Writer::createOutputSections() { // First, create hidden sections stringTableSection = make(); + unwindInfoSection = make(); // TODO(gkm): only when no -r symtabSection = make(*stringTableSection); switch (config->outputType) { @@ -498,7 +504,11 @@ for (const auto &it : mergedOutputSections) { StringRef segname = it.first.first; MergedOutputSection *osec = it.second; - getOrCreateOutputSegment(segname)->addOutputSection(osec); + if (unwindInfoSection && segname == segment_names::ld) { + assert(osec->name == section_names::compactUnwind); + unwindInfoSection->setCompactUnwindSection(osec); + } else + getOrCreateOutputSegment(segname)->addOutputSection(osec); } for (SyntheticSection *ssec : syntheticSections) { diff --git a/lld/test/MachO/compact-unwind.test b/lld/test/MachO/compact-unwind.test new file mode 100644 --- /dev/null +++ b/lld/test/MachO/compact-unwind.test @@ -0,0 +1,21 @@ +# REQUIRES: x86 + +# FIXME(gkm): This test is fast on a Release tree, and slow (~10s) on +# a Debug tree mostly because of llvm-mc. Is there a way to prefer the +# fast installed llvm-mc rather than the slow one in our Debug tree? + +# If headers and offsets are proper, then ... +# +# 1) llvm-objdump will not crash, and exit with good status +# +# 2) Summary encodings from the input object will match +# those from the linked output +# +# 3) Encodings & symbols from the intput object will match +# those from the linked output + +# RUN: %python %S/tools/generate-cfi-funcs.py --seed=johnnyapple >%t.s +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 -o %t.o %t.s +# RUN: lld -flavor darwinnew -Z -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t %t.o +# RUN: llvm-objdump --unwind-info --syms %t %t.o >%t.dump +# RUN: %python %S/tools/validate-unwind-info.py %t.dump diff --git a/lld/test/MachO/tools/generate-cfi-funcs.py b/lld/test/MachO/tools/generate-cfi-funcs.py new file mode 100755 --- /dev/null +++ b/lld/test/MachO/tools/generate-cfi-funcs.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python + +"""Generate skeletal functions with a variety .cfi_ directives. +The purpose is to produce object-file test inputs to lld with a +variety of compact unwind encodings. +""" +import random +import argparse +import string +from math import factorial +from itertools import permutations + +lsda_n = 0 +lsda_odds = 0.0 +func_size_low = 0x10 +func_size_high = 0x100 +saved_regs = ["%r15", "%r14", "%r13", "%r12", "%rbx"] +saved_regs_combined = list(list(permutations(saved_regs, i)) + for i in range(0,6)) + +def print_function(name: str): + global lsda_odds + have_lsda = (random.random() < lsda_odds) + frame_size = random.randint(4, 64) * 16 + frame_offset = -random.randint(0, (frame_size/16 - 4)) * 16 + reg_count = random.randint(0, 4) + reg_combo = random.randint(0, factorial(reg_count) - 1) + regs_saved = saved_regs_combined[reg_count][reg_combo] + global func_size_low, func_size_high + func_size = random.randint(func_size_low, func_size_high) * 0x10 + func_size_high += 1 + if func_size_high % 0x10 == 0: + func_size_low += 1 + + print(f"""\ +### {name} regs={reg_count} frame={frame_size} lsda={have_lsda} size={func_size} + .section __TEXT,__text,regular,pure_instructions + .p2align 4, 0x90 + .globl {name} +{name}: + .cfi_startproc""") + if have_lsda: + global lsda_n + lsda_n += 1 + print(f"""\ + .cfi_personality 155, ___gxx_personality_v0 + .cfi_lsda 16, Lexception{lsda_n}""") + print(f"""\ + pushq %rbp + .cfi_def_cfa_offset {frame_size} + .cfi_offset %rbp, {frame_offset+(6*8)} + movq %rsp, %rbp + .cfi_def_cfa_register %rbp""") + for i in range(reg_count): + print(f".cfi_offset {regs_saved[i]}, {frame_offset+(i*8)}") + print(f"""\ + .fill {func_size - 6} + popq %rbp + retq + .cfi_endproc +""") + + if have_lsda: + print(f"""\ + .section __TEXT,__gcc_except_tab + .p2align 2 +Lexception{lsda_n}: + .space 0x10 +""") + return func_size + +def random_seed(): + """Generate a seed that can easily be passsed back in via --seed=STRING""" + return ''.join(random.choice(string.ascii_lowercase) for i in range(10)) + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + epilog="""\ +Function sizes begin small then monotonically increase. The goal is +to produce early pages that are full and later pages that are less +than full, in order to test handling for both cases. Full pages +contain the maximum of 1021 compact unwind entries for a total page +size = 4 KiB. + +Use --pages=N or --functions=N to control the size of the output. +Default is --pages=2, meaning produce at least two full pages of +compact unwind entries, plus some more. The calculatation is sloppy. +""") + parser.add_argument('--seed', type=str, default=random_seed(), + help='Seed the random number generator') + parser.add_argument('--pages', type=int, default=2, + help='Number of compact-unwind pages') + parser.add_argument('--functions', type=int, default=None, + help='Number of functions to generate') + parser.add_argument('--encodings', type=int, default=127, + help='Maximum number of unique unwind encodings (default = 127)') + parser.add_argument('--lsda', type=int, default=0, + help='Percentage of functions with personality & LSDA (default = 10') + args = parser.parse_args() + random.seed(args.seed) + p2align = 14 + global lsda_odds + lsda_odds = args.lsda / 100.0 + + print(f"""\ +### seed={args.seed} lsda={lsda_odds} p2align={p2align} + .section __TEXT,__text,regular,pure_instructions + .p2align {p2align}, 0x90 +""") + + size = 0 + base = (1 << p2align) + if args.functions: + for n in range(args.functions): + size += print_function(f"x{size+base:08x}") + else: + while size < (args.pages << 24): + size += print_function(f"x{size+base:08x}") + + print(f"""\ + .section __TEXT,__text,regular,pure_instructions + .globl _main + .p2align 4, 0x90 +_main: + retq + + .p2align 4, 0x90 +___gxx_personality_v0: + retq +""") + + +if __name__ == '__main__': + main() diff --git a/lld/test/MachO/tools/validate-unwind-info.py b/lld/test/MachO/tools/validate-unwind-info.py new file mode 100755 --- /dev/null +++ b/lld/test/MachO/tools/validate-unwind-info.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +"""Validate compact unwind info by cross checking the llvm-objdump +reports of the input object file vs final linked output. +""" +import sys +import argparse +import re +from pprint import pprint + +def main(): + hex = "[a-f\d]" + hex8 = hex + "{8}" + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('files', metavar='FILES', nargs='*', + help='output of (llvm-objdump --unwind-info --syms) for object file(s) plus final linker output') + parser.add_argument('--debug', action='store_true') + args = parser.parse_args() + + if args.files: + objdump_string = ''.join([open(f).read() for f in args.files]) + else: + objdump_string = sys.stdin.read() + + object_encodings_list = [(symbol, encoding, personality, lsda) + for symbol, encoding, personality, lsda in + re.findall(rf"start:\s+0x{hex}+\s+(\w+)\s+" + + rf"length:\s+0x{hex}+\s+" + + rf"compact encoding:\s+0x({hex}+)(?:\s+" + + rf"personality function:\s+0x({hex}+)\s+\w+\s+" + + rf"LSDA:\s+0x({hex}+)\s+\w+(?: \+ 0x{hex}+)?)?", + objdump_string, re.DOTALL)] + object_encodings_map = {symbol:encoding + for symbol, encoding, _, _ in object_encodings_list} + if not object_encodings_map: + sys.exit("no object encodings found in input") + + program_symbols_map = {address:symbol + for address, symbol in + re.findall(rf"^{hex8}({hex8}) g\s+F __TEXT,__text (x\1)$", + objdump_string, re.MULTILINE)} + if not program_symbols_map: + sys.exit("no program symbols found in input") + + program_common_encodings = ( + re.findall(rf"^\s+encoding\[\d+\]: 0x({hex}+)$", + objdump_string, re.MULTILINE)) + if not program_common_encodings: + sys.exit("no common encodings found in input") + + program_encodings_map = {program_symbols_map[address]:encoding + for address, encoding in + re.findall(rf"^\s+\[\d+\]: function offset=0x({hex}+), " + + rf"encoding\[\d+\]=0x({hex}+)$", + objdump_string, re.MULTILINE)} + if not object_encodings_map: + sys.exit("no program encodings found in input") + + # Fold adjacent entries from the object file that have matching encodings + # TODO(gkm) add check for personality+lsda + encoding0 = 0 + for symbol in sorted(object_encodings_map): + encoding = object_encodings_map[symbol] + fold = (encoding == encoding0) + if fold: + del object_encodings_map[symbol] + if args.debug: + print(f"{'delete' if fold else 'retain'} {symbol} with {encoding}") + encoding0 = encoding + + if program_encodings_map != object_encodings_map: + if args.debug: + pprint(f"program encodings map:\n{program_encodings_map}") + pprint(f"object encodings map:\n{object_encodings_map}") + sys.exit("encoding maps differ") + + # Count frequency of object-file folded encodings + # and compare with the program-file common encodings table + encoding_frequency_map = {} + for _, encoding in object_encodings_map.items(): + encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0) + encoding_frequencies = [x for x in + sorted(encoding_frequency_map, + key=lambda x: (encoding_frequency_map.get(x), x), + reverse=True)] + + if program_common_encodings != encoding_frequencies: + if args.debug: + pprint(f"program common encodings:\n{program_common_encodings}") + pprint(f"object encoding frequencies:\n{encoding_frequencies}") + sys.exit("encoding frequencies differ") + + +if __name__ == '__main__': + main()