diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -825,18 +825,23 @@ in.stubs->reserved1 = in.lazyPointers->reserved1 = off; } +static uint32_t indirectValue(const Symbol *sym) { + return sym->symtabIndex != UINT32_MAX ? sym->symtabIndex + : MachO::INDIRECT_SYMBOL_LOCAL; +} + void IndirectSymtabSection::writeTo(uint8_t *buf) const { uint32_t off = 0; for (const Symbol *sym : in.got->getEntries()) { - write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); + write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); ++off; } for (const Symbol *sym : in.tlvPointers->getEntries()) { - write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); + write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); ++off; } for (const Symbol *sym : in.stubs->getEntries()) { - write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); + write32le(buf + off * sizeof(uint32_t), indirectValue(sym)); ++off; } } diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h --- a/lld/MachO/UnwindInfoSection.h +++ b/lld/MachO/UnwindInfoSection.h @@ -63,16 +63,19 @@ private: std::vector> commonEncodings; EncodingMap commonEncodingIndexes; + // Indices of personality functions within the GOT. std::vector personalities; std::vector lsdaEntries; std::vector cuVector; - std::vector cuPtrVector; + std::vector cuPtrVector; std::vector secondLevelPages; MergedOutputSection *compactUnwindSection = nullptr; uint64_t level2PagesOffset = 0; uint64_t unwindInfoSize = 0; }; +void prepareCompactUnwind(InputSection *isec); + } // namespace macho } // namespace lld diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -12,11 +12,13 @@ #include "MergedOutputSection.h" #include "OutputSection.h" #include "OutputSegment.h" +#include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/SmallVector.h" #include "llvm/BinaryFormat/MachO.h" @@ -81,6 +83,10 @@ // all sizes. Therefore, we don't even bother implementing the regular // non-compressed format. Time will tell if anyone in the field ever // overflows the 127-encodings limit. +// +// Refer to the definition of unwind_info_section_header in +// compact_unwind_encoding.h for an overview of the format we are encoding +// here. // TODO(gkm): prune __eh_frame entries superseded by __unwind_info // TODO(gkm): how do we align the 2nd-level pages? @@ -94,9 +100,115 @@ return (compactUnwindSection != nullptr); } +// Compact unwind relocations have different semantics, so we handle them in a +// separate code path from regular relocations. First, we do not wish to add +// rebase opcodes for __LD,__compact_unwind, because that section doesn't +// actually end up in the final binary. Second, personality pointers always +// reside in the GOT and must be treated specially. +void macho::prepareCompactUnwind(InputSection *isec) { + assert(isec->segname == segment_names::ld && + isec->name == section_names::compactUnwind); + + DenseMap, macho::Symbol *> + anonPersonalitySymbols; + for (Reloc &r : isec->relocs) { + // TODO: generalize for other archs + assert(r.type == X86_64_RELOC_UNSIGNED); + if (r.offset % sizeof(CompactUnwindEntry64) != + offsetof(struct CompactUnwindEntry64, personality)) + continue; + + if (auto *s = r.referent.dyn_cast()) { + if (auto *undefined = dyn_cast(s)) + treatUndefinedSymbol(*undefined); + else + in.got->addEntry(s); + } else if (auto *referentIsec = r.referent.dyn_cast()) { + // Personality functions can be referenced via section relocations + // if they live in an object file (instead of a dylib). Create + // placeholder synthetic symbols for them in the GOT. + macho::Symbol *&s = anonPersonalitySymbols[{referentIsec, r.addend}]; + if (s == nullptr) { + s = make("", nullptr, referentIsec, r.addend, false, + false, false); + in.got->addEntry(s); + } + r.referent = s; + r.addend = 0; + } + } +} + +// Unwind info lives in __DATA, and finalization of __TEXT will occur before +// finalization of __DATA. Moreover, the finalization of unwind info depends on +// the exact addresses that it references. So it is safe for compact unwind to +// reference addresses in __TEXT, but not addresses in any other segment. +static void checkTextSegment(InputSection *isec) { + if (isec->segname != segment_names::text) + error("compact unwind references address in " + toString(isec) + + " which is not in segment __TEXT"); +} + +// We need to apply the relocations to the pre-link compact unwind section +// before converting it to post-link form. There should only be absolute +// relocations here: since we are not emitting the pre-link CU section, there +// is no source address to make a relative location meaningful. +static void relocateCompactUnwind(MergedOutputSection *compactUnwindSection, + std::vector &cuVector) { + for (InputSection *isec : compactUnwindSection->inputs) { + uint8_t *buf = + reinterpret_cast(cuVector.data()) + isec->outSecFileOff; + memcpy(buf, isec->data.data(), isec->data.size()); + + for (Reloc &r : isec->relocs) { + uint64_t referentVA = 0; + if (auto *referentSym = r.referent.dyn_cast()) { + if (!isa(referentSym)) { + assert(referentSym->isInGot()); + if (auto *defined = dyn_cast(referentSym)) + checkTextSegment(defined->isec); + // At this point in the link, we may not yet know the final address of + // the GOT, so we just encode the index. We make it a 1-based index so + // that we can distinguish the null pointer case. + referentVA = referentSym->gotIndex + 1; + } + } else if (auto *referentIsec = r.referent.dyn_cast()) { + checkTextSegment(referentIsec); + referentVA = referentIsec->getVA() + r.addend; + } + support::endian::write64le(buf + r.offset, referentVA); + } + } +} + +// There should only be a handful of unique personality pointers, so we can +// encode them as 2-bit indices into a small array. +void encodePersonalities(const std::vector &cuPtrVector, + std::vector &personalities) { + for (CompactUnwindEntry64 *cu : cuPtrVector) { + if (cu->personality == 0) + continue; + uint32_t personalityOffset = cu->personality - in.header->addr; + // Linear search is fast enough for a small array. + auto it = find(personalities, personalityOffset); + uint32_t personalityIndex; // 1-based index + if (it != personalities.end()) { + personalityIndex = std::distance(personalities.begin(), it) + 1; + } else { + personalities.push_back(cu->personality); + personalityIndex = personalities.size(); + } + cu->encoding |= + personalityIndex << countTrailingZeros( + static_cast(UNWIND_PERSONALITY_MASK)); + } + if (personalities.size() > 3) + error("too many personalities (" + std::to_string(personalities.size()) + + ") for compact unwind to encode"); +} + // Scan the __LD,__compact_unwind entries and compute the space needs of // __TEXT,__unwind_info and __TEXT,__eh_frame - void UnwindInfoSection::finalize() { if (compactUnwindSection == nullptr) return; @@ -114,12 +226,12 @@ compactUnwindSection->getSize() / sizeof(CompactUnwindEntry64); cuVector.resize(cuCount); // Relocate all __LD,__compact_unwind entries - compactUnwindSection->writeTo(reinterpret_cast(cuVector.data())); + relocateCompactUnwind(compactUnwindSection, cuVector); // Rather than sort & fold the 32-byte entries directly, we create a // vector of pointers to entries and sort & fold that instead. cuPtrVector.reserve(cuCount); - for (const CompactUnwindEntry64 &cuEntry : cuVector) + for (CompactUnwindEntry64 &cuEntry : cuVector) cuPtrVector.emplace_back(&cuEntry); std::sort(cuPtrVector.begin(), cuPtrVector.end(), [](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) { @@ -146,6 +258,8 @@ } cuPtrVector.erase(foldWrite, cuPtrVector.end()); + encodePersonalities(cuPtrVector, personalities); + // Count frequencies of the folded encodings EncodingMap encodingFrequencies; for (auto cuPtrEntry : cuPtrVector) @@ -263,7 +377,7 @@ // Personalities for (const uint32_t &personality : personalities) - *i32p++ = personality; + *i32p++ = in.got->addr + (personality - 1) * WordSize; // Level-1 index uint32_t lsdaOffset = diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -427,11 +427,10 @@ void Writer::scanRelocations() { for (InputSection *isec : inputSections) { - // We do not wish to add rebase opcodes for __LD,__compact_unwind, because - // it doesn't actually end up in the final binary. TODO: filtering it out - // before Writer runs might be cleaner... - if (isec->segname == segment_names::ld) + if (isec->segname == segment_names::ld) { + prepareCompactUnwind(isec); continue; + } for (Reloc &r : isec->relocs) { if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) diff --git a/lld/test/MachO/compact-unwind.test b/lld/test/MachO/compact-unwind-generated.test rename from lld/test/MachO/compact-unwind.test rename to lld/test/MachO/compact-unwind-generated.test diff --git a/lld/test/MachO/compact-unwind-pie.s b/lld/test/MachO/compact-unwind-pie.s deleted file mode 100644 --- a/lld/test/MachO/compact-unwind-pie.s +++ /dev/null @@ -1,21 +0,0 @@ -# REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o -# RUN: %lld -pie -lSystem %t.o -o %t -# RUN: llvm-objdump --macho --unwind-info --rebase %t | FileCheck %s - -## Check that we do not add rebase opcodes to the compact unwind section. -# CHECK: Contents of __unwind_info section: -# CHECK-NEXT: Version: 0x1 -# CHECK-NEXT: Common encodings array section offset: -# CHECK-NEXT: Number of common encodings in array: 0x1 -# CHECK: Rebase table: -# CHECK-NEXT: segment section address type -# CHECK-EMPTY: - -.globl _main -.text -_main: - .cfi_startproc - .cfi_def_cfa_offset 16 - retq - .cfi_endproc diff --git a/lld/test/MachO/compact-unwind.s b/lld/test/MachO/compact-unwind.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/compact-unwind.s @@ -0,0 +1,49 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o +# RUN: %lld -pie -lSystem -lc++ %t.o -o %t +# RUN: llvm-objdump --macho --unwind-info --indirect-symbols --rebase %t | FileCheck %s + +# CHECK: Indirect symbols for (__DATA_CONST,__got) +# CHECK-NEXT: address index name +# CHECK-DAG: 0x[[#%x,GXX_PERSONALITY:]] [[#]] ___gxx_personality_v0 +# CHECK-DAG: 0x[[#%x,MY_PERSONALITY:]] LOCAL + +# CHECK: Contents of __unwind_info section: +# CHECK: Personality functions: (count = 2) +# CHECK-NEXT: personality[1]: 0x{{0*}}[[#MY_PERSONALITY-0x100000000]] +# CHECK-NEXT: personality[2]: 0x{{0*}}[[#GXX_PERSONALITY-0x100000000]] + +## Check that we do not add rebase opcodes to the compact unwind section. +# CHECK: Rebase table: +# CHECK-NEXT: segment section address type +# CHECK-NEXT: __DATA_CONST __got 0x{{[0-9a-f]*}} pointer +# CHECK-NEXT: __DATA_CONST __got 0x{{[0-9a-f]*}} pointer +# CHECK-EMPTY: + +.globl _main, _foo, _my_personality, _bar + +.text +_foo: + .cfi_startproc + .cfi_personality 155, _my_personality + .cfi_def_cfa_offset 16 + retq + .cfi_endproc + +_bar: + .cfi_startproc +## Check that we dedup references to the same statically-linked personality. + .cfi_personality 155, _my_personality + .cfi_def_cfa_offset 16 + retq + .cfi_endproc + +_main: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_def_cfa_offset 16 + retq + .cfi_endproc + +_my_personality: + retq diff --git a/lld/test/MachO/invalid/compact-unwind-bad-reloc.s b/lld/test/MachO/invalid/compact-unwind-bad-reloc.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/compact-unwind-bad-reloc.s @@ -0,0 +1,17 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o +# RUN: not %lld -pie -lSystem -lc++ %t.o -o %t 2>&1 | FileCheck %s -DFILE=%t.o +# CHECK: error: compact unwind references address in [[FILE]]:(__data) which is not in segment __TEXT + +.globl _main, _not_a_function +.text +_main: + retq + +.data +_not_a_function: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_def_cfa_offset 16 + retq + .cfi_endproc diff --git a/lld/test/MachO/invalid/compact-unwind-personalities.s b/lld/test/MachO/invalid/compact-unwind-personalities.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/compact-unwind-personalities.s @@ -0,0 +1,45 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o +# RUN: not %lld -pie -lSystem -lc++ %t.o -o %t 2>&1 | FileCheck %s --check-prefix=TOO-MANY +# RUN: not %lld -pie -lSystem %t.o -o %t 2>&1 | FileCheck %s --check-prefix=UNDEF +# TOO-MANY: error: too many personalities (4) for compact unwind to encode +# UNDEF: error: undefined symbol: ___gxx_personality_v0 + +.globl _main, _personality_1, _personality_2, _personality_3 + +.text + +_foo: + .cfi_startproc + .cfi_personality 155, _personality_1 + .cfi_def_cfa_offset 16 + retq + .cfi_endproc + +_bar: + .cfi_startproc + .cfi_personality 155, _personality_2 + .cfi_def_cfa_offset 16 + retq + .cfi_endproc + +_baz: + .cfi_startproc + .cfi_personality 155, _personality_3 + .cfi_def_cfa_offset 16 + retq + .cfi_endproc + +_main: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_def_cfa_offset 16 + retq + .cfi_endproc + +_personality_1: + retq +_personality_2: + retq +_personality_3: + retq diff --git a/lld/test/MachO/tools/validate-unwind-info.py b/lld/test/MachO/tools/validate-unwind-info.py --- a/lld/test/MachO/tools/validate-unwind-info.py +++ b/lld/test/MachO/tools/validate-unwind-info.py @@ -73,8 +73,10 @@ if program_encodings_map != object_encodings_map: if args.debug: - pprint("program encodings map:\n" + str(program_encodings_map)) - pprint("object encodings map:\n" + str(object_encodings_map)) + print("program encodings map:") + pprint(program_encodings_map) + print("object encodings map:") + pprint(object_encodings_map) sys.exit("encoding maps differ") # Count frequency of object-file folded encodings