diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -415,10 +415,11 @@ uint64_t icfUniqueID = inputSections.size(); for (ConcatInputSection *isec : inputSections) { // FIXME: consider non-code __text sections as hashable? - bool isHashable = (isCodeSection(isec) || isCfStringSection(isec) || - isClassRefsSection(isec)) && - !isec->keepUnique && !isec->shouldOmitFromOutput() && - sectionType(isec->getFlags()) == MachO::S_REGULAR; + bool isHashable = + (isCodeSection(isec) || isCfStringSection(isec) || + isClassRefsSection(isec) || isGccExceptTabSection(isec)) && + !isec->keepUnique && !isec->shouldOmitFromOutput() && + sectionType(isec->getFlags()) == MachO::S_REGULAR; if (isHashable) { hashable.push_back(isec); for (Defined *d : isec->symbols) diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -1186,14 +1186,27 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) { for (const Subsection &subsection : compactUnwindSection.subsections) { ConcatInputSection *isec = cast(subsection.isec); - // Hack!! Since each CUE contains a different function address, if ICF - // operated naively and compared the entire contents of each CUE, entries - // with identical unwind info but belonging to different functions would - // never be considered equivalent. To work around this problem, we slice - // away the function address here. (Note that we do not adjust the offsets - // of the corresponding relocations.) We rely on `relocateCompactUnwind()` - // to correctly handle these truncated input sections. - isec->data = isec->data.slice(target->wordSize); + // Hack!! Each CUE has its UNSIGNED relocations embed their addends in its + // data. Thus if ICF operated naively and compared the entire contents of + // each CUE, entries with identical unwind info but e.g. belonging to + // different functions would never be considered equivalent. To work around + // this problem, we remove some parts of the data containing the embedded + // addends. In particular, we remove the function address and LSDA pointers. + // Since these locations at all at the start and end of the entry, we can + // do this using a simple, efficient slice rather than performing a copy. + // We are not losing any information here because the embedded addends have + // already been parsed in the corresponding Reloc structs. + // + // Removing these pointers would not be safe if they were pointers to + // absolute symbols. In that case, there would be no corresponding + // relocation. However, (AFAIK) MC cannot to refs to absolute symbols for + // either the function address or the LSDA. However, it *can* do so for the + // personality pointer, so we are not slicing that field away. + // + // Note that we do not adjust the offsets of the corresponding relocations; + // instead, we rely on `relocateCompactUnwind()` to correctly handle these + // truncated input sections. + isec->data = isec->data.slice(target->wordSize, 8 + target->wordSize); uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t)); // llvm-mc omits CU entries for functions that need DWARF encoding, but // `ld -r` doesn't. We can ignore them because we will re-synthesize these diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -281,6 +281,7 @@ bool isCfStringSection(const InputSection *); bool isClassRefsSection(const InputSection *); bool isEhFrameSection(const InputSection *); +bool isGccExceptTabSection(const InputSection *); extern std::vector inputSections; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -345,6 +345,11 @@ isec->getSegName() == segment_names::text; } +bool macho::isGccExceptTabSection(const InputSection *isec) { + return isec->getName() == section_names:: gccExceptTab && + isec->getSegName() == segment_names::text; +} + std::string lld::toString(const InputSection *isec) { return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); } diff --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s --- a/lld/test/MachO/icf.s +++ b/lld/test/MachO/icf.s @@ -37,6 +37,9 @@ # CHECK: [[#%x,HAS_UNWIND_2:]] l F __TEXT,__text _has_unwind_1 # CHECK: [[#%x,HAS_UNWIND_2]] l F __TEXT,__text _has_unwind_2 # CHECK: [[#%x,HAS_UNWIND_3:]] l F __TEXT,__text _has_unwind_3 +# CHECK: [[#%x,HAS_UNWIND_4:]] l F __TEXT,__text _has_unwind_4 +# CHECK: [[#%x,HAS_ABS_PERSONALITY_1:]] l F __TEXT,__text _has_abs_personality_1 +# CHECK: [[#%x,HAS_ABS_PERSONALITY_2:]] l F __TEXT,__text _has_abs_personality_2 # CHECK: [[#%x,MUTALLY_RECURSIVE_2:]] l F __TEXT,__text _mutually_recursive_1 # CHECK: [[#%x,MUTALLY_RECURSIVE_2]] l F __TEXT,__text _mutually_recursive_2 # CHECK: [[#%x,INIT_2:]] l F __TEXT,__text _init_1 @@ -45,6 +48,9 @@ ### FIXME: Mutually-recursive functions with identical bodies (see below) # COM: [[#%x,ASYMMETRIC_RECURSIVE_2:]] l F __TEXT,__text _asymmetric_recursive_1 # COM: [[#%x,ASYMMETRIC_RECURSIVE_2]] l F __TEXT,__text _asymmetric_recursive_2 +# CHECK: [[#%x,GCC_EXCEPT_0:]] l O __TEXT,__gcc_except_tab GCC_except_table0 +# CHECK: [[#%x,GCC_EXCEPT_0]] l O __TEXT,__gcc_except_tab GCC_except_table1 +# CHECK: [[#%x,GCC_EXCEPT_2:]] l O __TEXT,__gcc_except_tab GCC_except_table2 # CHECK-LABEL: Disassembly of section __TEXT,__text: # CHECK: <_main>: @@ -72,6 +78,9 @@ # CHECK: callq 0x[[#%x,HAS_UNWIND_2]] <_has_unwind_2> # CHECK: callq 0x[[#%x,HAS_UNWIND_2]] <_has_unwind_2> # CHECK: callq 0x[[#%x,HAS_UNWIND_3]] <_has_unwind_3> +# CHECK: callq 0x[[#%x,HAS_UNWIND_4]] <_has_unwind_4> +# CHECK: callq 0x[[#%x,HAS_ABS_PERSONALITY_1]] <_has_abs_personality_1> +# CHECK: callq 0x[[#%x,HAS_ABS_PERSONALITY_2]] <_has_abs_personality_2> # CHECK: callq 0x[[#%x,MUTALLY_RECURSIVE_2]] <_mutually_recursive_2> # CHECK: callq 0x[[#%x,MUTALLY_RECURSIVE_2]] <_mutually_recursive_2> ## FIXME Mutually-recursive functions with identical bodies (see below) @@ -189,6 +198,7 @@ _has_unwind_1: .cfi_startproc .cfi_personality 155, _my_personality + .cfi_lsda 16, Lexception0 .cfi_def_cfa_offset 16 ret .cfi_endproc @@ -196,19 +206,51 @@ _has_unwind_2: .cfi_startproc .cfi_personality 155, _my_personality + .cfi_lsda 16, Lexception1 .cfi_def_cfa_offset 16 ret .cfi_endproc -## This function has different unwind info from the preceding two, and therefore +## This function has a different cfa_offset from the first two, and therefore ## should not be folded. _has_unwind_3: .cfi_startproc .cfi_personality 155, _my_personality + .cfi_lsda 16, Lexception1 .cfi_def_cfa_offset 8 ret .cfi_endproc +## This function has a different LSDA from the first two, and therefore should +## not be folded. +_has_unwind_4: + .cfi_startproc + .cfi_personality 155, _my_personality + .cfi_lsda 16, Lexception2 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +## The next two functions should not be folded as they refer to personalities +## at different absolute addresses. This verifies that we are doing the right +## thing in our "data slicing hack" for compact unwind. +_has_abs_personality_1: + .cfi_startproc + .cfi_personality 155, _abs_personality_1 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +_has_abs_personality_2: + .cfi_startproc + .cfi_personality 155, _abs_personality_2 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +_abs_personality_1 = 0x1 +_abs_personality_2 = 0x2 + ## Fold: Mutually-recursive functions with symmetric bodies _mutually_recursive_1: callq _mutually_recursive_1 # call myself @@ -279,6 +321,9 @@ callq _has_unwind_1 callq _has_unwind_2 callq _has_unwind_3 + callq _has_unwind_4 + callq _has_abs_personality_1 + callq _has_abs_personality_2 callq _mutually_recursive_1 callq _mutually_recursive_2 callq _asymmetric_recursive_1 @@ -286,3 +331,16 @@ callq _init_1 callq _init_2 callq _init_3 + +.section __TEXT,__gcc_except_tab +GCC_except_table0: +Lexception0: + .byte 255 + +GCC_except_table1: +Lexception1: + .byte 255 + +GCC_except_table2: +Lexception2: + .byte 254