diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -420,10 +420,11 @@ uint64_t icfUniqueID = inputSections.size(); for (ConcatInputSection *isec : inputSections) { // FIXME: consider non-code __text sections as hashable? - bool isHashable = (isCodeSection(isec) || isCfStringSection(isec) || - isClassRefsSection(isec)) && - !isec->keepUnique && !isec->shouldOmitFromOutput() && - sectionType(isec->getFlags()) == MachO::S_REGULAR; + bool isHashable = + (isCodeSection(isec) || isCfStringSection(isec) || + isClassRefsSection(isec) || isGccExceptTabSection(isec)) && + !isec->keepUnique && !isec->shouldOmitFromOutput() && + sectionType(isec->getFlags()) == MachO::S_REGULAR; if (isHashable) { hashable.push_back(isec); for (Defined *d : isec->symbols) diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -1186,14 +1186,27 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) { for (const Subsection &subsection : compactUnwindSection.subsections) { ConcatInputSection *isec = cast(subsection.isec); - // Hack!! Since each CUE contains a different function address, if ICF - // operated naively and compared the entire contents of each CUE, entries - // with identical unwind info but belonging to different functions would - // never be considered equivalent. To work around this problem, we slice - // away the function address here. (Note that we do not adjust the offsets - // of the corresponding relocations.) We rely on `relocateCompactUnwind()` - // to correctly handle these truncated input sections. - isec->data = isec->data.slice(target->wordSize); + // Hack!! Each compact unwind entry (CUE) has its UNSIGNED relocations embed + // their addends in its data. Thus if ICF operated naively and compared the + // entire contents of each CUE, entries with identical unwind info but e.g. + // belonging to different functions would never be considered equivalent. To + // work around this problem, we remove some parts of the data containing the + // embedded addends. In particular, we remove the function address and LSDA + // pointers. Since these locations are at the start and end of the entry, + // we can do this using a simple, efficient slice rather than performing a + // copy. We are not losing any information here because the embedded + // addends have already been parsed in the corresponding Reloc structs. + // + // Removing these pointers would not be safe if they were pointers to + // absolute symbols. In that case, there would be no corresponding + // relocation. However, (AFAIK) MC cannot emit references to absolute + // symbols for either the function address or the LSDA. However, it *can* do + // so for the personality pointer, so we are not slicing that field away. + // + // Note that we do not adjust the offsets of the corresponding relocations; + // instead, we rely on `relocateCompactUnwind()` to correctly handle these + // truncated input sections. + isec->data = isec->data.slice(target->wordSize, 8 + target->wordSize); uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t)); // llvm-mc omits CU entries for functions that need DWARF encoding, but // `ld -r` doesn't. We can ignore them because we will re-synthesize these diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -281,6 +281,7 @@ bool isCfStringSection(const InputSection *); bool isClassRefsSection(const InputSection *); bool isEhFrameSection(const InputSection *); +bool isGccExceptTabSection(const InputSection *); extern std::vector inputSections; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -345,6 +345,11 @@ isec->getSegName() == segment_names::text; } +bool macho::isGccExceptTabSection(const InputSection *isec) { + return isec->getName() == section_names::gccExceptTab && + isec->getSegName() == segment_names::text; +} + std::string lld::toString(const InputSection *isec) { return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); } diff --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s --- a/lld/test/MachO/icf.s +++ b/lld/test/MachO/icf.s @@ -37,6 +37,9 @@ # CHECK: [[#%x,HAS_UNWIND_2:]] l F __TEXT,__text _has_unwind_1 # CHECK: [[#%x,HAS_UNWIND_2]] l F __TEXT,__text _has_unwind_2 # CHECK: [[#%x,HAS_UNWIND_3:]] l F __TEXT,__text _has_unwind_3 +# CHECK: [[#%x,HAS_UNWIND_4:]] l F __TEXT,__text _has_unwind_4 +# CHECK: [[#%x,HAS_ABS_PERSONALITY_1:]] l F __TEXT,__text _has_abs_personality_1 +# CHECK: [[#%x,HAS_ABS_PERSONALITY_2:]] l F __TEXT,__text _has_abs_personality_2 # CHECK: [[#%x,HAS_EH_FRAME_1:]] l F __TEXT,__text _has_eh_frame_1 # CHECK: [[#%x,HAS_EH_FRAME_2:]] l F __TEXT,__text _has_eh_frame_2 # CHECK: [[#%x,HAS_EH_FRAME_3:]] l F __TEXT,__text _has_eh_frame_3 @@ -48,6 +51,9 @@ ### FIXME: Mutually-recursive functions with identical bodies (see below) # COM: [[#%x,ASYMMETRIC_RECURSIVE_2:]] l F __TEXT,__text _asymmetric_recursive_1 # COM: [[#%x,ASYMMETRIC_RECURSIVE_2]] l F __TEXT,__text _asymmetric_recursive_2 +# CHECK: [[#%x,GCC_EXCEPT_0:]] l O __TEXT,__gcc_except_tab GCC_except_table0 +# CHECK: [[#%x,GCC_EXCEPT_0]] l O __TEXT,__gcc_except_tab GCC_except_table1 +# CHECK: [[#%x,GCC_EXCEPT_2:]] l O __TEXT,__gcc_except_tab GCC_except_table2 ## Check that we don't accidentally dedup distinct EH frames. # CHECK: FDE {{.*}} pc=[[#%x,HAS_EH_FRAME_1]] @@ -80,6 +86,9 @@ # CHECK: callq 0x[[#%x,HAS_UNWIND_2]] <_has_unwind_2> # CHECK: callq 0x[[#%x,HAS_UNWIND_2]] <_has_unwind_2> # CHECK: callq 0x[[#%x,HAS_UNWIND_3]] <_has_unwind_3> +# CHECK: callq 0x[[#%x,HAS_UNWIND_4]] <_has_unwind_4> +# CHECK: callq 0x[[#%x,HAS_ABS_PERSONALITY_1]] <_has_abs_personality_1> +# CHECK: callq 0x[[#%x,HAS_ABS_PERSONALITY_2]] <_has_abs_personality_2> # CHECK: callq 0x[[#%x,HAS_EH_FRAME_1]] <_has_eh_frame_1> # CHECK: callq 0x[[#%x,HAS_EH_FRAME_2]] <_has_eh_frame_2> # CHECK: callq 0x[[#%x,HAS_EH_FRAME_3]] <_has_eh_frame_3> @@ -200,6 +209,7 @@ _has_unwind_1: .cfi_startproc .cfi_personality 155, _my_personality + .cfi_lsda 16, Lexception0 .cfi_def_cfa_offset 16 ret .cfi_endproc @@ -207,19 +217,51 @@ _has_unwind_2: .cfi_startproc .cfi_personality 155, _my_personality + .cfi_lsda 16, Lexception1 .cfi_def_cfa_offset 16 ret .cfi_endproc -## This function has different unwind info from the preceding two, and therefore +## This function has a different cfa_offset from the first two, and therefore ## should not be folded. _has_unwind_3: .cfi_startproc .cfi_personality 155, _my_personality + .cfi_lsda 16, Lexception1 .cfi_def_cfa_offset 8 ret .cfi_endproc +## This function has a different LSDA from the first two, and therefore should +## not be folded. +_has_unwind_4: + .cfi_startproc + .cfi_personality 155, _my_personality + .cfi_lsda 16, Lexception2 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +## The next two functions should not be folded as they refer to personalities +## at different absolute addresses. This verifies that we are doing the right +## thing in our "data slicing hack" for compact unwind. +_has_abs_personality_1: + .cfi_startproc + .cfi_personality 155, _abs_personality_1 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +_has_abs_personality_2: + .cfi_startproc + .cfi_personality 155, _abs_personality_2 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +_abs_personality_1 = 0x1 +_abs_personality_2 = 0x2 + ## In theory _has_eh_frame_{1, 2} can be dedup'ed, but we don't support this ## yet. _has_eh_frame_1: @@ -319,6 +361,9 @@ callq _has_unwind_1 callq _has_unwind_2 callq _has_unwind_3 + callq _has_unwind_4 + callq _has_abs_personality_1 + callq _has_abs_personality_2 callq _has_eh_frame_1 callq _has_eh_frame_2 callq _has_eh_frame_3 @@ -329,3 +374,16 @@ callq _init_1 callq _init_2 callq _init_3 + +.section __TEXT,__gcc_except_tab +GCC_except_table0: +Lexception0: + .byte 255 + +GCC_except_table1: +Lexception1: + .byte 255 + +GCC_except_table2: +Lexception2: + .byte 254