diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -621,6 +621,10 @@ } } +static bool isIgnoredName(StringRef name) { + return name.startswith("l") || name.startswith("L"); +} + template static macho::Symbol *createDefined(const NList &sym, StringRef name, InputSection *isec, uint64_t value, @@ -690,8 +694,7 @@ } assert(!isWeakDefCanBeHidden && "weak_def_can_be_hidden on already-hidden symbol?"); - bool includeInSymtab = - !name.startswith("l") && !name.startswith("L") && !isEhFrameSection(isec); + bool includeInSymtab = !isIgnoredName(name) && !isEhFrameSection(isec); return make( name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF, /*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab, @@ -824,17 +827,25 @@ } sections[i]->doneSplitting = true; + auto getSymName = [strtab](const NList& sym) -> StringRef { + return StringRef(strtab + sym.n_strx); + }; + // Calculate symbol sizes and create subsections by splitting the sections // along symbol boundaries. // We populate subsections by repeatedly splitting the last (highest // address) subsection. llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { + // Put ignored symbols after other symbols at the same address. + if (nList[lhs].n_value == nList[rhs].n_value) + return !isIgnoredName(getSymName(nList[lhs])) && + isIgnoredName(getSymName(nList[rhs])); return nList[lhs].n_value < nList[rhs].n_value; }); for (size_t j = 0; j < symbolIndices.size(); ++j) { const uint32_t symIndex = symbolIndices[j]; const NList &sym = nList[symIndex]; - StringRef name = strtab + sym.n_strx; + StringRef name = getSymName(sym); Subsection &subsec = subsections.back(); InputSection *isec = subsec.isec; @@ -854,8 +865,15 @@ if (!subsectionsViaSymbols || symbolOffset == 0 || sym.n_desc & N_ALT_ENTRY || !isa(isec)) { isec->hasAltEntry = symbolOffset != 0; - symbols[symIndex] = createDefined(sym, name, isec, symbolOffset, - symbolSize, forceHidden); + // If we have an ignored symbol that's an alias, just reuse the aliased + // symbol. Our sorting step above ensures that any ignored symbols will + // appear after the non-ignored ones. See weak-def-alias-ignored.s for + // the motivation behind this. + if (symbolOffset == 0 && isIgnoredName(name) && j != 0) + symbols[symIndex] = symbols[symbolIndices[j - 1]]; + else + symbols[symIndex] = createDefined(sym, name, isec, symbolOffset, + symbolSize, forceHidden); continue; } auto *concatIsec = cast(isec); diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -206,7 +206,7 @@ // If we have multiple symbols at the same address, only one of them can have // an associated unwind entry. if (!p.second && d->unwindEntry) { - assert(!p.first->second->unwindEntry); + assert(p.first->second == d || !p.first->second->unwindEntry); p.first->second = d; } } diff --git a/lld/test/MachO/weak-def-alias-ignored.s b/lld/test/MachO/weak-def-alias-ignored.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/weak-def-alias-ignored.s @@ -0,0 +1,75 @@ +# REQUIRES: x86 +## This test checks that when we coalesce weak definitions, any ignored-name +## aliases to those weak defs don't cause the coalesced data to be retained. +## This test explicitly creates those ignored-name symbols, but this test was +## actually motivated by MC's aarch64 backend which automatically creates them +## when emitting object files. I've chosen to explicitly create them here since +## we can then reference those symbols for a more complete test. +## +## Not retaining the data matters for more than just size -- we have a use case +## that depends on proper data coalescing to emit a valid file format. +## +## ld64 actually treats all local symbol aliases (not just the ignored ones) the +## same way. But implementing this is harder -- we would have to create those +## symbols first (so we can emit their names later), but we would have to +## ensure the linker correctly shuffles them around when their aliasees get +## coalesced.Emulating the behavior of weak binds for non-ignored symbols would +## be even trickier. Let's just deal with ignored symbols for now until we find +## a use case for more general local symbols. +## +## Finally, ld64 does all this regardless of whether .subsections_via_symbols is +## specified. We don't. But again, given how rare the lack of that directive is +## (I've only seen it from hand-written assembly inputs), I don't think we need +## to worry about it. + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-then-ignored.s -o %t/weak-then-ignored.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/ignored-then-weak.s -o %t/ignored-then-weak.o +# RUN: %lld -dylib %t/weak-then-ignored.o %t/ignored-then-weak.o -o %t/test1 +# RUN: %lld -dylib %t/ignored-then-weak.o %t/weak-then-ignored.o -o %t/test2 +# RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test1 +# RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test2 + +## Check that we only have one copy of 0x123 in the data, not two. +# CHECK: Contents of (__DATA,__data) section +# CHECK-NEXT: 0000000000001000 23 01 00 00 00 00 00 00 00 10 00 00 00 00 00 00 {{$}} +# CHECK-NEXT: 0000000000001010 00 10 00 00 00 00 00 00 {{$}} +# CHECK-EMPTY: +# CHECK-NEXT: SYMBOL TABLE: +# CHECK-NEXT: 0000000000001008 l O __DATA,__data _ref +# CHECK-NEXT: 0000000000001010 l O __DATA,__data _ref +# CHECK-NEXT: 0000000000001000 w O __DATA,__data _weak +# CHECK-NEXT: 0000000000000000 *UND* dyld_stub_binder +# CHECK-EMPTY: +## Even though the references were to the non-weak `l_ignored` aliases, we +## should still emit weak binds as if they were the `_weak` symbol itself. +# CHECK-NEXT: Weak bind table: +# CHECK-NEXT: segment section address type addend symbol +# CHECK-NEXT: __DATA __data 0x00001008 pointer 0 _weak +# CHECK-NEXT: __DATA __data 0x00001010 pointer 0 _weak + +#--- weak-then-ignored.s +.globl _weak +.weak_definition _weak +.data +_weak: +l_ignored: + .quad 0x123 + +_ref: + .quad l_ignored + +.subsections_via_symbols + +#--- ignored-then-weak.s +.globl _weak +.weak_definition _weak +.data +l_ignored: +_weak: + .quad 0x123 + +_ref: + .quad l_ignored + +.subsections_via_symbols