diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -848,15 +848,10 @@ // We populate subsections by repeatedly splitting the last (highest // address) subsection. llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { - // Put private-label symbols that have no flags after other symbols at the - // same address. - StringRef lhsName = getSymName(nList[lhs]); - StringRef rhsName = getSymName(nList[rhs]); - if (nList[lhs].n_value == nList[rhs].n_value) { - if (isPrivateLabel(lhsName) && isPrivateLabel(rhsName)) - return nList[lhs].n_desc > nList[rhs].n_desc; - return !isPrivateLabel(lhsName) && isPrivateLabel(rhsName); - } + // Put extern weak symbols after other symbols at the same address. + if (nList[lhs].n_value == nList[rhs].n_value && + nList[lhs].n_type & N_EXT && nList[rhs].n_type & N_EXT) + return !(nList[lhs].n_desc & N_WEAK_DEF) && (nList[rhs].n_desc & N_WEAK_DEF); return nList[lhs].n_value < nList[rhs].n_value; }); for (size_t j = 0; j < symbolIndices.size(); ++j) { @@ -882,17 +877,8 @@ if (!subsectionsViaSymbols || symbolOffset == 0 || sym.n_desc & N_ALT_ENTRY || !isa(isec)) { isec->hasAltEntry = symbolOffset != 0; - // If we have an private-label symbol that's an alias, and that alias - // doesn't have any flags of its own, then we can just reuse the aliased - // symbol. Our sorting step above ensures that any such symbols will - // appear after the non-private-label ones. See weak-def-alias-ignored.s - // for the motivation behind this. - if (symbolOffset == 0 && isPrivateLabel(name) && j != 0 && - sym.n_desc == 0) - symbols[symIndex] = symbols[symbolIndices[j - 1]]; - else - symbols[symIndex] = createDefined(sym, name, isec, symbolOffset, - symbolSize, forceHidden); + symbols[symIndex] = createDefined(sym, name, isec, symbolOffset, + symbolSize, forceHidden); continue; } auto *concatIsec = cast(isec); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -61,6 +61,32 @@ SmallVector dupSymDiags; } // namespace +// Move symbols at \p fromOff in \p fromIsec into \p toIsec, unless that symbol +// is \p skip. +static void transplantSymbolsAtOffset(InputSection *fromIsec, + InputSection *toIsec, Defined *skip, + uint64_t fromOff, uint64_t toOff) { + // Ensure the symbols will still be in address order after our insertions. + auto insertIt = llvm::upper_bound(toIsec->symbols, toOff, + [](uint64_t off, const Symbol *s) { + return cast(s)->value < off; + }); + llvm::erase_if(fromIsec->symbols, [&](Symbol *s) { + auto *d = cast(s); + if (d->value != fromOff) + return false; + if (d != skip) { + // This repeated insertion will be quadratic unless insertId is the end + // iterator. However, that is typically the case for files that have + // .subsections_via_symbols set. + insertIt = toIsec->symbols.insert(insertIt, d); + d->isec = toIsec; + d->value = toOff; + } + return true; + }); +} + Defined *SymbolTable::addDefined(StringRef name, InputFile *file, InputSection *isec, uint64_t value, uint64_t size, bool isWeakDef, @@ -82,18 +108,22 @@ defined->referencedDynamically |= isReferencedDynamically; defined->noDeadStrip |= noDeadStrip; } - // FIXME: Handle this for bitcode files. - if (auto concatIsec = dyn_cast_or_null(isec)) + if (auto concatIsec = dyn_cast_or_null(isec)) { concatIsec->wasCoalesced = true; + if (defined->isec) + transplantSymbolsAtOffset(concatIsec, defined->isec, + /*skip=*/nullptr, value, defined->value); + } return defined; } if (defined->isWeakDef()) { - // FIXME: Handle this for bitcode files. if (auto concatIsec = dyn_cast_or_null(defined->isec)) { concatIsec->wasCoalesced = true; - concatIsec->symbols.erase(llvm::find(concatIsec->symbols, defined)); + if (isec) + transplantSymbolsAtOffset(concatIsec, isec, defined, defined->value, + value); } } else { std::string srcLoc1 = defined->getSourceLocation(); @@ -382,7 +412,7 @@ }; MapVector undefs; -} +} // namespace void macho::reportPendingDuplicateSymbols() { for (const auto &duplicate : dupSymDiags) { diff --git a/lld/test/MachO/private-label-alias.s b/lld/test/MachO/local-alias-to-weak.s rename from lld/test/MachO/private-label-alias.s rename to lld/test/MachO/local-alias-to-weak.s --- a/lld/test/MachO/private-label-alias.s +++ b/lld/test/MachO/local-alias-to-weak.s @@ -1,21 +1,16 @@ # REQUIRES: x86 -## This test checks that when we coalesce weak definitions, any flag-less -## private-label aliases to those weak defs don't cause the coalesced data to be -## retained. This test explicitly creates those private-label symbols, but it -## was actually motivated by MC's aarch64 backend which automatically creates -## them when emitting object files. I've chosen to explicitly create them here -## since we can then reference those symbols for a more complete test. +## This test checks that when we coalesce weak definitions, their local symbol +## aliases defs don't cause the coalesced data to be retained. This was +## motivated by MC's aarch64 backend which automatically creates `ltmp` +## symbols at the start of each .text section. These symbols are frequently +## aliases of other symbols created by clang or other inputs to MC. I've chosen +## to explicitly create them here since we can then reference those symbols for +## a more complete test. ## ## Not retaining the data matters for more than just size -- we have a use case -## that depends on proper data coalescing to emit a valid file format. -## -## ld64 actually treats all local symbol aliases (not just the private ones) the -## same way. But implementing this is harder -- we would have to create those -## symbols first (so we can emit their names later), but we would have to -## ensure the linker correctly shuffles them around when their aliasees get -## coalesced. Emulating the behavior of weak binds for non-private symbols would -## be even trickier. Let's just deal with private-label symbols for now until we -## find a use case for more general local symbols. +## that depends on proper data coalescing to emit a valid file format. We also +## need this behavior to properly deduplicate the __objc_protolist section; +## failure to do this can result in dyld crashing on iOS 13. ## ## Finally, ld64 does all this regardless of whether .subsections_via_symbols is ## specified. We don't. But again, given how rare the lack of that directive is @@ -23,11 +18,11 @@ ## to worry about it. # RUN: rm -rf %t; split-file %s %t -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-then-private.s -o %t/weak-then-private.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/private-then-weak.s -o %t/private-then-weak.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-then-local.s -o %t/weak-then-local.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/local-then-weak.s -o %t/local-then-weak.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-dead-strip.s -o %t/no-dead-strip.o -# RUN: %lld -dylib %t/weak-then-private.o %t/private-then-weak.o -o %t/test1 -# RUN: %lld -dylib %t/private-then-weak.o %t/weak-then-private.o -o %t/test2 +# RUN: %lld -dylib %t/weak-then-local.o %t/local-then-weak.o -o %t/test1 +# RUN: %lld -dylib %t/local-then-weak.o %t/weak-then-local.o -o %t/test2 # RUN: %lld -dead_strip %t/no-dead-strip.o -o %t/no-dead-strip # RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test1 | FileCheck %s @@ -39,19 +34,22 @@ # CHECK-NEXT: 0000000000001010 00 10 00 00 00 00 00 00 {{$}} # CHECK-EMPTY: # CHECK-NEXT: SYMBOL TABLE: +# CHECK-NEXT: 0000000000001000 l O __DATA,__data _alias # CHECK-NEXT: 0000000000001008 l O __DATA,__data _ref +# CHECK-NEXT: 0000000000001000 l O __DATA,__data _alias # CHECK-NEXT: 0000000000001010 l O __DATA,__data _ref # CHECK-NEXT: 0000000000001000 w O __DATA,__data _weak # CHECK-NEXT: 0000000000000000 *UND* dyld_stub_binder # CHECK-EMPTY: -## Even though the references were to the non-weak `l_ignored` aliases, we -## should still emit weak binds as if they were the `_weak` symbol itself. +## Even though the references were to the non-weak `_alias` symbols, ld64 still +## emits weak binds as if they were the `_weak` symbol itself. We do not. I +## don't know of any programs that rely on this behavior, so I'm just +## documenting it here. # CHECK-NEXT: Weak bind table: # CHECK-NEXT: segment section address type addend symbol -# CHECK-NEXT: __DATA __data 0x00001008 pointer 0 _weak -# CHECK-NEXT: __DATA __data 0x00001010 pointer 0 _weak +# CHECK-EMPTY: -## Verify that we don't drop any flags that private-label aliases have (such as +## Verify that we don't drop any flags that the aliases have (such as ## .no_dead_strip). This is a regression test. We previously had subsections ## that were mistakenly stripped. @@ -59,29 +57,29 @@ # RUN: --check-prefix=NO-DEAD-STRIP # NO-DEAD-STRIP: __data 00000010 -#--- weak-then-private.s +#--- weak-then-local.s .globl _weak .weak_definition _weak .data _weak: -l_ignored: +_alias: .quad 0x123 _ref: - .quad l_ignored + .quad _alias .subsections_via_symbols -#--- private-then-weak.s +#--- local-then-weak.s .globl _weak .weak_definition _weak .data -l_ignored: +_alias: _weak: .quad 0x123 _ref: - .quad l_ignored + .quad _alias .subsections_via_symbols