diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -182,6 +182,7 @@ SymbolPatterns exportedSymbols; SymbolPatterns unexportedSymbols; + SymbolPatterns whyLive; bool zeroModTime = false; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1370,6 +1370,11 @@ symtab->addUndefined(cachedName.val(), /*file=*/nullptr, /*isWeakRef=*/false); + for (const Arg *arg : args.filtered(OPT_why_live)) + config->whyLive.insert(arg->getValue()); + if (!config->whyLive.empty() && !config->deadStrip) + warn("-why_live has no effect without -dead_strip, ignoring"); + config->saveTemps = args.hasArg(OPT_save_temps); config->adhocCodesign = args.hasFlag( diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp --- a/lld/MachO/MarkLive.cpp +++ b/lld/MachO/MarkLive.cpp @@ -21,59 +21,153 @@ using namespace llvm; using namespace llvm::MachO; +struct WhyLiveEntry { + InputSection *isec; + // Keep track of the entry that caused us to mark `isec` as live. + const WhyLiveEntry *prev; + + WhyLiveEntry(InputSection *isec, const WhyLiveEntry *prev) + : isec(isec), prev(prev) {} +}; + +// Type-erased interface to MarkLiveImpl. Used for adding roots to the liveness +// graph. class MarkLive { public: - void enqueue(InputSection *isec, uint64_t off); - void addSym(Symbol *s); - void markTransitively(); + virtual void enqueue(InputSection *isec, uint64_t off) = 0; + virtual void addSym(Symbol *s) = 0; + virtual void markTransitively() = 0; + virtual ~MarkLive() = default; +}; + +template class MarkLiveImpl : public MarkLive { +public: + // -why_live is a rarely used option, so we don't want support for that flag + // to slow down the main -dead_strip code path. As such, we employ templates + // to avoid the usage of WhyLiveEntry in the main code path. This saves us + // from needless allocations and pointer indirections. + using WorklistEntry = + std::conditional_t; + + void enqueue(InputSection *isec, uint64_t off) override { + enqueue(isec, off, nullptr); + } + void addSym(Symbol *s) override { addSym(s, nullptr); } + void markTransitively() override; private: + void enqueue(InputSection *isec, uint64_t off, const WorklistEntry *prev); + void addSym(Symbol *s, const WorklistEntry *prev); + void printWhyLive(Symbol *s, const WorklistEntry *prev); + const InputSection *getInputSection(const WorklistEntry *) const; + WorklistEntry *makeEntry(InputSection *, const WorklistEntry *prev) const; + // We build up a worklist of sections which have been marked as live. We // only push into the worklist when we discover an unmarked section, and we // mark as we push, so sections never appear twice in the list. Literal // sections cannot contain references to other sections, so we only store // ConcatInputSections in our worklist. - SmallVector worklist; + SmallVector worklist; }; -void MarkLive::enqueue(InputSection *isec, uint64_t off) { +template +void MarkLiveImpl::enqueue( + InputSection *isec, uint64_t off, + const typename MarkLiveImpl::WorklistEntry *prev) { if (isec->isLive(off)) return; isec->markLive(off); if (auto s = dyn_cast(isec)) { assert(!s->isCoalescedWeak()); - worklist.push_back(s); + worklist.push_back(makeEntry(s, prev)); } } -void MarkLive::addSym(Symbol *s) { +template +void MarkLiveImpl::addSym( + Symbol *s, + const typename MarkLiveImpl::WorklistEntry *prev) { if (s->used) return; s->used = true; + printWhyLive(s, prev); if (auto *d = dyn_cast(s)) { if (d->isec) - enqueue(d->isec, d->value); + enqueue(d->isec, d->value, prev); if (d->unwindEntry) - enqueue(d->unwindEntry, 0); + enqueue(d->unwindEntry, 0, prev); + } +} + +static void printWhyLiveImpl(const Symbol *s, const WhyLiveEntry *prev) { + lld::errs() << toString(*s) << " from " << toString(s->getFile()) << "\n"; + int indent = 2; + for (const WhyLiveEntry *entry = prev; entry; + entry = entry->prev, indent += 2) { + const TinyPtrVector &symbols = entry->isec->symbols; + // With .subsections_with_symbols set, most isecs will have exactly one + // entry in their symbols vector, so we just print the first one. + if (!symbols.empty()) + lld::errs() << std::string(indent, ' ') << toString(*symbols.front()) + << " from " << toString(symbols.front()->getFile()) << "\n"; } + lld::errs().flush(); +} + +// NOTE: if/when `constexpr if` becomes available, we can simplify a lot of +// the partial template specializations below. + +template <> +void MarkLiveImpl::printWhyLive(Symbol *s, const WhyLiveEntry *prev) { + if (!config->whyLive.empty() && config->whyLive.match(s->getName())) + printWhyLiveImpl(s, prev); +} + +template <> +void MarkLiveImpl::printWhyLive(Symbol *s, const InputSection *prev) {} + +template <> +const InputSection * +MarkLiveImpl::getInputSection(const WhyLiveEntry *entry) const { + return entry->isec; +} + +template <> +const InputSection * +MarkLiveImpl::getInputSection(const InputSection *isec) const { + return isec; } -void MarkLive::markTransitively() { +template <> +typename MarkLiveImpl::WorklistEntry *MarkLiveImpl::makeEntry( + InputSection *isec, const MarkLiveImpl::WorklistEntry *prev) const { + return make(isec, prev); +} + +template <> +typename MarkLiveImpl::WorklistEntry *MarkLiveImpl::makeEntry( + InputSection *isec, const MarkLiveImpl::WorklistEntry *prev) const { + return isec; +} + +template +void MarkLiveImpl::markTransitively() { do { // Mark things reachable from GC roots as live. while (!worklist.empty()) { - ConcatInputSection *s = worklist.pop_back_val(); - assert(s->live && "We mark as live when pushing onto the worklist!"); + WorklistEntry *entry = worklist.pop_back_val(); + assert(getInputSection(entry)->live && + "We mark as live when pushing onto the worklist!"); // Mark all symbols listed in the relocation table for this section. - for (const Reloc &r : s->relocs) { + for (const Reloc &r : getInputSection(entry)->relocs) { if (auto *s = r.referent.dyn_cast()) - addSym(s); + addSym(s, entry); else - enqueue(r.referent.get(), r.addend); + enqueue(r.referent.get(), r.addend, entry); } - for (Defined *d : s->symbols) - addSym(d); + for (Defined *d : getInputSection(entry)->symbols) + addSym(d, entry); } // S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live @@ -85,13 +179,18 @@ continue; for (const Reloc &r : isec->relocs) { - bool referentLive; - if (auto *s = r.referent.dyn_cast()) - referentLive = s->isLive(); - else - referentLive = r.referent.get()->isLive(r.addend); - if (referentLive) - enqueue(isec, 0); + if (auto *s = r.referent.dyn_cast()) { + if (s->isLive()) { + InputSection *referentIsec = nullptr; + if (auto *d = dyn_cast(s)) + referentIsec = d->isec; + enqueue(isec, 0, makeEntry(referentIsec, nullptr)); + } + } else { + auto *referentIsec = r.referent.get(); + if (referentIsec->isLive(r.addend)) + enqueue(isec, 0, makeEntry(referentIsec, nullptr)); + } } } @@ -107,10 +206,14 @@ // from the final output. void markLive() { TimeTraceScope timeScope("markLive"); - MarkLive marker; + MarkLive *marker; + if (config->whyLive.empty()) + marker = make>(); + else + marker = make>(); // Add GC roots. if (config->entry) - marker.addSym(config->entry); + marker->addSym(config->entry); for (Symbol *sym : symtab->getSymbols()) { if (auto *defined = dyn_cast(sym)) { // -exported_symbol(s_list) @@ -121,13 +224,13 @@ // explicitUndefineds code below would handle this automatically. assert(!defined->privateExtern && "should have been rejected by driver"); - marker.addSym(defined); + marker->addSym(defined); continue; } // public symbols explicitly marked .no_dead_strip if (defined->referencedDynamically || defined->noDeadStrip) { - marker.addSym(defined); + marker->addSym(defined); continue; } @@ -142,40 +245,40 @@ bool externsAreRoots = config->outputType != MH_EXECUTE || config->exportDynamic; if (externsAreRoots && !defined->privateExtern) { - marker.addSym(defined); + marker->addSym(defined); continue; } } } // -u symbols for (Symbol *sym : config->explicitUndefineds) - marker.addSym(sym); + marker->addSym(sym); // local symbols explicitly marked .no_dead_strip for (const InputFile *file : inputFiles) if (auto *objFile = dyn_cast(file)) for (Symbol *sym : objFile->symbols) if (auto *defined = dyn_cast_or_null(sym)) if (!defined->isExternal() && defined->noDeadStrip) - marker.addSym(defined); + marker->addSym(defined); if (auto *stubBinder = dyn_cast_or_null(symtab->find("dyld_stub_binder"))) - marker.addSym(stubBinder); + marker->addSym(stubBinder); for (ConcatInputSection *isec : inputSections) { // Sections marked no_dead_strip if (isec->getFlags() & S_ATTR_NO_DEAD_STRIP) { - marker.enqueue(isec, 0); + marker->enqueue(isec, 0); continue; } // mod_init_funcs, mod_term_funcs sections if (sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS || sectionType(isec->getFlags()) == S_MOD_TERM_FUNC_POINTERS) { - marker.enqueue(isec, 0); + marker->enqueue(isec, 0); continue; } } - marker.markTransitively(); + marker->markTransitively(); } } // namespace macho diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -549,7 +549,6 @@ def why_live : Separate<["-"], "why_live">, MetaVarName<"">, HelpText<"Log a chain of references to , for use with -dead_strip">, - Flags<[HelpHidden]>, Group; def print_statistics : Flag<["-"], "print_statistics">, HelpText<"Log the linker's memory and CPU usage">, diff --git a/lld/test/MachO/why-live.s b/lld/test/MachO/why-live.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/why-live.s @@ -0,0 +1,53 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o +# RUN: %lld -lSystem -dead_strip -why_live _foo -why_live _undef -U _undef \ +# RUN: -why_live _support -why_live _abs %t.o -o /dev/null 2>&1 | FileCheck %s + +## Due to an implementation detail, LLD is not able to report -why_live info for +## absolute symbols. (ld64 has the same shortcoming.) +# CHECK-NOT: _abs +# CHECK: _foo from {{.*}}why-live.s.tmp.o +# CHECK-NEXT: _quux from {{.*}}why-live.s.tmp.o +# CHECK-NEXT: _undef from {{.*}}why-live.s.tmp.o +# CHECK-NEXT: _main from {{.*}}why-live.s.tmp.o +## Our handling of live_support sections can be improved... we shouldn't be +## printing _support twice. (ld64 seems to have the same issue.) +# CHECK-NEXT: _support from {{.*}}why-live.s.tmp.o +# CHECK-NEXT: _support from {{.*}}why-live.s.tmp.o +# CHECK-NEXT: _foo from {{.*}}why-live.s.tmp.o +# CHECK-EMPTY: + +.text +_foo: + retq + +_bar: + retq + +_baz: + callq _foo + retq + +.no_dead_strip _quux +_quux: + callq _foo + retq + +.globl _main +_main: + callq _foo + callq _baz + callq _undef + retq + +.globl _abs +_abs = 0x1000 + +.section __TEXT,support,regular,live_support +_support: + callq _foo + callq _abs + retq + +.subsections_via_symbols