diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -127,6 +127,7 @@ llvm::StringRef sysroot; llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; + llvm::StringRef whyExtract; llvm::StringRef ltoBasicBlockSections; std::pair thinLTOObjectSuffixReplace; std::pair thinLTOPrefixReplace; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -94,6 +94,7 @@ objectFiles.clear(); sharedFiles.clear(); backwardReferences.clear(); + whyExtract.clear(); tar = nullptr; memset(&in, 0, sizeof(in)); @@ -1171,6 +1172,7 @@ config->warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false); config->warnSymbolOrdering = args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true); + config->whyExtract = args.getLastArgValue(OPT_why_extract); config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); config->zForceBti = hasZOption(args, "force-bti"); @@ -1696,13 +1698,16 @@ } // Force Sym to be entered in the output. -static void handleUndefined(Symbol *sym) { +static void handleUndefined(Symbol *sym, const char *option) { // Since a symbol may not be used inside the program, LTO may // eliminate it. Mark the symbol as "used" to prevent it. sym->isUsedInRegularObj = true; - if (sym->isLazy()) - sym->fetch(); + if (!sym->isLazy()) + return; + sym->fetch(); + if (!config->whyExtract.empty()) + whyExtract.emplace_back(option, sym->file, *sym); } // As an extension to GNU linkers, lld supports a variant of `-u` @@ -1725,7 +1730,7 @@ } for (Symbol *sym : syms) - handleUndefined(sym); + handleUndefined(sym, "--undefined-glob"); } static void handleLibcall(StringRef name) { @@ -2192,6 +2197,9 @@ e.message()); if (auto e = tryCreateFile(config->mapFile)) error("cannot open map file " + config->mapFile + ": " + e.message()); + if (auto e = tryCreateFile(config->whyExtract)) + error("cannot open --why-extract= file " + config->whyExtract + ": " + + e.message()); } if (errorCount()) return; @@ -2246,7 +2254,7 @@ // If an entry symbol is in a static archive, pull out that file now. if (Symbol *sym = symtab->find(config->entry)) - handleUndefined(sym); + handleUndefined(sym, "--entry"); // Handle the `--undefined-glob ` options. for (StringRef pat : args::getStrings(args, OPT_undefined_glob)) diff --git a/lld/ELF/MapFile.h b/lld/ELF/MapFile.h --- a/lld/ELF/MapFile.h +++ b/lld/ELF/MapFile.h @@ -12,6 +12,7 @@ namespace lld { namespace elf { void writeMapFile(); +void writeWhyExtract(); void writeCrossReferenceTable(); void writeArchiveStats(); } // namespace elf diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -215,6 +215,25 @@ } } +void elf::writeWhyExtract() { + if (config->whyExtract.empty()) + return; + + std::error_code ec; + raw_fd_ostream os(config->whyExtract, ec, sys::fs::OF_None); + if (ec) { + error("cannot open --why-extract= file " + config->whyExtract + ": " + + ec.message()); + return; + } + + os << "reference\textracted\tsymbol\n"; + for (auto &entry : whyExtract) { + os << std::get<0>(entry) << '\t' << toString(std::get<1>(entry)) << '\t' + << toString(std::get<2>(entry)) << '\n'; + } +} + static void print(StringRef a, StringRef b) { lld::outs() << left_justify(a, 49) << " " << b << "\n"; } diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -492,6 +492,8 @@ "Force load of all members in a static library", "Do not force load of all members in a static library (default)">; +def why_extract: JJ<"why-extract=">, HelpText<"Print to a file about why archive members are extracted">; + defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and " "__real_symbol references to symbol">, MetaVarName<"">; diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" +#include namespace lld { // Returns a string representation for a symbol for diagnostics. @@ -582,6 +583,11 @@ std::pair> backwardReferences; +// A tuple of (reference, extractedFile, sym). Used by --why-extract=. +extern SmallVector, + 0> + whyExtract; + } // namespace elf } // namespace lld diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -64,6 +64,8 @@ Defined *ElfSym::tlsModuleBase; DenseMap> elf::backwardReferences; +SmallVector, 0> + elf::whyExtract; static uint64_t getSymVA(const Symbol &sym, int64_t &addend) { switch (sym.kind()) { @@ -321,6 +323,11 @@ message(toString(sym->file) + s + sym->getName()); } +static void recordWhyExtract(const InputFile *reference, + const InputFile &extracted, const Symbol &sym) { + whyExtract.emplace_back(toString(reference), &extracted, sym); +} + void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { if (!config->warnSymbolOrdering) return; @@ -533,6 +540,9 @@ file->groupId < other.file->groupId; fetch(); + if (!config->whyExtract.empty()) + recordWhyExtract(other.file, *file, *this); + // We don't report backward references to weak symbols as they can be // overridden later. // @@ -742,7 +752,10 @@ return; } + const InputFile *oldFile = file; other.fetch(); + if (!config->whyExtract.empty()) + recordWhyExtract(oldFile, *file, *this); } void Symbol::resolveShared(const SharedSymbol &other) { diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -622,11 +622,12 @@ for (OutputSection *sec : outputSections) sec->addr = 0; - // Handle --print-map(-M)/--Map, --cref and --print-archive-stats=. Dump them - // before checkSections() because the files may be useful in case - // checkSections() or openFile() fails, for example, due to an erroneous file - // size. + // Handle --print-map(-M)/--Map, --why-extract=, --cref and + // --print-archive-stats=. Dump them before checkSections() because the files + // may be useful in case checkSections() or openFile() fails, for example, due + // to an erroneous file size. writeMapFile(); + writeWhyExtract(); writeCrossReferenceTable(); writeArchiveStats(); diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -26,6 +26,8 @@ * ``--export-dynamic-symbol-list`` has been added. (`D107317 `_) +* ``--why-extract`` has been added to query why archive members/lazy object files are extracted. + (`D109572 `_) * ``e_entry`` no longer falls back to the address of ``.text`` if the entry symbol does not exist. Instead, a value of 0 will be written. (`D110014 `_) diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -658,6 +658,8 @@ Report unresolved symbols as warnings. .It Fl -whole-archive Force load of all members in a static library. +.It Fl -why-extract Ns = Ns Ar file +Print to a file about why archive members are extracted. .It Fl -wrap Ns = Ns Ar symbol Redirect .Ar symbol diff --git a/lld/test/ELF/why-extract.s b/lld/test/ELF/why-extract.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/why-extract.s @@ -0,0 +1,86 @@ +# REQUIRES: x86 + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/main.s -o %t/main.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a.s -o %t/a.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a_b.s -o %t/a_b.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/b.s -o %t/b.o +# RUN: llvm-ar rc %t/a.a %t/a.o +# RUN: llvm-ar rc %t/a_b.a %t/a_b.o +# RUN: llvm-ar rc %t/b.a %t/b.o +# RUN: cd %t + +## Nothing is extracted from an archive. The file is created with just a header. +# RUN: ld.lld main.o a.o b.a -o /dev/null --why-extract=why1.txt +# RUN: FileCheck %s --input-file=why1.txt --check-prefix=CHECK1 --match-full-lines --strict-whitespace + +# CHECK1:reference extracted symbol +# CHECK1-NOT:{{.}} + +## Some archive members are extracted. +# RUN: ld.lld main.o a_b.a b.a -o /dev/null --why-extract=why2.txt +# RUN: FileCheck %s --input-file=why2.txt --check-prefix=CHECK2 --match-full-lines --strict-whitespace + +# CHECK2:reference extracted symbol +# CHECK2-NEXT:main.o a_b.a(a_b.o) a +# CHECK2-NEXT:a_b.a(a_b.o) b.a(b.o) b() + +## Check that backward references are supported. +## - means stdout. +# RUN: ld.lld b.a a_b.a main.o -o /dev/null --why-extract=- | FileCheck %s --check-prefix=CHECK3 + +# CHECK3:reference extracted symbol +# CHECK3-NEXT:a_b.a(a_b.o) b.a(b.o) b() +# CHECK3-NEXT:main.o a_b.a(a_b.o) a + +# RUN: ld.lld main.o a_b.a b.a -o /dev/null --no-demangle --why-extract=- | FileCheck %s --check-prefix=MANGLED + +# MANGLED: a_b.a(a_b.o) b.a(b.o) _Z1bv + +# RUN: ld.lld main.o a.a b.a -o /dev/null -u _Z1bv --why-extract=- | FileCheck %s --check-prefix=UNDEFINED + +## We insert -u symbol before processing other files, so its name is . +## This is not ideal. +# UNDEFINED: b.a(b.o) b() + +# RUN: ld.lld main.o a.a b.a -o /dev/null --undefined-glob '_Z1b*' --why-extract=- | FileCheck %s --check-prefix=UNDEFINED_GLOB + +# UNDEFINED_GLOB: --undefined-glob b.a(b.o) b() + +# RUN: ld.lld main.o a.a b.a -o /dev/null -e _Z1bv --why-extract=- | FileCheck %s --check-prefix=ENTRY + +# ENTRY: --entry b.a(b.o) b() + +# RUN: ld.lld main.o b.a -o /dev/null -T a.lds --why-extract=- | FileCheck %s --check-prefix=SCRIPT + +# SCRIPT: b.a(b.o) b() + +# RUN: ld.lld main.o --start-lib a_b.o b.o --end-lib -o /dev/null --why-extract=- | FileCheck %s --check-prefix=LAZY + +# LAZY: main.o a_b.o a +# LAZY: a_b.o b.o b() + +# RUN: not ld.lld -shared main.o -o /dev/null --why-extract=/ 2>&1 | FileCheck %s --check-prefix=ERR + +# ERR: error: cannot open --why-extract= file /: {{.*}} + +#--- main.s +.globl _start +_start: + call a + +#--- a.s +.globl a +a: + +#--- a_b.s +.globl a +a: + call _Z1bv + +#--- b.s +.globl _Z1bv +_Z1bv: + +#--- a.lds +a = _Z1bv;