diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -28,6 +28,7 @@ Driver.cpp DriverUtils.cpp EhFrame.cpp + Explain.cpp ICF.cpp InputFiles.cpp InputSection.cpp diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -24,6 +24,7 @@ #include "Driver.h" #include "Config.h" +#include "Explain.h" #include "ICF.h" #include "InputFiles.h" #include "InputSection.h" @@ -1986,6 +1987,10 @@ readCallGraphsFromObjectFiles(); } + // Handle --explain. + for (auto *arg : args.filtered(OPT_explain)) + explain(arg->getValue()); + // Write the result to the file. writeResult(); } diff --git a/lld/ELF/Explain.h b/lld/ELF/Explain.h new file mode 100644 --- /dev/null +++ b/lld/ELF/Explain.h @@ -0,0 +1,22 @@ +//===- Explain.h ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_EXPLAIN_H +#define LLD_ELF_EXPLAIN_H + +#include "lld/Common/LLVM.h" + +namespace lld { +namespace elf { + +template void explain(StringRef fileOrSym); + +} // namespace elf +} // namespace lld + +#endif diff --git a/lld/ELF/Explain.cpp b/lld/ELF/Explain.cpp new file mode 100644 --- /dev/null +++ b/lld/ELF/Explain.cpp @@ -0,0 +1,272 @@ +//===- Explain.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements an optional analysis pass that doesn't change the +// linker's internal state. If you are reading this file to understand how the +// linker works, you can skip this file now. +// +// So, if you are doing a release work, it is a common situation that you find a +// binary bloat between releases, nail down a file that wasn't linked in a +// previous release, and try to figure out why the file gets linked to the +// current one. There was no elegant solution for the last step; you would make +// a guess and remove some function calls, hoping that that eliminates a +// dependency to the file. +// +// --explain option is intended to be the solution for the above situation. You +// can specify a filename as an argument for the option, and lld prints out the +// shortest path from a root to a given file. Below is an example output of lld +// when `--explain=lib/libLLVMSupport.a(APInt.cpp.o)` is given (shortened to fit +// to the screen): +// +// This is why 'lib/libLLVMSupport.a(APInt.cpp.o)' is linked: +// +// '(--entry option)' uses '_start' defined in '/usr/lib/x86_64-linux-gnu/crt1.o' +// which uses 'main' defined in 'tools/lld/tools/lld/CMakeFiles/lld.dir/lld.cpp.o' +// which uses 'StringRef::endswith_lower()' defined in 'lib/libLLVMSupport.a(StringRef.cpp.o)' +// which uses 'APInt::zext()' defined in 'lib/libLLVMSupport.a(APInt.cpp.o)' +// +// You can also pass a symbol name instead of a filename to the option, and lld +// will try to figure out why a file that defines a symbol is linked. +// +// What we are doing in this file is the basic breadth-first search in the +// dependency graph. +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "InputSection.h" +#include "LinkerScript.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "lld/Common/ErrorHandler.h" +#include + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +namespace lld { +namespace elf { + +namespace { +template class Explain { +public: + void run(StringRef fileOrSym); + +private: + InputFile *findFile(StringRef fileOrSym); + template void enqueue(const InputSectionBase *sec, RelTy &rel); + void enqueueSpecial(StringRef cause, StringRef symName); + void printPath(StringRef fileOrSym); + + // A file object to which we are searching for a path. + InputFile *target = nullptr; + + // This map represents how we reach a file from root. + DenseMap> edges; + + // Some root objects are not really object files but command line arguments + // (e.g. --entry) or linker scripts. This map manages edges to such vertices. + DenseMap> specialEdges; + + // We want to visit each object file at most once. + DenseSet seen; + + // A queue for breadth-first search. + std::deque queue; +}; +} // namespace + +// This is the main function of the --explain feature. +template void Explain::run(StringRef fileOrSym) { + // Find a file object for a given filename or a symbol. + target = findFile(fileOrSym); + if (!target) + return; + + // Collect root objects. Object files are usually root objects (i.e. always + // included to the result), but if -gc-sections is passed, they need to be + // (directly or indirectly) referenced by a root symbol. + if (!config->gcSections) { + for (InputFile *file : objectFiles) + if (!file->loadedLazily) + queue.push_back(file); + + for (InputFile *file : bitcodeFiles) + if (!file->loadedLazily) + queue.push_back(file); + + // If the target file is a root object, we don't need to run BFS. + for (InputFile *file : queue) { + if (file != target) + continue; + outs() << "Explain: File '" << toString(target) << "' is linked because " + << "it is passed as a command line argument.\n"; + return; + } + } + + // Object files referenced by the following symbols are also root objects. + enqueueSpecial("(--entry option)", config->entry); + for (StringRef s : config->undefined) + enqueueSpecial("(--undefined option)", s); + for (StringRef s : script->referencedSymbols) + enqueueSpecial("(linker script)", s); + + // Now that we have a complete set of root objects, run BFS. + while (!queue.empty()) { + InputFile *file = queue.front(); + queue.pop_front(); + + if (file == target) { + printPath(fileOrSym); + return; + } + + if (!isa>(file) && !isa(file)) + continue; + + for (const InputSectionBase *sec : file->getSections()) { + if (!sec || !sec->isLive()) + continue; + + if (sec->areRelocsRela) { + for (const typename ELFT::Rela &rel : sec->template relas()) + enqueue(sec, rel); + } else { + for (const typename ELFT::Rel &rel : sec->template rels()) + enqueue(sec, rel); + } + } + } + + error("--explain: unreachable file: " + toString(target)); +} + +// Returns the shortest absolute path for a given path, so that +// it is easy to compare two paths. +static std::string canonicalize(StringRef path) { + SmallVector buf(path.begin(), path.end()); + sys::fs::make_absolute(buf); + sys::path::remove_dots(buf, /*remove_dot_dot=*/true); + return {buf.begin(), buf.end()}; +} + +// Returns true if a given file has a given path. You can specify +// "foo.a(bar.o)"-style string as a path. +static bool hasPath(InputFile *file, StringRef path) { + // If a given path doesn't seem to represent an object in an archive, + // we can just compare the two paths. + size_t pos = path.find('('); + if (pos == StringRef::npos || !path.endswith(")")) + return canonicalize(file->mb.getBufferIdentifier()) == canonicalize(path); + + if (file->archiveName.empty()) + return false; + + // Split "foo.a(bar.o)" into "foo.a" and "bar.o". + StringRef archivePath = path.substr(0, pos); + StringRef subPath = path.substr(pos + 1, path.size() - pos - 2); + + return canonicalize(file->archiveName) == canonicalize(archivePath) && + file->mb.getBufferIdentifier() == subPath; +} + +template InputFile *Explain::findFile(StringRef fileOrSym) { + for (InputFile *file : objectFiles) + if (hasPath(file, fileOrSym)) + return file; + + for (SharedFile *file : sharedFiles) + if (hasPath(file, fileOrSym)) + return file; + + if (auto *sym = dyn_cast_or_null(symtab->find(fileOrSym))) { + outs() << "Explain: Symbol '" << fileOrSym << "' is defined in file '" + << toString(sym->file) << "'.\n"; + return sym->file; + } + + error("--explain: no such file or symbol: '" + fileOrSym + + "'. Use --trace option to see a list of input files."); + return nullptr; +} + +// Add a new vertex for BFS. +template +template +void Explain::enqueue(const InputSectionBase *sec, RelTy &rel) { + Symbol &sym = sec->getFile()->getRelocTargetSym(rel); + if (!sym.file || sym.isWeak() || !seen.insert(sym.file).second) + return; + queue.push_back(sym.file); + edges[sym.file] = {sec->file, &sym}; +} + +// Add a new vertex for BFS. +template +void Explain::enqueueSpecial(StringRef cause, StringRef symName) { + auto *sym = dyn_cast_or_null(symtab->find(symName)); + if (!sym) + return; + + auto *sec = dyn_cast_or_null(sym->section); + if (!sec || !seen.insert(sec->file).second) + return; + + queue.push_back(sec->file); + specialEdges[sec->file] = {cause, symName}; +} + +// Print out a path from a root to the target file. +template void Explain::printPath(StringRef fileOrSym) { + std::vector files; + std::vector syms; + InputFile *cur = target; + + for (;;) { + files.push_back(toString(cur)); + + if (specialEdges.count(cur)) { + StringRef cause; + StringRef sym; + std::tie(cause, sym) = specialEdges[cur]; + files.push_back(cause); + syms.push_back(sym); + break; + } + + if (!edges.count(cur)) + break; + + Symbol *sym; + std::tie(cur, sym) = edges[cur]; + syms.push_back(toString(*sym)); + } + + outs() << "Explain: This is why '" << files[0] << "' is linked:\n" + << "Explain:\n" + << "Explain: '" << files.back() << "' uses '" << syms.back() + << "' defined in '" << files[files.size() - 2] << "'\n"; + + for (int i = files.size() - 3; i >= 0; --i) + outs() << "Explain: which uses '" << syms[i] << "' defined in '" << files[i] + << "'\n"; +} + +template void explain(StringRef fileOrSym) { + Explain().run(fileOrSym); +} + +template void explain(StringRef); +template void explain(StringRef); +template void explain(StringRef); +template void explain(StringRef); + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -114,6 +114,10 @@ // True if this is an argument for --just-symbols. Usually false. bool justSymbols = false; + // True if this file loaded lazily from an archive or --start-lib/--end-lib. + // False if this file is directly specified on the command line. + bool loadedLazily = false; + // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE // to compute offsets in PLT call stubs. uint32_t ppc32Got2OutSecOff = 0; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1099,6 +1099,7 @@ InputFile *file = createObjectFile( mb, getName(), c.getParent()->isThin() ? 0 : c.getChildOffset()); file->groupId = groupId; + file->loadedLazily = true; parseFile(file); } @@ -1495,6 +1496,7 @@ // insert the same defined symbols to the symbol table again. file->symbols = std::move(symbols); + file->loadedLazily = true; parseFile(file); } diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -42,6 +42,10 @@ defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"=">; +defm explain: + Eq<"explain", "Explain why a given file or a symbol gets linked to the final binary">, + MetaVarName<"">; + defm split_stack_adjust_size : Eq<"split-stack-adjust-size", "Specify adjustment to stack size when a split-stack function calls a " diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -177,6 +177,16 @@ This option is currently only supported on AArch64. .It Fl -exclude-libs Ns = Ns Ar value Exclude static libraries from automatic export. +.It Fl -explain Ns = Ns Ar value +Explain why an object file gets linked in to the final binary. +.Ar value +may be either an object file pathname or a symbol. If a symbol is +specified, the reason why an object file defining a given symbol will +be explained. Objects defined in archives are specified by +.Ql full/path/to/library(object) +such as +.Ql /usr/lib/libc.a(printf.o) +. .It Fl -export-dynamic , Fl E Put symbols in the dynamic symbol table. .It Fl -export-dynamic-symbol Ns = Ns Ar symbol diff --git a/lld/test/ELF/explain.s b/lld/test/ELF/explain.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/explain.s @@ -0,0 +1,63 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux -o %t-main.o %s +# RUN: echo '.globl foo; foo:' | llvm-mc -filetype=obj -triple=x86_64-unknown-linux -o %t-foo.o - +# RUN: echo '.globl bar; bar:' | llvm-mc -filetype=obj -triple=x86_64-unknown-linux -o %t-bar.o - + +# RUN: mkdir -p %t.dir/dir +# RUN: rm -f %t.dir/foo.a %t.dir/bar.a +# RUN: cp %t-foo.o %t.dir/foo.o +# RUN: cp %t-bar.o %t.dir/bar.o +# RUN: cd %t.dir +# RUN: llvm-ar rcs foo.a foo.o +# RUN: llvm-ar rcs bar.a bar.o + +# RUN: ld.lld -o /dev/null %t-main.o %t-foo.o --explain=%t-main.o \ +# RUN: | FileCheck -check-prefix=TEST1 %s + +# TEST1: main.o' is linked because it is passed as a command line argument. + +# RUN: ld.lld -o /dev/null %t-main.o %t-foo.o --explain=%t-foo.o \ +# RUN: | FileCheck -check-prefix=TEST2 %s + +# TEST2: foo.o' is linked because it is passed as a command line argument. + +# RUN: ld.lld -o /dev/null %t-main.o %t-foo.o --explain=%t-foo.o -gc-sections \ +# RUN: | FileCheck -check-prefix=TEST3 %s + +# TEST3: This is why '{{.*}}tmp-foo.o' is linked: +# TEST3: '(--entry option)' uses '_start' defined in '{{.*}}main.o' +# TEST3: Explain: which uses 'foo' defined in '{{.*}}foo.o' + +# RUN: ld.lld -o /dev/null %t-main.o %t-foo.o --explain=_start \ +# RUN: | FileCheck -check-prefix=TEST4 %s + +# TEST4: Symbol '_start' is defined in file '{{.*}}main.o'. +# TEST4: File '{{.*}}main.o' is linked because it is passed as a command line argument. + +# RUN: ld.lld -o /dev/null %t-main.o foo.a --explain="foo.a(foo.o)" \ +# RUN: | FileCheck -check-prefix=TEST5 %s + +# RUN: cd %t.dir/ +# TEST5: This is why 'foo.a(foo.o)' is linked: +# TEST5: '(--entry option)' uses '_start' defined in '{{.*}}-main.o' +# TEST5: which uses 'foo' defined in 'foo.a(foo.o)' + +# RUN: ld.lld -o /dev/null %t-main.o %t.dir/foo.a --explain="%t.dir/dir/../foo.a(foo.o)" \ +# RUN: | FileCheck -check-prefix=TEST6 %s + +# TEST6: This is why '{{.*}}.dir/foo.a(foo.o)' is linked: +# TEST6: '(--entry option)' uses '_start' defined in '{{.*}}-main.o' +# TEST6: which uses 'foo' defined in '{{.*}}.dir/foo.a(foo.o)' + +# RUN: ld.lld -o /dev/null %t-main.o %t-foo.o %t.dir/bar.a -u bar --explain=bar \ +# RUN: | FileCheck -check-prefix=TEST7 %s + +# TEST7: Symbol 'bar' is defined in file '{{.*}}bar.a(bar.o)'. +# TEST7: This is why '{{.*}}bar.a(bar.o)' is linked: +# TEST7: '(--undefined option)' uses 'bar' defined in '{{.*}}bar.a(bar.o)' + +.globl _start +_start: + call foo + ret