diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -108,7 +108,7 @@ // load the ObjFile emitted by LTO compilation. if (r.Prevailing) replaceSymbol(sym, sym->getName(), sym->getFile(), - RefState::Strong); + RefState::Strong, /*wasBitcodeSymbol=*/true); // TODO: set the other resolution configs properly } diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp --- a/lld/MachO/MapFile.cpp +++ b/lld/MachO/MapFile.cpp @@ -45,32 +45,41 @@ using namespace lld; using namespace lld::macho; -using Symbols = std::vector; -// Returns a pair where the left element is a container of all live Symbols and -// the right element is a container of all dead symbols. -static std::pair getSymbols() { - Symbols liveSymbols, deadSymbols; +struct MapInfo { + SmallVector files; + SmallVector liveSymbols; + SmallVector deadSymbols; +}; + +static MapInfo gatherMapInfo() { + MapInfo info; for (InputFile *file : inputFiles) - if (isa(file)) - for (Symbol *sym : file->symbols) + if (isa(file) || isa(file)) { + bool hasEmittedSymbol = false; + for (Symbol *sym : file->symbols) { if (auto *d = dyn_cast_or_null(sym)) if (d->isec && d->getFile() == file) { if (d->isLive()) { assert(!shouldOmitFromOutput(d->isec)); - liveSymbols.push_back(d); + info.liveSymbols.push_back(d); } else { - deadSymbols.push_back(d); + info.deadSymbols.push_back(d); } + hasEmittedSymbol = true; } - parallelSort(liveSymbols.begin(), liveSymbols.end(), + } + if (hasEmittedSymbol) + info.files.push_back(file); + } + parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(), [](Defined *a, Defined *b) { return a->getVA() != b->getVA() ? a->getVA() < b->getVA() : a->getName() < b->getName(); }); parallelSort( - deadSymbols.begin(), deadSymbols.end(), + info.deadSymbols.begin(), info.deadSymbols.end(), [](Defined *a, Defined *b) { return a->getName() < b->getName(); }); - return {std::move(liveSymbols), std::move(deadSymbols)}; + return info; } // Construct a map from symbols to their stringified representations. @@ -128,16 +137,16 @@ os << format("# Arch: %s\n", getArchitectureName(config->arch()).str().c_str()); + MapInfo info = gatherMapInfo(); + // Dump table of object files. os << "# Object files:\n"; os << format("[%3u] %s\n", 0, (const char *)"linker synthesized"); uint32_t fileIndex = 1; DenseMap readerToFileOrdinal; - for (InputFile *file : inputFiles) { - if (isa(file)) { - os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str()); - readerToFileOrdinal[file] = fileIndex++; - } + for (InputFile *file : info.files) { + os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str()); + readerToFileOrdinal[file] = fileIndex++; } // Dump table of sections @@ -153,13 +162,11 @@ } // Dump table of symbols - auto [liveSymbols, deadSymbols] = getSymbols(); - DenseMap liveSymbolStrings = - getSymbolStrings(liveSymbols); + getSymbolStrings(info.liveSymbols); os << "# Symbols:\n"; os << "# Address\tSize \tFile Name\n"; - for (Defined *sym : liveSymbols) { + for (Defined *sym : info.liveSymbols) { assert(sym->isLive()); os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size, readerToFileOrdinal[sym->getFile()], @@ -168,10 +175,10 @@ if (config->deadStrip) { DenseMap deadSymbolStrings = - getSymbolStrings(deadSymbols); + getSymbolStrings(info.deadSymbols); os << "# Dead Stripped Symbols:\n"; os << "# \tSize \tFile Name\n"; - for (Defined *sym : deadSymbols) { + for (Defined *sym : info.deadSymbols) { assert(!sym->isLive()); os << format("<>\t0x%08llX\t[%3u] %s\n", sym->size, readerToFileOrdinal[sym->getFile()], diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -108,6 +108,11 @@ } else if (auto *dysym = dyn_cast(s)) { overridesWeakDef = !isWeakDef && dysym->isWeakDef(); dysym->unreference(); + } else if (auto *undef = dyn_cast(s)) { + // Preserve the original bitcode file name (instead of using the object + // file name). + if (undef->wasBitcodeSymbol) + file = undef->getFile(); } // Defined symbols take priority over other types of symbols, so in case // of a name conflict, we fall through to the replaceSymbol() call below. @@ -142,7 +147,8 @@ RefState refState = isWeakRef ? RefState::Weak : RefState::Strong; if (wasInserted) - replaceSymbol(s, name, file, refState); + replaceSymbol(s, name, file, refState, + /*wasBitcodeSymbol=*/false); else if (auto *lazy = dyn_cast(s)) lazy->fetchArchiveMember(); else if (isa(s)) diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -133,6 +133,10 @@ uint64_t getVA() const override; + // Returns the object file that this symbol was defined in. This value differs + // from `getFile()` if the symbol originated from a bitcode file. + ObjFile *getObjectFile() const; + std::string getSourceLocation(); // Ensure this symbol's pointers to InputSections point to their canonical @@ -199,8 +203,10 @@ class Undefined : public Symbol { public: - Undefined(StringRefZ name, InputFile *file, RefState refState) - : Symbol(UndefinedKind, name, file), refState(refState) { + Undefined(StringRefZ name, InputFile *file, RefState refState, + bool wasBitcodeSymbol) + : Symbol(UndefinedKind, name, file), refState(refState), + wasBitcodeSymbol(wasBitcodeSymbol) { assert(refState != RefState::Unreferenced); } @@ -209,6 +215,7 @@ static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } RefState refState : 2; + bool wasBitcodeSymbol; }; // On Unix, it is traditionally allowed to write variable definitions without diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -105,6 +105,10 @@ return isec->getVA(value); } +ObjFile *Defined::getObjectFile() const { + return isec ? dyn_cast_or_null(isec->getFile()) : nullptr; +} + void Defined::canonicalize() { if (unwindEntry) unwindEntry = unwindEntry->canonical(); diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1157,8 +1157,7 @@ if (defined->wasIdenticalCodeFolded) continue; - InputSection *isec = defined->isec; - ObjFile *file = dyn_cast_or_null(isec->getFile()); + ObjFile *file = defined->getObjectFile(); if (!file || !file->compileUnit) continue; diff --git a/lld/test/MachO/map-file.ll b/lld/test/MachO/map-file.ll new file mode 100644 --- /dev/null +++ b/lld/test/MachO/map-file.ll @@ -0,0 +1,90 @@ +; REQUIRES: x86 +; RUN: rm -rf %t; split-file %s %t + +;; Verify that we map symbols to their original bitcode input files, not the +;; intermediate object files. Also verify that we don't emit these intermediate +;; object files if no symbol needs to reference them. (Intermediate object files +;; may still be referenced if they contain compiler-synthesized symbols, such +;; as outlined functions. TODO: Test this case.) + +; RUN: opt -module-summary %t/foo.ll -o %t/foo.thinlto.o +; RUN: opt -module-summary %t/bar.ll -o %t/bar.thinlto.o + +; RUN: %lld -dylib %t/foo.thinlto.o %t/bar.thinlto.o -o %t/foobar.thinlto -map \ +; RUN: %t/foobar.thinlto.map +; RUN: FileCheck %s --check-prefix=FOOBAR < %t/foobar.thinlto.map + +; RUN: %lld -dylib %t/bar.thinlto.o %t/foo.thinlto.o -o %t/barfoo.thinlto -map \ +; RUN: %t/barfoo.thinlto.map +; RUN: FileCheck %s --check-prefix=BARFOO < %t/barfoo.thinlto.map + +; RUN: llvm-as %t/foo.ll -o %t/foo.o +; RUN: llvm-as %t/bar.ll -o %t/bar.o +; RUN: %lld -dylib %t/foo.o %t/bar.o -o %t/foobar -map %t/foobar.map +; RUN: FileCheck %s --check-prefix=LTO < %t/foobar.map + +; FOOBAR: # Path: {{.*}}{{/|\\}}map-file.ll.tmp/foobar.thinlto +; FOOBAR-NEXT: # Arch: x86_64 +; FOOBAR-NEXT: # Object files: +; FOOBAR-NEXT: [ 0] linker synthesized +; FOOBAR-NEXT: [ 1] {{.*}}{{/|\\}}map-file.ll.tmp/foo.thinlto.o +; FOOBAR-NEXT: [ 2] {{.*}}{{/|\\}}map-file.ll.tmp/bar.thinlto.o +; FOOBAR-NEXT: # Sections: +; FOOBAR: # Symbols: +; FOOBAR-NEXT: # Address Size File Name +; FOOBAR-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _foo +; FOOBAR-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _bar +; FOOBAR-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _maybe_weak + +; BARFOO: # Path: {{.*}}{{/|\\}}map-file.ll.tmp/barfoo.thinlto +; BARFOO-NEXT: # Arch: x86_64 +; BARFOO-NEXT: # Object files: +; BARFOO-NEXT: [ 0] linker synthesized +; BARFOO-NEXT: [ 1] {{.*}}{{/|\\}}map-file.ll.tmp/bar.thinlto.o +; BARFOO-NEXT: [ 2] {{.*}}{{/|\\}}map-file.ll.tmp/foo.thinlto.o +; BARFOO-NEXT: # Sections: +; BARFOO: # Symbols: +; BARFOO-NEXT: # Address Size File Name +; BARFOO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _bar +; BARFOO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _maybe_weak +; BARFOO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _foo + +; LTO: # Path: {{.*}}{{/|\\}}map-file.ll.tmp/foobar +; LTO-NEXT: # Arch: x86_64 +; LTO-NEXT: # Object files: +; LTO-NEXT: [ 0] linker synthesized +; LTO-NEXT: [ 1] {{.*}}{{/|\\}}map-file.ll.tmp/foo.o +; LTO-NEXT: [ 2] {{.*}}{{/|\\}}map-file.ll.tmp/bar.o +; LTO-NEXT: # Sections: +; LTO: # Symbols: +; LTO-NEXT: # Address Size File Name +; LTO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _foo +; LTO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _bar +; LTO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _maybe_weak + +;--- foo.ll +target triple = "x86_64-apple-darwin" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo() { + ret void +} + +;; This is weak in foo.ll but non-weak in bar.ll, so the definition in bar.ll +;; will always prevail. Check that the map file maps `maybe_weak` to bar.ll +;; regardless of the object file input order. +define weak_odr void @maybe_weak() { + ret void +} + +;--- bar.ll +target triple = "x86_64-apple-darwin" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @bar() { + ret void +} + +define void @maybe_weak() { + ret void +}