diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp --- a/lld/MachO/MapFile.cpp +++ b/lld/MachO/MapFile.cpp @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file implements the -map option. It shows lists in order and -// hierarchically the outputFile, arch, input files, output sections and -// symbols: +// This file implements the -map option, which maps address ranges to their +// respective contents, plus the input file these contents were originally from. +// The contents (typically symbols) are listed in address order. Dead-stripped +// contents are included as well. // // # Path: test // # Arch: x86_84 @@ -28,15 +29,16 @@ //===----------------------------------------------------------------------===// #include "MapFile.h" +#include "ConcatOutputSection.h" #include "Config.h" #include "InputFiles.h" #include "InputSection.h" -#include "OutputSection.h" #include "OutputSegment.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/TimeProfiler.h" @@ -45,69 +47,75 @@ using namespace lld; using namespace lld::macho; +struct CStringInfo { + uint32_t fileIndex; + StringRef str; +}; + struct MapInfo { SmallVector files; - SmallVector liveSymbols; SmallVector deadSymbols; + DenseMap>> + liveCStringsForSection; + SmallVector deadCStrings; }; static MapInfo gatherMapInfo() { MapInfo info; for (InputFile *file : inputFiles) if (isa(file) || isa(file)) { - bool hasEmittedSymbol = false; + uint32_t fileIndex = info.files.size() + 1; + bool isReferencedFile = false; + + // Gather the dead symbols. We don't have to bother with the live ones + // because we will pick them up as we iterate over the OutputSections + // later. for (Symbol *sym : file->symbols) { if (auto *d = dyn_cast_or_null(sym)) - if (d->isec && d->getFile() == file) { - if (d->isLive()) { - assert(!shouldOmitFromOutput(d->isec)); - info.liveSymbols.push_back(d); - } else { + // Only emit the prevailing definition of a symbol. Also, don't emit + // the symbol if it is part of a cstring section (we use the literal + // value instead, similar to ld64) + if (d->isec && d->getFile() == file && + !isa(d->isec)) { + isReferencedFile = true; + if (!d->isLive()) info.deadSymbols.push_back(d); + } + } + + // Gather all the cstrings (both live and dead). A CString(Output)Section + // doesn't provide us a way of figuring out which InputSections its + // cstring contents came from, so we need to build up that mapping here. + for (const Section *sec : file->sections) { + for (const Subsection &subsec : sec->subsections) { + if (auto isec = dyn_cast(subsec.isec)) { + auto &liveCStrings = info.liveCStringsForSection[isec->parent]; + for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { + if (piece.live) + liveCStrings.push_back({isec->parent->addr + piece.outSecOff, + {fileIndex, isec->getStringRef(i)}}); + else + info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)}); + isReferencedFile = true; } - hasEmittedSymbol = true; + } else { + break; } + } } - if (hasEmittedSymbol) - info.files.push_back(file); - } - parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(), - [](Defined *a, Defined *b) { return a->getVA() < b->getVA(); }); - return info; -} -// Construct a map from symbols to their stringified representations. -// Demangling symbols (which is what toString() does) is slow, so -// we do that in batch using parallel-for. -static DenseMap -getSymbolStrings(ArrayRef syms) { - std::vector str(syms.size()); - parallelFor(0, syms.size(), [&](size_t i) { - raw_string_ostream os(str[i]); - Defined *sym = syms[i]; - - switch (sym->isec->kind()) { - case InputSection::CStringLiteralKind: { - // Output "literal string: " - const auto *isec = cast(sym->isec); - const StringPiece &piece = isec->getStringPiece(sym->value); - assert( - sym->value == piece.inSecOff && - "We expect symbols to always point to the start of a StringPiece."); - StringRef str = isec->getStringRef(&piece - &(*isec->pieces.begin())); - (os << "literal string: ").write_escaped(str); - break; - } - case InputSection::ConcatKind: - case InputSection::WordLiteralKind: - os << toString(*sym); + if (isReferencedFile) + info.files.push_back(file); } - }); - DenseMap ret; - for (size_t i = 0, e = syms.size(); i < e; ++i) - ret[syms[i]] = std::move(str[i]); - return ret; + // cstrings are not stored in sorted order in their OutputSections, so we sort + // them here. + for (auto &liveCStrings : info.liveCStringsForSection) + parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) { + return p1.first < p2.first; + }); + return info; } void macho::writeMapFile() { @@ -124,16 +132,12 @@ return; } - // Dump output path. os << format("# Path: %s\n", config->outputFile.str().c_str()); - - // Dump output architecture. os << format("# Arch: %s\n", getArchitectureName(config->arch()).str().c_str()); MapInfo info = gatherMapInfo(); - // Dump table of object files. os << "# Object files:\n"; os << format("[%3u] %s\n", 0, (const char *)"linker synthesized"); uint32_t fileIndex = 1; @@ -143,7 +147,6 @@ readerToFileOrdinal[file] = fileIndex++; } - // Dump table of sections os << "# Sections:\n"; os << "# Address\tSize \tSegment\tSection\n"; for (OutputSegment *seg : outputSegments) @@ -155,28 +158,48 @@ seg->name.str().c_str(), osec->name.str().c_str()); } - // Dump table of symbols - DenseMap liveSymbolStrings = - getSymbolStrings(info.liveSymbols); os << "# Symbols:\n"; os << "# Address\tSize \tFile Name\n"; - for (Defined *sym : info.liveSymbols) { - assert(sym->isLive()); - os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size, - readerToFileOrdinal[sym->getFile()], - liveSymbolStrings[sym].c_str()); + for (const OutputSegment *seg : outputSegments) { + for (const OutputSection *osec : seg->getSections()) { + if (auto *concatOsec = dyn_cast(osec)) { + for (const InputSection *isec : concatOsec->inputs) { + for (Defined *sym : isec->symbols) + os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), + sym->size, readerToFileOrdinal[sym->getFile()], + sym->getName().str().data()); + } + } else if (osec == in.cStringSection || osec == in.objcMethnameSection) { + const auto &liveCStrings = info.liveCStringsForSection.lookup(osec); + uint64_t lastAddr = 0; // strings will never start at address 0, so this + // is a sentinel value + for (const auto &[addr, info] : liveCStrings) { + uint64_t size = 0; + if (addr != lastAddr) + size = info.str.size() + 1; // include null terminator + lastAddr = addr; + os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size, + info.fileIndex); + os.write_escaped(info.str) << "\n"; + } + } + // TODO print other synthetic sections + } } if (config->deadStrip) { - DenseMap deadSymbolStrings = - getSymbolStrings(info.deadSymbols); os << "# Dead Stripped Symbols:\n"; os << "# \tSize \tFile Name\n"; for (Defined *sym : info.deadSymbols) { assert(!sym->isLive()); os << format("<>\t0x%08llX\t[%3u] %s\n", sym->size, readerToFileOrdinal[sym->getFile()], - deadSymbolStrings[sym].c_str()); + sym->getName().str().data()); + } + for (CStringInfo &cstrInfo : info.deadCStrings) { + os << format("<>\t0x%08llX\t[%3u] literal string: ", + cstrInfo.str.size() + 1, cstrInfo.fileIndex); + os.write_escaped(cstrInfo.str) << "\n"; } } } diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s --- a/lld/test/MachO/map-file.s +++ b/lld/test/MachO/map-file.s @@ -4,23 +4,24 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/c-string-literal.s -o %t/c-string-literal.o -# RUN: %lld -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o --time-trace -o %t/test +# RUN: %lld -demangle -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o \ +# RUN: --time-trace -o %t/test # RUN: llvm-objdump --syms --section-headers %t/test > %t/objdump -# RUN: cat %t/objdump %t/map > %t/out -# RUN: FileCheck %s < %t/out +## Check that symbols in cstring sections aren't emitted +# RUN: cat %t/objdump %t/map | FileCheck %s --implicit-check-not _hello_world # RUN: FileCheck %s --check-prefix=MAPFILE < %t/test.time-trace # CHECK: Sections: -# CHECK-NEXT: Idx Name Size VMA Type -# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT -# CHECK-NEXT: 1 obj {{[0-9a-f]+}} [[#%x,DATA:]] TEXT -# CHECK-NEXT: 2 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA -# CHECK-NEXT: 3 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS +# CHECK-NEXT: Idx Name Size VMA Type +# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT +# CHECK-NEXT: 1 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA +# CHECK-NEXT: 2 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS # CHECK: SYMBOL TABLE: # CHECK-DAG: [[#%x,MAIN:]] g F __TEXT,__text _main # CHECK-DAG: [[#%x,NUMBER:]] g O __DATA,__common _number -# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,obj _foo +# CHECK-DAG: [[#%x,BAR:]] g F __TEXT,__text _bar +# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,__text __ZTIN3foo3bar4MethE # CHECK-DAG: [[#%x,HIWORLD:]] g O __TEXT,__cstring _hello_world # CHECK-DAG: [[#%x,HIITSME:]] g O __TEXT,__cstring _hello_its_me @@ -35,43 +36,50 @@ # CHECK-NEXT: # Sections: # CHECK-NEXT: # Address Size Segment Section # CHECK-NEXT: 0x[[#%X,TEXT]] 0x{{[0-9A-F]+}} __TEXT __text -# CHECK-NEXT: 0x[[#%X,DATA]] 0x{{[0-9A-F]+}} __TEXT obj # CHECK-NEXT: 0x[[#%X,CSTR]] 0x{{[0-9A-F]+}} __TEXT __cstring # CHECK-NEXT: 0x[[#%X,BSS]] 0x{{[0-9A-F]+}} __DATA __common # CHECK-NEXT: # Symbols: -# CHECK-NEXT: # Address Size File Name -# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main -# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] _foo -# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n -# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me -# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number +# CHECK-NEXT: # Address Size File Name +# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main +# CHECK-DAG: 0x[[#%X,BAR]] 0x00000001 [ 1] _bar +# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] __ZTIN3foo3bar4MethE +# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n +# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me +# CHECK-DAG: 0x[[#%X,HIITSME + 0xf]] 0x0000000E [ 3] literal string: Hello world!\n +# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number # MAPFILE: "name":"Total Write map file" -# RUN: %lld -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped +# RUN: %lld -demangle -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped # RUN: FileCheck --check-prefix=STRIPPED %s < %t/stripped-map ## C-string literals should be printed as "literal string: " # STRIPPED-LABEL: Dead Stripped Symbols: -# STRIPPED-DAG: <> 0x00000001 [ 2] _foo -# STRIPPED-DAG: <> 0x0000000E [ 3] literal string: Hello world!\n -# STRIPPED-DAG: <> 0x0000000F [ 3] literal string: Hello, it's me -# STRIPPED-DAG: <> 0x00000001 [ 1] _number +# STRIPPED-DAG: <> 0x00000001 [ 1] _bar +# STRIPPED-DAG: <> 0x00000001 [ 1] _number +# STRIPPED-DAG: <> 0x00000001 [ 2] __ZTIN3foo3bar4MethE +# STRIPPED-DAG: <> 0x0000000E [ 3] literal string: Hello world!\n +# STRIPPED-DAG: <> 0x0000000F [ 3] literal string: Hello, it's me +# STRIPPED-DAG: <> 0x0000000E [ 3] literal string: Hello world!\n # RUN: %lld --icf=all -map %t/icf-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/icf # RUN: FileCheck --check-prefix=ICF %s < %t/icf-map +## Verify that folded symbols and cstrings have size zero. Note that ld64 prints +## folded symbols but not folded cstrings; we print both. + # ICF: Symbols: -# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] _foo -# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar +# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] __ZTIN3foo3bar4MethE +# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar +# ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E [ 3] literal string: Hello world!\n +# ICF-DAG: 0x[[#%X,HIWORLD]] 0x00000000 [ 3] literal string: Hello world!\n #--- foo.s -## ICF will only fold sections marked as pure_instructions -.section __TEXT,obj,regular,pure_instructions -.globl _foo -.alt_entry _alt_foo -_foo: +.globl __ZTIN3foo3bar4MethE +## This C++ symbol makes it clear that we do not print the demangled name in +## the map file, even if `-demangle` is passed. +__ZTIN3foo3bar4MethE: nop .subsections_via_symbols @@ -79,12 +87,10 @@ #--- test.s .comm _number, 1 .globl _main, _bar -.alt_entry _alt_bar _main: ret -.section __TEXT,obj,regular,pure_instructions _bar: nop @@ -101,4 +107,6 @@ _hello_its_me: .asciz "Hello, it's me" +.asciz "Hello world!\n" + .subsections_via_symbols