Index: lld/MachO/Dwarf.h =================================================================== --- lld/MachO/Dwarf.h +++ lld/MachO/Dwarf.h @@ -37,12 +37,17 @@ llvm::StringRef getAbbrevSection() const override { return abbrevSection; } llvm::StringRef getStrSection() const override { return strSection; } + llvm::DWARFSection const &getLineSection() const override { + return lineSection; + } + // Returns an instance of DwarfObject if the given object file has the // relevant DWARF debug sections. static std::unique_ptr create(ObjFile *); private: llvm::DWARFSection infoSection; + llvm::DWARFSection lineSection; llvm::StringRef abbrevSection; llvm::StringRef strSection; }; Index: lld/MachO/Dwarf.cpp =================================================================== --- lld/MachO/Dwarf.cpp +++ lld/MachO/Dwarf.cpp @@ -20,15 +20,16 @@ std::unique_ptr DwarfObject::create(ObjFile *obj) { auto dObj = std::make_unique(); bool hasDwarfInfo = false; - // LLD only needs to extract the source file path from the debug info, so we - // initialize DwarfObject with just the sections necessary to get that path. - // The debugger will locate the debug info via the object file paths that we - // emit in our STABS symbols, so we don't need to process & emit them - // ourselves. + // LLD only needs to extract the source file path and line numbers from the + // debug info, so we initialize DwarfObject with just the sections necessary + // to get that path. The debugger will locate the debug info via the object + // file paths that we emit in our STABS symbols, so we don't need to process & + // emit them ourselves. for (const InputSection *isec : obj->debugSections) { if (StringRef *s = StringSwitch(isec->getName()) .Case(section_names::debugInfo, &dObj->infoSection.Data) + .Case(section_names::debugLine, &dObj->lineSection.Data) .Case(section_names::debugAbbrev, &dObj->abbrevSection) .Case(section_names::debugStr, &dObj->strSection) .Default(nullptr)) { Index: lld/MachO/InputFiles.h =================================================================== --- lld/MachO/InputFiles.h +++ lld/MachO/InputFiles.h @@ -12,6 +12,7 @@ #include "MachOStructs.h" #include "Target.h" +#include "lld/Common/DWARF.h" #include "lld/Common/LLVM.h" #include "lld/Common/Memory.h" #include "llvm/ADT/CachedHashString.h" @@ -21,6 +22,7 @@ #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Object/Archive.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Threading.h" #include "llvm/TextAPI/TextAPIReader.h" #include @@ -159,7 +161,13 @@ static bool classof(const InputFile *f) { return f->kind() == ObjKind; } + std::string sourceFile() const; + // Parses line table information for diagnostics. compileUnit should be used + // for other purposes. + lld::DWARFCache *getDwarf(); + llvm::DWARFUnit *compileUnit = nullptr; + std::unique_ptr dwarfCache; Section *addrSigSection = nullptr; const uint32_t modTime; std::vector debugSections; @@ -167,6 +175,7 @@ llvm::DenseMap fdes; private: + llvm::once_flag initDwarf; template void parseLazy(); template void parseSections(ArrayRef); template Index: lld/MachO/InputFiles.cpp =================================================================== --- lld/MachO/InputFiles.cpp +++ lld/MachO/InputFiles.cpp @@ -998,6 +998,8 @@ if (!dObj) return; + // We do not re-use the context from getDwarf() here as that function + // constructs an expensive DWARFCache object. auto *ctx = make( std::move(dObj), "", [&](Error err) { @@ -1373,6 +1375,31 @@ } } +std::string ObjFile::sourceFile() const { + SmallString<261> dir(compileUnit->getCompilationDir()); + StringRef sep = sys::path::get_separator(); + // We don't use `path::append` here because we want an empty `dir` to result + // in an absolute path. `append` would give us a relative path for that case. + if (!dir.endswith(sep)) + dir += sep; + return (dir + compileUnit->getUnitDIE().getShortName()).str(); +} + +lld::DWARFCache *ObjFile::getDwarf() { + llvm::call_once(initDwarf, [this]() { + auto dwObj = DwarfObject::create(this); + if (!dwObj) + return; + dwarfCache = std::make_unique(std::make_unique( + std::move(dwObj), "", + [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, + [&](Error warning) { + warn(getName() + ": " + toString(std::move(warning))); + })); + }); + + return dwarfCache.get(); +} // The path can point to either a dylib or a .tbd file. static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) { Optional mbref = readFile(path); Index: lld/MachO/InputSection.h =================================================================== --- lld/MachO/InputSection.h +++ lld/MachO/InputSection.h @@ -50,7 +50,11 @@ // The offset from the beginning of the file. uint64_t getVA(uint64_t off) const; // Return a user-friendly string for use in diagnostics. + // Format: /path/to/object.o:(symbol _func+0x123) std::string getLocation(uint64_t off) const; + // Return the source line corresponding to an address, or the empty string. + // Format: Source.cpp:123 (/path/to/Source.cpp:123) + std::string getSourceLocation(uint64_t off) const; // Whether the data at \p off in this InputSection is live. virtual bool isLive(uint64_t off) const = 0; virtual void markLive(uint64_t off) = 0; @@ -85,6 +89,8 @@ protected: const Section §ion; + + const Defined *getContainingSymbol(uint64_t off) const; }; // ConcatInputSections are combined into (Concat)OutputSections through simple @@ -292,6 +298,7 @@ constexpr const char data[] = "__data"; constexpr const char debugAbbrev[] = "__debug_abbrev"; constexpr const char debugInfo[] = "__debug_info"; +constexpr const char debugLine[] = "__debug_line"; constexpr const char debugStr[] = "__debug_str"; constexpr const char ehFrame[] = "__eh_frame"; constexpr const char gccExceptTab[] = "__gcc_except_tab"; Index: lld/MachO/InputSection.cpp =================================================================== --- lld/MachO/InputSection.cpp +++ lld/MachO/InputSection.cpp @@ -55,17 +55,21 @@ return sym->getVA(); } +const Defined *InputSection::getContainingSymbol(uint64_t off) const { + auto *nextSym = llvm::upper_bound( + symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; }); + if (nextSym == symbols.begin()) + return nullptr; + return *std::prev(nextSym); +} + std::string InputSection::getLocation(uint64_t off) const { // First, try to find a symbol that's near the offset. Use it as a reference // point. - auto *nextSym = llvm::upper_bound( - symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; }); - if (nextSym != symbols.begin()) { - auto &sym = *std::prev(nextSym); + if (auto *sym = getContainingSymbol(off)) return (toString(getFile()) + ":(symbol " + sym->getName() + "+0x" + Twine::utohexstr(off - sym->value) + ")") .str(); - } // If that fails, use the section itself as a reference point. for (const Subsection &subsec : section.subsections) { @@ -74,11 +78,54 @@ break; } } + return (toString(getFile()) + ":(" + getName() + "+0x" + Twine::utohexstr(off) + ")") .str(); } +std::string InputSection::getSourceLocation(uint64_t off) const { + auto *obj = dyn_cast(getFile()); + if (!obj) + return {}; + + DWARFCache *dwarf = obj->getDwarf(); + if (!dwarf) + return std::string(); + + for (const Subsection &subsec : section.subsections) { + if (subsec.isec == this) { + off += subsec.offset; + break; + } + } + + auto createMsg = [&](StringRef path, unsigned line) { + std::string filename = sys::path::filename(path).str(); + std::string lineStr = (":" + Twine(line)).str(); + if (filename == path) + return filename + lineStr; + return (filename + lineStr + " (" + path + lineStr + ")").str(); + }; + + // First, lookup a function for a given offset. + if (Optional li = dwarf->getDILineInfo( + section.addr + off, object::SectionedAddress::UndefSection)) + return createMsg(li->FileName, li->Line); + + // If it failed, lookup again as a variable. + if (const Defined *sym = getContainingSymbol(off)) + if (Optional> fileLine = + dwarf->getVariableLoc(sym->getName())) + return createMsg(fileLine->first, fileLine->second); + + // Try to get the source file's name from the DWARF information. + if (obj->compileUnit) + return obj->sourceFile(); + + return {}; +} + void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { align = std::max(align, copy->align); copy->live = false; Index: lld/MachO/SymbolTable.cpp =================================================================== --- lld/MachO/SymbolTable.cpp +++ lld/MachO/SymbolTable.cpp @@ -381,8 +381,11 @@ locations.codeReferences) { if (i >= maxUndefinedReferences) break; - // TODO: Get source file/line from debug information. - message += "\n>>> referenced by " + loc.isec->getLocation(loc.offset); + message += "\n>>> referenced by "; + std::string src = loc.isec->getSourceLocation(loc.offset); + if (!src.empty()) + message += src + "\n>>> "; + message += loc.isec->getLocation(loc.offset); ++i; } Index: lld/MachO/SyntheticSections.h =================================================================== --- lld/MachO/SyntheticSections.h +++ lld/MachO/SyntheticSections.h @@ -435,7 +435,7 @@ uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); } private: - void emitBeginSourceStab(llvm::DWARFUnit *compileUnit); + void emitBeginSourceStab(std::string); void emitEndSourceStab(); void emitObjectFileStab(ObjFile *); void emitEndFunStab(Defined *); Index: lld/MachO/SyntheticSections.cpp =================================================================== --- lld/MachO/SyntheticSections.cpp +++ lld/MachO/SyntheticSections.cpp @@ -834,16 +834,9 @@ : LinkEditSection(segment_names::linkEdit, section_names::symbolTable), stringTableSection(stringTableSection) {} -void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) { +void SymtabSection::emitBeginSourceStab(std::string sourceFile) { StabsEntry stab(N_SO); - SmallString<261> dir(compileUnit->getCompilationDir()); - StringRef sep = sys::path::get_separator(); - // We don't use `path::append` here because we want an empty `dir` to result - // in an absolute path. `append` would give us a relative path for that case. - if (!dir.endswith(sep)) - dir += sep; - stab.strx = stringTableSection.addString( - saver().save(dir + compileUnit->getUnitDIE().getShortName())); + stab.strx = stringTableSection.addString(saver().save(sourceFile)); stabs.emplace_back(std::move(stab)); } @@ -938,7 +931,7 @@ emitEndSourceStab(); lastFile = file; - emitBeginSourceStab(file->compileUnit); + emitBeginSourceStab(file->sourceFile()); emitObjectFileStab(file); } Index: lld/test/MachO/invalid/undef-debug.s =================================================================== --- /dev/null +++ lld/test/MachO/invalid/undef-debug.s @@ -0,0 +1,178 @@ +# REQUIRES: aarch64 +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos %s -o %t.o +# RUN: not %lld -arch arm64 %t.o -o /dev/null 2>&1 | FileCheck %s + +# CHECK: undefined symbol: _undef +# CHECK-NEXT: >>> referenced by undef-debug.c:2 +# CHECK-NEXT: >>> {{.*}}.o:(symbol _main+0x0) + +# This is the output of `clang -g1 -O2 -fdebug-compilation-dir=. -fno-ident` called on the following file: +# int undef(); +# int main() { return undef(); } + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 12, 0 sdk_version 12, 3 + .globl _main ; -- Begin function main + .p2align 2 +_main: ; @main +Lfunc_begin0: + .file 1 "." "undef-debug.c" + .loc 1 2 0 ; undef-debug.c:2:0 + .cfi_startproc +; %bb.0: + .loc 1 2 21 prologue_end ; undef-debug.c:2:21 + b _undef +Ltmp0: +Lfunc_end0: + .cfi_endproc + ; -- End function + .section __DWARF,__debug_abbrev,regular,debug +Lsection_abbrev: + .byte 1 ; Abbreviation Code + .byte 17 ; DW_TAG_compile_unit + .byte 1 ; DW_CHILDREN_yes + .byte 37 ; DW_AT_producer + .byte 14 ; DW_FORM_strp + .byte 19 ; DW_AT_language + .byte 5 ; DW_FORM_data2 + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .ascii "\202|" ; DW_AT_LLVM_sysroot + .byte 14 ; DW_FORM_strp + .ascii "\357\177" ; DW_AT_APPLE_sdk + .byte 14 ; DW_FORM_strp + .byte 16 ; DW_AT_stmt_list + .byte 23 ; DW_FORM_sec_offset + .byte 27 ; DW_AT_comp_dir + .byte 14 ; DW_FORM_strp + .ascii "\341\177" ; DW_AT_APPLE_optimized + .byte 25 ; DW_FORM_flag_present + .byte 17 ; DW_AT_low_pc + .byte 1 ; DW_FORM_addr + .byte 18 ; DW_AT_high_pc + .byte 6 ; DW_FORM_data4 + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 2 ; Abbreviation Code + .byte 46 ; DW_TAG_subprogram + .byte 0 ; DW_CHILDREN_no + .byte 17 ; DW_AT_low_pc + .byte 1 ; DW_FORM_addr + .byte 18 ; DW_AT_high_pc + .byte 6 ; DW_FORM_data4 + .ascii "\347\177" ; DW_AT_APPLE_omit_frame_ptr + .byte 25 ; DW_FORM_flag_present + .byte 122 ; DW_AT_call_all_calls + .byte 25 ; DW_FORM_flag_present + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 0 ; EOM(3) + .section __DWARF,__debug_info,regular,debug +Lsection_info: +Lcu_begin0: +.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ; Length of Unit + .long Lset0 +Ldebug_info_start0: + .short 4 ; DWARF version number +.set Lset1, Lsection_abbrev-Lsection_abbrev ; Offset Into Abbrev. Section + .long Lset1 + .byte 8 ; Address Size (in bytes) + .byte 1 ; Abbrev [1] 0xb:0x39 DW_TAG_compile_unit + .long 0 ; DW_AT_producer + .short 12 ; DW_AT_language + .long 1 ; DW_AT_name + .long 15 ; DW_AT_LLVM_sysroot + .long 110 ; DW_AT_APPLE_sdk +.set Lset2, Lline_table_start0-Lsection_line ; DW_AT_stmt_list + .long Lset2 + .long 121 ; DW_AT_comp_dir + ; DW_AT_APPLE_optimized + .quad Lfunc_begin0 ; DW_AT_low_pc +.set Lset3, Lfunc_end0-Lfunc_begin0 ; DW_AT_high_pc + .long Lset3 + .byte 2 ; Abbrev [2] 0x32:0x11 DW_TAG_subprogram + .quad Lfunc_begin0 ; DW_AT_low_pc +.set Lset4, Lfunc_end0-Lfunc_begin0 ; DW_AT_high_pc + .long Lset4 + ; DW_AT_APPLE_omit_frame_ptr + ; DW_AT_call_all_calls + .long 123 ; DW_AT_name + .byte 0 ; End Of Children Mark +Ldebug_info_end0: + .section __DWARF,__debug_str,regular,debug +Linfo_string: + .byte 0 ; string offset=0 + .asciz "undef-debug.c" ; string offset=1 + .asciz "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk" ; string offset=15 + .asciz "MacOSX.sdk" ; string offset=110 + .asciz "." ; string offset=121 + .asciz "main" ; string offset=123 + .section __DWARF,__apple_names,regular,debug +Lnames_begin: + .long 1212240712 ; Header Magic + .short 1 ; Header Version + .short 0 ; Header Hash Function + .long 1 ; Header Bucket Count + .long 1 ; Header Hash Count + .long 12 ; Header Data Length + .long 0 ; HeaderData Die Offset Base + .long 1 ; HeaderData Atom Count + .short 1 ; DW_ATOM_die_offset + .short 6 ; DW_FORM_data4 + .long 0 ; Bucket 0 + .long 2090499946 ; Hash in Bucket 0 +.set Lset5, LNames0-Lnames_begin ; Offset in Bucket 0 + .long Lset5 +LNames0: + .long 123 ; main + .long 1 ; Num DIEs + .long 50 + .long 0 + .section __DWARF,__apple_objc,regular,debug +Lobjc_begin: + .long 1212240712 ; Header Magic + .short 1 ; Header Version + .short 0 ; Header Hash Function + .long 1 ; Header Bucket Count + .long 0 ; Header Hash Count + .long 12 ; Header Data Length + .long 0 ; HeaderData Die Offset Base + .long 1 ; HeaderData Atom Count + .short 1 ; DW_ATOM_die_offset + .short 6 ; DW_FORM_data4 + .long -1 ; Bucket 0 + .section __DWARF,__apple_namespac,regular,debug +Lnamespac_begin: + .long 1212240712 ; Header Magic + .short 1 ; Header Version + .short 0 ; Header Hash Function + .long 1 ; Header Bucket Count + .long 0 ; Header Hash Count + .long 12 ; Header Data Length + .long 0 ; HeaderData Die Offset Base + .long 1 ; HeaderData Atom Count + .short 1 ; DW_ATOM_die_offset + .short 6 ; DW_FORM_data4 + .long -1 ; Bucket 0 + .section __DWARF,__apple_types,regular,debug +Ltypes_begin: + .long 1212240712 ; Header Magic + .short 1 ; Header Version + .short 0 ; Header Hash Function + .long 1 ; Header Bucket Count + .long 0 ; Header Hash Count + .long 20 ; Header Data Length + .long 0 ; HeaderData Die Offset Base + .long 3 ; HeaderData Atom Count + .short 1 ; DW_ATOM_die_offset + .short 6 ; DW_FORM_data4 + .short 3 ; DW_ATOM_die_tag + .short 5 ; DW_FORM_data2 + .short 4 ; DW_ATOM_type_flags + .short 11 ; DW_FORM_data1 + .long -1 ; Bucket 0 +.subsections_via_symbols + .section __DWARF,__debug_line,regular,debug +Lsection_line: +Lline_table_start0: