Index: lld/MachO/Dwarf.h =================================================================== --- lld/MachO/Dwarf.h +++ lld/MachO/Dwarf.h @@ -37,12 +37,17 @@ llvm::StringRef getAbbrevSection() const override { return abbrevSection; } llvm::StringRef getStrSection() const override { return strSection; } + llvm::DWARFSection const &getLineSection() const override { + return lineSection; + } + // Returns an instance of DwarfObject if the given object file has the // relevant DWARF debug sections. static std::unique_ptr create(ObjFile *); private: llvm::DWARFSection infoSection; + llvm::DWARFSection lineSection; llvm::StringRef abbrevSection; llvm::StringRef strSection; }; Index: lld/MachO/Dwarf.cpp =================================================================== --- lld/MachO/Dwarf.cpp +++ lld/MachO/Dwarf.cpp @@ -20,15 +20,16 @@ std::unique_ptr DwarfObject::create(ObjFile *obj) { auto dObj = std::make_unique(); bool hasDwarfInfo = false; - // LLD only needs to extract the source file path from the debug info, so we - // initialize DwarfObject with just the sections necessary to get that path. - // The debugger will locate the debug info via the object file paths that we - // emit in our STABS symbols, so we don't need to process & emit them - // ourselves. + // LLD only needs to extract the source file path and line numbers from the + // debug info, so we initialize DwarfObject with just the sections necessary + // to get that path. The debugger will locate the debug info via the object + // file paths that we emit in our STABS symbols, so we don't need to process & + // emit them ourselves. for (const InputSection *isec : obj->debugSections) { if (StringRef *s = StringSwitch(isec->getName()) .Case(section_names::debugInfo, &dObj->infoSection.Data) + .Case(section_names::debugLine, &dObj->lineSection.Data) .Case(section_names::debugAbbrev, &dObj->abbrevSection) .Case(section_names::debugStr, &dObj->strSection) .Default(nullptr)) { Index: lld/MachO/InputFiles.h =================================================================== --- lld/MachO/InputFiles.h +++ lld/MachO/InputFiles.h @@ -12,6 +12,7 @@ #include "MachOStructs.h" #include "Target.h" +#include "lld/Common/DWARF.h" #include "lld/Common/LLVM.h" #include "lld/Common/Memory.h" #include "llvm/ADT/CachedHashString.h" @@ -21,6 +22,7 @@ #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Object/Archive.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Threading.h" #include "llvm/TextAPI/TextAPIReader.h" #include @@ -159,7 +161,13 @@ static bool classof(const InputFile *f) { return f->kind() == ObjKind; } + std::string sourceFile() const; + // Parses line table information for diagnostics. compileUnit should be used + // for other purposes. + lld::DWARFCache *getDwarf(); + llvm::DWARFUnit *compileUnit = nullptr; + std::unique_ptr dwarfCache; Section *addrSigSection = nullptr; const uint32_t modTime; std::vector debugSections; @@ -167,6 +175,7 @@ llvm::DenseMap fdes; private: + llvm::once_flag initDwarf; template void parseLazy(); template void parseSections(ArrayRef); template Index: lld/MachO/InputFiles.cpp =================================================================== --- lld/MachO/InputFiles.cpp +++ lld/MachO/InputFiles.cpp @@ -998,6 +998,8 @@ if (!dObj) return; + // We do not re-use the context from getDwarf() here as that function + // constructs an expensive DWARFCache object. auto *ctx = make( std::move(dObj), "", [&](Error err) { @@ -1013,7 +1015,7 @@ // FIXME: There can be more than one compile unit per object file. See // PR48637. auto it = units.begin(); - compileUnit = it->get(); + compileUnit = (it != units.end()) ? it->get() : nullptr; } ArrayRef ObjFile::getDataInCode() const { @@ -1373,6 +1375,33 @@ } } +std::string ObjFile::sourceFile() const { + if (!compileUnit) + return {}; + SmallString<261> dir(compileUnit->getCompilationDir()); + StringRef sep = sys::path::get_separator(); + // We don't use `path::append` here because we want an empty `dir` to result + // in an absolute path. `append` would give us a relative path for that case. + if (!dir.endswith(sep)) + dir += sep; + return (dir + compileUnit->getUnitDIE().getShortName()).str(); +} + +lld::DWARFCache *ObjFile::getDwarf() { + llvm::call_once(initDwarf, [this]() { + auto dwObj = DwarfObject::create(this); + if (!dwObj) + return; + dwarfCache = std::make_unique(std::make_unique( + std::move(dwObj), "", + [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, + [&](Error warning) { + warn(getName() + ": " + toString(std::move(warning))); + })); + }); + + return dwarfCache.get(); +} // The path can point to either a dylib or a .tbd file. static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) { Optional mbref = readFile(path); Index: lld/MachO/InputSection.h =================================================================== --- lld/MachO/InputSection.h +++ lld/MachO/InputSection.h @@ -50,7 +50,11 @@ // The offset from the beginning of the file. uint64_t getVA(uint64_t off) const; // Return a user-friendly string for use in diagnostics. + // Format: /path/to/object.o:(symbol _func+0x123) std::string getLocation(uint64_t off) const; + // Return the source line corresponding to an address, or the empty string. + // Format: Source.cpp:123 (/path/to/Source.cpp:123) + std::string getSourceLocation(uint64_t off) const; // Whether the data at \p off in this InputSection is live. virtual bool isLive(uint64_t off) const = 0; virtual void markLive(uint64_t off) = 0; @@ -85,6 +89,8 @@ protected: const Section §ion; + + const Defined *getContainingSymbol(uint64_t off) const; }; // ConcatInputSections are combined into (Concat)OutputSections through simple @@ -292,6 +298,7 @@ constexpr const char data[] = "__data"; constexpr const char debugAbbrev[] = "__debug_abbrev"; constexpr const char debugInfo[] = "__debug_info"; +constexpr const char debugLine[] = "__debug_line"; constexpr const char debugStr[] = "__debug_str"; constexpr const char ehFrame[] = "__eh_frame"; constexpr const char gccExceptTab[] = "__gcc_except_tab"; Index: lld/MachO/InputSection.cpp =================================================================== --- lld/MachO/InputSection.cpp +++ lld/MachO/InputSection.cpp @@ -55,17 +55,21 @@ return sym->getVA(); } +const Defined *InputSection::getContainingSymbol(uint64_t off) const { + auto *nextSym = llvm::upper_bound( + symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; }); + if (nextSym == symbols.begin()) + return nullptr; + return *std::prev(nextSym); +} + std::string InputSection::getLocation(uint64_t off) const { // First, try to find a symbol that's near the offset. Use it as a reference // point. - auto *nextSym = llvm::upper_bound( - symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; }); - if (nextSym != symbols.begin()) { - auto &sym = *std::prev(nextSym); + if (auto *sym = getContainingSymbol(off)) return (toString(getFile()) + ":(symbol " + sym->getName() + "+0x" + Twine::utohexstr(off - sym->value) + ")") .str(); - } // If that fails, use the section itself as a reference point. for (const Subsection &subsec : section.subsections) { @@ -74,11 +78,61 @@ break; } } + return (toString(getFile()) + ":(" + getName() + "+0x" + Twine::utohexstr(off) + ")") .str(); } +std::string InputSection::getSourceLocation(uint64_t off) const { + auto *obj = dyn_cast(getFile()); + if (!obj) + return {}; + + DWARFCache *dwarf = obj->getDwarf(); + if (!dwarf) + return std::string(); + + for (const Subsection &subsec : section.subsections) { + if (subsec.isec == this) { + off += subsec.offset; + break; + } + } + + auto createMsg = [&](StringRef path, unsigned line) { + std::string filename = sys::path::filename(path).str(); + std::string lineStr = (":" + Twine(line)).str(); + if (filename == path) + return filename + lineStr; + return (filename + lineStr + " (" + path + lineStr + ")").str(); + }; + + // First, look up a function for a given offset. + if (Optional li = dwarf->getDILineInfo( + section.addr + off, object::SectionedAddress::UndefSection)) + return createMsg(li->FileName, li->Line); + + // If it failed, look up again as a variable. + if (const Defined *sym = getContainingSymbol(off)) { + // Symbols are generally prefixed with an underscore, which is not included + // in the debug information. + StringRef symName = sym->getName(); + if (!symName.empty() && symName[0] == '_') + symName = symName.substr(1); + + if (Optional> fileLine = + dwarf->getVariableLoc(symName)) + return createMsg(fileLine->first, fileLine->second); + } + + // Try to get the source file's name from the DWARF information. + if (obj->compileUnit) + return obj->sourceFile(); + + return {}; +} + void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { align = std::max(align, copy->align); copy->live = false; Index: lld/MachO/SymbolTable.cpp =================================================================== --- lld/MachO/SymbolTable.cpp +++ lld/MachO/SymbolTable.cpp @@ -381,8 +381,11 @@ locations.codeReferences) { if (i >= maxUndefinedReferences) break; - // TODO: Get source file/line from debug information. - message += "\n>>> referenced by " + loc.isec->getLocation(loc.offset); + message += "\n>>> referenced by "; + std::string src = loc.isec->getSourceLocation(loc.offset); + if (!src.empty()) + message += src + "\n>>> "; + message += loc.isec->getLocation(loc.offset); ++i; } Index: lld/MachO/SyntheticSections.h =================================================================== --- lld/MachO/SyntheticSections.h +++ lld/MachO/SyntheticSections.h @@ -435,7 +435,7 @@ uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); } private: - void emitBeginSourceStab(llvm::DWARFUnit *compileUnit); + void emitBeginSourceStab(StringRef); void emitEndSourceStab(); void emitObjectFileStab(ObjFile *); void emitEndFunStab(Defined *); Index: lld/MachO/SyntheticSections.cpp =================================================================== --- lld/MachO/SyntheticSections.cpp +++ lld/MachO/SyntheticSections.cpp @@ -834,16 +834,9 @@ : LinkEditSection(segment_names::linkEdit, section_names::symbolTable), stringTableSection(stringTableSection) {} -void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) { +void SymtabSection::emitBeginSourceStab(StringRef sourceFile) { StabsEntry stab(N_SO); - SmallString<261> dir(compileUnit->getCompilationDir()); - StringRef sep = sys::path::get_separator(); - // We don't use `path::append` here because we want an empty `dir` to result - // in an absolute path. `append` would give us a relative path for that case. - if (!dir.endswith(sep)) - dir += sep; - stab.strx = stringTableSection.addString( - saver().save(dir + compileUnit->getUnitDIE().getShortName())); + stab.strx = stringTableSection.addString(saver().save(sourceFile)); stabs.emplace_back(std::move(stab)); } @@ -938,7 +931,7 @@ emitEndSourceStab(); lastFile = file; - emitBeginSourceStab(file->compileUnit); + emitBeginSourceStab(file->sourceFile()); emitObjectFileStab(file); } Index: lld/test/MachO/invalid/undef-debug.s =================================================================== --- /dev/null +++ lld/test/MachO/invalid/undef-debug.s @@ -0,0 +1,210 @@ +# REQUIRES: aarch64 +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos %s -o %t.o +# RUN: not %lld -arch arm64 %t.o -o /dev/null 2>&1 | FileCheck %s + +# CHECK: undefined symbol: _undef +# CHECK-NEXT: >>> referenced by test.c:3 +# CHECK-NEXT: >>> {{.*}}.o:(symbol _main+0x0) +# CHECK-NEXT: >>> referenced by test.c:2 +# CHECK-NEXT >>> {{.*}}.o:(symbol _ptr+0x0) + +## This is the output of `clang -g2 -O2 -fdebug-compilation-dir=. -fno-ident` called on the following file, with the +## Apple DWARF tables removed: +## +## int undef(); +## int (*ptr)() = &undef; +## int main() { return undef(); }; + + + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 12, 0 sdk_version 13, 0 + .file 1 "." "test.c" + .globl _main ; -- Begin function main + .p2align 2 +_main: ; @main +Lfunc_begin0: + .loc 1 3 0 ; test.c:3:0 + .cfi_startproc +; %bb.0: + .loc 1 3 21 prologue_end ; test.c:3:21 + b _undef +Ltmp0: +Lfunc_end0: + .cfi_endproc + ; -- End function + .section __DATA,__data + .globl _ptr ; @ptr + .p2align 3 +_ptr: + .quad _undef + + .section __DWARF,__debug_abbrev,regular,debug +Lsection_abbrev: + .byte 1 ; Abbreviation Code + .byte 17 ; DW_TAG_compile_unit + .byte 1 ; DW_CHILDREN_yes + .byte 37 ; DW_AT_producer + .byte 14 ; DW_FORM_strp + .byte 19 ; DW_AT_language + .byte 5 ; DW_FORM_data2 + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .ascii "\202|" ; DW_AT_LLVM_sysroot + .byte 14 ; DW_FORM_strp + .ascii "\357\177" ; DW_AT_APPLE_sdk + .byte 14 ; DW_FORM_strp + .byte 16 ; DW_AT_stmt_list + .byte 23 ; DW_FORM_sec_offset + .byte 27 ; DW_AT_comp_dir + .byte 14 ; DW_FORM_strp + .ascii "\341\177" ; DW_AT_APPLE_optimized + .byte 25 ; DW_FORM_flag_present + .byte 17 ; DW_AT_low_pc + .byte 1 ; DW_FORM_addr + .byte 18 ; DW_AT_high_pc + .byte 6 ; DW_FORM_data4 + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 2 ; Abbreviation Code + .byte 52 ; DW_TAG_variable + .byte 0 ; DW_CHILDREN_no + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .byte 73 ; DW_AT_type + .byte 19 ; DW_FORM_ref4 + .byte 63 ; DW_AT_external + .byte 25 ; DW_FORM_flag_present + .byte 58 ; DW_AT_decl_file + .byte 11 ; DW_FORM_data1 + .byte 59 ; DW_AT_decl_line + .byte 11 ; DW_FORM_data1 + .byte 2 ; DW_AT_location + .byte 24 ; DW_FORM_exprloc + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 3 ; Abbreviation Code + .byte 15 ; DW_TAG_pointer_type + .byte 0 ; DW_CHILDREN_no + .byte 73 ; DW_AT_type + .byte 19 ; DW_FORM_ref4 + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 4 ; Abbreviation Code + .byte 21 ; DW_TAG_subroutine_type + .byte 1 ; DW_CHILDREN_yes + .byte 73 ; DW_AT_type + .byte 19 ; DW_FORM_ref4 + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 5 ; Abbreviation Code + .byte 24 ; DW_TAG_unspecified_parameters + .byte 0 ; DW_CHILDREN_no + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 6 ; Abbreviation Code + .byte 36 ; DW_TAG_base_type + .byte 0 ; DW_CHILDREN_no + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .byte 62 ; DW_AT_encoding + .byte 11 ; DW_FORM_data1 + .byte 11 ; DW_AT_byte_size + .byte 11 ; DW_FORM_data1 + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 7 ; Abbreviation Code + .byte 46 ; DW_TAG_subprogram + .byte 0 ; DW_CHILDREN_no + .byte 17 ; DW_AT_low_pc + .byte 1 ; DW_FORM_addr + .byte 18 ; DW_AT_high_pc + .byte 6 ; DW_FORM_data4 + .ascii "\347\177" ; DW_AT_APPLE_omit_frame_ptr + .byte 25 ; DW_FORM_flag_present + .byte 64 ; DW_AT_frame_base + .byte 24 ; DW_FORM_exprloc + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .byte 58 ; DW_AT_decl_file + .byte 11 ; DW_FORM_data1 + .byte 59 ; DW_AT_decl_line + .byte 11 ; DW_FORM_data1 + .byte 73 ; DW_AT_type + .byte 19 ; DW_FORM_ref4 + .byte 63 ; DW_AT_external + .byte 25 ; DW_FORM_flag_present + .ascii "\341\177" ; DW_AT_APPLE_optimized + .byte 25 ; DW_FORM_flag_present + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 0 ; EOM(3) + .section __DWARF,__debug_info,regular,debug +Lsection_info: +Lcu_begin0: +.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ; Length of Unit + .long Lset0 +Ldebug_info_start0: + .short 4 ; DWARF version number +.set Lset1, Lsection_abbrev-Lsection_abbrev ; Offset Into Abbrev. Section + .long Lset1 + .byte 8 ; Address Size (in bytes) + .byte 1 ; Abbrev [1] 0xb:0x69 DW_TAG_compile_unit + .long 0 ; DW_AT_producer + .short 12 ; DW_AT_language + .long 1 ; DW_AT_name + .long 8 ; DW_AT_LLVM_sysroot + .long 60 ; DW_AT_APPLE_sdk +.set Lset2, Lline_table_start0-Lsection_line ; DW_AT_stmt_list + .long Lset2 + .long 71 ; DW_AT_comp_dir + ; DW_AT_APPLE_optimized + .quad Lfunc_begin0 ; DW_AT_low_pc +.set Lset3, Lfunc_end0-Lfunc_begin0 ; DW_AT_high_pc + .long Lset3 + .byte 2 ; Abbrev [2] 0x32:0x15 DW_TAG_variable + .long 73 ; DW_AT_name + .long 71 ; DW_AT_type + ; DW_AT_external + .byte 1 ; DW_AT_decl_file + .byte 2 ; DW_AT_decl_line + .byte 9 ; DW_AT_location + .byte 3 + .quad _ptr + .byte 3 ; Abbrev [3] 0x47:0x5 DW_TAG_pointer_type + .long 76 ; DW_AT_type + .byte 4 ; Abbrev [4] 0x4c:0x7 DW_TAG_subroutine_type + .long 83 ; DW_AT_type + .byte 5 ; Abbrev [5] 0x51:0x1 DW_TAG_unspecified_parameters + .byte 0 ; End Of Children Mark + .byte 6 ; Abbrev [6] 0x53:0x7 DW_TAG_base_type + .long 77 ; DW_AT_name + .byte 5 ; DW_AT_encoding + .byte 4 ; DW_AT_byte_size + .byte 7 ; Abbrev [7] 0x5a:0x19 DW_TAG_subprogram + .quad Lfunc_begin0 ; DW_AT_low_pc +.set Lset4, Lfunc_end0-Lfunc_begin0 ; DW_AT_high_pc + .long Lset4 + ; DW_AT_APPLE_omit_frame_ptr + .byte 1 ; DW_AT_frame_base + .byte 111 + .long 81 ; DW_AT_name + .byte 1 ; DW_AT_decl_file + .byte 3 ; DW_AT_decl_line + .long 83 ; DW_AT_type + ; DW_AT_external + ; DW_AT_APPLE_optimized + .byte 0 ; End Of Children Mark +Ldebug_info_end0: + .section __DWARF,__debug_str,regular,debug +Linfo_string: + .byte 0 ; string offset=0 + .asciz "test.c" ; string offset=1 + .asciz "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk" ; string offset=8 + .asciz "MacOSX.sdk" ; string offset=60 + .asciz "." ; string offset=71 + .asciz "ptr" ; string offset=73 + .asciz "int" ; string offset=77 + .asciz "main" ; string offset=81 + .section __DWARF,__debug_line,regular,debug +Lsection_line: +Lline_table_start0: