Index: ELF/InputFiles.h =================================================================== --- ELF/InputFiles.h +++ ELF/InputFiles.h @@ -185,6 +185,7 @@ // If no information is available, returns "". std::string getLineInfo(InputSectionBase *S, uint64_t Offset); llvm::Optional getDILineInfo(InputSectionBase *, uint64_t); + llvm::Optional> getDataLoc(StringRef Name); // MIPS GP0 value defined by this file. This value represents the gp value // used to create the relocatable object and required to support @@ -200,7 +201,7 @@ void initializeSections(llvm::DenseSet &ComdatGroups); void initializeSymbols(); - void initializeDwarfLine(); + void initializeDwarfData(); InputSectionBase *getRelocTarget(const Elf_Shdr &Sec); InputSectionBase *createInputSection(const Elf_Shdr &Sec); StringRef getSectionName(const Elf_Shdr &Sec); @@ -216,6 +217,7 @@ // single object file, so we cache debugging information in order to // parse it only once for each object file we link. std::unique_ptr DwarfLine; + llvm::DenseMap> DataObjectsLoc; llvm::once_flag InitDwarfLine; }; Index: ELF/InputFiles.cpp =================================================================== --- ELF/InputFiles.cpp +++ ELF/InputFiles.cpp @@ -67,7 +67,7 @@ return MBRef; } -template void ObjFile::initializeDwarfLine() { +template void ObjFile::initializeDwarfData() { DWARFContext Dwarf(make_unique>(this)); const DWARFObject &Obj = Dwarf.getDWARFObj(); DwarfLine.reset(new DWARFDebugLine); @@ -77,7 +77,67 @@ // The second parameter is offset in .debug_line section // for compilation unit (CU) of interest. We have only one // CU (object file), so offset is always 0. - DwarfLine->getOrParseLineTable(LineData, 0); + const DWARFDebugLine::LineTable *LT = + DwarfLine->getOrParseLineTable(LineData, 0); + + // Return if there is no debug information about CU available. + if (!Dwarf.getNumCompileUnits()) + return; + + // Get information about global variables location. + DWARFCompileUnit *CU = Dwarf.getCompileUnitAtIndex(0); + for (const auto &Entry : CU->dies()) { + DWARFDie Die = {CU, &Entry}; + // Skip all tags that are not variables. + if (Die.getTag() != dwarf::DW_TAG_variable) + continue; + + // Skip variables that are invisible outside the compilation unit. + if (!dwarf::toUnsigned(Die.find(dwarf::DW_AT_external), 0)) + continue; + + // Take file number in the line number information table. Skip if + // no source file is available. + unsigned File = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_file), 0); + if (!LT->hasFileAtIndex(File)) + continue; + + // Take source line number where declared object appears. + unsigned Line = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_line), 0); + + // Take the name of variable. Ignore it if no name available, it can + // happen if some debug information is missing or corrupted. + StringRef Name = dwarf::toString(Die.find(dwarf::DW_AT_name), ""); + if (!Name.empty()) + DataObjectsLoc.insert({Name, {File, Line}}); + } +} + +// Returns the pair of file name and line number describing location of data +// object (variable, array, etc) definition. +template +Optional> +ObjFile::getDataLoc(StringRef Name) { + llvm::call_once(InitDwarfLine, [this]() { initializeDwarfData(); }); + + // The offset to CU is 0. + const DWARFDebugLine::LineTable *LT = DwarfLine->getLineTable(0); + if (!LT) + return None; + + // Return if we have no debug information about data object. + auto It = DataObjectsLoc.find(Name); + if (It == DataObjectsLoc.end()) + return None; + + // Take file name string from line table. + std::string FileName; + if (!LT->getFileNameByIndex( + It->second.first /* File */, nullptr, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FileName)) + return None; + + return std::make_pair(FileName, It->second.second /*Line*/); } // Returns source line information for a given offset @@ -85,7 +145,7 @@ template Optional ObjFile::getDILineInfo(InputSectionBase *S, uint64_t Offset) { - llvm::call_once(InitDwarfLine, [this]() { initializeDwarfLine(); }); + llvm::call_once(InitDwarfLine, [this]() { initializeDwarfData(); }); // The offset to CU is 0. const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0); Index: ELF/InputSection.h =================================================================== --- ELF/InputSection.h +++ ELF/InputSection.h @@ -183,7 +183,7 @@ // Returns a source location string. Used to construct an error message. template std::string getLocation(uint64_t Offset); - template std::string getSrcMsg(uint64_t Offset); + template std::string getSrcMsg(const SymbolBody &Sym, uint64_t Offset); template std::string getObjMsg(uint64_t Offset); // Each section knows how to relocate itself. These functions apply Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -258,31 +258,44 @@ return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str(); } -// Returns a source location string. This function is intended to be -// used for constructing an error message. The returned message looks -// like this: +// This function is intended to be used for constructing an error message. +// The returned message looks like this: // // foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42) // -// Returns an empty string if there's no way to get line info. -template std::string InputSectionBase::getSrcMsg(uint64_t Offset) { +static std::string createFileLineMsg(StringRef Path, unsigned Line) { + std::string Filename = path::filename(Path); + std::string Lineno = ":" + std::to_string(Line); + if (Filename == Path) + return Filename + Lineno; + return Filename + Lineno + " (" + Path.str() + Lineno + ")"; +} + +// Returns a source location string. It normally takes this information from +// debug sections and used for constructing an error message. Returns an empty +// string if there's no location information available. +template +std::string InputSectionBase::getSrcMsg(const SymbolBody &Sym, + uint64_t Offset) { // Synthetic sections don't have input files. ObjFile *File = getFile(); if (!File) return ""; - Optional Info = File->getDILineInfo(this, Offset); + // We want to get location string here. For functions we take information from + // .debug_line which contains association between location in source files and + // machine instuctions. But for data objects we have to scan DIEs in .debug_info + // which describes variables and their locations. + if (Sym.Type == STT_OBJECT) { + if (Optional> FileLine = + File->getDataLoc(Sym.getName())) + return createFileLineMsg(FileLine->first, FileLine->second); + } else if (Optional Info = File->getDILineInfo(this, Offset)) { + return createFileLineMsg(Info->FileName, Info->Line); + } // File->SourceFile contains STT_FILE symbol, and that is a last resort. - if (!Info) - return File->SourceFile; - - std::string Path = Info->FileName; - std::string Filename = path::filename(Path); - std::string Lineno = ":" + std::to_string(Info->Line); - if (Filename == Path) - return Filename + Lineno; - return Filename + Lineno + " (" + Path + Lineno + ")"; + return File->SourceFile; } // Returns a filename string along with an optional section name. This @@ -994,10 +1007,14 @@ template std::string InputSectionBase::getLocation(uint64_t); template std::string InputSectionBase::getLocation(uint64_t); -template std::string InputSectionBase::getSrcMsg(uint64_t); -template std::string InputSectionBase::getSrcMsg(uint64_t); -template std::string InputSectionBase::getSrcMsg(uint64_t); -template std::string InputSectionBase::getSrcMsg(uint64_t); +template std::string InputSectionBase::getSrcMsg(const SymbolBody &, + uint64_t); +template std::string InputSectionBase::getSrcMsg(const SymbolBody &, + uint64_t); +template std::string InputSectionBase::getSrcMsg(const SymbolBody &, + uint64_t); +template std::string InputSectionBase::getSrcMsg(const SymbolBody &, + uint64_t); template std::string InputSectionBase::getObjMsg(uint64_t); template std::string InputSectionBase::getObjMsg(uint64_t); Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -74,7 +74,7 @@ uint64_t Off) { std::string Msg = "\n>>> defined in " + toString(Sym.getFile()) + "\n>>> referenced by "; - std::string Src = S.getSrcMsg(Off); + std::string Src = S.getSrcMsg(Sym, Off); if (!Src.empty()) Msg += Src + "\n>>> "; return Msg + S.getObjMsg(Off); @@ -730,7 +730,7 @@ std::string Msg = "undefined symbol: " + toString(Sym) + "\n>>> referenced by "; - std::string Src = Sec.getSrcMsg(Offset); + std::string Src = Sec.getSrcMsg(Sym, Offset); if (!Src.empty()) Msg += Src + "\n>>> "; Msg += Sec.getObjMsg(Offset); Index: ELF/SymbolTable.cpp =================================================================== --- ELF/SymbolTable.cpp +++ ELF/SymbolTable.cpp @@ -462,9 +462,9 @@ // >>> defined at baz.c:563 // >>> baz.o in archive libbaz.a auto *Sec1 = cast(D->Section); - std::string Src1 = Sec1->getSrcMsg(D->Value); + std::string Src1 = Sec1->getSrcMsg(*Sym, D->Value); std::string Obj1 = Sec1->getObjMsg(D->Value); - std::string Src2 = ErrSec->getSrcMsg(ErrOffset); + std::string Src2 = ErrSec->getSrcMsg(*Sym, ErrOffset); std::string Obj2 = ErrSec->getObjMsg(ErrOffset); std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at "; Index: test/ELF/conflict-debug-variable.s =================================================================== --- test/ELF/conflict-debug-variable.s +++ test/ELF/conflict-debug-variable.s @@ -0,0 +1,141 @@ +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +# RUN: llvm-dwarfdump %t.o | FileCheck -check-prefix=INPUT %s +# RUN: not ld.lld %t.o %t.o -o %t 2>&1 | FileCheck %s + +# INPUT: .debug_info contents: +# INPUT: DW_TAG_variable +# INPUT-NEXT: DW_AT_name ("foo") +# INPUT-NEXT: DW_AT_decl_file ("1.c") +# INPUT-NEXT: DW_AT_decl_line (1) +# INPUT-NEXT: DW_AT_type (cu + 0x0032 "int") +# INPUT-NEXT: DW_AT_external (true) +# INPUT-NEXT: DW_AT_location (DW_OP_addr 0x0) +# INPUT: DW_TAG_variable +# INPUT-NEXT: DW_AT_name ("bar") +# INPUT-NEXT: DW_AT_decl_file ("1.c") +# INPUT-NEXT: DW_AT_decl_line (2) +# INPUT-NEXT: DW_AT_type (cu + 0x0032 "int") +# INPUT-NEXT: DW_AT_external (true) +# INPUT-NEXT: DW_AT_location (DW_OP_addr 0x0) + +## Check we use information from .debug_info in messages. +# CHECK: duplicate symbol: bar +# CHECK-NEXT: >>> defined at 1.c:2 +# CHECK-NEXT: >>> {{.*}}:(bar) +# CHECK-NEXT: >>> defined at 1.c:2 +# CHECK-NEXT: >>> {{.*}}:(.data+0x0) +# CHECK: duplicate symbol: foo +# CHECK-NEXT: >>> defined at 1.c:1 +# CHECK-NEXT: >>> {{.*}}:(foo) +# CHECK-NEXT: >>> defined at 1.c:1 +# CHECK-NEXT: >>> {{.*}}:(.bss+0x0) + +# Used reduced output from following code and gcc 7.1.0 +# to produce this input file: +# Source (1.c): +# int foo = 0; +# int bar = 1; +# Invocation: g++ -g -S 1.c + +.bss +.globl foo +.type foo, @object +.size foo, 4 +foo: + +.data +.globl bar +.type bar, @object +.size bar, 4 +bar: + +.text +.file 1 "1.c" + +.section .debug_info,"",@progbits + .long 0x4b # Compile Unit: length = 0x0000004b) + .value 0x4 # version = 0x0004 + .long 0 # abbr_offset = 0x0 + .byte 0x8 # addr_size = 0x08 + + .uleb128 0x1 # DW_TAG_compile_unit [1] * + .long 0 # DW_AT_producer [DW_FORM_strp] ( .debug_str[0x00000000] = ) + .byte 0x4 # DW_AT_language [DW_FORM_data1] (DW_LANG_C_plus_plus) + .string "1.c" # DW_AT_name [DW_FORM_string] ("1.c") + .long 0 # DW_AT_comp_dir [DW_FORM_strp] ( .debug_str[0x00000000] = ) + .long 0 # DW_AT_stmt_list [DW_FORM_sec_offset] (0x00000000) + + .uleb128 0x2 # DW_TAG_variable [2] + .string "foo" # DW_AT_name [DW_FORM_string] ("foo") + .byte 0x1 # DW_AT_decl_file [DW_FORM_data1] ("1.c") + .byte 0x1 # DW_AT_decl_line [DW_FORM_data1] (1) + .long 0x32 # DW_AT_type [DW_FORM_ref4] (cu + 0x0032 => {0x00000032}) + .uleb128 0x9 # DW_AT_external [DW_FORM_flag_present] (true) + .byte 0x3 + .quad foo # DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) + + .uleb128 0x3 # DW_TAG_base_type [3] + .byte 0x4 # DW_AT_byte_size [DW_FORM_data1] (0x04) + .byte 0x5 # DW_AT_encoding [DW_FORM_data1] (DW_ATE_signed) + .string "int" # DW_AT_name [DW_FORM_string] ("int" + + .uleb128 0x2 # DW_TAG_variable [2] + .string "bar" # DW_AT_name [DW_FORM_string] ("bar") + .byte 0x1 # DW_AT_decl_file [DW_FORM_data1] ("1.c") + .byte 0x2 # DW_AT_decl_line [DW_FORM_data1] (2) + .long 0x32 # DW_AT_type [DW_FORM_ref4] (cu + 0x0032 => {0x00000032}) + .uleb128 0x9 # DW_AT_external [DW_FORM_flag_present] (true) + .byte 0x3 + .quad bar # DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) + .byte 0 # END + + +.section .debug_abbrev,"",@progbits + .uleb128 0x1 # Abbreviation code. + .uleb128 0x11 # DW_TAG_compile_unit + + .byte 0x1 # ID + .uleb128 0x25 # DW_AT_producer, DW_FORM_strp + .uleb128 0xe + .uleb128 0x13 # DW_AT_language, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x3 # DW_AT_name, DW_FORM_string + .uleb128 0x8 + .uleb128 0x1b # DW_AT_comp_dir, DW_FORM_strp + .uleb128 0xe + .uleb128 0x10 # DW_AT_stmt_list, DW_FORM_sec_offset + .uleb128 0x17 + .byte 0 + .byte 0 + + .uleb128 0x2 # ID + .uleb128 0x34 # DW_TAG_variable, DW_CHILDREN_no + .byte 0 + .uleb128 0x3 # DW_AT_name, DW_FORM_string + .uleb128 0x8 + .uleb128 0x3a # DW_AT_decl_file, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x3b # DW_AT_decl_line, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x49 # DW_AT_type, DW_FORM_ref4 + .uleb128 0x13 + .uleb128 0x3f # DW_AT_external, DW_FORM_flag_present + .uleb128 0x19 + .uleb128 0x2 # DW_AT_location, DW_FORM_exprloc + .uleb128 0x18 + .byte 0 + .byte 0 + + .uleb128 0x3 # ID + .uleb128 0x24 # DW_TAG_base_type, DW_CHILDREN_no + .byte 0 + .uleb128 0xb # DW_AT_byte_size, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x3e # DW_AT_encoding, DW_FORM_data1 + .uleb128 0xb + .uleb128 0x3 # DW_AT_name, DW_FORM_string + .uleb128 0x8 + .byte 0 + .byte 0 + .byte 0 +