diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h --- a/llvm/include/llvm/DebugInfo/DIContext.h +++ b/llvm/include/llvm/DebugInfo/DIContext.h @@ -114,6 +114,8 @@ std::string Name; uint64_t Start = 0; uint64_t Size = 0; + std::string DeclFile; + uint64_t DeclLine = 0; DIGlobal() : Name(DILineInfo::BadString) {} }; @@ -239,6 +241,8 @@ virtual DILineInfo getLineInfoForAddress( object::SectionedAddress Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; + virtual DILineInfo + getLineInfoForDataAddress(object::SectionedAddress Address) = 0; virtual DILineInfoTable getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -360,6 +360,8 @@ DILineInfo getLineInfoForAddress( object::SectionedAddress Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; + DILineInfo + getLineInfoForDataAddress(object::SectionedAddress Address) override; DILineInfoTable getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -280,6 +280,13 @@ /// \returns an iterator range for the attributes of the current DIE. iterator_range attributes() const; + /// Gets the type size (in bytes) for this DIE. + /// + /// \param PointerSize the pointer size of the containing CU. + /// \returns if this is a type DIE, or this DIE contains a DW_AT_type, returns + /// the size of the type. + Optional getTypeSize(uint64_t PointerSize); + class iterator; iterator begin() const; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -9,6 +9,7 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -27,6 +28,7 @@ #include #include #include +#include #include #include @@ -240,6 +242,11 @@ /// std::map::upper_bound for address range lookup. std::map> AddrDieMap; + /// Map from the location (interpreted DW_AT_location) of a DW_TAG_variable, + /// to the end address and the corresponding DIE. + std::map> VariableDieMap; + DenseSet RootsParsedForVariables; + using die_iterator_range = iterator_range::iterator>; @@ -322,6 +329,9 @@ /// Recursively update address to Die map. void updateAddressDieMap(DWARFDie Die); + /// Recursively update address to variable Die map. + void updateVariableDieMap(DWARFDie Die); + void setRangesSection(const DWARFSection *RS, uint64_t Base) { RangeSection = RS; RangeSectionBase = Base; @@ -436,6 +446,10 @@ /// cleared. DWARFDie getSubroutineForAddress(uint64_t Address); + /// Returns variable DIE for the address provided. The pointer is alive as + /// long as parsed compile unit DIEs are not cleared. + DWARFDie getVariableForAddress(uint64_t Address); + /// getInlinedChainForAddress - fetches inlined chain for a given address. /// Returns empty chain if there is no subprogram containing address. The /// chain is valid as long as parsed compile unit DIEs are not cleared. diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h --- a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h @@ -45,6 +45,8 @@ DILineInfo getLineInfoForAddress( object::SectionedAddress Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; + DILineInfo + getLineInfoForDataAddress(object::SectionedAddress Address) override; DILineInfoTable getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1037,7 +1037,25 @@ // First, get the offset of the compile unit. uint64_t CUOffset = getDebugAranges()->findAddress(Address); // Retrieve the compile unit. - return getCompileUnitForOffset(CUOffset); + if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset)) + return OffsetCU; + + // Global variables are often not found by the above search, for one of two + // reasons: + // 1. .debug_aranges may not include global variables. On clang, it seems we + // put the globals in the aranges, but this isn't true for gcc. + // 2. Even if the global variable is in a .debug_arange, global variables + // may not be captured in the [start, end) addresses described by the + // parent compile unit. + // + // So, we walk the CU's and their child DI's manually, looking for the + // specific global variable. + for (std::unique_ptr &CU : compile_units()) { + if (DWARFDie Die = CU->getVariableForAddress(Address)) { + return static_cast(CU.get()); + } + } + return nullptr; } DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { @@ -1107,64 +1125,6 @@ return FoundResult; } -static Optional getTypeSize(DWARFDie Type, uint64_t PointerSize) { - if (auto SizeAttr = Type.find(DW_AT_byte_size)) - if (Optional Size = SizeAttr->getAsUnsignedConstant()) - return Size; - - switch (Type.getTag()) { - case DW_TAG_pointer_type: - case DW_TAG_reference_type: - case DW_TAG_rvalue_reference_type: - return PointerSize; - case DW_TAG_ptr_to_member_type: { - if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) - if (BaseType.getTag() == DW_TAG_subroutine_type) - return 2 * PointerSize; - return PointerSize; - } - case DW_TAG_const_type: - case DW_TAG_immutable_type: - case DW_TAG_volatile_type: - case DW_TAG_restrict_type: - case DW_TAG_typedef: { - if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) - return getTypeSize(BaseType, PointerSize); - break; - } - case DW_TAG_array_type: { - DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type); - if (!BaseType) - return Optional(); - Optional BaseSize = getTypeSize(BaseType, PointerSize); - if (!BaseSize) - return Optional(); - uint64_t Size = *BaseSize; - for (DWARFDie Child : Type) { - if (Child.getTag() != DW_TAG_subrange_type) - continue; - - if (auto ElemCountAttr = Child.find(DW_AT_count)) - if (Optional ElemCount = - ElemCountAttr->getAsUnsignedConstant()) - Size *= *ElemCount; - if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) - if (Optional UpperBound = - UpperBoundAttr->getAsSignedConstant()) { - int64_t LowerBound = 0; - if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) - LowerBound = LowerBoundAttr->getAsSignedConstant().getValueOr(0); - Size *= *UpperBound - LowerBound + 1; - } - } - return Size; - } - default: - break; - } - return Optional(); -} - static Optional getExpressionFrameOffset(ArrayRef Expr, Optional FrameBaseReg) { @@ -1225,7 +1185,7 @@ if (Optional Name = dwarf::toString(*NameAttr)) Local.Name = *Name; if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type)) - Local.Size = getTypeSize(Type, getCUAddrSize()); + Local.Size = Type.getTypeSize(getCUAddrSize()); if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) { if (const auto *LT = CU->getContext().getLineTableForUnit(CU)) LT->getFileNameByIndex( @@ -1266,7 +1226,6 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, DILineInfoSpecifier Spec) { DILineInfo Result; - DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); if (!CU) return Result; @@ -1281,6 +1240,22 @@ Spec.FLIKind, Result); } } + + return Result; +} + +DILineInfo +DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { + DILineInfo Result; + DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Result; + + if (DWARFDie Die = CU->getVariableForAddress(Address.Address)) { + Result.FileName = Die.getDeclFile(FileLineInfoKind::AbsoluteFilePath); + Result.Line = Die.getDeclLine(); + } + return Result; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -1099,6 +1099,66 @@ CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); } +Optional DWARFDie::getTypeSize(uint64_t PointerSize) { + if (auto SizeAttr = find(DW_AT_byte_size)) + if (Optional Size = SizeAttr->getAsUnsignedConstant()) + return Size; + + switch (getTag()) { + case DW_TAG_pointer_type: + case DW_TAG_reference_type: + case DW_TAG_rvalue_reference_type: + return PointerSize; + case DW_TAG_ptr_to_member_type: { + if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) + if (BaseType.getTag() == DW_TAG_subroutine_type) + return 2 * PointerSize; + return PointerSize; + } + case DW_TAG_const_type: + case DW_TAG_immutable_type: + case DW_TAG_volatile_type: + case DW_TAG_restrict_type: + case DW_TAG_typedef: { + if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) + return BaseType.getTypeSize(PointerSize); + break; + } + case DW_TAG_array_type: { + DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type); + if (!BaseType) + return None; + Optional BaseSize = BaseType.getTypeSize(PointerSize); + if (!BaseSize) + return None; + uint64_t Size = *BaseSize; + for (DWARFDie Child : *this) { + if (Child.getTag() != DW_TAG_subrange_type) + continue; + + if (auto ElemCountAttr = Child.find(DW_AT_count)) + if (Optional ElemCount = + ElemCountAttr->getAsUnsignedConstant()) + Size *= *ElemCount; + if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) + if (Optional UpperBound = + UpperBoundAttr->getAsSignedConstant()) { + int64_t LowerBound = 0; + if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) + LowerBound = LowerBoundAttr->getAsSignedConstant().getValueOr(0); + Size *= *UpperBound - LowerBound + 1; + } + } + return Size; + } + default: + if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) + return BaseType.getTypeSize(PointerSize); + break; + } + return None; +} + /// Helper to dump a DIE with all of its parents, but no siblings. static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent, DIDumpOptions DumpOpts, unsigned Depth = 0) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -18,11 +19,13 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFListTable.h" #include "llvm/DebugInfo/DWARF/DWARFObject.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Path.h" @@ -746,6 +749,100 @@ return R->second.second; } +void DWARFUnit::updateVariableDieMap(DWARFDie Die) { + for (DWARFDie Child : Die) { + if (isType(Child.getTag())) + continue; + updateVariableDieMap(Child); + } + + if (Die.getTag() != DW_TAG_variable) + return; + + Expected Locations = + Die.getLocations(DW_AT_location); + if (!Locations) { + // Missing DW_AT_location is fine here. + consumeError(Locations.takeError()); + return; + } + + uint64_t Address = UINT64_MAX; + + for (const DWARFLocationExpression &Location : *Locations) { + uint8_t AddressSize = getAddressByteSize(); + DataExtractor Data(Location.Expr, /*IsLittleEndian=*/true, AddressSize); + DWARFExpression Expr(Data, AddressSize); + auto It = Expr.begin(); + if (It == Expr.end()) + continue; + + // Match exactly the main sequence used to describe global variables: + // `DW_OP_addr[x] [+ DW_OP_plus_uconst]`. Currently, this is the sequence + // that LLVM produces for DILocalVariables and DIGlobalVariables. If, in + // future, the DWARF producer (`DwarfCompileUnit::addLocationAttribute()` is + // a good starting point) is extended to use further expressions, this code + // needs to be updated. + uint64_t LocationAddr; + if (It->getCode() == dwarf::DW_OP_addr) { + LocationAddr = It->getRawOperand(0); + } else if (It->getCode() == dwarf::DW_OP_addrx) { + uint64_t DebugAddrOffset = It->getRawOperand(0); + if (auto Pointer = getAddrOffsetSectionItem(DebugAddrOffset)) { + LocationAddr = Pointer->Address; + } + } else { + continue; + } + + // Read the optional 2nd operand, a DW_OP_plus_uconst. + if (++It != Expr.end()) { + if (It->getCode() != dwarf::DW_OP_plus_uconst) + continue; + + LocationAddr += It->getRawOperand(0); + + // Probe for a 3rd operand, if it exists, bail. + if (++It != Expr.end()) + continue; + } + + Address = LocationAddr; + break; + } + + // Get the size of the global variable. If all else fails (i.e. the global has + // no type), then we use a size of one to still allow symbolization of the + // exact address. + uint64_t GVSize = 1; + if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type)) + if (Optional Size = Die.getTypeSize(getAddressByteSize())) + GVSize = *Size; + + if (Address != UINT64_MAX) + VariableDieMap[Address] = {Address + GVSize, Die}; +} + +DWARFDie DWARFUnit::getVariableForAddress(uint64_t Address) { + extractDIEsIfNeeded(false); + + auto RootDie = getUnitDIE(); + + auto RootLookup = RootsParsedForVariables.insert(RootDie.getOffset()); + if (RootLookup.second) + updateVariableDieMap(RootDie); + + auto R = VariableDieMap.upper_bound(Address); + if (R == VariableDieMap.begin()) + return DWARFDie(); + + // upper_bound's previous item contains Address. + --R; + if (Address >= R->second.first) + return DWARFDie(); + return R->second.second; +} + void DWARFUnit::getInlinedChainForAddress(uint64_t Address, SmallVectorImpl &InlinedChain) { diff --git a/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/llvm/lib/DebugInfo/PDB/PDBContext.cpp --- a/llvm/lib/DebugInfo/PDB/PDBContext.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBContext.cpp @@ -64,6 +64,13 @@ return Result; } +DILineInfo +PDBContext::getLineInfoForDataAddress(object::SectionedAddress Address) { + // Unimplemented. S_GDATA and S_LDATA in CodeView (used to describe global + // variables) aren't capable of carrying line information. + return DILineInfo(); +} + DILineInfoTable PDBContext::getLineInfoForAddressRange(object::SectionedAddress Address, uint64_t Size, diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -206,6 +206,10 @@ Name = DILineInfo::Addr2LineBadString; OS << Name << "\n"; OS << Global.Start << " " << Global.Size << "\n"; + if (Global.DeclFile.empty()) + OS << "??:?\n"; + else + OS << Global.DeclFile << ":" << Global.DeclLine << "\n"; printFooter(); } diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -327,6 +327,14 @@ std::string FileName; getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size, FileName); + Res.DeclFile = FileName; + + // Try and get a better filename:lineno pair from the debuginfo, if present. + DILineInfo DL = DebugInfoContext->getLineInfoForDataAddress(ModuleOffset); + if (DL.Line != 0) { + Res.DeclFile = DL.FileName; + Res.DeclLine = DL.Line; + } return Res; } diff --git a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml --- a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml +++ b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml @@ -7,12 +7,15 @@ # CHECK: func # CHECK-NEXT: 4096 1 +# CHECK-NEXT: ??:? # CHECK-EMPTY: # CHECK-NEXT: data # CHECK-NEXT: 8192 2 +# CHECK-NEXT: ??:? # CHECK-EMPTY: # CHECK-NEXT: notype # CHECK-NEXT: 8194 3 +# CHECK-NEXT: ??:? # CHECK-EMPTY: --- !ELF diff --git a/llvm/test/tools/llvm-symbolizer/data-location.yaml b/llvm/test/tools/llvm-symbolizer/data-location.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/data-location.yaml @@ -0,0 +1,450 @@ +## Show that when "DATA" is used with an address, it forces the found location +## to be symbolized as data, including the source information. + +# RUN: yaml2obj %s -o %t.so + +# RUN: llvm-symbolizer 'DATA 0x304d0' 'DATA 0x304d1' 'DATA 0x304d3' \ +# RUN: 'DATA 0x304c0' 'DATA 0x304c8' 'DATA 0x304d4' 'DATA 0x304dc' \ +# RUN: 'DATA 0x304d8' --obj=%t.so | FileCheck %s + +# CHECK: bss_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:1 +# CHECK-EMPTY: + +## Check that lookups in the middle of the symbol are also resolved correctly. +# CHECK: bss_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:1 +# CHECK-EMPTY: +# CHECK: bss_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:1 +# CHECK-EMPTY: + +## Now, the remainder of the symbols. +# CHECK-NEXT: data_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:2 +# CHECK-EMPTY: +# CHECK-NEXT: str +# CHECK-NEXT: {{[0-9]+}} 8 +# CHECK-NEXT: /tmp/file.cpp:4 +# CHECK-EMPTY: +# CHECK-NEXT: f()::function_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:8 +# CHECK-EMPTY: + +## Including the one that includes an addend. +# CHECK-NEXT: alpha +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:12 +# CHECK-EMPTY: +# CHECK-NEXT: beta +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:13 +# CHECK-EMPTY: + +## Ensure there's still a global that's offset-based. +# RUN: llvm-dwarfdump --debug-info %t.so | FileCheck %s --check-prefix=OFFSET + +# OFFSET: DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4) + +################################################################################ +## File below was generated using: +## +## $ clang++ -g -O3 /tmp/file.cpp -shared -fuse-ld=lld -nostdlib \ +## -target aarch64-linux-gnuabi -mllvm -global-merge-ignore-single-use \ +## -o /tmp/file.so +## +## With /tmp/file.cpp as: +## 1: int bss_global; +## 2: int data_global = 2; +## 3: +## 4: const char* str = +## 5: "12345678"; +## 6: +## 7: int* f() { +## 8: static int function_global; +## 9: return &function_global; +## 10: } +## 11: +## 12: static int alpha; +## 13: static int beta; +## 14: int *f(bool b) { return beta ? &alpha : β } +## 15: +## +## ... then, one can get the offsets using `nm`, like: +## $ nm out.so | grep bss_global +## 00000000000038fc B bss_global +## +## Note the use of the aarch64 target (with -nostdlib in order to allow linkage +## without libraries for cross-compilation) as well as -O3 and +## -global-merge-ignore-single-use. This is a specific combination that makes +## the compiler emit the `alpha` global variable with a more complex +## DW_AT_location than just a DW_OP_addr/DW_OP_addrx. In this instance, it +## outputs a `DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4)`. +## +## Ideally, this would be tested by invoking clang directly on a C source file, +## but unfortunately there's no way to do that for LLVM tests. The other option +## is to compile IR to an objfile, but llvm-symbolizer doesn't understand that +## two symbols can have the same address in different sections. In the code +## above, for example, we'd have bss_global at .bss+0x0, and data_global at +## .data+0x0, and so the symbolizer would only print one of them. Hence, we have +## the ugly dso-to-yaml blob below. +## +## For now, constant strings don't have a debuginfo entry, and so can't be +## symbolized correctly. In future (if D123534 gets merged), this can be updated +## to include a check that llvm-symbolizer can also symbolize constant strings, +## like `str` above (basically that &"12345678" should be symbolizable) +## to the specific line. Then, you can find the address of the constant string +## from the relocation: +## +## $ nm out.so | grep str +## 00000000000038c0 D str +## $ llvm-objdump -R out.so | grep 38c0 +## 00000000000038c0 R_X86_64_RELATIVE *ABS*+0x4f8 # <-- 0x4f8 +################################################################################ + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_AARCH64 +ProgramHeaders: + - Type: PT_PHDR + Flags: [ PF_R ] + VAddr: 0x40 + Align: 0x8 + - Type: PT_LOAD + Flags: [ PF_R ] + FirstSec: .dynsym + LastSec: .eh_frame + Align: 0x10000 + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + FirstSec: .text + LastSec: .text + VAddr: 0x103E4 + Align: 0x10000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + FirstSec: .dynamic + LastSec: .dynamic + VAddr: 0x20410 + Align: 0x10000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + FirstSec: .data + LastSec: .bss + VAddr: 0x304C0 + Align: 0x10000 + - Type: PT_DYNAMIC + Flags: [ PF_W, PF_R ] + FirstSec: .dynamic + LastSec: .dynamic + VAddr: 0x20410 + Align: 0x8 + - Type: PT_GNU_RELRO + Flags: [ PF_R ] + FirstSec: .dynamic + LastSec: .dynamic + VAddr: 0x20410 + - Type: PT_GNU_EH_FRAME + Flags: [ PF_R ] + FirstSec: .eh_frame_hdr + LastSec: .eh_frame_hdr + VAddr: 0x37C + Align: 0x4 + - Type: PT_GNU_STACK + Flags: [ PF_W, PF_R ] + Align: 0x0 +Sections: + - Name: .dynsym + Type: SHT_DYNSYM + Flags: [ SHF_ALLOC ] + Address: 0x238 + Link: .dynstr + AddressAlign: 0x8 + - Name: .gnu.hash + Type: SHT_GNU_HASH + Flags: [ SHF_ALLOC ] + Address: 0x2C8 + Link: .dynsym + AddressAlign: 0x8 + Header: + SymNdx: 0x1 + Shift2: 0x1A + BloomFilter: [ 0x400188002180000C ] + HashBuckets: [ 0x1 ] + HashValues: [ 0xEE8502A, 0xEE85016, 0xC033991C, 0x61F7372E, 0xB88AB7F ] + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Address: 0x2F8 + Link: .dynsym + AddressAlign: 0x4 + Bucket: [ 5, 0, 4, 0, 3, 0 ] + Chain: [ 0, 0, 0, 1, 2, 0 ] + - Name: .dynstr + Type: SHT_STRTAB + Flags: [ SHF_ALLOC ] + Address: 0x330 + AddressAlign: 0x1 + - Name: .rela.dyn + Type: SHT_RELA + Flags: [ SHF_ALLOC ] + Address: 0x358 + Link: .dynsym + AddressAlign: 0x8 + Relocations: + - Offset: 0x304C8 + Type: R_AARCH64_RELATIVE + Addend: 880 + - Name: .rodata + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_MERGE, SHF_STRINGS ] + Address: 0x370 + AddressAlign: 0x1 + EntSize: 0x1 + Content: '313233343536373800' + - Name: .eh_frame_hdr + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x37C + AddressAlign: 0x4 + Content: 011B033B18000000020000006800010034000000740001004C000000 + - Name: .eh_frame + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x398 + AddressAlign: 0x8 + Content: 1400000000000000017A5200017C1E011B0C1F0000000000140000001C0000002C0001000C00000000000000000000001400000034000000200001001C000000000000000000000000000000 + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x103E4 + AddressAlign: 0x4 + Content: 0001009000501391C0035FD60801009008611391E90308AA2A4540B85F0100710001899AC0035FD6 + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x20410 + Link: .dynstr + AddressAlign: 0x8 + Entries: + - Tag: DT_RELA + Value: 0x358 + - Tag: DT_RELASZ + Value: 0x18 + - Tag: DT_RELAENT + Value: 0x18 + - Tag: DT_RELACOUNT + Value: 0x1 + - Tag: DT_SYMTAB + Value: 0x238 + - Tag: DT_SYMENT + Value: 0x18 + - Tag: DT_STRTAB + Value: 0x330 + - Tag: DT_STRSZ + Value: 0x28 + - Tag: DT_GNU_HASH + Value: 0x2C8 + - Tag: DT_HASH + Value: 0x2F8 + - Tag: DT_NULL + Value: 0x0 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x304C0 + AddressAlign: 0x8 + Content: '02000000000000000000000000000000' + - Name: .bss + Type: SHT_NOBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x304D0 + AddressAlign: 0x4 + Size: 0x10 + - Name: .debug_abbrev + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 011101252513050325721710171B25111B120673170000023400032549133F193A0B3B0B0218000003240003253E0B0B0B0000040F004913000005260049130000062E01111B120640187A196E2503253A0B3B0B49133F190000073400032549133A0B3B0B02180000083400032549133A0B3B0B02186E25000009050003253A0B3B0B4913000000 + - Name: .debug_info + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: AB0000000500010800000000010021000108000000000000000205280000000800000002032E000000000102A1000304050402052E000000000202A101020648000000000402A102044D00000005520000000307080106050C000000016F0D0E0007A500000007082E000000000802A1030008092E000000000D02A1040A080B2E000000000C04A10423040C06061C000000016F0F0E000EA50000000910000EAA00000000042E0000000311020100 + - Name: .debug_str_offsets + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 4C00000005000000A2000000000000002C00000059000000280000001C00000072000000640000008C0000008700000069000000140000007B0000009C0000001A0000000E0000008500000076000000 + - Name: .comment + Type: SHT_PROGBITS + Flags: [ SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 4C696E6B65723A204C4C442031352E302E300000636C616E672076657273696F6E2031352E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420306462616566363162353666306566306162306366333865613932666663316633356265653366662900 + - Name: .debug_line + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 620000000500080037000000010101FB0E0D00010101010000000100000101011F010E00000003011F020F051E0100000000006C97BBE59F7DC6A9EA956633431DA63E0400000902E4030100000000001805030A140500BF05190A0105120608740204000101 + - Name: .debug_line_str + Type: SHT_PROGBITS + Flags: [ SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 2F746D702F66696C652E637070002F7573722F6C6F63616C2F676F6F676C652F686F6D652F6D69746368702F6C6C766D2D6275696C642F6F707400 +Symbols: + - Name: file.cpp + Type: STT_FILE + Index: SHN_ABS + - Name: '$x.0' + Section: .text + Value: 0x103E4 + - Name: _ZZ1fvE15function_global + Type: STT_OBJECT + Section: .bss + Value: 0x304D4 + Size: 0x4 + - Name: '$d.1' + Section: .bss + Value: 0x304D0 + - Name: '$d.2' + Section: .data + Value: 0x304C0 + - Name: '$d.3' + Section: .rodata + Value: 0x370 + - Name: '$d.4' + Section: .debug_abbrev + - Name: '$d.5' + Section: .debug_info + - Name: '$d.6' + Section: .debug_str_offsets + - Name: '$d.7' + Section: .debug_str + Value: 0xA2 + - Name: '$d.8' + Section: .debug_addr + - Name: _ZL4beta + Type: STT_OBJECT + Section: .bss + Value: 0x304D8 + Size: 0x4 + - Name: _ZL5alpha + Type: STT_OBJECT + Section: .bss + Value: 0x304DC + Size: 0x4 + - Name: '$d.9' + Section: .comment + Value: 0x13 + - Name: '$d.10' + Section: .eh_frame + Value: 0x398 + - Name: '$d.11' + Section: .debug_line + - Name: '$d.12' + Section: .debug_line_str + Value: 0xE + - Name: _DYNAMIC + Section: .dynamic + Value: 0x20410 + Other: [ STV_HIDDEN ] + - Name: _Z1fv + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x103E4 + Size: 0xC + - Name: _Z1fb + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x103F0 + Size: 0x1C + - Name: bss_global + Type: STT_OBJECT + Section: .bss + Binding: STB_GLOBAL + Value: 0x304D0 + Size: 0x4 + - Name: data_global + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x304C0 + Size: 0x4 + - Name: str + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x304C8 + Size: 0x8 +DynamicSymbols: + - Name: _Z1fv + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x103E4 + Size: 0xC + - Name: _Z1fb + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x103F0 + Size: 0x1C + - Name: bss_global + Type: STT_OBJECT + Section: .bss + Binding: STB_GLOBAL + Value: 0x304D0 + Size: 0x4 + - Name: data_global + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x304C0 + Size: 0x4 + - Name: str + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x304C8 + Size: 0x8 +DWARF: + debug_str: + - '/tmp/file.cpp' + - _Z1fb + - alpha + - f + - data_global + - int + - '/usr/local/google/home/mitchp/llvm-build/opt' + - bss_global + - char + - _ZL4beta + - str + - bool + - _ZL5alpha + - b + - beta + - function_global + - _Z1fv + - 'clang version 15.0.0 (https://github.com/llvm/llvm-project.git 0dbaef61b56f0ef0ab0cf38ea92ffc1f35bee3ff)' + debug_addr: + - Length: 0x3C + Version: 0x5 + AddressSize: 0x8 + Entries: + - Address: 0x304D0 + - Address: 0x304C0 + - Address: 0x304C8 + - Address: 0x304D4 + - Address: 0x304D8 + - Address: 0x103E4 + - Address: 0x103F0 +... diff --git a/llvm/test/tools/llvm-symbolizer/data.s b/llvm/test/tools/llvm-symbolizer/data.s --- a/llvm/test/tools/llvm-symbolizer/data.s +++ b/llvm/test/tools/llvm-symbolizer/data.s @@ -7,9 +7,12 @@ # CHECK: d1 # CHECK-NEXT: 0 8 +# CHECK-NEXT: ??:? # CHECK-EMPTY: # CHECK-NEXT: d2 # CHECK-NEXT: 8 4 +# CHECK-NEXT: ??:? +# CHECK-EMPTY: d1: .quad 0x1122334455667788