diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -254,7 +254,6 @@ and prints the results to standard output. The following markup elements are not yet supported: - * ``{{pc}}`` * ``{{bt}}`` * ``{{hexdict}}`` * ``{{dumpfile}}`` diff --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst --- a/llvm/docs/SymbolizerMarkupFormat.rst +++ b/llvm/docs/SymbolizerMarkupFormat.rst @@ -184,7 +184,7 @@ {{{symbol:_ZN7Mangled4NameEv}}} {{{symbol:foobar}}} -``{{{pc:%p}}}``, ``{{{pc:%p:ra}}}``, ``{{{pc:%p:pc}}}`` [#not_yet_implemented]_ +``{{{pc:%p}}}``, ``{{{pc:%p:ra}}}``, ``{{{pc:%p:pc}}}`` Here ``%p`` is the memory address of a code location. It might be presented as a function name and source location. The second two forms distinguish the kind of diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h --- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h @@ -71,6 +71,15 @@ SmallVector MMaps = {}; }; + // The semantics of a possible program counter value. + enum class PCType { + // The address is a return address and must be adjusted to point to the call + // itself. + ReturnAddress, + // The address is the precise location in the code and needs no adjustment. + PreciseCode, + }; + bool tryContextualElement(const MarkupNode &Node, const SmallVector &DeferredNodes); bool tryMMap(const MarkupNode &Element, @@ -87,6 +96,7 @@ bool tryPresentation(const MarkupNode &Node); bool trySymbol(const MarkupNode &Node); + bool tryPC(const MarkupNode &Node); bool tryData(const MarkupNode &Node); bool trySGR(const MarkupNode &Node); @@ -96,6 +106,9 @@ void restoreColor(); void resetColor(); + void printRawElement(const MarkupNode &Element); + void printValue(Twine Value); + Optional parseModule(const MarkupNode &Element) const; Optional parseMMap(const MarkupNode &Element) const; @@ -104,10 +117,12 @@ Optional parseSize(StringRef Str) const; Optional> parseBuildID(StringRef Str) const; Optional parseMode(StringRef Str) const; + Optional parsePCType(StringRef Str) const; bool checkTag(const MarkupNode &Node) const; bool checkNumFields(const MarkupNode &Element, size_t Size) const; bool checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const; + bool checkNumFieldsAtMost(const MarkupNode &Element, size_t Size) const; void reportTypeError(StringRef Str, StringRef TypeName) const; void reportLocation(StringRef::iterator Loc) const; @@ -115,6 +130,8 @@ const MMap *getOverlappingMMap(const MMap &Map) const; const MMap *getContainingMMap(uint64_t Addr) const; + uint64_t adjustAddr(uint64_t Addr, PCType Type) const; + StringRef lineEnding() const; raw_ostream &OS; diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp --- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/Symbolize/Markup.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Debuginfod/Debuginfod.h" @@ -163,18 +164,17 @@ filterNode(Node); beginModuleInfoLine(&Module); OS << "; BuildID="; - highlightValue(); - OS << toHex(Module.BuildID, /*LowerCase=*/true); - highlight(); + printValue(toHex(Module.BuildID, /*LowerCase=*/true)); return true; } void MarkupFilter::beginModuleInfoLine(const Module *M) { highlight(); OS << "[[[ELF module"; - highlightValue(); - OS << formatv(" #{0:x} \"{1}\"", M->ID, M->Name); - highlight(); + printValue(formatv(" #{0:x} ", M->ID)); + OS << '"'; + printValue(M->Name); + OS << '"'; MIL = ModuleInfoLine{M}; } @@ -186,13 +186,12 @@ }); for (const MMap *M : MIL->MMaps) { OS << (M == MIL->MMaps.front() ? ' ' : ','); - highlightValue(); - OS << formatv("[{0:x}-{1:x}]", M->Addr, M->Addr + M->Size - 1); - highlight(); - OS << '('; - highlightValue(); - OS << M->Mode; - highlight(); + OS << '['; + printValue(formatv("{0:x}", M->Addr)); + OS << '-'; + printValue(formatv("{0:x}", M->Addr + M->Size - 1)); + OS << "]("; + printValue(M->Mode); OS << ')'; } OS << "]]]" << lineEnding(); @@ -215,6 +214,8 @@ bool MarkupFilter::tryPresentation(const MarkupNode &Node) { if (trySymbol(Node)) return true; + if (tryPC(Node)) + return true; return tryData(Node); } @@ -230,6 +231,61 @@ return true; } +bool MarkupFilter::tryPC(const MarkupNode &Node) { + if (Node.Tag != "pc") + return false; + if (!checkNumFieldsAtLeast(Node, 1)) + return true; + if (!checkNumFieldsAtMost(Node, 2)) + return true; + + Optional Addr = parseAddr(Node.Fields[0]); + if (!Addr) + return true; + + // PC addresses that aren't part of a backtrace are assumed to be precise code + // locations. + PCType Type = PCType::PreciseCode; + if (Node.Fields.size() == 2) { + Optional ParsedType = parsePCType(Node.Fields[1]); + if (!ParsedType) + return true; + Type = *ParsedType; + } + *Addr = adjustAddr(*Addr, Type); + + const MMap *MMap = getContainingMMap(*Addr); + if (!MMap) { + WithColor::error() << "no mmap covers address\n"; + reportLocation(Node.Fields[0].begin()); + printRawElement(Node); + return true; + } + + Expected LI = Symbolizer.symbolizeCode( + MMap->Mod->BuildID, {MMap->getModuleRelativeAddr(*Addr)}); + if (!LI) { + WithColor::defaultErrorHandler(LI.takeError()); + printRawElement(Node); + return true; + } + if (LI->FileName == DILineInfo::BadString && + LI->FunctionName == DILineInfo::BadString && LI->Line == 0) { + printRawElement(Node); + return true; + } + + highlight(); + printValue(LI->FunctionName); + OS << '['; + printValue(LI->FileName); + OS << ':'; + printValue(Twine(LI->Line)); + OS << ']'; + restoreColor(); + return true; +} + bool MarkupFilter::tryData(const MarkupNode &Node) { if (Node.Tag != "data") return false; @@ -239,21 +295,11 @@ if (!Addr) return true; - const auto PrintRaw = [&]() { - highlight(); - OS << "[[[data:"; - highlightValue(); - OS << "0x" << toHex(*Addr, /*LowerCase=*/true); - highlight(); - OS << "]]]\n"; - restoreColor(); - }; - const MMap *MMap = getContainingMMap(*Addr); if (!MMap) { WithColor::error() << "no mmap covers address\n"; reportLocation(Node.Fields[0].begin()); - PrintRaw(); + printRawElement(Node); return true; } @@ -261,7 +307,7 @@ MMap->Mod->BuildID, {MMap->getModuleRelativeAddr(*Addr)}); if (!Symbol) { WithColor::defaultErrorHandler(Symbol.takeError()); - PrintRaw(); + printRawElement(Node); return true; } @@ -343,6 +389,24 @@ OS.resetColor(); } +void MarkupFilter::printRawElement(const MarkupNode &Element) { + highlight(); + OS << "[[["; + printValue(Element.Tag); + for (StringRef Field : Element.Fields) { + OS << ':'; + printValue(Field); + } + OS << "]]]"; + restoreColor(); +} + +void MarkupFilter::printValue(Twine Value) { + highlightValue(); + OS << Value; + highlight(); +} + // This macro helps reduce the amount of indirection done through Optional // below, since the usual case upon returning a None Optional is to return None. #define ASSIGN_OR_RETURN_NONE(TYPE, NAME, EXPR) \ @@ -476,6 +540,17 @@ return Str.lower(); } +Optional MarkupFilter::parsePCType(StringRef Str) const { + Optional Type = + StringSwitch>(Str) + .Case("ra", MarkupFilter::PCType::ReturnAddress) + .Case("pc", MarkupFilter::PCType::PreciseCode) + .Default(None); + if (!Type) + reportTypeError(Str, "PC type"); + return Type; +} + bool MarkupFilter::checkTag(const MarkupNode &Node) const { if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) { WithColor::error(errs()) << "tags must be all lowercase characters\n"; @@ -508,6 +583,18 @@ return true; } +bool MarkupFilter::checkNumFieldsAtMost(const MarkupNode &Element, + size_t Size) const { + if (Element.Fields.size() > Size) { + WithColor::error(errs()) + << "expected at most " << Size << " field(s); found " + << Element.Fields.size() << "\n"; + reportLocation(Element.Tag.end()); + return false; + } + return true; +} + void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const { WithColor::error(errs()) << "expected " << TypeName << "; found '" << Str << "'\n"; @@ -556,6 +643,14 @@ return I->second.contains(Addr) ? &I->second : nullptr; } +uint64_t MarkupFilter::adjustAddr(uint64_t Addr, PCType Type) const { + // Decrementing return addresses by one moves them into the call instruction. + // The address doesn't have to be the start of the call instruction, just some + // byte on the inside. Subtracting one avoids needing detailed instruction + // length information here. + return Type == MarkupFilter::PCType::ReturnAddress ? Addr - 1 : Addr; +} + StringRef MarkupFilter::lineEnding() const { return Line.endswith("\r\n") ? "\r\n" : "\n"; } diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-data.test b/llvm/test/DebugInfo/symbolize-filter-markup-data.test --- a/llvm/test/DebugInfo/symbolize-filter-markup-data.test +++ b/llvm/test/DebugInfo/symbolize-filter-markup-data.test @@ -12,7 +12,7 @@ CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x0-0x4](r),[0x10-0x11](r)[[END:\]{3}]] CHECK: long long byte CHECK: long byte -CHECK: [[BEGIN]]data:0x05[[END]] +CHECK: [[BEGIN]]data:0x5[[END]] ERR: error: expected 1 field(s); found 0 ERR: error: no mmap covers address diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-pc.test b/llvm/test/DebugInfo/symbolize-filter-markup-pc.test new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/symbolize-filter-markup-pc.test @@ -0,0 +1,188 @@ +REQUIRES: x86-registered-target +RUN: split-file %s %t +RUN: mkdir -p %t/.build-id/ab +RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/asm.s \ +RUN: -o %t/.build-id/ab/cdef.debug +RUN: llvm-symbolizer --debug-file-directory=%t --filter-markup < %t/input \ +RUN: > %t.output 2> %t.err +RUN: FileCheck %s --input-file=%t.output --match-full-lines \ +RUN: --implicit-check-not {{.}} +RUN: FileCheck %s --check-prefix=ERR --input-file=%t.err --match-full-lines + +CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x0-0xff](r)[[END:\]{3}]] +CHECK: first[/dir[[SEP:[/\\]]]tmp.c:3] +CHECK: first[/dir[[SEP]]tmp.c:5] +CHECK: first[/dir[[SEP]]tmp.c:4] +CHECK: first[/dir[[SEP]]tmp.c:5] +CHECK: [[BEGIN]]pc:0xff[[END]] +CHECK: [[BEGIN]]pc:0x100[[END]] + +ERR: error: expected at least 1 field(s); found 0 +ERR: error: no mmap covers address +ERR: error: expected PC type; found '' +ERR: error: expected at most 2 field(s); found 3 + +;--- input +{{{module:0:a.o:elf:abcdef}}} +{{{mmap:0:256:load:0:r:0}}} +{{{pc:0}}} +{{{pc:0x9}}} +{{{pc:0x9:ra}}} +{{{pc:0x9:pc}}} +{{{pc:0xff}}} + +{{{pc}}} +{{{pc:0x100}}} +{{{pc:0x9:}}} +{{{pc:0x9:pc:}}} +;--- asm.s + .text + .file "tmp.c" + .globl first # -- Begin function first + .p2align 4, 0x90 + .type first,@function +first: # @first +.Lfunc_begin0: + .file 1 "/dir" "tmp.c" + .loc 1 3 0 # tmp.c:3:0 + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp0: + .loc 1 4 3 prologue_end # tmp.c:4:3 + callq second + .loc 1 5 1 # tmp.c:5:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size first, .Lfunc_end0-first + .cfi_endproc + # -- End function + .globl second # -- Begin function second + .p2align 4, 0x90 + .type second,@function +second: # @second +.Lfunc_begin1: + .loc 1 7 0 # tmp.c:7:0 + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp2: + .loc 1 8 3 prologue_end # tmp.c:8:3 + callq first + .loc 1 9 1 # tmp.c:9:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp3: +.Lfunc_end1: + .size second, .Lfunc_end1-second + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 39 # DW_AT_prototyped + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x4a DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 12 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x15 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 3 # DW_AT_decl_line + # DW_AT_prototyped + # DW_AT_external + .byte 2 # Abbrev [2] 0x3f:0x15 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + # DW_AT_prototyped + # DW_AT_external + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang" # string offset=0 +.Linfo_string1: + .asciz "tmp.c" # string offset=30 +.Linfo_string2: + .asciz "/dir" # string offset=36 +.Linfo_string3: + .asciz "first" # string offset=85 +.Linfo_string4: + .asciz "second" # string offset=91 + .ident "clang" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym first + .addrsig_sym second + .section .debug_line,"",@progbits +.Lline_table_start0: