diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -197,6 +197,28 @@ When printing symbols, only print symbols with a value up to ``address``. +.. option:: --symbolize-operands + + When disassembling, symbolize a branch target operand to print a label instead of a real address. + + When printing a PC-relative global symbol reference, print it as an offset from the leading symbol. + + Only works with an X86 linked image. + + Example: + A non-symbolized branch instruction with a local target and pc-relative memory access like + + .. code-block:: none + cmp eax, dword ptr [rip + 4112] + jge 0x20117e <_start+0x25> + + might become + + .. code-block:: none + : + cmp eax, dword ptr + jge + .. option:: --triple= Target triple to disassemble for, see ``--version`` for available targets. diff --git a/llvm/include/llvm/MC/MCInstPrinter.h b/llvm/include/llvm/MC/MCInstPrinter.h --- a/llvm/include/llvm/MC/MCInstPrinter.h +++ b/llvm/include/llvm/MC/MCInstPrinter.h @@ -18,6 +18,7 @@ class MCInst; class MCOperand; class MCInstrInfo; +class MCInstrAnalysis; class MCRegisterInfo; class MCSubtargetInfo; class raw_ostream; @@ -48,6 +49,7 @@ const MCAsmInfo &MAI; const MCInstrInfo &MII; const MCRegisterInfo &MRI; + const MCInstrAnalysis *MIA = nullptr; /// True if we are printing marked up assembly. bool UseMarkup = false; @@ -63,6 +65,9 @@ /// (llvm-objdump -d). bool PrintBranchImmAsAddress = false; + /// If true, symbolize branch target and memory reference operands. + bool SymbolizeOperands = false; + /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); @@ -115,6 +120,9 @@ PrintBranchImmAsAddress = Value; } + void setSymbolizeOperands(bool Value) { SymbolizeOperands = Value; } + void setMCInstrAnalysis(const MCInstrAnalysis *Value) { MIA = Value; } + /// Utility function to print immediates in decimal or hex. format_object formatImm(int64_t Value) const { return PrintImmHex ? formatHex(Value) : formatDec(Value); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -16,6 +16,7 @@ #include "X86InstComments.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Casting.h" @@ -384,6 +385,16 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O) { + // Do not print the exact form of the memory operand if it references a known + // binary object. + if (SymbolizeOperands && MIA) { + uint64_t Target; + if (MIA->evaluateBranch(*MI, 0, 0, Target)) + return; + if (MIA->evaluateMemoryOperandAddress(*MI, 0, 0)) + return; + } + const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg); const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg); const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -295,6 +295,10 @@ /// \see MCInstPrinter::printInst void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address, unsigned OpNo, raw_ostream &O) { + // Do not print the numberic target address when symbolizing. + if (SymbolizeOperands) + return; + const MCOperand &Op = MI->getOperand(OpNo); if (Op.isImm()) { if (PrintBranchImmAsAddress) { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -16,6 +16,7 @@ #include "X86InstComments.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -342,6 +343,15 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O) { + // Do not print the exact form of the memory operand if it references a known + // binary object. + if (SymbolizeOperands && MIA) { + uint64_t Target; + if (MIA->evaluateBranch(*MI, 0, 0, Target)) + return; + if (MIA->evaluateMemoryOperandAddress(*MI, 0, 0)) + return; + } const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg); unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm(); const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg); diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbololize-operands.yaml b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbololize-operands.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbololize-operands.yaml @@ -0,0 +1,48 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump %t -d --symbolize-operands --x86-asm-syntax=intel --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines --check-prefix=INTEL +# RUN: llvm-objdump %t -d --symbolize-operands --x86-asm-syntax=att --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines --check-prefix=ATT + +## Expect to find the branch labels and global variable name. +# ATT: <_start>: +# ATT-NEXT: pushq %rax +# ATT-NEXT: : +# ATT-NEXT: cmpl , %eax +# ATT-NEXT: jge +# ATT-NEXT: jmp +# ATT-NEXT: : +# ATT-NEXT: retq + +# INTEL: <_start>: +# INTEL-NEXT: push rax +# INTEL-NEXT: : +# INTEL-NEXT: cmp eax, dword ptr +# INTEL-NEXT: jge +# INTEL-NEXT: jmp +# INTEL-NEXT: : +# INTEL-NEXT: ret + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Address: 0x4000 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: '503b05051000007d02ebf6c3' + - Name: .data + Type: SHT_PROGBITS + Flags: [SHF_ALLOC, SHF_WRITE] + Address: 0x5000 +Symbols: + - Name: _start + Section: .text + Value: 0x4000 + - Name: symbol + Section: .data + Value: 0x500c diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -321,6 +321,11 @@ cl::NotHidden, cl::Grouping, cl::aliasopt(SymbolTable)); +static cl::opt SymbolizeOperands( + "symbolize-operands", + cl::desc("Symbolize instruction operands when disassembling"), + cl::cat(ObjdumpCat)); + static cl::opt DynamicSymbolTable( "dynamic-syms", cl::desc("Display the contents of the dynamic symbol table"), @@ -1578,6 +1583,42 @@ return SymbolInfoTy(Addr, Name, Type); } +static void +collectLocalBranchTargets(ArrayRef Bytes, const MCInstrAnalysis *MIA, + MCDisassembler *DisAsm, MCInstPrinter *IP, + const MCSubtargetInfo *STI, uint64_t SectionAddr, + uint64_t Start, uint64_t End, + std::unordered_map &Labels) { + // So far only supports X86. + if (!STI->getTargetTriple().isX86()) + return; + + Labels.clear(); + unsigned LabelCount = 0; + Start += SectionAddr; + End += SectionAddr; + uint64_t Index = Start; + while (Index < End) { + // Disassemble a real instruction and record function-local branch labels. + MCInst Inst; + uint64_t Size; + bool Disassembled = DisAsm->getInstruction( + Inst, Size, Bytes.slice(Index - SectionAddr), Index, nulls()); + if (Size == 0) + Size = 1; + + if (Disassembled && MIA) { + uint64_t Target; + bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target); + if (TargetKnown && (Target >= Start && Target < End) && + !Labels.count(Target)) + Labels[Target] = ("L" + Twine(LabelCount++)).str(); + } + + Index += Size; + } +} + static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, @@ -1894,6 +1935,12 @@ !DisassembleAll; bool DumpARMELFData = false; formatted_raw_ostream FOS(outs()); + + std::unordered_map AllLabels; + if (SymbolizeOperands) + collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI, + SectionAddr, Index, End, AllLabels); + while (Index < End) { // ARM and AArch64 ELF binaries can interleave data and text in the // same section. We rely on the markers introduced to understand what @@ -1934,6 +1981,11 @@ } } + // Print local label if there's any. + auto Iter = AllLabels.find(SectionAddr + Index); + if (Iter != AllLabels.end()) + FOS << "<" << Iter->second << ">:\n"; + // Disassemble a real instruction or a data when disassemble all is // provided MCInst Inst; @@ -1967,7 +2019,9 @@ Inst, SectionAddr + Index, Size)) { Target = *MaybeTarget; PrintTarget = true; - FOS << " # " << Twine::utohexstr(Target); + // Do not print real address when symbolizing. + if (!SymbolizeOperands) + FOS << " # " << Twine::utohexstr(Target); } if (PrintTarget) { // In a relocatable object, the target's section must reside in @@ -2017,17 +2071,30 @@ } } + // Print the labels corresponding to the target if there's any. + bool LabelAvailable = AllLabels.count(Target); if (TargetSym != nullptr) { uint64_t TargetAddress = TargetSym->Addr; + uint64_t Disp = Target - TargetAddress; std::string TargetName = TargetSym->Name.str(); if (Demangle) TargetName = demangle(TargetName); - FOS << " <" << TargetName; - uint64_t Disp = Target - TargetAddress; - if (Disp) - FOS << "+0x" << Twine::utohexstr(Disp); - FOS << '>'; + FOS << " <"; + if (!Disp) { + // Always Print the binary symbol precisely corresponding to + // the target address. + FOS << TargetName; + } else if (!LabelAvailable) { + // Always Print the binary symbol plus an offset if there's no + // local label corresponding to the target address. + FOS << TargetName << "+0x" << Twine::utohexstr(Disp); + } else { + FOS << AllLabels[Target]; + } + FOS << ">"; + } else if (LabelAvailable) { + FOS << " <" << AllLabels[Target] << ">"; } } } @@ -2149,6 +2216,8 @@ "no instruction printer for target " + TripleName); IP->setPrintImmHex(PrintImmHex); IP->setPrintBranchImmAsAddress(true); + IP->setSymbolizeOperands(SymbolizeOperands); + IP->setMCInstrAnalysis(MIA.get()); PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName)); SourcePrinter SP(Obj, TheTarget->getName());