diff --git a/llvm/include/llvm/MC/MCInstPrinter.h b/llvm/include/llvm/MC/MCInstPrinter.h --- a/llvm/include/llvm/MC/MCInstPrinter.h +++ b/llvm/include/llvm/MC/MCInstPrinter.h @@ -18,6 +18,7 @@ class MCInst; class MCOperand; class MCInstrInfo; +class MCInstrAnalysis; class MCRegisterInfo; class MCSubtargetInfo; class raw_ostream; @@ -48,6 +49,7 @@ const MCAsmInfo &MAI; const MCInstrInfo &MII; const MCRegisterInfo &MRI; + const MCInstrAnalysis *MIA; /// True if we are printing marked up assembly. bool UseMarkup = false; @@ -63,6 +65,9 @@ /// (llvm-objdump -d). bool PrintBranchImmAsAddress = false; + /// If true, symbolize branch target operands and memory reference operands. + bool SymbolizeOperands = false; + /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); @@ -115,6 +120,9 @@ PrintBranchImmAsAddress = Value; } + void setSymbolizeOperands(bool Value) { SymbolizeOperands = Value; } + void setMCInstrAnalysis(const MCInstrAnalysis *Value) { MIA = Value; } + /// Utility function to print immediates in decimal or hex. format_object formatImm(int64_t Value) const { return PrintImmHex ? formatHex(Value) : formatDec(Value); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -295,6 +295,10 @@ /// \see MCInstPrinter::printInst void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address, unsigned OpNo, raw_ostream &O) { + // Do not print the numberic target address when symbolizing. + if (SymbolizeOperands) + return; + const MCOperand &Op = MI->getOperand(OpNo); if (Op.isImm()) { if (PrintBranchImmAsAddress) { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -16,6 +16,7 @@ #include "X86InstComments.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -342,6 +343,15 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O) { + // Do not print the exact form of the memory operand if it references a known + // binary object. + if (SymbolizeOperands && MIA) { + uint64_t Target; + if (MIA->evaluateBranch(*MI, 0, 0, Target)) + return; + if (MIA->evaluateMemoryOperandAddress(*MI, 0, 0)) + return; + } const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg); unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm(); const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg); diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-symbolize-operands.s b/llvm/test/tools/llvm-objdump/X86/disassemble-symbolize-operands.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-symbolize-operands.s @@ -0,0 +1,33 @@ +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t +# RUN: ld.lld %t -o %t2 +# RUN: llvm-objdump -d --symbolize-operands --x86-asm-syntax=intel --no-show-raw-insn --no-leading-addr %t2 | FileCheck %s + + .text +foo: + retq +# CHECK: <_start>: +_start: + pushq %rax + movl $0, 4(%rsp) + movl $0, (%rsp) +# CHECK: : +.LBB1_1: + movl (%rsp), %eax +# CHECK: cmp eax, dword ptr + cmpl g(%rip), %eax +# CHECK: jge + jge .LBB1_3 +# CHECK: call + callq foo + incl (%rsp) +# CHECK: jmp + jmp .LBB1_1 +# CHECK: : +.LBB1_3: + xorl %eax, %eax + popq %rcx + retq + .bss +g: + .long 0 + .size g, 4 diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -321,6 +321,11 @@ cl::NotHidden, cl::Grouping, cl::aliasopt(SymbolTable)); +static cl::opt SymbolizeOperands( + "symbolize-operands", + cl::desc("Symbolize instruction operands when disassembling"), + cl::cat(MachOCat)); + static cl::opt DynamicSymbolTable( "dynamic-syms", cl::desc("Display the contents of the dynamic symbol table"), @@ -1568,6 +1573,46 @@ return SymbolInfoTy(Addr, Name, Type); } +static void collectLocalBranchTargets( + ArrayRef Bytes, const MCInstrAnalysis *MIA, MCDisassembler *DisAsm, + MCInstPrinter *IP, const MCSubtargetInfo *STI, uint64_t SectionAddr, + uint64_t Start, uint64_t End, DenseMap &Labels) { + // So far only supports X86. Other target like Arm64 may have data embedded in + // the text section. + if (!STI->getTargetTriple().isX86()) + return; + + // Only support X86 Intel asm syntax + if (!DisAsm->getContext().getAsmInfo()->getAssemblerDialect()) + return; + + Labels.clear(); + unsigned LabelCount = 0; + Start += SectionAddr; + End += SectionAddr; + uint64_t Index = Start; + while (Index < End) { + // Disassemble a real instruction and record function-local branch labels. + MCInst Inst; + uint64_t Size; + bool Disassembled = DisAsm->getInstruction( + Inst, Size, Bytes.slice(Index - SectionAddr), Index, nulls()); + if (Size == 0) + Size = 1; + + if (Disassembled && MIA) { + uint64_t Target; + bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target); + if (TargetKnown && !Labels.count(Target)) { + if (Target >= Start && Target < End) + Labels[Target] = "L" + Twine(LabelCount++).str(); + } + } + + Index += Size; + } +} + static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, @@ -1880,6 +1925,12 @@ !DisassembleAll; bool DumpARMELFData = false; formatted_raw_ostream FOS(outs()); + + DenseMap AllLabels; + if (SymbolizeOperands) + collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI, + SectionAddr, Index, End, AllLabels); + while (Index < End) { // ARM and AArch64 ELF binaries can interleave data and text in the // same section. We rely on the markers introduced to understand what @@ -1920,6 +1971,11 @@ } } + // Print local label if there's any. + auto Iter = AllLabels.find(SectionAddr + Index); + if (Iter != AllLabels.end()) + FOS << "<" << Iter->second << ">:\n"; + // Disassemble a real instruction or a data when disassemble all is // provided MCInst Inst; @@ -1953,7 +2009,9 @@ Inst, SectionAddr + Index, Size)) { Target = *MaybeTarget; PrintTarget = true; - FOS << " # " << Twine::utohexstr(Target); + // Do not print real address when symbolizing. + if (!SymbolizeOperands) + FOS << " # " << Twine::utohexstr(Target); } if (PrintTarget) { // In a relocatable object, the target's section must reside in @@ -2003,17 +2061,29 @@ } } + // Print the labels corresponding to the target if there's any. + bool LabelAvailable = AllLabels.count(Target); if (TargetSym != nullptr) { uint64_t TargetAddress = TargetSym->Addr; + uint64_t Disp = Target - TargetAddress; std::string TargetName = TargetSym->Name.str(); if (Demangle) TargetName = demangle(TargetName); - FOS << " <" << TargetName; - uint64_t Disp = Target - TargetAddress; - if (Disp) - FOS << "+0x" << Twine::utohexstr(Disp); - FOS << '>'; + FOS << " <"; + if (!Disp) + // Always Print the binary symbol precisely corresponding to + // the target address. + FOS << TargetName; + else if (!LabelAvailable) { + // Always Print the binary symbol plus an offset if there's no + // local label corresponding to the target address. + FOS << TargetName << "+0x" << Twine::utohexstr(Disp); + } else + FOS << AllLabels[Target]; + FOS << ">"; + } else if (LabelAvailable) { + FOS << " <" << AllLabels[Target] << ">"; } } } @@ -2135,6 +2205,8 @@ "no instruction printer for target " + TripleName); IP->setPrintImmHex(PrintImmHex); IP->setPrintBranchImmAsAddress(true); + IP->setSymbolizeOperands(SymbolizeOperands); + IP->setMCInstrAnalysis(MIA.get()); PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName)); SourcePrinter SP(Obj, TheTarget->getName());