Index: llvm/include/llvm/MC/MCInstPrinter.h =================================================================== --- llvm/include/llvm/MC/MCInstPrinter.h +++ llvm/include/llvm/MC/MCInstPrinter.h @@ -34,6 +34,72 @@ } // end namespace HexStyle +enum class MarkupType { + Reg, + Imm, + Mem, +}; + +// MarkupSpan represents a marked up range in the disassembly. For example: +// +// Pos InnerPos +// v v +// ... )> ... +// ~~~~~~~~~~~~~~~ InnerLenth +// ~~~~~~~~~~~~~~~~~~~~~ Length +// +struct MarkupSpan { + MarkupType Type; + // The offset of the beginning of the marked up range. + size_t Pos; + // The length of the marked up range. + size_t Length; + // The offset of the beginning of the inner text. + size_t InnerPos; + // The length of the inner text. + size_t InnerLength; + // Marked up ranges in the inner text. In the example above, + // InnerSpans contains one MarkupSpan which represents ``. + std::unique_ptr> InnerSpans; + + MarkupSpan(MarkupType Type, size_t Pos, size_t Length, size_t InnerPos, + size_t InnerLength) + : Type(Type), Pos(Pos), Length(Length), InnerPos(InnerPos), + InnerLength(InnerLength), InnerSpans(new std::vector()) {} +}; + +// MarkupState holds the state used by llvm::operator<<(raw_ostream&) implementaions +// for MarkupStart and MarkupEnd. +struct MarkupState { + // True if the marked up disassembly is enabled. Defaults to false in case + // resetMarkup() isn't called in the printer. + bool Enabled = false; + // The offset of the beginning of the disassembly in the stream. + size_t StartOffset; + // A stack which holds pointers to SpansOut_ and InnerSpans of unclosed Spans. + // TODO: Rename to a appropriate name. + std::vector *> UnclosedSpanInner; + + void reset(raw_ostream &OS, bool Enabled_, + std::vector *SpansOut_); + size_t offset(raw_ostream &OS) const; +}; + +struct MarkupStart { + MarkupState &State; + MarkupType Type; + + MarkupStart(MarkupState &State, MarkupType Type) : State(State), Type(Type) {} + friend raw_ostream &operator<<(raw_ostream &OS, const MarkupStart &M); +}; + +struct MarkupEnd { + MarkupState &State; + + MarkupEnd(MarkupState &State) : State(State) {} + friend raw_ostream &operator<<(raw_ostream &OS, const MarkupEnd &M); +}; + /// This is an instance of a target assembly language printer that /// converts an MCInst to valid target assembly syntax. class MCInstPrinter { @@ -55,6 +121,10 @@ /// Which style to use for printing hexadecimal values. HexStyle::Style PrintHexStyle = HexStyle::C; + /// Markup states. + MarkupState MarkupState; + std::vector *MarkupSpans = nullptr; + /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); @@ -85,9 +155,17 @@ bool getUseMarkup() const { return UseMarkup; } void setUseMarkup(bool Value) { UseMarkup = Value; } + // Specify an output vector of marked up ranges. + void setMarkupSpans(std::vector &MS) { MarkupSpans = &MS; } + + // Resets the MarkupState. This should be called first in printInst(). + void resetMarkup(raw_ostream &OS); + /// Utility functions to make adding mark ups simpler. StringRef markup(StringRef s) const; StringRef markup(StringRef a, StringRef b) const; + MarkupStart startMarkup(MarkupType Type); + MarkupEnd endMarkup(); bool getPrintImmHex() const { return PrintImmHex; } void setPrintImmHex(bool Value) { PrintImmHex = Value; } Index: llvm/lib/MC/MCInstPrinter.cpp =================================================================== --- llvm/lib/MC/MCInstPrinter.cpp +++ llvm/lib/MC/MCInstPrinter.cpp @@ -71,11 +71,13 @@ return b; } +void MCInstPrinter::resetMarkup(raw_ostream &OS) { + MarkupState.reset(OS, getUseMarkup(), MarkupSpans); +} + // For asm-style hex (e.g. 0ffh) the first digit always has to be a number. -static bool needsLeadingZero(uint64_t Value) -{ - while (Value) - { +static bool needsLeadingZero(uint64_t Value) { + while (Value) { uint64_t digit = (Value >> 60) & 0xf; if (digit != 0) return (digit >= 0xa); @@ -123,3 +125,80 @@ } llvm_unreachable("unsupported print style"); } + +MarkupStart MCInstPrinter::startMarkup(MarkupType Type) { + return MarkupStart(MarkupState, Type); +} + +MarkupEnd MCInstPrinter::endMarkup() { + return MarkupEnd(MarkupState); +} + + +void MarkupState::reset(raw_ostream &OS, bool Enabled_, + std::vector *SpansOut) { + Enabled = Enabled_; + StartOffset = OS.tell(); + UnclosedSpanInner.clear(); + if (SpansOut) { + UnclosedSpanInner.push_back(SpansOut); + } +} + +size_t MarkupState::offset(raw_ostream &OS) const { + return OS.tell() - StartOffset; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MarkupStart &M) { + if (M.State.Enabled) { + StringRef TypeStr; + switch (M.Type) { + case MarkupType::Imm: + TypeStr = "imm"; + break; + case MarkupType::Reg: + TypeStr = "reg"; + break; + case MarkupType::Mem: + TypeStr = "mem"; + break; + } + + // TODO: support tag-modifier-list. As far as I investigated, it is not + // used though. See: https://llvm.org/docs/MarkedUpDisassembly.html + size_t Length = 2 + TypeStr.size(); + if (!M.State.UnclosedSpanInner.empty()) { + std::vector *CurrentInnerSpans = + M.State.UnclosedSpanInner.back(); + + /* we'll set Length and InnerLength later. */ + MarkupSpan Span = MarkupSpan(M.Type, M.State.offset(OS), 0, + M.State.offset(OS) + Length, 0); + + auto *InnerSpans = + const_cast *>(Span.InnerSpans.get()); + CurrentInnerSpans->push_back(std::move(Span)); + M.State.UnclosedSpanInner.push_back(InnerSpans); + } + OS << "<" << TypeStr << ":"; + } + + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MarkupEnd &M) { + if (M.State.Enabled) { + if (!M.State.UnclosedSpanInner.empty()) { + assert(M.State.UnclosedSpanInner.size() > 1 && + "Missing the corresponding markupStart()."); + + M.State.UnclosedSpanInner.pop_back(); + MarkupSpan &Span = M.State.UnclosedSpanInner.back()->back(); + Span.Length = M.State.offset(OS) - Span.Pos + 1; + Span.InnerLength = M.State.offset(OS) - Span.InnerPos; + } + OS << ">"; + } + + return OS; +} Index: llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h +++ llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h @@ -117,6 +117,8 @@ private: bool HasCustomInstComment; + + void printMarkedUpRegName(raw_ostream &OS, unsigned RegNo); }; } // end namespace llvm Index: llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -38,8 +38,17 @@ OS << markup(""); } +// Since printRegName is a const method, we need this non-const version to +// update MarkupState and MarkupSpans. +void X86ATTInstPrinter::printMarkedUpRegName(raw_ostream &OS, unsigned RegNo) { + OS << startMarkup(MarkupType::Reg) << '%' << getRegisterName(RegNo) + << endMarkup(); +} + void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, const MCSubtargetInfo &STI) { + resetMarkup(OS); + // If verbose assembly is enabled, we can print some informative comments. if (CommentStream) HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII); @@ -353,11 +362,11 @@ raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { - printRegName(O, Op.getReg()); + printMarkedUpRegName(O, Op.getReg()); } else if (Op.isImm()) { // Print immediates as signed values. int64_t Imm = Op.getImm(); - O << markup(""); + O << startMarkup(MarkupType::Imm) << '$' << formatImm(Imm) << endMarkup(); // TODO: This should be in a helper function in the base class, so it can // be used by other printers. @@ -365,6 +374,7 @@ // If there are no instruction-specific comments, add a comment clarifying // the hex value of the immediate operand when it isn't in the range // [-256,255]. + errs() << "CommentStream:" << ((size_t) CommentStream) << "\n"; if (CommentStream && !HasCustomInstComment && (Imm > 255 || Imm < -256)) { // Don't print unnecessary hex sign bits. if (Imm == (int16_t)(Imm)) @@ -376,9 +386,9 @@ } } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << markup("print(O, &MAI); - O << markup(">"); + O << endMarkup(); } } @@ -388,7 +398,7 @@ const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg); const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp); - O << markup("getOperand(Op + X86::AddrScaleAmt).getImm(); if (ScaleVal != 1) { - O << ',' << markup(""); + O << ',' << startMarkup(MarkupType::Imm) + << ScaleVal // never printed in hex. + << endMarkup(); } } O << ')'; } - O << markup(">"); + O << endMarkup(); } void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op, raw_ostream &O) { - O << markup(""); + O << endMarkup(); } void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op, raw_ostream &O) { - O << markup(""); + O << endMarkup(); } void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &DispSpec = MI->getOperand(Op); - O << markup("print(O, &MAI); } - O << markup(">"); + O << endMarkup(); } void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op, @@ -471,8 +482,8 @@ if (MI->getOperand(Op).isExpr()) return printOperand(MI, Op, O); - O << markup("getOperand(Op).getImm() & 0xff) - << markup(">"); + O << startMarkup(MarkupType::Imm) << '$' + << formatImm(MI->getOperand(Op).getImm() & 0xff) << endMarkup(); } void X86ATTInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo, @@ -481,7 +492,7 @@ unsigned Reg = Op.getReg(); // Override the default printing to print st(0) instead st. if (Reg == X86::ST0) - OS << markup(""); + OS << startMarkup(MarkupType::Reg) << "%st(0)" << endMarkup(); else - printRegName(OS, Reg); + printMarkedUpRegName(OS, Reg); } Index: llvm/tools/llvm-objdump/llvm-objdump.cpp =================================================================== --- llvm/tools/llvm-objdump/llvm-objdump.cpp +++ llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -332,6 +332,10 @@ cl::cat(ObjdumpCat)); static cl::alias WideShort("w", cl::Grouping, cl::aliasopt(Wide)); +static cl::opt Highlight("highlight", + cl::desc("Enable syntax highlighting"), + cl::cat(ObjdumpCat)); + static cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); @@ -660,8 +664,9 @@ ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, - SourcePrinter *SP, + SourcePrinter *SP, size_t &HeaderLength, std::vector *Rels = nullptr) { + uint64_t BeforeOffset = OS.tell(); if (SP && (PrintSource || PrintLines)) SP->printSourceLine(OS, Address); @@ -683,6 +688,7 @@ // The dtor calls flush() to ensure the indent comes before printInst(). } + HeaderLength = OS.tell() - BeforeOffset; if (MI) IP.printInst(MI, OS, "", STI); else @@ -708,7 +714,9 @@ void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + size_t &HeaderLength, std::vector *Rels) override { + size_t StartOffset = OS.tell(); if (SP && (PrintSource || PrintLines)) SP->printSourceLine(OS, Address, ""); if (!MI) { @@ -716,6 +724,8 @@ OS << " "; return; } + + HeaderLength = OS.tell() - StartOffset; std::string Buffer; { raw_string_ostream TempStream(Buffer); @@ -777,7 +787,9 @@ void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + size_t &HeaderLength, std::vector *Rels) override { + size_t StartOffset = OS.tell(); if (SP && (PrintSource || PrintLines)) SP->printSourceLine(OS, Address); @@ -785,6 +797,7 @@ SmallString<40> InstStr; raw_svector_ostream IS(InstStr); + HeaderLength = OS.tell() - StartOffset; IP.printInst(MI, IS, "", STI); OS << left_justify(IS.str(), 60); @@ -828,7 +841,9 @@ void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + size_t &HeaderLength, std::vector *Rels) override { + size_t StartOffset = OS.tell(); if (SP && (PrintSource || PrintLines)) SP->printSourceLine(OS, Address); if (!NoLeadingAddr) @@ -837,6 +852,7 @@ OS << "\t"; dumpBytes(Bytes, OS); } + HeaderLength = OS.tell() - StartOffset; if (MI) IP.printInst(MI, OS, "", STI); else @@ -1072,14 +1088,68 @@ } } +static void printMarkupSpans(StringRef Text, + const std::vector &Spans, + size_t &NextPos) { + for (const MarkupSpan &Span : Spans) { + StringRef BeforeText = Text.substr(NextPos, Span.Pos - NextPos); + outs() << BeforeText; + + if (Span.InnerSpans->empty()) { + switch (Span.Type) { + case MarkupType::Reg: + outs().changeColor(raw_ostream::BLUE); + break; + case MarkupType::Imm: + outs().changeColor(raw_ostream::RED); + break; + default: + // Do nothing. + break; + } + + StringRef InnerText = Text.substr(Span.InnerPos, Span.InnerLength); + outs() << InnerText; + outs().resetColor(); + } else { + NextPos = Span.InnerPos; + printMarkupSpans(Text, *Span.InnerSpans, NextPos); + StringRef AfterText = + Text.substr(NextPos, Span.InnerLength - (NextPos - Span.InnerPos)); + outs() << AfterText; + } + + NextPos = Span.Pos + Span.Length; + } +} + +static void printMarkedUpInst(StringRef Text, + const std::vector &Spans, + size_t HeaderLength) { + if (!Highlight) { + outs() << Text; + return; + } + + // Print hexdump, etc. + outs() << Text.slice(0, HeaderLength); + + StringRef Disasm = Text.slice(HeaderLength, Text.size()); + size_t NextPos = 0; + printMarkupSpans(Disasm, Spans, NextPos); + + // Print the remaining part. + outs() << Disasm.substr(NextPos); +} + static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, const MCInstrAnalysis *MIA, MCInstPrinter *IP, const MCSubtargetInfo *PrimarySTI, const MCSubtargetInfo *SecondarySTI, - PrettyPrinter &PIP, - SourcePrinter &SP, bool InlineRelocs) { + PrettyPrinter &PIP, SourcePrinter &SP, + bool InlineRelocs) { const MCSubtargetInfo *STI = PrimarySTI; MCDisassembler *DisAsm = PrimaryDisAsm; bool PrimaryIsThumb = false; @@ -1221,6 +1291,9 @@ SmallString<40> Comments; raw_svector_ostream CommentStream(Comments); + std::vector MarkupSpans; + IP->setMarkupSpans(MarkupSpans); + ArrayRef Bytes = arrayRefFromStringRef( unwrapOrError(Section.getContents(), Obj->getFileName())); @@ -1378,12 +1451,18 @@ if (Size == 0) Size = 1; - PIP.printInst( - *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size), - {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(), - "", *STI, &SP, &Rels); + std::string InstText; + raw_string_ostream InstTextStream(InstText); + size_t HeaderLength; + PIP.printInst(*IP, Disassembled ? &Inst : nullptr, + Bytes.slice(Index, Size), + {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, + InstTextStream, "", *STI, &SP, HeaderLength, &Rels); + printMarkedUpInst(StringRef(InstTextStream.str()), MarkupSpans, + HeaderLength); outs() << CommentStream.str(); Comments.clear(); + MarkupSpans.clear(); // Try to resolve the target of a call, tail call, etc. to a specific // symbol. @@ -1433,11 +1512,17 @@ --TargetSym; uint64_t TargetAddress = std::get<0>(*TargetSym); StringRef TargetName = std::get<1>(*TargetSym); + if (Highlight) + outs().changeColor(raw_ostream::YELLOW, true); + outs() << " <" << TargetName; uint64_t Disp = Target - TargetAddress; if (Disp) outs() << "+0x" << Twine::utohexstr(Disp); outs() << '>'; + + if (Highlight) + outs().resetColor(); } } } @@ -1549,6 +1634,7 @@ report_error(Obj->getFileName(), "no instruction printer for target " + TripleName); IP->setPrintImmHex(PrintImmHex); + IP->setUseMarkup(Highlight); PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName)); SourcePrinter SP(Obj, TheTarget->getName());