Index: llvm/include/llvm/MC/MCInstPrinter.h =================================================================== --- llvm/include/llvm/MC/MCInstPrinter.h +++ llvm/include/llvm/MC/MCInstPrinter.h @@ -34,6 +34,36 @@ } // end namespace HexStyle +enum class MarkupType { + Reg, + Imm, +}; + +// Pos InnerPos +// v v +// ... )> ... +// ~~~~~~~~~~~~~~~ InnerLenth +// ~~~~~~~~~~~~~~~~~~~~~ Length +// +struct MarkupSpan { + MarkupType Type; + size_t Pos; + size_t Length; + size_t InnerPos; + size_t InnerLength; + + MarkupSpan(MarkupType Type, size_t Pos, size_t Length, size_t InnerPos, + size_t InnerLength) + : Type(Type), Pos(Pos), Length(Length), InnerPos(InnerPos), + InnerLength(InnerLength) {} +}; + +struct PrinterMarkupState { + size_t WrittenLen = 0; + + void reset() { WrittenLen = 0; } +}; + /// This is an instance of a target assembly language printer that /// converts an MCInst to valid target assembly syntax. class MCInstPrinter { @@ -55,6 +85,9 @@ /// Which style to use for printing hexadecimal values. HexStyle::Style PrintHexStyle = HexStyle::C; + /// Markups. + std::vector *MarkupSpans = nullptr; + /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); @@ -89,6 +122,12 @@ StringRef markup(StringRef s) const; StringRef markup(StringRef a, StringRef b) const; + void markupStart(raw_ostream &O, PrinterMarkupState &State, MarkupType Type); + void write(raw_ostream &O, PrinterMarkupState &State, StringRef Text); + void markupEnd(raw_ostream &O, PrinterMarkupState &State); + + void setMarkupSpans(std::vector &MS) { MarkupSpans = &MS; } + bool getPrintImmHex() const { return PrintImmHex; } void setPrintImmHex(bool Value) { PrintImmHex = Value; } Index: llvm/lib/MC/MCInstPrinter.cpp =================================================================== --- llvm/lib/MC/MCInstPrinter.cpp +++ llvm/lib/MC/MCInstPrinter.cpp @@ -71,9 +71,55 @@ return b; } +void MCInstPrinter::markupStart(raw_ostream &O, PrinterMarkupState &State, + MarkupType Type) { + if (getUseMarkup()) { + StringRef TypeStr; + switch (Type) { + case MarkupType::Imm: + TypeStr = "imm"; + break; + case MarkupType::Reg: + TypeStr = "reg"; + break; + } + + // TODO: support tag-modifier-list (see: + // https://llvm.org/docs/MarkedUpDisassembly.html) + size_t Length = 2 + TypeStr.size(); + O << "<" << TypeStr << ":"; + if (MarkupSpans) { + /* we'll set Length and InnerLength in markupEnd */ + MarkupSpans->emplace_back(Type, State.WrittenLen, 0, State.WrittenLen + Length, 0); + } + State.WrittenLen += Length; + } +} + +void MCInstPrinter::write(raw_ostream &O, PrinterMarkupState &State, + StringRef Text) { + O << Text; + if (getUseMarkup()) { + State.WrittenLen += Text.size(); + } +} + +void MCInstPrinter::markupEnd(raw_ostream &O, PrinterMarkupState &State) { + if (getUseMarkup()) { + O << ">"; + if (MarkupSpans) { + assert(MarkupSpans->size() > 0 && + "missing the corresponding markupStart()"); + MarkupSpan &Span = MarkupSpans->back(); + Span.Length = State.WrittenLen - Span.Pos + 1; + Span.InnerLength = State.WrittenLen - Span.InnerPos; + } + State.WrittenLen += 1; + } +} + // For asm-style hex (e.g. 0ffh) the first digit always has to be a number. -static bool needsLeadingZero(uint64_t Value) -{ +static bool needsLeadingZero(uint64_t Value) { while (Value) { uint64_t digit = (Value >> 60) & 0xf; Index: llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h +++ llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h @@ -117,6 +117,9 @@ private: bool HasCustomInstComment; + PrinterMarkupState MarkupState; + + void printMarkedUpRegName(raw_ostream &OS, unsigned RegNo); }; } // end namespace llvm Index: llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -38,12 +38,20 @@ OS << markup(""); } +void X86ATTInstPrinter::printMarkedUpRegName(raw_ostream &OS, unsigned RegNo) { + markupStart(OS, MarkupState, MarkupType::Reg); + write(OS, MarkupState, "%"); + write(OS, MarkupState, getRegisterName(RegNo)); + markupEnd(OS, MarkupState); +} + void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, const MCSubtargetInfo &STI) { // If verbose assembly is enabled, we can print some informative comments. if (CommentStream) HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII); + MarkupState.reset(); printInstFlags(MI, OS); // Output CALLpcrel32 as "callq" in 64-bit mode. @@ -353,7 +361,7 @@ raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { - printRegName(O, Op.getReg()); + printMarkedUpRegName(O, Op.getReg()); } else if (Op.isImm()) { // Print immediates as signed values. int64_t Imm = Op.getImm(); @@ -483,5 +491,5 @@ if (Reg == X86::ST0) OS << markup(""); else - printRegName(OS, Reg); + printMarkedUpRegName(OS, Reg); } Index: llvm/tools/llvm-objdump/llvm-objdump.cpp =================================================================== --- llvm/tools/llvm-objdump/llvm-objdump.cpp +++ llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -332,6 +332,10 @@ cl::cat(ObjdumpCat)); static cl::alias WideShort("w", cl::Grouping, cl::aliasopt(Wide)); +static cl::opt Highlight("highlight", + cl::desc("Enable syntax highlighting"), + cl::cat(ObjdumpCat)); + static cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); @@ -656,12 +660,12 @@ class PrettyPrinter { public: virtual ~PrettyPrinter() = default; - virtual void printInst(MCInstPrinter &IP, const MCInst *MI, - ArrayRef Bytes, - object::SectionedAddress Address, raw_ostream &OS, - StringRef Annot, MCSubtargetInfo const &STI, - SourcePrinter *SP, - std::vector *Rels = nullptr) { + virtual void + printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, + object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, + MCSubtargetInfo const &STI, SourcePrinter *SP, + size_t &HeaderLength, std::vector *Rels = nullptr) { + uint64_t BeforeOffset = OS.tell(); if (SP && (PrintSource || PrintLines)) SP->printSourceLine(OS, Address); @@ -683,6 +687,7 @@ // The dtor calls flush() to ensure the indent comes before printInst(). } + HeaderLength = OS.tell() - BeforeOffset; if (MI) IP.printInst(MI, OS, "", STI); else @@ -708,7 +713,9 @@ void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, - std::vector *Rels) override { + size_t &HeaderLength, + std::vector *Rels) + override { if (SP && (PrintSource || PrintLines)) SP->printSourceLine(OS, Address, ""); if (!MI) { @@ -716,6 +723,8 @@ OS << " "; return; } + + HeaderLength = 0 /* TODO: */; std::string Buffer; { raw_string_ostream TempStream(Buffer); @@ -777,10 +786,12 @@ void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + size_t &HeaderLength, std::vector *Rels) override { if (SP && (PrintSource || PrintLines)) SP->printSourceLine(OS, Address); + HeaderLength = 0 /* TODO: */; if (MI) { SmallString<40> InstStr; raw_svector_ostream IS(InstStr); @@ -828,6 +839,7 @@ void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + size_t &HeaderLength, std::vector *Rels) override { if (SP && (PrintSource || PrintLines)) SP->printSourceLine(OS, Address); @@ -837,6 +849,7 @@ OS << "\t"; dumpBytes(Bytes, OS); } + HeaderLength = 0 /* TODO: */; if (MI) IP.printInst(MI, OS, "", STI); else @@ -1072,14 +1085,55 @@ } } +static void printMarkedUpInst(StringRef Text, std::vector &Spans, + size_t HeaderLength) { + if (!Highlight) { + outs() << Text; + return; + } + + outs() << Text.slice(0, HeaderLength); + size_t NextPos = 0; + for (const MarkupSpan &Span : Spans) { + StringRef BeforeText = + Text.substr(HeaderLength + NextPos, Span.Pos - NextPos); + StringRef InnerText = + Text.substr(HeaderLength + Span.InnerPos, Span.InnerLength); + + // TODO: Remove debug outputs. + errs() << "\n\n"; + errs() << "HeaderLength: " << HeaderLength << "\n"; + errs() << "Text: '"; errs().write_escaped(Text); errs() << "'\n"; + errs() << "Header: '"; errs().write_escaped(Text); errs() << "'\n"; + errs() << "BeforeText: '"; errs().write_escaped(BeforeText); errs() << "'\n"; + errs() << "InnerText: '"; errs().write_escaped(InnerText); errs() << "'\n"; + errs() << "InnerPos: " << Span.InnerPos << ", L=" << Span.InnerLength << "'\n"; + + outs() << BeforeText; + + // WIP: Hghlight only register names for now. + if (Span.Type == MarkupType::Reg) + outs().changeColor(raw_ostream::RED); + + // TODO: support nested + outs() << InnerText; + outs().resetColor(); + + NextPos = HeaderLength + Span.Pos + Span.Length; + } + + // Print the remaining part. + outs() << Text.substr(NextPos); +} + static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, const MCInstrAnalysis *MIA, MCInstPrinter *IP, const MCSubtargetInfo *PrimarySTI, const MCSubtargetInfo *SecondarySTI, - PrettyPrinter &PIP, - SourcePrinter &SP, bool InlineRelocs) { + PrettyPrinter &PIP, SourcePrinter &SP, + bool InlineRelocs) { const MCSubtargetInfo *STI = PrimarySTI; MCDisassembler *DisAsm = PrimaryDisAsm; bool PrimaryIsThumb = false; @@ -1221,6 +1275,9 @@ SmallString<40> Comments; raw_svector_ostream CommentStream(Comments); + std::vector MarkupSpans; + IP->setMarkupSpans(MarkupSpans); + ArrayRef Bytes = arrayRefFromStringRef( unwrapOrError(Section.getContents(), Obj->getFileName())); @@ -1378,12 +1435,18 @@ if (Size == 0) Size = 1; - PIP.printInst( - *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size), - {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(), - "", *STI, &SP, &Rels); + std::string InstText; + raw_string_ostream InstTextStream(InstText); + size_t HeaderLength; + PIP.printInst(*IP, Disassembled ? &Inst : nullptr, + Bytes.slice(Index, Size), + {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, + InstTextStream, "", *STI, &SP, HeaderLength, &Rels); + printMarkedUpInst(StringRef(InstTextStream.str()), MarkupSpans, + HeaderLength); outs() << CommentStream.str(); Comments.clear(); + MarkupSpans.clear(); // Try to resolve the target of a call, tail call, etc. to a specific // symbol. @@ -1549,6 +1612,7 @@ report_error(Obj->getFileName(), "no instruction printer for target " + TripleName); IP->setPrintImmHex(PrintImmHex); + IP->setUseMarkup(Highlight); PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName)); SourcePrinter SP(Obj, TheTarget->getName());