diff --git a/llvm/include/llvm/MC/MCInstPrinter.h b/llvm/include/llvm/MC/MCInstPrinter.h --- a/llvm/include/llvm/MC/MCInstPrinter.h +++ b/llvm/include/llvm/MC/MCInstPrinter.h @@ -34,6 +34,73 @@ } // end namespace HexStyle +enum class MarkupType { + Reg, + Imm, + Mem, +}; + +// MarkupSpan represents a marked up range in the disassembly. For example: +// +// Pos InnerPos +// v v +// ... )> ... +// ~~~~~~~~~~~~~~~ InnerLenth +// ~~~~~~~~~~~~~~~~~~~~~ Length +// +struct MarkupSpan { + MarkupType Type; + // The offset of the beginning of the marked up range. + size_t Pos; + // The length of the marked up range. + size_t Length; + // The offset of the beginning of the inner text. + size_t InnerPos; + // The length of the inner text. + size_t InnerLength; + // Marked up ranges in the inner text. In the example above, + // InnerSpans contains one MarkupSpan which represents ``. + std::unique_ptr> InnerSpans; + + MarkupSpan(MarkupType Type, size_t Pos, size_t Length, size_t InnerPos, + size_t InnerLength) + : Type(Type), Pos(Pos), Length(Length), InnerPos(InnerPos), + InnerLength(InnerLength), InnerSpans(new std::vector()) {} +}; + +// MarkupState holds the state used by llvm::operator<<(raw_ostream&) +// implementaions for MarkupStart and MarkupEnd. +struct MarkupState { + // True if the marked up disassembly is enabled. Defaults to false in case + // resetMarkup() isn't called in the printer. + bool Enabled = false; + // The offset of the beginning of the disassembly in the stream. + size_t StartOffset; + // A stack of pointers which points to SpansOut_ or InnerSpans of unclosed + // Spans. + // TODO: Rename to a appropriate name. + std::vector *> SpansVectors; + + void reset(raw_ostream &OS, bool Enabled_, + std::vector *SpansOut_); + size_t offset(raw_ostream &OS) const; +}; + +struct MarkupStart { + MarkupState &State; + MarkupType Type; + + MarkupStart(MarkupState &State, MarkupType Type) : State(State), Type(Type) {} + friend raw_ostream &operator<<(raw_ostream &OS, const MarkupStart &M); +}; + +struct MarkupEnd { + MarkupState &State; + + MarkupEnd(MarkupState &State) : State(State) {} + friend raw_ostream &operator<<(raw_ostream &OS, const MarkupEnd &M); +}; + /// This is an instance of a target assembly language printer that /// converts an MCInst to valid target assembly syntax. class MCInstPrinter { @@ -55,6 +122,10 @@ /// Which style to use for printing hexadecimal values. HexStyle::Style PrintHexStyle = HexStyle::C; + /// Markup states. + MarkupState MarkupState; + std::vector *MarkupSpans = nullptr; + /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); @@ -85,9 +156,17 @@ bool getUseMarkup() const { return UseMarkup; } void setUseMarkup(bool Value) { UseMarkup = Value; } + // Specify an output vector of marked up ranges. + void setMarkupSpans(std::vector &MS) { MarkupSpans = &MS; } + + // Resets the MarkupState. This should be called first in printInst(). + void resetMarkup(raw_ostream &OS); + /// Utility functions to make adding mark ups simpler. StringRef markup(StringRef s) const; StringRef markup(StringRef a, StringRef b) const; + MarkupStart startMarkup(MarkupType Type); + MarkupEnd endMarkup(); bool getPrintImmHex() const { return PrintImmHex; } void setPrintImmHex(bool Value) { PrintImmHex = Value; } diff --git a/llvm/lib/MC/MCInstPrinter.cpp b/llvm/lib/MC/MCInstPrinter.cpp --- a/llvm/lib/MC/MCInstPrinter.cpp +++ b/llvm/lib/MC/MCInstPrinter.cpp @@ -71,6 +71,10 @@ return b; } +void MCInstPrinter::resetMarkup(raw_ostream &OS) { + MarkupState.reset(OS, getUseMarkup(), MarkupSpans); +} + // For asm-style hex (e.g. 0ffh) the first digit always has to be a number. static bool needsLeadingZero(uint64_t Value) { @@ -123,3 +127,76 @@ } llvm_unreachable("unsupported print style"); } + +MarkupStart MCInstPrinter::startMarkup(MarkupType Type) { + return MarkupStart(MarkupState, Type); +} + +MarkupEnd MCInstPrinter::endMarkup() { return MarkupEnd(MarkupState); } + +void MarkupState::reset(raw_ostream &OS, bool Enabled_, + std::vector *SpansOut) { + Enabled = Enabled_; + StartOffset = OS.tell(); + SpansVectors.clear(); + if (SpansOut) { + SpansVectors.push_back(SpansOut); + } +} + +size_t MarkupState::offset(raw_ostream &OS) const { + return OS.tell() - StartOffset; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MarkupStart &M) { + if (M.State.Enabled) { + StringRef TypeStr; + switch (M.Type) { + case MarkupType::Imm: + TypeStr = "imm"; + break; + case MarkupType::Reg: + TypeStr = "reg"; + break; + case MarkupType::Mem: + TypeStr = "mem"; + break; + } + + // TODO: support tag-modifier-list. As far as I investigated, it is not + // used though. See: https://llvm.org/docs/MarkedUpDisassembly.html + size_t Length = 2 + TypeStr.size(); + if (!M.State.SpansVectors.empty()) { + std::vector *CurrentInnerSpans = M.State.SpansVectors.back(); + + /* we'll set Length and InnerLength later. */ + MarkupSpan Span = MarkupSpan(M.Type, M.State.offset(OS), 0, + M.State.offset(OS) + Length, 0); + + auto *InnerSpans = + const_cast *>(Span.InnerSpans.get()); + CurrentInnerSpans->push_back(std::move(Span)); + M.State.SpansVectors.push_back(InnerSpans); + } + OS << "<" << TypeStr << ":"; + } + + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MarkupEnd &M) { + if (M.State.Enabled) { + if (!M.State.SpansVectors.empty()) { + assert(M.State.SpansVectors.size() > 1 && + "Missing the corresponding markupStart()."); + + M.State.SpansVectors.pop_back(); + MarkupSpan &Span = M.State.SpansVectors.back()->back(); + Span.Length = M.State.offset(OS) - Span.Pos + 1; + Span.InnerLength = M.State.offset(OS) - Span.InnerPos; + } + OS << ">"; + } + + return OS; +}