diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h --- a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h @@ -19,6 +19,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/Regex.h" namespace llvm { @@ -52,7 +53,7 @@ /// Parses a log containing symbolizer markup into a sequence of elements. class MarkupParser { public: - MarkupParser(); + MarkupParser(StringSet<> MultilineTags = {}); /// Parses an individual \p Line of input. /// @@ -62,6 +63,10 @@ /// the last use. void parseLine(StringRef Line); + /// Abort any unfinished multi-line tags and output the contents as a text + /// element. This is typically used when EOF is reached on the input stream. + void flush(); + /// Returns the next element in the input sequence. /// /// This is either a markup element or a region of text. The next call to @@ -75,6 +80,7 @@ if (*NextIdx == Buffer.size()) { NextIdx.reset(); Buffer.clear(); + FinishedMultiline.clear(); return None; } return std::move(Buffer[(*NextIdx)++]); @@ -83,6 +89,19 @@ private: Optional parseElement(StringRef Line); void parseTextOutsideMarkup(StringRef Text); + Optional parseMultiLineBegin(StringRef Line); + Optional parseMultiLineEnd(StringRef Line); + + // Tags of elements that can span multiple lines. + const StringSet<> MultilineTags; + + // Contents of a multi-line element that has finished being parsed. Retained + // to keep returned StringRefs for the contents valid. + std::string FinishedMultiline; + + // Contents of a multi-line element that is still in the process of receiving + // lines. + std::string InProgressMultiline; // Buffer for elements parsed from the current line. SmallVector Buffer; diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp --- a/llvm/lib/DebugInfo/Symbolize/Markup.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" namespace llvm { @@ -24,7 +25,8 @@ // "\033[30m" -- "\033[37m" static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; -MarkupParser::MarkupParser() : SGRSyntax(SGRSyntaxStr) {} +MarkupParser::MarkupParser(StringSet<> MultilineTags) + : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { return Str.take_front(Pos - Str.begin()); @@ -37,19 +39,57 @@ assert(Buffer.empty() && "Cannot call parseLine before all elements have been extracted."); while (!Line.empty()) { + if (!InProgressMultiline.empty()) { + if (Optional MultilineEnd = parseMultiLineEnd(Line)) { + llvm::append_range(InProgressMultiline, *MultilineEnd); + assert(FinishedMultiline.empty() && + "At most one multiline can be finished at a time."); + FinishedMultiline.swap(InProgressMultiline); + // Parse the multi-line element as if it were contiguous. + parseLine(FinishedMultiline); + advanceTo(Line, MultilineEnd->end()); + continue; + } + + // The whole line is part of the multi-line tag. + llvm::append_range(InProgressMultiline, Line); + return; + } + // Find the first valid markup element, if any. if (Optional Element = parseElement(Line)) { parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); Buffer.push_back(std::move(*Element)); advanceTo(Line, Element->Text.end()); - } else { - // The line doesn't contain any more markup elements, so emit it as text. - parseTextOutsideMarkup(Line); + continue; + } + + // Since there were no valid elements remaining, see if the line opens a + // multi-line element. + if (Optional MultilineBegin = parseMultiLineBegin(Line)) { + // Emit any text before the element. + parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); + + // Begin recording the multi-line tag. + assert(InProgressMultiline.empty() && + "At most one multiline can begin at a time."); + llvm::append_range(InProgressMultiline, *MultilineBegin); return; } + + // The line doesn't contain any more markup elements, so emit it as text. + parseTextOutsideMarkup(Line); + return; } } +void MarkupParser::flush() { + if (InProgressMultiline.empty()) + return; + FinishedMultiline.swap(InProgressMultiline); + parseTextOutsideMarkup(FinishedMultiline); +} + // Finds and returns the next valid markup element in the given line. Returns // None if the line contains no valid elements. Optional MarkupParser::parseElement(StringRef Line) { @@ -109,5 +149,39 @@ Buffer.push_back(textElement(Text)); } +// Given that a line doesn't contain any valid markup, see if it ends with the +// start of a multi-line element. If so, returns the beginning. +Optional MarkupParser::parseMultiLineBegin(StringRef Line) { + // A multi-line begin marker must be the last one on the line. + size_t BeginPos = Line.rfind("{{{"); + if (BeginPos == StringRef::npos) + return None; + size_t BeginTagPos = BeginPos + 3; + + // If there are any end markers afterwards, the begin marker cannot belong to + // a multi-line tag. + size_t EndPos = Line.find("}}}", BeginTagPos); + if (EndPos != StringRef::npos) + return None; + + // Check whether the tag is registered multi-line. + size_t EndTagPos = Line.find(':', BeginTagPos); + if (EndTagPos == StringRef::npos) + return None; + StringRef Tag = Line.slice(BeginTagPos, EndTagPos); + if (!MultilineTags.contains(Tag)) + return None; + return Line.substr(BeginPos); +} + +// See if the line begins with the ending of an in-progress multi-line element. +// If so, return the ending. +Optional MarkupParser::parseMultiLineEnd(StringRef Line) { + size_t EndPos = Line.find("}}}"); + if (EndPos == StringRef::npos) + return None; + return Line.take_front(EndPos + 3); +} + } // end namespace symbolize } // end namespace llvm diff --git a/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp --- a/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp +++ b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp @@ -42,6 +42,14 @@ EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("text"))); EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("text\n"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("text\n"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("text\r\n"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("text\r\n"))); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("{{{"); EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{"))); EXPECT_THAT(Parser.nextElement(), None); @@ -152,4 +160,67 @@ EXPECT_THAT(Parser.nextElement(), None); } +TEST(SymbolizerMarkup, MultilineElements) { + MarkupParser Parser(/*MultilineTags=*/{"first", "second"}); + + Parser.parseLine("{{{tag:"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{tag:"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{first:"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}{{{second:"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{first:}}}", "first"))); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{second:}}}", "second"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{before{{{first:"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{before"))); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("line"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}after"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional( + isElement("{{{first:line}}}", "first", ElementsAre("line")))); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("after"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{first:"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.flush(); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{first:"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{first:\n"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}\n"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional( + isElement("{{{first:\n}}}", "first", ElementsAre("\n")))); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("\n"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{first:\r\n"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}\r\n"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional( + isElement("{{{first:\r\n}}}", "first", ElementsAre("\r\n")))); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("\r\n"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{first:"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("\033[0m}}}"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{first:\033[0m}}}", "first", + ElementsAre("\033[0m")))); + EXPECT_THAT(Parser.nextElement(), None); +} + } // namespace