diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h --- a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h @@ -21,6 +21,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/Regex.h" namespace llvm { @@ -52,7 +53,7 @@ /// Parses a log containing symbolizer markup into a sequence of nodes. class MarkupParser { public: - MarkupParser(); + MarkupParser(StringSet<> MultilineTags = {}); /// Parses an individual \p Line of input. /// @@ -60,28 +61,48 @@ /// by nextNode() are discarded. The nodes returned by nextNode() may /// reference the input string, so it must be retained by the caller until the /// last use. + /// + /// Note that some elements may span multiple lines. If a line ends with the + /// start of one of these elements, then no nodes will be produced until the + /// either the end or something that cannot be part of an element is + /// encountered. This may only occur after multiple calls to parseLine(), + /// corresponding to the lines of the multi-line element. void parseLine(StringRef Line); - /// Returns the next node from the most recent parseLine() call. + /// Inform the parser of that the input stream has ended. + /// + /// This allows the parser to finish any deferred processing (e.g., an + /// in-progress multi-line element) and may cause nextNode() to return + /// additional nodes. + void flush(); + + /// Returns the next node in the input sequence. /// /// Calling nextNode() may invalidate the contents of the node returned by the /// previous call. /// /// \returns the next markup node or None if none remain. - Optional nextNode() { - if (!NextIdx) - NextIdx = 0; - if (*NextIdx == Buffer.size()) { - NextIdx.reset(); - Buffer.clear(); - return None; - } - return std::move(Buffer[(*NextIdx)++]); - } + Optional nextNode(); private: Optional parseElement(StringRef Line); void parseTextOutsideMarkup(StringRef Text); + Optional parseMultiLineBegin(StringRef Line); + Optional parseMultiLineEnd(StringRef Line); + + // Tags of elements that can span multiple lines. + const StringSet<> MultilineTags; + + // Contents of a multi-line element that has finished being parsed. Retained + // to keep returned StringRefs for the contents valid. + std::string FinishedMultiline; + + // Contents of a multi-line element that is still in the process of receiving + // lines. + std::string InProgressMultiline; + + // The line currently being parsed. + StringRef Line; // Buffer for nodes parsed from the current line. SmallVector Buffer; diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp --- a/llvm/lib/DebugInfo/Symbolize/Markup.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" namespace llvm { @@ -24,7 +25,8 @@ // "\033[30m" -- "\033[37m" static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; -MarkupParser::MarkupParser() : SGRSyntax(SGRSyntaxStr) {} +MarkupParser::MarkupParser(StringSet<> MultilineTags) + : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { return Str.take_front(Pos - Str.begin()); @@ -35,18 +37,74 @@ void MarkupParser::parseLine(StringRef Line) { Buffer.clear(); - while (!Line.empty()) { - // Find the first valid markup element, if any. - if (Optional Element = parseElement(Line)) { - parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); - Buffer.push_back(std::move(*Element)); - advanceTo(Line, Element->Text.end()); - } else { - // The line doesn't contain any more markup elements, so emit it as text. - parseTextOutsideMarkup(Line); - return; + FinishedMultiline.clear(); + this->Line = Line; +} + +Optional MarkupParser::nextNode() { + // Pull something out of the buffer if possible. + if (!Buffer.empty()) { + if (!NextIdx) + NextIdx = 0; + if (NextIdx < Buffer.size()) + return std::move(Buffer[(*NextIdx)++]); + NextIdx.reset(); + Buffer.clear(); + } + + // The buffer is empty, so parse the next bit of the line. + + if (Line.empty()) + return None; + + if (!InProgressMultiline.empty()) { + if (Optional MultilineEnd = parseMultiLineEnd(Line)) { + llvm::append_range(InProgressMultiline, *MultilineEnd); + assert(FinishedMultiline.empty() && + "At most one multi-line element can be finished at a time."); + FinishedMultiline.swap(InProgressMultiline); + // Parse the multi-line element as if it were contiguous. + advanceTo(Line, MultilineEnd->end()); + return *parseElement(FinishedMultiline); } + + // The whole line is part of the multi-line element. + llvm::append_range(InProgressMultiline, Line); + Line = Line.drop_front(Line.size()); + return None; + } + + // Find the first valid markup element, if any. + if (Optional Element = parseElement(Line)) { + parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); + Buffer.push_back(std::move(*Element)); + advanceTo(Line, Element->Text.end()); + return nextNode(); + } + + // Since there were no valid elements remaining, see if the line opens a + // multi-line element. + if (Optional MultilineBegin = parseMultiLineBegin(Line)) { + // Emit any text before the element. + parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); + + // Begin recording the multi-line element. + llvm::append_range(InProgressMultiline, *MultilineBegin); + Line = Line.drop_front(Line.size()); + return nextNode(); } + + // The line doesn't contain any more markup elements, so emit it as text. + parseTextOutsideMarkup(Line); + Line = Line.drop_front(Line.size()); + return nextNode(); +} + +void MarkupParser::flush() { + if (InProgressMultiline.empty()) + return; + FinishedMultiline.swap(InProgressMultiline); + parseTextOutsideMarkup(FinishedMultiline); } // Finds and returns the next valid markup element in the given line. Returns @@ -107,5 +165,39 @@ Buffer.push_back(textNode(Text)); } +// Given that a line doesn't contain any valid markup, see if it ends with the +// start of a multi-line element. If so, returns the beginning. +Optional MarkupParser::parseMultiLineBegin(StringRef Line) { + // A multi-line begin marker must be the last one on the line. + size_t BeginPos = Line.rfind("{{{"); + if (BeginPos == StringRef::npos) + return None; + size_t BeginTagPos = BeginPos + 3; + + // If there are any end markers afterwards, the begin marker cannot belong to + // a multi-line element. + size_t EndPos = Line.find("}}}", BeginTagPos); + if (EndPos != StringRef::npos) + return None; + + // Check whether the tag is registered multi-line. + size_t EndTagPos = Line.find(':', BeginTagPos); + if (EndTagPos == StringRef::npos) + return None; + StringRef Tag = Line.slice(BeginTagPos, EndTagPos); + if (!MultilineTags.contains(Tag)) + return None; + return Line.substr(BeginPos); +} + +// See if the line begins with the ending of an in-progress multi-line element. +// If so, return the ending. +Optional MarkupParser::parseMultiLineEnd(StringRef Line) { + size_t EndPos = Line.find("}}}"); + if (EndPos == StringRef::npos) + return None; + return Line.take_front(EndPos + 3); +} + } // end namespace symbolize } // end namespace llvm diff --git a/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp --- a/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp +++ b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp @@ -44,6 +44,14 @@ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("kept"))); EXPECT_THAT(Parser.nextNode(), None); + Parser.parseLine("text\n"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("text\n"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("text\r\n"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("text\r\n"))); + EXPECT_THAT(Parser.nextNode(), None); + Parser.parseLine("{{{"); EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{"))); EXPECT_THAT(Parser.nextNode(), None); @@ -145,4 +153,69 @@ EXPECT_THAT(Parser.nextNode(), None); } +TEST(SymbolizerMarkup, MultilineElements) { + MarkupParser Parser(/*MultilineTags=*/{"first", "second"}); + + Parser.parseLine("{{{tag:"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag:"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{first:"); + EXPECT_THAT(Parser.nextNode(), None); + Parser.parseLine("}}}{{{second:"); + EXPECT_THAT( + Parser.nextNode(), + testing::Optional(isNode("{{{first:}}}", "first", ElementsAre("")))); + EXPECT_THAT(Parser.nextNode(), None); + Parser.parseLine("}}}"); + EXPECT_THAT( + Parser.nextNode(), + testing::Optional(isNode("{{{second:}}}", "second", ElementsAre("")))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{before{{{first:"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{before"))); + EXPECT_THAT(Parser.nextNode(), None); + Parser.parseLine("line"); + EXPECT_THAT(Parser.nextNode(), None); + Parser.parseLine("}}}after"); + EXPECT_THAT(Parser.nextNode(), + testing::Optional( + isNode("{{{first:line}}}", "first", ElementsAre("line")))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("after"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{first:"); + EXPECT_THAT(Parser.nextNode(), None); + Parser.flush(); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{first:"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{first:\n"); + EXPECT_THAT(Parser.nextNode(), None); + Parser.parseLine("}}}\n"); + EXPECT_THAT( + Parser.nextNode(), + testing::Optional(isNode("{{{first:\n}}}", "first", ElementsAre("\n")))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\n"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{first:\r\n"); + EXPECT_THAT(Parser.nextNode(), None); + Parser.parseLine("}}}\r\n"); + EXPECT_THAT(Parser.nextNode(), + testing::Optional( + isNode("{{{first:\r\n}}}", "first", ElementsAre("\r\n")))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\r\n"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{first:"); + EXPECT_THAT(Parser.nextNode(), None); + Parser.parseLine("\033[0m}}}"); + EXPECT_THAT(Parser.nextNode(), + testing::Optional(isNode("{{{first:\033[0m}}}", "first", + ElementsAre("\033[0m")))); + EXPECT_THAT(Parser.nextNode(), None); +} + } // namespace