diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h --- a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h @@ -20,6 +20,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" namespace llvm { namespace symbolize { @@ -50,6 +51,8 @@ /// Parses a log containing symbolizer markup into a sequence of elements. class MarkupParser { public: + MarkupParser(StringSet<> MultilineTags = {}); + /// Parses an individual \p Line of input. /// /// After parseLine() is called, it must not be called again until @@ -58,6 +61,10 @@ /// the last use. void parseLine(StringRef Line); + /// Abort any unfinished multi-line tags and output the contents as a text + /// element. This is typically used when EOF is reached on the input stream. + void flush(); + /// Returns the next element in the input sequence. /// /// This is either a markup element or a region of text. The next call to @@ -66,8 +73,10 @@ /// /// \returns the next markup element or None if none remain. Optional nextElement() { - if (Buffer.empty()) + if (Buffer.empty()) { + FinishedMultiline.clear(); return None; + } Optional Next = std::move(Buffer.front()); Buffer.pop(); return Next; @@ -75,6 +84,19 @@ private: Optional parseElement(StringRef Line); + Optional parseMultiLineBegin(StringRef Line); + Optional parseMultiLineEnd(StringRef Line); + + // Tags of elements that can span multiple lines. + const StringSet<> MultilineTags; + + // Contents of a multi-line element that has finished being parsed. Retained + // to keep returned StringRefs for the contents valid. + std::string FinishedMultiline; + + // Contents of a multi-line element that is still in the process of receiving + // lines. + std::string InProgressMultiline; std::queue Buffer; }; diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp --- a/llvm/lib/DebugInfo/Symbolize/Markup.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -13,36 +13,78 @@ #include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" namespace llvm { namespace symbolize { +MarkupParser::MarkupParser(StringSet<> MultilineTags) + : MultilineTags(std::move(MultilineTags)) {} + +static MarkupElement textElement(StringRef Text) { + MarkupElement Element; + Element.Text = Text; + return Element; +} + void MarkupParser::parseLine(StringRef Line) { assert(Buffer.empty() && "Cannot call parseLine before all elements have been extracted."); while (!Line.empty()) { + if (!InProgressMultiline.empty()) { + if (Optional MultilineEnd = parseMultiLineEnd(Line)) { + llvm::append_range(InProgressMultiline, *MultilineEnd); + assert(FinishedMultiline.empty() && + "At most one multiline can be finished at a time."); + FinishedMultiline.swap(InProgressMultiline); + // Parse the multi-line element as if it were contiguous. + parseLine(FinishedMultiline); + Line = Line.drop_front(MultilineEnd->end() - Line.begin()); + continue; + } + + // The whole line is part of the multi-line tag. + llvm::append_range(InProgressMultiline, Line); + return; + } + if (Optional Element = parseElement(Line)) { // Emit any text before the first valid element. - if (Element->Text.begin() != Line.begin()) { - MarkupElement PrecedingText; - PrecedingText.Text = - Line.take_front(Element->Text.begin() - Line.begin()); - Buffer.push(std::move(PrecedingText)); - } + if (Element->Text.begin() != Line.begin()) + Buffer.push( + textElement(Line.take_front(Element->Text.begin() - Line.begin()))); // Emit the element and advance the line. Line = Line.drop_front(Element->Text.end() - Line.begin()); Buffer.push(std::move(*Element)); - } else { - // The line doesn't contain any markup elements, so emit it as text. - MarkupElement LineElement; - LineElement.Text = Line; - Buffer.push(std::move(LineElement)); + continue; + } + + if (Optional MultilineBegin = parseMultiLineBegin(Line)) { + // Emit any text before the first valid element. + if (MultilineBegin->begin() != Line.begin()) + Buffer.push(textElement( + Line.take_front(MultilineBegin->begin() - Line.begin()))); + // Begin recording the multi-line tag. + assert(InProgressMultiline.empty() && + "At most one multiline can begin at a time."); + llvm::append_range(InProgressMultiline, *MultilineBegin); return; } + + // The line doesn't contain any markup elements, so emit it as text. + Buffer.push(textElement(Line)); + return; } } +void MarkupParser::flush() { + if (InProgressMultiline.empty()) + return; + FinishedMultiline.swap(InProgressMultiline); + Buffer.push(textElement(FinishedMultiline)); +} + // Finds and returns the next valid markup element in the given line. Returns // None if the line contains no valid elements. Optional MarkupParser::parseElement(StringRef Line) { @@ -79,5 +121,39 @@ } } +// Given that a line doesn't contain any valid markup, see if it ends with the +// start of a multi-line element. If so, returns the beginning. +Optional MarkupParser::parseMultiLineBegin(StringRef Line) { + // A multi-line begin marker must be the last one on the line. + size_t BeginPos = Line.rfind("{{{"); + if (BeginPos == StringRef::npos) + return None; + size_t BeginTagPos = BeginPos + 3; + + // If there are any end markers afterwards, the begin marker cannot belong to + // a multi-line tag. + size_t EndPos = Line.find("}}}", BeginTagPos); + if (EndPos != StringRef::npos) + return None; + + // Check whether the tag is registered multi-line. + size_t EndTagPos = Line.find(':', BeginTagPos); + if (EndTagPos == StringRef::npos) + return None; + StringRef Tag = Line.slice(BeginTagPos, EndTagPos); + if (!MultilineTags.contains(Tag)) + return None; + return Line.substr(BeginPos); +} + +// See if the line begins with the ending of an in-progress multi-line element. +// If so, return the ending. +Optional MarkupParser::parseMultiLineEnd(StringRef Line) { + size_t EndPos = Line.find("}}}"); + if (EndPos == StringRef::npos) + return None; + return Line.take_front(EndPos + 3); +} + } // end namespace symbolize } // end namespace llvm diff --git a/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp --- a/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp +++ b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp @@ -42,6 +42,14 @@ EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("text"))); EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("text\n"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("text\n"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("text\r\n"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("text\r\n"))); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("{{{"); EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{"))); EXPECT_THAT(Parser.nextElement(), None); @@ -119,4 +127,59 @@ EXPECT_THAT(Parser.nextElement(), None); } +TEST(SymbolizerMarkup, MultilineElements) { + MarkupParser Parser(/*MultilineTags=*/{"first", "second"}); + + Parser.parseLine("{{{tag:"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{tag:"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{first:"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}{{{second:"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{first:}}}", "first"))); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{second:}}}", "second"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{before{{{first:"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{before"))); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("line"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}after"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional( + isElement("{{{first:line}}}", "first", ElementsAre("line")))); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("after"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{first:"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.flush(); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{first:"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{first:\n"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}\n"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional( + isElement("{{{first:\n}}}", "first", ElementsAre("\n")))); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("\n"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{first:\r\n"); + EXPECT_THAT(Parser.nextElement(), None); + Parser.parseLine("}}}\r\n"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional( + isElement("{{{first:\r\n}}}", "first", ElementsAre("\r\n")))); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("\r\n"))); + EXPECT_THAT(Parser.nextElement(), None); +} + } // namespace