diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h @@ -0,0 +1,99 @@ +//===- Markup.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the log symbolizer markup data model and parser. +/// +/// \todo Add a link to the reference documentation once added. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H +#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H + +#include + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Regex.h" + +namespace llvm { +namespace symbolize { + +/// A node of symbolizer markup. +/// +/// If only the Text field is set, this represents a region of text outside a +/// markup element. ANSI SGR control codes are also reported this way; if +/// detected, then the control code will be the entirety of the Text field, and +/// any surrounding text will be reported as preceding and following nodes. +struct MarkupNode { + /// The full text of this node in the input. + StringRef Text; + + /// If this represents an element, the tag. Otherwise, empty. + StringRef Tag; + + /// If this represents an element with fields, a list of the field contents. + /// Otherwise, empty. + SmallVector Fields; + + bool operator==(const MarkupNode &Other) const { + return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields; + } + bool operator!=(const MarkupNode &Other) const { return !(*this == Other); } +}; + +/// Parses a log containing symbolizer markup into a sequence of nodes. +class MarkupParser { +public: + MarkupParser(); + + /// Parses an individual \p Line of input. + /// + /// Nodes from the previous parseLine() call that haven't yet been extracted + /// by nextNode() are discarded. The nodes returned by nextNode() may + /// reference the input string, so it must be retained by the caller until the + /// last use. + void parseLine(StringRef Line); + + /// Returns the next node from the most recent parseLine() call. + /// + /// Calling nextNode() may invalidate the contents of the node returned by the + /// previous call. + /// + /// \returns the next markup node or None if none remain. + Optional nextNode() { + if (!NextIdx) + NextIdx = 0; + if (*NextIdx == Buffer.size()) { + NextIdx.reset(); + Buffer.clear(); + return None; + } + return std::move(Buffer[(*NextIdx)++]); + } + +private: + Optional parseElement(StringRef Line); + void parseTextOutsideMarkup(StringRef Text); + + // Buffer for nodes parsed from the current line. + SmallVector Buffer; + + // Next buffer index to return or None if nextNode has not yet been called. + Optional NextIdx; + + // Regular expression matching supported ANSI SGR escape sequences. + const Regex SGRSyntax; +}; + +} // end namespace symbolize +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H diff --git a/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt b/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt --- a/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt +++ b/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_component_library(LLVMSymbolize DIFetcher.cpp DIPrinter.cpp + Markup.cpp SymbolizableObjectFile.cpp Symbolize.cpp diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -0,0 +1,111 @@ +//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the log symbolizer markup data model and parser. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/Markup.h" + +#include "llvm/ADT/StringExtras.h" + +namespace llvm { +namespace symbolize { + +// Matches the following: +// "\033[0m" +// "\033[1m" +// "\033[30m" -- "\033[37m" +static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; + +MarkupParser::MarkupParser() : SGRSyntax(SGRSyntaxStr) {} + +static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { + return Str.take_front(Pos - Str.begin()); +} +static void advanceTo(StringRef &Str, StringRef::iterator Pos) { + Str = Str.drop_front(Pos - Str.begin()); +} + +void MarkupParser::parseLine(StringRef Line) { + Buffer.clear(); + while (!Line.empty()) { + // Find the first valid markup element, if any. + if (Optional Element = parseElement(Line)) { + parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); + Buffer.push_back(std::move(*Element)); + advanceTo(Line, Element->Text.end()); + } else { + // The line doesn't contain any more markup elements, so emit it as text. + parseTextOutsideMarkup(Line); + return; + } + } +} + +// Finds and returns the next valid markup element in the given line. Returns +// None if the line contains no valid elements. +Optional MarkupParser::parseElement(StringRef Line) { + while (true) { + // Find next element using begin and end markers. + size_t BeginPos = Line.find("{{{"); + if (BeginPos == StringRef::npos) + return None; + size_t EndPos = Line.find("}}}", BeginPos + 3); + if (EndPos == StringRef::npos) + return None; + EndPos += 3; + MarkupNode Element; + Element.Text = Line.slice(BeginPos, EndPos); + Line = Line.substr(EndPos); + + // Parse tag. + StringRef Content = Element.Text.drop_front(3).drop_back(3); + StringRef FieldsContent; + std::tie(Element.Tag, FieldsContent) = Content.split(':'); + if (Element.Tag.empty()) + continue; + + // Parse fields. + if (!FieldsContent.empty()) + FieldsContent.split(Element.Fields, ":"); + else if (Content.back() == ':') + Element.Fields.push_back(FieldsContent); + + return Element; + } +} + +static MarkupNode textNode(StringRef Text) { + MarkupNode Node; + Node.Text = Text; + return Node; +} + +// Parses a region of text known to be outside any markup elements. Such text +// may still contain SGR control codes, so the region is further subdivided into +// control codes and true text regions. +void MarkupParser::parseTextOutsideMarkup(StringRef Text) { + if (Text.empty()) + return; + SmallVector Matches; + while (SGRSyntax.match(Text, &Matches)) { + // Emit any text before the SGR element. + if (Matches.begin()->begin() != Text.begin()) + Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); + + Buffer.push_back(textNode(*Matches.begin())); + advanceTo(Text, Matches.begin()->end()); + } + if (!Text.empty()) + Buffer.push_back(textNode(Text)); +} + +} // end namespace symbolize +} // end namespace llvm diff --git a/llvm/unittests/DebugInfo/CMakeLists.txt b/llvm/unittests/DebugInfo/CMakeLists.txt --- a/llvm/unittests/DebugInfo/CMakeLists.txt +++ b/llvm/unittests/DebugInfo/CMakeLists.txt @@ -3,3 +3,4 @@ add_subdirectory(GSYM) add_subdirectory(MSF) add_subdirectory(PDB) +add_subdirectory(Symbolizer) diff --git a/llvm/unittests/DebugInfo/Symbolizer/CMakeLists.txt b/llvm/unittests/DebugInfo/Symbolizer/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/unittests/DebugInfo/Symbolizer/CMakeLists.txt @@ -0,0 +1,3 @@ +set(LLVM_LINK_COMPONENTS Symbolize) +add_llvm_unittest(DebugInfoSymbolizerTests MarkupTest.cpp) +target_link_libraries(DebugInfoSymbolizerTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp @@ -0,0 +1,148 @@ + +//===- unittest/DebugInfo/Symbolizer/MarkupTest.cpp - Markup parser tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/Markup.h" + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/FormatVariadic.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +using namespace llvm; +using namespace llvm::symbolize; +using namespace testing; + +Matcher isNode(StringRef Text, StringRef Tag = "", + Matcher> Fields = IsEmpty()) { + return AllOf(Field("Text", &MarkupNode::Text, Text), + Field("Tag", &MarkupNode::Tag, Tag), + Field("Fields", &MarkupNode::Fields, Fields)); +} + +TEST(SymbolizerMarkup, NoLines) { EXPECT_EQ(MarkupParser{}.nextNode(), None); } + +TEST(SymbolizerMarkup, LinesWithoutMarkup) { + MarkupParser Parser; + + Parser.parseLine("text"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("text"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("discarded"); + Parser.parseLine("kept"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("kept"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{}}"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{}}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{}}}"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{}}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{}}}"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{:field}}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{:field}}}"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{tag:"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag:"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{tag:field}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag:field}}"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("a\033[2mb"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("a\033[2mb"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("a\033[38mb"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("a\033[38mb"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("a\033[4mb"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("a\033[4mb"))); + EXPECT_THAT(Parser.nextNode(), None); +} + +TEST(SymbolizerMarkup, LinesWithMarkup) { + MarkupParser Parser; + + Parser.parseLine("{{{tag}}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag}}}", "tag"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{tag:f1:f2:f3}}}"); + EXPECT_THAT(Parser.nextNode(), + testing::Optional(isNode("{{{tag:f1:f2:f3}}}", "tag", + ElementsAre("f1", "f2", "f3")))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{tag:}}}"); + EXPECT_THAT(Parser.nextNode(), + testing::Optional(isNode("{{{tag:}}}", "tag", ElementsAre("")))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{tag:}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag:}}"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{t2g}}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{t2g}}}", "t2g"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{tAg}}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tAg}}}", "tAg"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("a{{{b}}}c{{{d}}}e"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("a"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{b}}}", "b"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("c"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{d}}}", "d"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("e"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{}}}{{{tag}}}"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{}}}"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag}}}", "tag"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("\033[0mA\033[1mB\033[30mC\033[37m"); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\033[0m"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("A"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\033[1m"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("B"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\033[30m"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("C"))); + EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\033[37m"))); + EXPECT_THAT(Parser.nextNode(), None); + + Parser.parseLine("{{{tag:\033[0m}}}"); + EXPECT_THAT(Parser.nextNode(), + testing::Optional( + isNode("{{{tag:\033[0m}}}", "tag", ElementsAre("\033[0m")))); + EXPECT_THAT(Parser.nextNode(), None); +} + +} // namespace