diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h @@ -0,0 +1,92 @@ +//===- Markup.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the log symbolizer markup data model and parser. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H +#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H + +#include + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +namespace symbolize { + +/// An element of symbolizer markup. +/// +/// If only the Text field is set, this represents a region of text outside a +/// markup element. +struct MarkupElement { + /// The full text of this element in the input. + StringRef Text; + + /// If this represents a tag, the tag itself. Otherwise, empty. + StringRef Tag; + + /// If this represents a tag with fields, a list of the field contents. + /// Otherwise, empty. + SmallVector Fields; + + bool operator==(const MarkupElement &Other) const { + return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields; + } + bool operator!=(const MarkupElement &Other) const { + return !(*this == Other); + } +}; + +/// Parses a log containing symbolizer markup into a sequence of elements. +class MarkupParser { +public: + /// Parses an individual \p Line of input. + /// + /// After parseLine() is called, it must not be called again until + /// nextElement() returns None. The markup elements returned by nextElement() + /// may reference the input string, so it must be retained by the caller until + /// the last use. + void parseLine(StringRef Line); + + /// Returns the next element in the input sequence. + /// + /// This is either a markup element or a region of text. The next call to + /// nextElement() may invalidate the contents of the element returned by the + /// previous call. + /// + /// \returns the next markup element or None if none remain. + Optional nextElement() { + if (!CurEntry) + CurEntry = 0; + if (*CurEntry == Buffer.size()) { + CurEntry.reset(); + Buffer.clear(); + return None; + } + return std::move(Buffer[(*CurEntry)++]); + } + +private: + Optional parseElement(StringRef Line); + + // Buffer for elements parsed from the current line. + SmallVector Buffer; + + // Current entry being returned, or None if nextElement has not yet been + // called. + Optional CurEntry; +}; + +} // end namespace symbolize +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H diff --git a/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt b/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt --- a/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt +++ b/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_component_library(LLVMSymbolize DIFetcher.cpp DIPrinter.cpp + Markup.cpp SymbolizableObjectFile.cpp Symbolize.cpp diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -0,0 +1,83 @@ +//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the log symbolizer markup data model and parser. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/Markup.h" + +#include "llvm/ADT/StringExtras.h" + +namespace llvm { +namespace symbolize { + +void MarkupParser::parseLine(StringRef Line) { + assert(Buffer.empty() && + "Cannot call parseLine before all elements have been extracted."); + while (!Line.empty()) { + if (Optional Element = parseElement(Line)) { + // Emit any text before the first valid element. + if (Element->Text.begin() != Line.begin()) { + MarkupElement PrecedingText; + PrecedingText.Text = + Line.take_front(Element->Text.begin() - Line.begin()); + Buffer.push_back(std::move(PrecedingText)); + } + // Emit the element and advance the line. + Line = Line.drop_front(Element->Text.end() - Line.begin()); + Buffer.push_back(std::move(*Element)); + } else { + // The line doesn't contain any markup elements, so emit it as text. + MarkupElement LineElement; + LineElement.Text = Line; + Buffer.push_back(std::move(LineElement)); + return; + } + } +} + +// Finds and returns the next valid markup element in the given line. Returns +// None if the line contains no valid elements. +Optional MarkupParser::parseElement(StringRef Line) { + while (true) { + // Find next element using begin and end markers. + size_t BeginPos = Line.find("{{{"); + if (BeginPos == StringRef::npos) + return None; + size_t EndPos = Line.find("}}}", BeginPos + 3); + if (EndPos == StringRef::npos) + return None; + EndPos += 3; + MarkupElement Element; + Element.Text = Line.slice(BeginPos, EndPos); + Line = Line.substr(EndPos); + + // Parse tag. + StringRef Content = Element.Text.drop_front(3).drop_back(3); + StringRef FieldsContent; + std::tie(Element.Tag, FieldsContent) = Content.split(':'); + if (Element.Tag.empty()) + continue; + if (Element.Tag.find_if_not(llvm::isAlpha) != StringRef::npos) + continue; + // Tags must be lowercase. + if (any_of(Element.Tag, [](char C) { return toLower(C) != C; })) + continue; + + // Parse fields. + if (!FieldsContent.empty()) + FieldsContent.split(Element.Fields, ":"); + + return Element; + } +} + +} // end namespace symbolize +} // end namespace llvm diff --git a/llvm/unittests/DebugInfo/CMakeLists.txt b/llvm/unittests/DebugInfo/CMakeLists.txt --- a/llvm/unittests/DebugInfo/CMakeLists.txt +++ b/llvm/unittests/DebugInfo/CMakeLists.txt @@ -3,3 +3,4 @@ add_subdirectory(GSYM) add_subdirectory(MSF) add_subdirectory(PDB) +add_subdirectory(Symbolizer) diff --git a/llvm/unittests/DebugInfo/Symbolizer/CMakeLists.txt b/llvm/unittests/DebugInfo/Symbolizer/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/unittests/DebugInfo/Symbolizer/CMakeLists.txt @@ -0,0 +1,3 @@ +set(LLVM_LINK_COMPONENTS Symbolize) +add_llvm_unittest(DebugInfoSymbolizerTests MarkupTest.cpp) +target_link_libraries(DebugInfoSymbolizerTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/DebugInfo/Symbolizer/MarkupTest.cpp @@ -0,0 +1,122 @@ + +//===- unittest/DebugInfo/Symbolizer/MarkupTest.cpp - Markup parser tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/Markup.h" + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/FormatVariadic.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +using namespace llvm; +using namespace llvm::symbolize; +using namespace testing; + +Matcher +isElement(StringRef Text, StringRef Tag = "", + Matcher> Fields = IsEmpty()) { + return AllOf(Field("Text", &MarkupElement::Text, Text), + Field("Tag", &MarkupElement::Tag, Tag), + Field("Fields", &MarkupElement::Fields, Fields)); +} + +TEST(SymbolizerMarkup, NoLines) { + EXPECT_EQ(MarkupParser{}.nextElement(), None); +} + +TEST(SymbolizerMarkup, LinesWithoutMarkup) { + MarkupParser Parser; + + Parser.parseLine("text"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("text"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{}}"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{}}"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{}}}"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{}}}"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{tag:"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{tag:"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{tag:}}"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{tag:}}"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{tag:field}}"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{tag:field}}"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{t2g}}}"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{t2g}}}"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{tAg}}}"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{tAg}}}"))); + EXPECT_THAT(Parser.nextElement(), None); +} + +TEST(SymbolizerMarkup, LinesWithMarkup) { + MarkupParser Parser; + + Parser.parseLine("{{{tag}}}"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{tag}}}", "tag"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{tag:f1:f2:f3}}}"); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{tag:f1:f2:f3}}}", "tag", + ElementsAre("f1", "f2", "f3")))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("a{{{b}}}c{{{d}}}e"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("a"))); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{b}}}", "b"))); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("c"))); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{d}}}", "d"))); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("e"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{}}}{{{tag}}}"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{}}}"))); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{tag}}}", "tag"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{t2g}}}{{{tag}}}"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{t2g}}}"))); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{tag}}}", "tag"))); + EXPECT_THAT(Parser.nextElement(), None); + + Parser.parseLine("{{{tAg}}}{{{tag}}}"); + EXPECT_THAT(Parser.nextElement(), testing::Optional(isElement("{{{tAg}}}"))); + EXPECT_THAT(Parser.nextElement(), + testing::Optional(isElement("{{{tag}}}", "tag"))); + EXPECT_THAT(Parser.nextElement(), None); +} + +} // namespace