diff --git a/lldb/include/lldb/DataFormatters/StringPrinter.h b/lldb/include/lldb/DataFormatters/StringPrinter.h --- a/lldb/include/lldb/DataFormatters/StringPrinter.h +++ b/lldb/include/lldb/DataFormatters/StringPrinter.h @@ -24,6 +24,8 @@ enum class GetPrintableElementType { ASCII, UTF8 }; + enum class EscapeStyle { CXX, Swift }; + class DumpToStreamOptions { public: DumpToStreamOptions() = default; @@ -68,9 +70,9 @@ bool GetIgnoreMaxLength() const { return m_ignore_max_length; } - void SetLanguage(lldb::LanguageType l) { m_language_type = l; } + void SetEscapeStyle(EscapeStyle style) { m_escape_style = style; } - lldb::LanguageType GetLanguage() const { return m_language_type; } + EscapeStyle GetEscapeStyle() const { return m_escape_style; } private: /// The used output stream. @@ -93,12 +95,8 @@ /// True iff a zero bytes ('\0') should terminate the memory region that /// is being dumped. bool m_zero_is_terminator = true; - /// The language that the generated string literal is supposed to be valid - /// for. This changes for example what and how certain characters are - /// escaped. - /// For example, printing the a string containing only a quote (") char - /// with eLanguageTypeC would escape the quote character. - lldb::LanguageType m_language_type = lldb::eLanguageTypeUnknown; + /// The language-specific style for escaping special characters. + EscapeStyle m_escape_style = EscapeStyle::CXX; }; class ReadStringAndDumpToStreamOptions : public DumpToStreamOptions { @@ -147,71 +145,6 @@ bool m_is_truncated = false; }; - // I can't use a std::unique_ptr for this because the Deleter is a template - // argument there - // and I want the same type to represent both pointers I want to free and - // pointers I don't need to free - which is what this class essentially is - // It's very specialized to the needs of this file, and not suggested for - // general use - struct StringPrinterBufferPointer { - public: - typedef std::function Deleter; - - StringPrinterBufferPointer(std::nullptr_t ptr) - : m_data(nullptr), m_size(0), m_deleter() {} - - StringPrinterBufferPointer(const uint8_t *bytes, size_t size, - Deleter deleter = nullptr) - : m_data(bytes), m_size(size), m_deleter(deleter) {} - - StringPrinterBufferPointer(const char *bytes, size_t size, - Deleter deleter = nullptr) - : m_data(reinterpret_cast(bytes)), m_size(size), - m_deleter(deleter) {} - - StringPrinterBufferPointer(StringPrinterBufferPointer &&rhs) - : m_data(rhs.m_data), m_size(rhs.m_size), m_deleter(rhs.m_deleter) { - rhs.m_data = nullptr; - } - - ~StringPrinterBufferPointer() { - if (m_data && m_deleter) - m_deleter(m_data); - m_data = nullptr; - } - - const uint8_t *GetBytes() const { return m_data; } - - size_t GetSize() const { return m_size; } - - StringPrinterBufferPointer & - operator=(StringPrinterBufferPointer &&rhs) { - if (m_data && m_deleter) - m_deleter(m_data); - m_data = rhs.m_data; - m_size = rhs.m_size; - m_deleter = rhs.m_deleter; - rhs.m_data = nullptr; - return *this; - } - - private: - DISALLOW_COPY_AND_ASSIGN(StringPrinterBufferPointer); - - const uint8_t *m_data; - size_t m_size; - Deleter m_deleter; - }; - - typedef std::function - EscapingHelper; - typedef std::function - EscapingHelperGenerator; - - static EscapingHelper - GetDefaultEscapingHelper(GetPrintableElementType elem_type); - template static bool ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions &options); diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -180,10 +180,6 @@ GetPossibleFormattersMatches(ValueObject &valobj, lldb::DynamicValueType use_dynamic); - virtual lldb_private::formatters::StringPrinter::EscapingHelper - GetStringPrinterEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType); - virtual std::unique_ptr GetTypeScavenger(); virtual const char *GetLanguageSpecificTypeLookupHelp(); diff --git a/lldb/source/DataFormatters/StringPrinter.cpp b/lldb/source/DataFormatters/StringPrinter.cpp --- a/lldb/source/DataFormatters/StringPrinter.cpp +++ b/lldb/source/DataFormatters/StringPrinter.cpp @@ -24,15 +24,73 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::formatters; +using GetPrintableElementType = StringPrinter::GetPrintableElementType; +using StringElementType = StringPrinter::StringElementType; + +/// StringPrinterBufferPointer is basically a unique_ptr specialized for the +/// needs of this file: the buffer pointer doesn't /have/ to be heap-allocated, +/// and the convenience constructors make it easier to create string chunks. +struct StringPrinterBufferPointer { +public: + using Deleter = std::function; + + StringPrinterBufferPointer(std::nullptr_t ptr) + : m_data(nullptr), m_size(0), m_deleter() {} + + StringPrinterBufferPointer(const uint8_t *bytes, size_t size, + Deleter deleter = nullptr) + : m_data(bytes), m_size(size), m_deleter(deleter) {} + + StringPrinterBufferPointer(const char *bytes, size_t size, + Deleter deleter = nullptr) + : m_data(reinterpret_cast(bytes)), m_size(size), + m_deleter(deleter) {} + + StringPrinterBufferPointer(StringPrinterBufferPointer &&rhs) + : m_data(rhs.m_data), m_size(rhs.m_size), m_deleter(rhs.m_deleter) { + rhs.m_data = nullptr; + } + + ~StringPrinterBufferPointer() { + if (m_data && m_deleter) + m_deleter(m_data); + m_data = nullptr; + } + + const uint8_t *GetBytes() const { return m_data; } + + size_t GetSize() const { return m_size; } + + StringPrinterBufferPointer &operator=(StringPrinterBufferPointer &&rhs) { + if (m_data && m_deleter) + m_deleter(m_data); + m_data = rhs.m_data; + m_size = rhs.m_size; + m_deleter = rhs.m_deleter; + rhs.m_data = nullptr; + return *this; + } + +private: + DISALLOW_COPY_AND_ASSIGN(StringPrinterBufferPointer); + + const uint8_t *m_data; + size_t m_size; + Deleter m_deleter; +}; + +using EscapingHelper = + std::function; // we define this for all values of type but only implement it for those we // care about that's good because we get linker errors for any unsupported type -template -static StringPrinter::StringPrinterBufferPointer -GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next); +template +static StringPrinterBufferPointer +GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, + StringPrinter::EscapeStyle escape_style); -// mimic isprint() for Unicode codepoints -static bool isprint(char32_t codepoint) { +// Mimic isprint() for Unicode codepoints. +static bool isprint32(char32_t codepoint) { if (codepoint <= 0x1F || codepoint == 0x7F) // C0 { return false; @@ -59,57 +117,73 @@ return true; } -template <> -StringPrinter::StringPrinterBufferPointer -GetPrintableImpl(uint8_t *buffer, - uint8_t *buffer_end, - uint8_t *&next) { - StringPrinter::StringPrinterBufferPointer retval = {nullptr}; - - switch (*buffer) { +StringPrinterBufferPointer +attemptASCIIEscape(char32_t c, StringPrinter::EscapeStyle escape_style) { + const bool is_swift_escape_style = + escape_style == StringPrinter::EscapeStyle::Swift; + switch (c) { case 0: - retval = {"\\0", 2}; - break; + return {"\\0", 2}; case '\a': - retval = {"\\a", 2}; - break; + return {"\\a", 2}; case '\b': - retval = {"\\b", 2}; - break; + if (is_swift_escape_style) + return nullptr; + return {"\\b", 2}; case '\f': - retval = {"\\f", 2}; - break; + if (is_swift_escape_style) + return nullptr; + return {"\\f", 2}; case '\n': - retval = {"\\n", 2}; - break; + return {"\\n", 2}; case '\r': - retval = {"\\r", 2}; - break; + return {"\\r", 2}; case '\t': - retval = {"\\t", 2}; - break; + return {"\\t", 2}; case '\v': - retval = {"\\v", 2}; - break; + if (is_swift_escape_style) + return nullptr; + return {"\\v", 2}; case '\"': - retval = {"\\\"", 2}; - break; + return {"\\\"", 2}; + case '\'': + if (is_swift_escape_style) + return {"\\'", 2}; + return nullptr; case '\\': - retval = {"\\\\", 2}; - break; - default: - if (isprint(*buffer)) - retval = {buffer, 1}; - else { - uint8_t *data = new uint8_t[5]; - sprintf((char *)data, "\\x%02x", *buffer); - retval = {data, 4, [](const uint8_t *c) { delete[] c; }}; - break; - } + return {"\\\\", 2}; } + return nullptr; +} +template <> +StringPrinterBufferPointer GetPrintableImpl( + uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, + StringPrinter::EscapeStyle escape_style) { + // The ASCII helper always advances 1 byte at a time. next = buffer + 1; - return retval; + + StringPrinterBufferPointer retval = attemptASCIIEscape(*buffer, escape_style); + if (retval.GetSize()) + return retval; + if (isprint(*buffer)) + return {buffer, 1}; + + unsigned escaped_len; + const unsigned max_buffer_size = 7; + uint8_t *data = new uint8_t[max_buffer_size]; + switch (escape_style) { + case StringPrinter::EscapeStyle::CXX: + // Prints 4 characters, then a \0 terminator. + escaped_len = sprintf((char *)data, "\\x%02x", *buffer); + break; + case StringPrinter::EscapeStyle::Swift: + // Prints up to 6 characters, then a \0 terminator. + escaped_len = sprintf((char *)data, "\\u{%x}", *buffer); + break; + } + lldbassert(escaped_len > 0 && "unknown string escape style"); + return {data, escaped_len, [](const uint8_t *c) { delete[] c; }}; } static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) { @@ -125,28 +199,25 @@ } template <> -StringPrinter::StringPrinterBufferPointer -GetPrintableImpl(uint8_t *buffer, - uint8_t *buffer_end, - uint8_t *&next) { - StringPrinter::StringPrinterBufferPointer retval{nullptr}; - +StringPrinterBufferPointer GetPrintableImpl( + uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, + StringPrinter::EscapeStyle escape_style) { const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); // If the utf8 encoded length is invalid, or if there aren't enough bytes to // print, this is some kind of corrupted string. if (utf8_encoded_len == 0 || utf8_encoded_len > 4) - return retval; + return nullptr; if ((buffer_end - buffer) < utf8_encoded_len) // There's no room in the buffer for the utf8 sequence. - return retval; + return nullptr; char32_t codepoint = 0; switch (utf8_encoded_len) { case 1: // this is just an ASCII byte - ask ASCII - return GetPrintableImpl( - buffer, buffer_end, next); + return GetPrintableImpl(buffer, buffer_end, next, + escape_style); case 2: codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer + 1)); @@ -163,92 +234,72 @@ break; } - if (codepoint) { - switch (codepoint) { - case 0: - retval = {"\\0", 2}; - break; - case '\a': - retval = {"\\a", 2}; - break; - case '\b': - retval = {"\\b", 2}; - break; - case '\f': - retval = {"\\f", 2}; - break; - case '\n': - retval = {"\\n", 2}; - break; - case '\r': - retval = {"\\r", 2}; - break; - case '\t': - retval = {"\\t", 2}; - break; - case '\v': - retval = {"\\v", 2}; - break; - case '\"': - retval = {"\\\"", 2}; - break; - case '\\': - retval = {"\\\\", 2}; - break; - default: - if (isprint(codepoint)) - retval = {buffer, utf8_encoded_len}; - else { - uint8_t *data = new uint8_t[11]; - sprintf((char *)data, "\\U%08x", (unsigned)codepoint); - retval = {data, 10, [](const uint8_t *c) { delete[] c; }}; - break; - } - } + // We couldn't figure out how to print this codepoint. + if (!codepoint) + return nullptr; - next = buffer + utf8_encoded_len; + // The UTF8 helper always advances by the utf8 encoded length. + next = buffer + utf8_encoded_len; + StringPrinterBufferPointer retval = + attemptASCIIEscape(codepoint, escape_style); + if (retval.GetSize()) return retval; + if (isprint32(codepoint)) + return {buffer, utf8_encoded_len}; + + unsigned escaped_len; + const unsigned max_buffer_size = 15; + uint8_t *data = new uint8_t[max_buffer_size]; + switch (escape_style) { + case StringPrinter::EscapeStyle::CXX: + // Prints 10 characters, then a \0 terminator. + escaped_len = sprintf((char *)data, "\\U%08x", (unsigned)codepoint); + break; + case StringPrinter::EscapeStyle::Swift: + // Prints up to 14 characters, then a \0 terminator. + escaped_len = sprintf((char *)data, "\\u{%x}", (unsigned)codepoint); + break; } - - // We couldn't figure out how to print this string. - return retval; + lldbassert(escaped_len > 0 && "unknown string escape style"); + return {data, escaped_len, [](const uint8_t *c) { delete[] c; }}; } // Given a sequence of bytes, this function returns: a sequence of bytes to // actually print out + a length the following unscanned position of the buffer // is in next -static StringPrinter::StringPrinterBufferPointer -GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, - uint8_t *buffer_end, uint8_t *&next) { +static StringPrinterBufferPointer +GetPrintable(StringElementType type, uint8_t *buffer, uint8_t *buffer_end, + uint8_t *&next, StringPrinter::EscapeStyle escape_style) { if (!buffer || buffer >= buffer_end) return {nullptr}; switch (type) { - case StringPrinter::StringElementType::ASCII: - return GetPrintableImpl( - buffer, buffer_end, next); - case StringPrinter::StringElementType::UTF8: - return GetPrintableImpl( - buffer, buffer_end, next); + case StringElementType::ASCII: + return GetPrintableImpl(buffer, buffer_end, next, + escape_style); + case StringElementType::UTF8: + return GetPrintableImpl(buffer, buffer_end, next, + escape_style); default: return {nullptr}; } } -StringPrinter::EscapingHelper -StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) { +static EscapingHelper +GetDefaultEscapingHelper(GetPrintableElementType elem_type, + StringPrinter::EscapeStyle escape_style) { switch (elem_type) { case GetPrintableElementType::UTF8: - return [](uint8_t *buffer, uint8_t *buffer_end, - uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer { - return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, - buffer_end, next); + return [escape_style](uint8_t *buffer, uint8_t *buffer_end, + uint8_t *&next) -> StringPrinterBufferPointer { + return GetPrintable(StringElementType::UTF8, buffer, buffer_end, next, + escape_style); }; case GetPrintableElementType::ASCII: - return [](uint8_t *buffer, uint8_t *buffer_end, - uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer { - return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, - buffer_end, next); + return [escape_style](uint8_t *buffer, uint8_t *buffer_end, + uint8_t *&next) -> StringPrinterBufferPointer { + return GetPrintable(StringElementType::ASCII, buffer, buffer_end, next, + escape_style); }; } llvm_unreachable("bad element type"); @@ -321,18 +372,10 @@ } const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); - lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; - if (escape_non_printables) { - if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) - escaping_callback = language->GetStringPrinterEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType:: - UTF8); - else - escaping_callback = - lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( - lldb_private::formatters::StringPrinter:: - GetPrintableElementType::UTF8); - } + EscapingHelper escaping_callback; + if (escape_non_printables) + escaping_callback = GetDefaultEscapingHelper( + GetPrintableElementType::UTF8, dump_options.GetEscapeStyle()); // since we tend to accept partial data (and even partially malformed data) // we might end up with no NULL terminator before the end_ptr hence we need @@ -394,16 +437,19 @@ SetQuote(options.GetQuote()); SetEscapeNonPrintables(options.GetEscapeNonPrintables()); SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); - SetLanguage(options.GetLanguage()); + SetEscapeStyle(options.GetEscapeStyle()); } namespace lldb_private { namespace formatters { +// FIXME: In practice, do we ever prefer ASCII-only formatting over UTF8 +// formatting? The NSString formatter is the only one that makes a distinction +// between the two: if it doesn't need to, we can simply delete all this +// duplicated ASCII-specific code. template <> -bool StringPrinter::ReadStringAndDumpToStream< - StringPrinter::StringElementType::ASCII>( +bool StringPrinter::ReadStringAndDumpToStream( const ReadStringAndDumpToStreamOptions &options) { assert(options.GetStream() && "need a Stream to print the string to"); Status my_error; @@ -447,18 +493,10 @@ uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize(); const bool escape_non_printables = options.GetEscapeNonPrintables(); - lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; - if (escape_non_printables) { - if (Language *language = Language::FindPlugin(options.GetLanguage())) - escaping_callback = language->GetStringPrinterEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType:: - ASCII); - else - escaping_callback = - lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType:: - ASCII); - } + EscapingHelper escaping_callback; + if (escape_non_printables) + escaping_callback = GetDefaultEscapingHelper(GetPrintableElementType::ASCII, + options.GetEscapeStyle()); // since we tend to accept partial data (and even partially malformed data) // we might end up with no NULL terminator before the end_ptr hence we need @@ -582,31 +620,27 @@ } template <> -bool StringPrinter::ReadStringAndDumpToStream< - StringPrinter::StringElementType::UTF8>( +bool StringPrinter::ReadStringAndDumpToStream( const ReadStringAndDumpToStreamOptions &options) { return ReadUTFBufferAndDumpToStream(options, nullptr); } template <> -bool StringPrinter::ReadStringAndDumpToStream< - StringPrinter::StringElementType::UTF16>( +bool StringPrinter::ReadStringAndDumpToStream( const ReadStringAndDumpToStreamOptions &options) { return ReadUTFBufferAndDumpToStream(options, llvm::ConvertUTF16toUTF8); } template <> -bool StringPrinter::ReadStringAndDumpToStream< - StringPrinter::StringElementType::UTF32>( +bool StringPrinter::ReadStringAndDumpToStream( const ReadStringAndDumpToStreamOptions &options) { return ReadUTFBufferAndDumpToStream(options, llvm::ConvertUTF32toUTF8); } template <> -bool StringPrinter::ReadBufferAndDumpToStream< - StringPrinter::StringElementType::UTF8>( +bool StringPrinter::ReadBufferAndDumpToStream( const ReadBufferAndDumpToStreamOptions &options) { assert(options.GetStream() && "need a Stream to print the string to"); @@ -614,8 +648,7 @@ } template <> -bool StringPrinter::ReadBufferAndDumpToStream< - StringPrinter::StringElementType::ASCII>( +bool StringPrinter::ReadBufferAndDumpToStream( const ReadBufferAndDumpToStreamOptions &options) { // treat ASCII the same as UTF8 // FIXME: can we optimize ASCII some more? @@ -623,8 +656,7 @@ } template <> -bool StringPrinter::ReadBufferAndDumpToStream< - StringPrinter::StringElementType::UTF16>( +bool StringPrinter::ReadBufferAndDumpToStream( const ReadBufferAndDumpToStreamOptions &options) { assert(options.GetStream() && "need a Stream to print the string to"); @@ -632,8 +664,7 @@ } template <> -bool StringPrinter::ReadBufferAndDumpToStream< - StringPrinter::StringElementType::UTF32>( +bool StringPrinter::ReadBufferAndDumpToStream( const ReadBufferAndDumpToStreamOptions &options) { assert(options.GetStream() && "need a Stream to print the string to"); diff --git a/lldb/source/Plugins/Language/ObjC/NSString.cpp b/lldb/source/Plugins/Language/ObjC/NSString.cpp --- a/lldb/source/Plugins/Language/ObjC/NSString.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSString.cpp @@ -175,7 +175,6 @@ options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(false); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::UTF16>(options); } else { @@ -188,7 +187,6 @@ options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(false); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::ASCII>(options); } @@ -204,7 +202,6 @@ options.SetHasSourceSize(has_explicit_length); options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::ASCII>(options); } else if (is_unicode) { @@ -229,7 +226,6 @@ options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(!has_explicit_length); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::UTF16>(options); } else if (is_path_store) { @@ -250,7 +246,6 @@ options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(!has_explicit_length); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::UTF16>(options); } else if (is_inline) { @@ -273,7 +268,6 @@ options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(!has_explicit_length); - options.SetLanguage(summary_options.GetLanguage()); if (has_explicit_length) return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::UTF8>(options); @@ -295,7 +289,6 @@ options.SetHasSourceSize(has_explicit_length); options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::ASCII>(options); } diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp --- a/lldb/source/Target/Language.cpp +++ b/lldb/source/Target/Language.cpp @@ -139,13 +139,6 @@ return {}; } -lldb_private::formatters::StringPrinter::EscapingHelper -Language::GetStringPrinterEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType - elem_type) { - return StringPrinter::GetDefaultEscapingHelper(elem_type); -} - struct language_name_pair { const char *name; LanguageType type; diff --git a/lldb/unittests/DataFormatter/CMakeLists.txt b/lldb/unittests/DataFormatter/CMakeLists.txt --- a/lldb/unittests/DataFormatter/CMakeLists.txt +++ b/lldb/unittests/DataFormatter/CMakeLists.txt @@ -1,5 +1,6 @@ add_lldb_unittest(LLDBFormatterTests FormatManagerTests.cpp + StringPrinterTests.cpp LINK_LIBS lldbCore diff --git a/lldb/unittests/DataFormatter/StringPrinterTests.cpp b/lldb/unittests/DataFormatter/StringPrinterTests.cpp new file mode 100644 --- /dev/null +++ b/lldb/unittests/DataFormatter/StringPrinterTests.cpp @@ -0,0 +1,135 @@ +//===-- StringPrinterTests.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/DataFormatters/StringPrinter.h" +#include "lldb/Utility/DataExtractor.h" +#include "lldb/Utility/Endian.h" +#include "lldb/Utility/StreamString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" +#include + +using namespace lldb; +using namespace lldb_private; +using lldb_private::formatters::StringPrinter; +using llvm::StringRef; + +#define QUOTE(x) "\"" x "\"" + +/// Dump out "name: { }". +static void dumpStr(StringRef name, StringRef str) { + llvm::errs() << name << ": { "; + for (char c : str) + llvm::errs() << c << " "; + llvm::errs() << "}\n"; +} + +/// Format \p input according to the options specified in the template params, +/// then check whether the result is equal to \p reference. If not, dump the +/// expeted vs. actual results. +template +static bool isFormatCorrect(StringRef input, StringRef reference) { + StreamString out; + StringPrinter::ReadBufferAndDumpToStreamOptions opts; + opts.SetStream(&out); + opts.SetSourceSize(input.size()); + opts.SetNeedsZeroTermination(true); + opts.SetEscapeNonPrintables(true); + opts.SetIgnoreMaxLength(false); + opts.SetEscapeStyle(escape_style); + DataExtractor extractor(input.data(), input.size(), + endian::InlHostByteOrder(), sizeof(void *)); + opts.SetData(extractor); + const bool success = StringPrinter::ReadBufferAndDumpToStream(opts); + const bool matches = out.GetString() == reference; + if (!success || !matches) { + dumpStr("expected", reference); + dumpStr(" but got", out.GetString()); + } + return matches; +} + +// The "StringElementType::ASCII + EscapeStyle::CXX" combination is not tested +// because it probably should not be supported (see FIXME in StringPrinter.cpp), +// and because it's implemented by calling into the UTF8 logic anyway. + +// Test UTF8 formatting for C++. +TEST(StringPrinterTests, CxxUTF8) { + auto matches = [](StringRef str, StringRef reference) { + return isFormatCorrect(str, reference); + }; + + // Special escapes. + EXPECT_TRUE(matches({"\0", 1}, QUOTE(""))); + EXPECT_TRUE(matches("\a", QUOTE("\\a"))); + EXPECT_TRUE(matches("\b", QUOTE("\\b"))); + EXPECT_TRUE(matches("\f", QUOTE("\\f"))); + EXPECT_TRUE(matches("\n", QUOTE("\\n"))); + EXPECT_TRUE(matches("\r", QUOTE("\\r"))); + EXPECT_TRUE(matches("\t", QUOTE("\\t"))); + EXPECT_TRUE(matches("\v", QUOTE("\\v"))); + EXPECT_TRUE(matches("\"", QUOTE("\\\""))); + EXPECT_TRUE(matches("\'", QUOTE("'"))); + EXPECT_TRUE(matches("\\", QUOTE("\\\\"))); + + // Printable characters. + EXPECT_TRUE(matches("'", QUOTE("'"))); + EXPECT_TRUE(matches("a", QUOTE("a"))); + EXPECT_TRUE(matches("Z", QUOTE("Z"))); + EXPECT_TRUE(matches("🥑", QUOTE("🥑"))); + + // Octal (\nnn), hex (\xnn), extended octal (\unnnn or \Unnnnnnnn). + EXPECT_TRUE(matches("\uD55C", QUOTE("한"))); + EXPECT_TRUE(matches("\U00010348", QUOTE("𐍈"))); + + // FIXME: These strings are all rejected, but shouldn't be AFAICT. LLDB finds + // that these are not valid utf8 sequences, but that's OK, the raw values + // should still be printed out. + EXPECT_FALSE(matches("\376", QUOTE("\\xfe"))); // \376 is 254 in decimal. + EXPECT_FALSE(matches("\xfe", QUOTE("\\xfe"))); // \xfe is 254 in decimal. +} + +// Test UTF8 formatting for Swift. +TEST(StringPrinterTests, SwiftUTF8) { + auto matches = [](StringRef str, StringRef reference) { + return isFormatCorrect(str, reference); + }; + + // Special escapes. + EXPECT_TRUE(matches({"\0", 1}, QUOTE(""))); + EXPECT_TRUE(matches("\a", QUOTE("\\a"))); + EXPECT_TRUE(matches("\b", QUOTE("\\u{8}"))); + EXPECT_TRUE(matches("\f", QUOTE("\\u{c}"))); + EXPECT_TRUE(matches("\n", QUOTE("\\n"))); + EXPECT_TRUE(matches("\r", QUOTE("\\r"))); + EXPECT_TRUE(matches("\t", QUOTE("\\t"))); + EXPECT_TRUE(matches("\v", QUOTE("\\u{b}"))); + EXPECT_TRUE(matches("\"", QUOTE("\\\""))); + EXPECT_TRUE(matches("\'", QUOTE("\\'"))); + EXPECT_TRUE(matches("\\", QUOTE("\\\\"))); + + // Printable characters. + EXPECT_TRUE(matches("'", QUOTE("\\'"))); + EXPECT_TRUE(matches("a", QUOTE("a"))); + EXPECT_TRUE(matches("Z", QUOTE("Z"))); + EXPECT_TRUE(matches("🥑", QUOTE("🥑"))); + + // Octal (\nnn), hex (\xnn), extended octal (\unnnn or \Unnnnnnnn). + EXPECT_TRUE(matches("\uD55C", QUOTE("한"))); + EXPECT_TRUE(matches("\U00010348", QUOTE("𐍈"))); + + // FIXME: These strings are all rejected, but shouldn't be AFAICT. LLDB finds + // that these are not valid utf8 sequences, but that's OK, the raw values + // should still be printed out. + EXPECT_FALSE(matches("\376", QUOTE("\\xfe"))); // \376 is 254 in decimal. + EXPECT_FALSE(matches("\xfe", QUOTE("\\xfe"))); // \xfe is 254 in decimal. +}