Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py =================================================================== --- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py +++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py @@ -51,6 +51,8 @@ "settings set target.max-children-count 256", check=False) + is_64_bit = self.process().GetAddressByteSize() == 8 + # Execute the cleanup function during test case tear down. self.addTearDownHook(cleanup) @@ -114,3 +116,7 @@ '(%s::basic_string, ' '%s::allocator >) uchar = "aaaaa"'%(ns,ns,ns), ]) + + if is_64_bit: + self.expect("frame variable garbage1", substrs=['garbage1 = Summary Unavailable']) + self.expect("frame variable garbage2", substrs=['garbage2 = Summary Unavailable']) Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp =================================================================== --- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp +++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp @@ -1,4 +1,27 @@ #include +#include + +// For more information about libc++'s std::string ABI, see: +// +// https://joellaity.com/2020/01/31/string.html + +// A corrupt string which hits the SSO code path, but has an invalid size. +static struct { + // Set the size of this short-mode string to 116. Note that in short mode, + // the size is encoded as `size << 1`. + unsigned char size = 232; + + // 23 garbage bytes for the inline string payload. + char inline_buf[23] = {0}; +} garbage_string_short_mode; + +// A corrupt libcxx string which points to garbage and has a crazy length. +static unsigned char garbage_utf8_payload[] = {0 /* illegal utf8 encoding */, 1, 2, 3}; +static struct { + uint64_t cap = 100; + uint64_t size = 200; + unsigned char *data = &garbage_utf8_payload[0]; +} garbage_string_long_mode; int main() { @@ -17,6 +40,17 @@ std::u32string u32_string(U"🍄🍅🍆🍌"); std::u32string u32_empty(U""); std::basic_string uchar(5, 'a'); + std::string garbage1, garbage2; + +#if _LIBCPP_ABI_VERSION == 1 + if (sizeof(std::string) == sizeof(garbage_string_short_mode)) + memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode)) + memcpy((void *)&garbage2, &garbage_string_long_mode, sizeof(std::string)); +#else +#error "Test potentially needs to be updated for a new std::string ABI." +#endif + S.assign(L"!!!!!"); // Set break point at this line. return 0; } Index: lldb/source/DataFormatters/StringPrinter.cpp =================================================================== --- lldb/source/DataFormatters/StringPrinter.cpp +++ lldb/source/DataFormatters/StringPrinter.cpp @@ -15,6 +15,7 @@ #include "lldb/Target/Target.h" #include "lldb/Utility/Status.h" +#include "llvm/Support/CheckedArithmetic.h" #include "llvm/Support/ConvertUTF.h" #include @@ -133,12 +134,17 @@ unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); - if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) { - // I don't have enough bytes - print whatever I have left - retval = {buffer, static_cast(1 + buffer_end - buffer)}; - next = buffer_end + 1; + // If the utf8 encoded length is invalid, or if there aren't enough bytes to + // print, this is some kind of corrupted string. + if (utf8_encoded_len == 0 || utf8_encoded_len > 4) + return retval; + if ((buffer_end - buffer) < utf8_encoded_len) + // There's no room in the buffer for the utf8 sequence. + return retval; + if (!llvm::checkedAdd(reinterpret_cast(buffer), + static_cast(utf8_encoded_len))) + // Accessing the utf8 sequence would cause a pointer overflow. return retval; - } char32_t codepoint = 0; switch (utf8_encoded_len) { @@ -160,12 +166,6 @@ (unsigned char)*buffer, (unsigned char)*(buffer + 1), (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); break; - default: - // this is probably some bogus non-character thing just print it as-is and - // hope to sync up again soon - retval = {buffer, 1}; - next = buffer + 1; - return retval; } if (codepoint) { @@ -215,9 +215,7 @@ return retval; } - // this should not happen - but just in case.. try to resync at some point - retval = {buffer, 1}; - next = buffer + 1; + // We couldn't figure out how to print this string. return retval; } @@ -227,7 +225,7 @@ static StringPrinter::StringPrinterBufferPointer<> GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next) { - if (!buffer) + if (!buffer || buffer >= buffer_end) return {nullptr}; switch (type) { @@ -354,13 +352,11 @@ escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); auto printable_bytes = printable.GetBytes(); auto printable_size = printable.GetSize(); - if (!printable_bytes || !next_data) { - // GetPrintable() failed on us - print one byte in a desperate resync - // attempt - printable_bytes = utf8_data_ptr; - printable_size = 1; - next_data = utf8_data_ptr + 1; - } + + // We failed to figure out how to print this string. + if (!printable_bytes || !next_data) + return false; + for (unsigned c = 0; c < printable_size; c++) stream.Printf("%c", *(printable_bytes + c)); utf8_data_ptr = (uint8_t *)next_data; @@ -478,13 +474,11 @@ auto printable = escaping_callback(data, data_end, next_data); auto printable_bytes = printable.GetBytes(); auto printable_size = printable.GetSize(); - if (!printable_bytes || !next_data) { - // GetPrintable() failed on us - print one byte in a desperate resync - // attempt - printable_bytes = data; - printable_size = 1; - next_data = data + 1; - } + + // We failed to figure out how to print this string. + if (!printable_bytes || !next_data) + return false; + for (unsigned c = 0; c < printable_size; c++) options.GetStream()->Printf("%c", *(printable_bytes + c)); data = (uint8_t *)next_data; Index: lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp =================================================================== --- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -527,6 +527,16 @@ size = (layout == eLibcxxStringLayoutModeDSC) ? size_mode_value : ((size_mode_value >> 1) % 256); + + // When the small-string optimization takes place, the data must fit in the + // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's + // likely that the string isn't initialized and we're reading garbage. + ExecutionContext exe_ctx(location_sp->GetExecutionContextRef()); + auto max_bytes = location_sp->GetCompilerType().GetByteSize( + exe_ctx.GetBestExecutionContextScope()); + if (size > max_bytes) + return false; + return (location_sp.get() != nullptr); } else { ValueObjectSP l(D->GetChildAtIndex(0, true)); @@ -549,8 +559,10 @@ const TypeSummaryOptions &summary_options) { uint64_t size = 0; ValueObjectSP location_sp; - if (!ExtractLibcxxStringInfo(valobj, location_sp, size)) - return false; + if (!ExtractLibcxxStringInfo(valobj, location_sp, size)) { + stream.Printf("Summary Unavailable"); + return true; + } if (size == 0) { stream.Printf("L\"\""); return true; @@ -591,29 +603,24 @@ switch (*wchar_t_size) { case 1: - StringPrinter::ReadBufferAndDumpToStream< + return StringPrinter::ReadBufferAndDumpToStream< lldb_private::formatters::StringPrinter::StringElementType::UTF8>( options); - break; case 2: - lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream< + return lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream< lldb_private::formatters::StringPrinter::StringElementType::UTF16>( options); - break; case 4: - lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream< + return lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream< lldb_private::formatters::StringPrinter::StringElementType::UTF32>( options); - break; default: stream.Printf("size for wchar_t is not valid"); return true; } - - return true; } template @@ -623,8 +630,10 @@ uint64_t size = 0; ValueObjectSP location_sp; - if (!ExtractLibcxxStringInfo(valobj, location_sp, size)) - return false; + if (!ExtractLibcxxStringInfo(valobj, location_sp, size)) { + stream.Printf("Summary Unavailable"); + return true; + } if (size == 0) { stream.Printf("\"\""); @@ -657,9 +666,7 @@ options.SetQuote('"'); options.SetSourceSize(size); options.SetBinaryZeroIsTerminator(false); - StringPrinter::ReadBufferAndDumpToStream(options); - - return true; + return StringPrinter::ReadBufferAndDumpToStream(options); } bool lldb_private::formatters::LibcxxStringSummaryProviderASCII(