Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py =================================================================== --- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py +++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py @@ -51,6 +51,8 @@ "settings set target.max-children-count 256", check=False) + is_64_bit = self.process().GetAddressByteSize() == 8 + # Execute the cleanup function during test case tear down. self.addTearDownHook(cleanup) @@ -114,3 +116,10 @@ '(%s::basic_string, ' '%s::allocator >) uchar = "aaaaa"'%(ns,ns,ns), ]) + + if is_64_bit: + self.expect("frame variable garbage1", substrs=['garbage1 = Summary Unavailable']) + self.expect("frame variable garbage2", substrs=['garbage2 = Summary Unavailable']) + self.expect("frame variable garbage3", substrs=['garbage3 = Summary Unavailable']) + self.expect("frame variable garbage4", substrs=['garbage4 = Summary Unavailable']) + self.expect("frame variable garbage5", substrs=['garbage5 = Summary Unavailable']) Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp =================================================================== --- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp +++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp @@ -1,4 +1,58 @@ #include +#include + +// For more information about libc++'s std::string ABI, see: +// +// https://joellaity.com/2020/01/31/string.html + +// A corrupt string which hits the SSO code path, but has an invalid size. +static struct { + // Set the size of this short-mode string to 116. Note that in short mode, + // the size is encoded as `size << 1`. + unsigned char size = 232; + + // 23 garbage bytes for the inline string payload. + char inline_buf[23] = {0}; +} garbage_string_short_mode; + +// A corrupt libcxx string in long mode with a payload that contains a utf8 +// sequence that's inherently too long. +static unsigned char garbage_utf8_payload1[] = { + 250 // This means that we expect a 5-byte sequence, this is invalid. +}; +static struct { + uint64_t cap = 5; + uint64_t size = 4; + unsigned char *data = &garbage_utf8_payload1[0]; +} garbage_string_long_mode1; + +// A corrupt libcxx string in long mode with a payload that contains a utf8 +// sequence that's too long to fit in the buffer. +static unsigned char garbage_utf8_payload2[] = { + 240 // This means that we expect a 4-byte sequence, but the buffer is too + // small for this. +}; +static struct { + uint64_t cap = 3; + uint64_t size = 2; + unsigned char *data = &garbage_utf8_payload1[0]; +} garbage_string_long_mode2; + +// A corrupt libcxx string which has an invalid size (i.e. a size greater than +// the capacity of the string). +static struct { + uint64_t cap = 5; + uint64_t size = 7; + const char *data = "foo"; +} garbage_string_long_mode3; + +// A corrupt libcxx string in long mode with a payload that would trigger a +// buffer overflow. +static struct { + uint64_t cap = 5; + uint64_t size = 2; + uint64_t data = 0xfffffffffffffffeULL; +} garbage_string_long_mode4; int main() { @@ -17,6 +71,23 @@ std::u32string u32_string(U"🍄🍅🍆🍌"); std::u32string u32_empty(U""); std::basic_string uchar(5, 'a'); + +#if _LIBCPP_ABI_VERSION == 1 + std::string garbage1, garbage2, garbage3, garbage4, garbage5; + if (sizeof(std::string) == sizeof(garbage_string_short_mode)) + memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode1)) + memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode2)) + memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode3)) + memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode4)) + memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string)); +#else +#error "Test potentially needs to be updated for a new std::string ABI." +#endif + S.assign(L"!!!!!"); // Set break point at this line. return 0; } Index: lldb/source/DataFormatters/StringPrinter.cpp =================================================================== --- lldb/source/DataFormatters/StringPrinter.cpp +++ lldb/source/DataFormatters/StringPrinter.cpp @@ -131,14 +131,15 @@ uint8_t *&next) { StringPrinter::StringPrinterBufferPointer<> retval{nullptr}; - unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); + const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); - if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) { - // I don't have enough bytes - print whatever I have left - retval = {buffer, static_cast(1 + buffer_end - buffer)}; - next = buffer_end + 1; + // If the utf8 encoded length is invalid, or if there aren't enough bytes to + // print, this is some kind of corrupted string. + if (utf8_encoded_len == 0 || utf8_encoded_len > 4) + return retval; + if ((buffer_end - buffer) < utf8_encoded_len) + // There's no room in the buffer for the utf8 sequence. return retval; - } char32_t codepoint = 0; switch (utf8_encoded_len) { @@ -160,12 +161,6 @@ (unsigned char)*buffer, (unsigned char)*(buffer + 1), (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); break; - default: - // this is probably some bogus non-character thing just print it as-is and - // hope to sync up again soon - retval = {buffer, 1}; - next = buffer + 1; - return retval; } if (codepoint) { @@ -215,9 +210,7 @@ return retval; } - // this should not happen - but just in case.. try to resync at some point - retval = {buffer, 1}; - next = buffer + 1; + // We couldn't figure out how to print this string. return retval; } @@ -227,7 +220,7 @@ static StringPrinter::StringPrinterBufferPointer<> GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next) { - if (!buffer) + if (!buffer || buffer >= buffer_end) return {nullptr}; switch (type) { @@ -354,13 +347,11 @@ escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); auto printable_bytes = printable.GetBytes(); auto printable_size = printable.GetSize(); - if (!printable_bytes || !next_data) { - // GetPrintable() failed on us - print one byte in a desperate resync - // attempt - printable_bytes = utf8_data_ptr; - printable_size = 1; - next_data = utf8_data_ptr + 1; - } + + // We failed to figure out how to print this string. + if (!printable_bytes || !next_data) + return false; + for (unsigned c = 0; c < printable_size; c++) stream.Printf("%c", *(printable_bytes + c)); utf8_data_ptr = (uint8_t *)next_data; @@ -478,13 +469,11 @@ auto printable = escaping_callback(data, data_end, next_data); auto printable_bytes = printable.GetBytes(); auto printable_size = printable.GetSize(); - if (!printable_bytes || !next_data) { - // GetPrintable() failed on us - print one byte in a desperate resync - // attempt - printable_bytes = data; - printable_size = 1; - next_data = data + 1; - } + + // We failed to figure out how to print this string. + if (!printable_bytes || !next_data) + return false; + for (unsigned c = 0; c < printable_size; c++) options.GetStream()->Printf("%c", *(printable_bytes + c)); data = (uint8_t *)next_data; Index: lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp =================================================================== --- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -8,7 +8,6 @@ #include "LibCxx.h" -#include "llvm/ADT/ScopeExit.h" #include "lldb/Core/Debugger.h" #include "lldb/Core/FormatEntity.h" #include "lldb/Core/ValueObject.h" @@ -527,6 +526,17 @@ size = (layout == eLibcxxStringLayoutModeDSC) ? size_mode_value : ((size_mode_value >> 1) % 256); + + // When the small-string optimization takes place, the data must fit in the + // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's + // likely that the string isn't initialized and we're reading garbage. + ExecutionContext exe_ctx(location_sp->GetExecutionContextRef()); + const llvm::Optional max_bytes = + location_sp->GetCompilerType().GetByteSize( + exe_ctx.GetBestExecutionContextScope()); + if (!max_bytes || size > *max_bytes) + return false; + return (location_sp.get() != nullptr); } else { ValueObjectSP l(D->GetChildAtIndex(0, true)); @@ -537,9 +547,15 @@ ? layout_decider : l->GetChildAtIndex(2, true); ValueObjectSP size_vo(l->GetChildAtIndex(1, true)); - if (!size_vo || !location_sp) + const unsigned capacity_index = + (layout == eLibcxxStringLayoutModeDSC) ? 2 : 0; + ValueObjectSP capacity_vo(l->GetChildAtIndex(capacity_index, true)); + if (!size_vo || !location_sp || !capacity_vo) + return false; + size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET); + const uint64_t cap = capacity_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET); + if (size == LLDB_INVALID_OFFSET || cap == LLDB_INVALID_OFFSET || cap < size) return false; - size = size_vo->GetValueAsUnsigned(0); return true; } } @@ -569,7 +585,9 @@ options.SetIsTruncated(true); } } - location_sp->GetPointeeData(extractor, 0, size); + const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size); + if (bytes_read < size) + return false; // std::wstring::size() is measured in 'characters', not bytes TypeSystemClang *ast_context = @@ -591,41 +609,33 @@ switch (*wchar_t_size) { case 1: - StringPrinter::ReadBufferAndDumpToStream< + return StringPrinter::ReadBufferAndDumpToStream< lldb_private::formatters::StringPrinter::StringElementType::UTF8>( options); break; case 2: - lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream< + return StringPrinter::ReadBufferAndDumpToStream< lldb_private::formatters::StringPrinter::StringElementType::UTF16>( options); break; case 4: - lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream< + return StringPrinter::ReadBufferAndDumpToStream< lldb_private::formatters::StringPrinter::StringElementType::UTF32>( options); - break; - - default: - stream.Printf("size for wchar_t is not valid"); - return true; } - - return true; + return false; } template bool LibcxxStringSummaryProvider(ValueObject &valobj, Stream &stream, const TypeSummaryOptions &summary_options, - std::string prefix_token = "") { + std::string prefix_token) { uint64_t size = 0; ValueObjectSP location_sp; - if (!ExtractLibcxxStringInfo(valobj, location_sp, size)) return false; - if (size == 0) { stream.Printf("\"\""); return true; @@ -644,7 +654,9 @@ options.SetIsTruncated(true); } } - location_sp->GetPointeeData(extractor, 0, size); + const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size); + if (bytes_read < size) + return false; options.SetData(extractor); options.SetStream(&stream); @@ -657,28 +669,40 @@ options.SetQuote('"'); options.SetSourceSize(size); options.SetBinaryZeroIsTerminator(false); - StringPrinter::ReadBufferAndDumpToStream(options); + return StringPrinter::ReadBufferAndDumpToStream(options); +} +template +static bool formatStringImpl(ValueObject &valobj, Stream &stream, + const TypeSummaryOptions &summary_options, + std::string prefix_token) { + StreamString scratch_stream; + const bool success = LibcxxStringSummaryProvider( + valobj, scratch_stream, summary_options, prefix_token); + if (success) + stream << scratch_stream.GetData(); + else + stream << "Summary Unavailable"; return true; } bool lldb_private::formatters::LibcxxStringSummaryProviderASCII( ValueObject &valobj, Stream &stream, const TypeSummaryOptions &summary_options) { - return LibcxxStringSummaryProvider( - valobj, stream, summary_options); + return formatStringImpl( + valobj, stream, summary_options, ""); } bool lldb_private::formatters::LibcxxStringSummaryProviderUTF16( ValueObject &valobj, Stream &stream, const TypeSummaryOptions &summary_options) { - return LibcxxStringSummaryProvider( + return formatStringImpl( valobj, stream, summary_options, "u"); } bool lldb_private::formatters::LibcxxStringSummaryProviderUTF32( ValueObject &valobj, Stream &stream, const TypeSummaryOptions &summary_options) { - return LibcxxStringSummaryProvider( + return formatStringImpl( valobj, stream, summary_options, "U"); }