Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp =================================================================== --- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp +++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp @@ -1,5 +1,12 @@ #include +// A corrupt libcxx string which hits the SSO code path, but has an invalid +// size. +static unsigned char garbage_string_sso[] = {232, 32, 175, 29, 1, 0, 0, 0, 32, 30, 175, 29, 1, 0, 0, 0, 184, 30, 175, 29, 1, 0, 0, 0}; + +// A corrupt libcxx string which points to garbage and has a crazy length. +static unsigned char garbage_string_long[] = {185, 52, 168, 29, 1, 0, 0, 0, 168, 61, 175, 29, 1, 0, 0, 0, 104, 222, 174, 29, 1, 0, 0, 0}; + int main() { std::wstring wempty(L""); @@ -17,6 +24,11 @@ std::u32string u32_string(U"🍄🍅🍆🍌"); std::u32string u32_empty(U""); std::basic_string uchar(5, 'a'); + std::string garbage1, garbage2; + if (sizeof(std::string) == sizeof(garbage_string_sso)) + memcpy((void *)&garbage1, &garbage_string_sso, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long)) + memcpy((void *)&garbage2, &garbage_string_long, sizeof(std::string)); S.assign(L"!!!!!"); // Set break point at this line. return 0; } Index: lldb/source/DataFormatters/StringPrinter.cpp =================================================================== --- lldb/source/DataFormatters/StringPrinter.cpp +++ lldb/source/DataFormatters/StringPrinter.cpp @@ -59,11 +59,19 @@ return true; } +/// Return true if \p Needle lies within the range [\p Start, \p End). +static bool isInHalfOpenRange(uint8_t *Needle, uint8_t *Start, uint8_t *End) { + return uintptr_t(Needle) >= uintptr_t(Start) && + uintptr_t(Needle) < uintptr_t(End); +} + template <> StringPrinter::StringPrinterBufferPointer<> GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next) { + assert(isInHalfOpenRange(buffer, buffer, buffer_end) && + "Cannot read the first byte of ASCII string buffer"); StringPrinter::StringPrinterBufferPointer<> retval = {nullptr}; switch (*buffer) { @@ -129,16 +137,17 @@ GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next) { + assert(isInHalfOpenRange(buffer, buffer, buffer_end) && + "Cannot read the first byte of UTF8 string buffer"); StringPrinter::StringPrinterBufferPointer<> retval{nullptr}; unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); - if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) { - // I don't have enough bytes - print whatever I have left - retval = {buffer, static_cast(1 + buffer_end - buffer)}; - next = buffer_end + 1; + // If the utf8 encoded length is invalid, or if there aren't enough bytes to + // print, this is some kind of corrupted string. + if ((utf8_encoded_len == 0 || utf8_encoded_len > 4) || + !isInHalfOpenRange(buffer + (utf8_encoded_len - 1), buffer, buffer_end)) return retval; - } char32_t codepoint = 0; switch (utf8_encoded_len) { @@ -160,12 +169,6 @@ (unsigned char)*buffer, (unsigned char)*(buffer + 1), (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); break; - default: - // this is probably some bogus non-character thing just print it as-is and - // hope to sync up again soon - retval = {buffer, 1}; - next = buffer + 1; - return retval; } if (codepoint) { @@ -215,9 +218,7 @@ return retval; } - // this should not happen - but just in case.. try to resync at some point - retval = {buffer, 1}; - next = buffer + 1; + // We couldn't figure out how to print this string. return retval; } @@ -354,13 +355,11 @@ escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); auto printable_bytes = printable.GetBytes(); auto printable_size = printable.GetSize(); - if (!printable_bytes || !next_data) { - // GetPrintable() failed on us - print one byte in a desperate resync - // attempt - printable_bytes = utf8_data_ptr; - printable_size = 1; - next_data = utf8_data_ptr + 1; - } + + // We failed to figure out how to print this string. + if (!printable_bytes || !next_data) + return false; + for (unsigned c = 0; c < printable_size; c++) stream.Printf("%c", *(printable_bytes + c)); utf8_data_ptr = (uint8_t *)next_data; Index: lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp =================================================================== --- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -527,6 +527,16 @@ size = (layout == eLibcxxStringLayoutModeDSC) ? size_mode_value : ((size_mode_value >> 1) % 256); + + // When the small-string optimization takes place, the data must fit in the + // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's + // likely that the string isn't initialized and we're reading garbage. + ExecutionContext exe_ctx(location_sp->GetExecutionContextRef()); + auto max_bytes = location_sp->GetCompilerType().GetByteSize( + exe_ctx.GetBestExecutionContextScope()); + if (size > max_bytes) + return false; + return (location_sp.get() != nullptr); } else { ValueObjectSP l(D->GetChildAtIndex(0, true)); @@ -657,9 +667,7 @@ options.SetQuote('"'); options.SetSourceSize(size); options.SetBinaryZeroIsTerminator(false); - StringPrinter::ReadBufferAndDumpToStream(options); - - return true; + return StringPrinter::ReadBufferAndDumpToStream(options); } bool lldb_private::formatters::LibcxxStringSummaryProviderASCII(