diff --git a/libcxx/include/string b/libcxx/include/string --- a/libcxx/include/string +++ b/libcxx/include/string @@ -532,6 +532,8 @@ #include <__utility/auto_cast.h> #include <__utility/move.h> #include <__utility/swap.h> +#include <__utility/unreachable.h> +#include #include #include // EOF #include @@ -539,6 +541,7 @@ #include #include #include +#include #include #include #include @@ -674,17 +677,10 @@ { pointer __data_; size_type __size_; - size_type __cap_; + size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1; + size_type __is_long_ : 1; }; -#ifdef _LIBCPP_BIG_ENDIAN - static const size_type __short_mask = 0x01; - static const size_type __long_mask = 0x1ul; -#else // _LIBCPP_BIG_ENDIAN - static const size_type __short_mask = 0x80; - static const size_type __long_mask = ~(size_type(~0) >> 1); -#endif // _LIBCPP_BIG_ENDIAN - enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ? (sizeof(__long) - 1)/sizeof(value_type) : 2}; @@ -692,26 +688,46 @@ { value_type __data_[__min_cap]; unsigned char __padding[sizeof(value_type) - 1]; - unsigned char __size_; + unsigned char __size_ : 7; + unsigned char __is_long_ : 1; }; +// The __endian_factor is required because the field we use to store the size +// (either size_type or unsigned char depending on long/short) has one fewer +// bit than it would if it were not a bitfield. +// +// If the LSB is used to store the short-flag in the short string representation, +// we have to multiply the size by two when it is stored and divide it by two when +// it is loaded to make sure that we always store an even number. In the long string +// representation, we can ignore this because we can assume that we always allocate +// an even amount of value_types. +// +// If the MSB is used for the short-flag, the max_size() is numeric_limits::max() / 2. +// This does not impact the short string representation, since we never need the MSB +// for representing the size of a short string anyway. + +#ifdef _LIBCPP_BIG_ENDIAN + static const size_type __endian_factor = 2; #else + static const size_type __endian_factor = 1; +#endif + +#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT + +#ifdef _LIBCPP_BIG_ENDIAN + static const size_type __endian_factor = 1; +#else + static const size_type __endian_factor = 2; +#endif struct __long { - size_type __cap_; + size_type __is_long_ : 1; + size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1; size_type __size_; pointer __data_; }; -#ifdef _LIBCPP_BIG_ENDIAN - static const size_type __short_mask = 0x80; - static const size_type __long_mask = ~(size_type(~0) >> 1); -#else // _LIBCPP_BIG_ENDIAN - static const size_type __short_mask = 0x01; - static const size_type __long_mask = 0x1ul; -#endif // _LIBCPP_BIG_ENDIAN - enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ? (sizeof(__long) - 1)/sizeof(value_type) : 2}; @@ -719,7 +735,10 @@ { union { - unsigned char __size_; + struct { + unsigned char __is_long_ : 1; + unsigned char __size_ : 7; + }; value_type __lx; }; value_type __data_[__min_cap]; @@ -1426,8 +1445,9 @@ _LIBCPP_INLINE_VISIBILITY void __shrink_or_extend(size_type __target_capacity); _LIBCPP_INLINE_VISIBILITY - bool __is_long() const _NOEXCEPT - {return bool(__r_.first().__s.__size_ & __short_mask);} + bool __is_long() const _NOEXCEPT { + return __r_.first().__s.__is_long_; + } #if _LIBCPP_DEBUG_LEVEL == 2 @@ -1474,43 +1494,18 @@ _LIBCPP_HIDE_FROM_ABI allocator_type& __alloc() _NOEXCEPT { return __r_.second(); } _LIBCPP_HIDE_FROM_ABI const allocator_type& __alloc() const _NOEXCEPT { return __r_.second(); } -#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT - - _LIBCPP_INLINE_VISIBILITY - void __set_short_size(size_type __s) _NOEXCEPT -# ifdef _LIBCPP_BIG_ENDIAN - {__r_.first().__s.__size_ = (unsigned char)(__s << 1);} -# else - {__r_.first().__s.__size_ = (unsigned char)(__s);} -# endif - - _LIBCPP_INLINE_VISIBILITY - size_type __get_short_size() const _NOEXCEPT -# ifdef _LIBCPP_BIG_ENDIAN - {return __r_.first().__s.__size_ >> 1;} -# else - {return __r_.first().__s.__size_;} -# endif - -#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT - _LIBCPP_INLINE_VISIBILITY - void __set_short_size(size_type __s) _NOEXCEPT -# ifdef _LIBCPP_BIG_ENDIAN - {__r_.first().__s.__size_ = (unsigned char)(__s);} -# else - {__r_.first().__s.__size_ = (unsigned char)(__s << 1);} -# endif + void __set_short_size(size_type __s) _NOEXCEPT { + _LIBCPP_ASSERT(__s < __min_cap, "__s should never be greater or equal to the short string capacity"); + __r_.first().__s.__size_ = __s; + __r_.first().__s.__is_long_ = false; + } _LIBCPP_INLINE_VISIBILITY - size_type __get_short_size() const _NOEXCEPT -# ifdef _LIBCPP_BIG_ENDIAN - {return __r_.first().__s.__size_;} -# else - {return __r_.first().__s.__size_ >> 1;} -# endif - -#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT + size_type __get_short_size() const _NOEXCEPT { + _LIBCPP_ASSERT(!__r_.first().__s.__is_long_, "String has to be short when trying to get the short size"); + return __r_.first().__s.__size_; + } _LIBCPP_INLINE_VISIBILITY void __set_long_size(size_type __s) _NOEXCEPT @@ -1523,11 +1518,15 @@ {if (__is_long()) __set_long_size(__s); else __set_short_size(__s);} _LIBCPP_INLINE_VISIBILITY - void __set_long_cap(size_type __s) _NOEXCEPT - {__r_.first().__l.__cap_ = __long_mask | __s;} + void __set_long_cap(size_type __s) _NOEXCEPT { + __r_.first().__l.__cap_ = __s / __endian_factor; + __r_.first().__l.__is_long_ = true; + } + _LIBCPP_INLINE_VISIBILITY - size_type __get_long_cap() const _NOEXCEPT - {return __r_.first().__l.__cap_ & size_type(~__long_mask);} + size_type __get_long_cap() const _NOEXCEPT { + return __r_.first().__l.__cap_ * __endian_factor; + } _LIBCPP_INLINE_VISIBILITY void __set_long_pointer(pointer __p) _NOEXCEPT @@ -3225,11 +3224,12 @@ basic_string<_CharT, _Traits, _Allocator>::max_size() const _NOEXCEPT { size_type __m = __alloc_traits::max_size(__alloc()); -#ifdef _LIBCPP_BIG_ENDIAN - return (__m <= ~__long_mask ? __m : __m/2) - __alignment; -#else - return __m - __alignment; -#endif + if (__m <= std::numeric_limits::max() / 2) { + return __m - __alignment; + } else { + bool __uses_lsb = __endian_factor == 2; + return __uses_lsb ? __m - __alignment : (__m / 2) - __alignment; + } } template diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +#include +#include + +#include "test_macros.h" + +bool test() { + // alignment of the string heap buffer is hardcoded to 16 + const size_t alignment = 16; + + std::string str; +#ifndef TEST_HAS_NO_UNICODE_CHARS + std::u16string u16str; + std::u32string u32str; +#endif +#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT + +# ifdef _LIBCPP_BIG_ENDIAN + + assert(str.max_size() == std::numeric_limits::max() - alignment); +# ifndef TEST_HAS_NO_UNICODE_CHARS + assert(u16str.max_size() == std::numeric_limits::max() / 2 - alignment); + assert(u32str.max_size() == std::numeric_limits::max() / 4 - alignment); +# endif // TEST_HAS_NO_UNICODE_CHARS + +# else // _LIBCPP_BIG_ENDIAN + + assert(str.max_size() == std::numeric_limits::max() / 2 - alignment); +# ifndef TEST_HAS_NO_UNICODE_CHARS + assert(u16str.max_size() == std::numeric_limits::max() / 2 - alignment); + assert(u32str.max_size() == std::numeric_limits::max() / 4 - alignment); +# endif // TEST_HAS_NO_UNICODE_CHARS + +# endif // _LIBCPP_BIG_ENDIAN + +#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT + +# ifdef _LIBCPP_BIG_ENDIAN + + assert(str.max_size() == std::numeric_limits::max() / 2 - alignment); +# ifndef TEST_HAS_NO_UNICODE_CHARS + assert(u16str.max_size() == std::numeric_limits::max() / 2 - alignment); + assert(u32str.max_size() == std::numeric_limits::max() / 4 - alignment); +# endif // TEST_HAS_NO_UNICODE_CHARS + +# else + + assert(str.max_size() == std::numeric_limits::max() - alignment); +# ifndef TEST_HAS_NO_UNICODE_CHARS + assert(u16str.max_size() == std::numeric_limits::max() / 2 - alignment); + assert(u32str.max_size() == std::numeric_limits::max() / 4 - alignment); +# endif // TEST_HAS_NO_UNICODE_CHARS + +# endif // _LIBCPP_BIG_ENDIAN + +#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT + + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER > 17 + // static_assert(test()); +#endif + + return 0; +} diff --git a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp --- a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp @@ -8,6 +8,7 @@ // UNSUPPORTED: no-exceptions // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} +// XFAIL: use_system_cxx_lib && target=arm64{{.+}}-apple-macosx{{11.0|12.0}} // diff --git a/libcxx/utils/gdb/libcxx/printers.py b/libcxx/utils/gdb/libcxx/printers.py --- a/libcxx/utils/gdb/libcxx/printers.py +++ b/libcxx/utils/gdb/libcxx/printers.py @@ -192,26 +192,6 @@ class StdStringPrinter(object): """Print a std::string.""" - def _get_short_size(self, short_field, short_size): - """Short size depends on both endianness and a compile-time define.""" - - # If the padding field is present after all this indirection, then string - # was compiled with _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT defined. - field = short_field.type.fields()[1].type.fields()[0] - libcpp_abi_alternate_string_layout = field.name and "__padding" in field.name - - # This logical structure closely follows the original code (which is clearer - # in C++). Keep them parallel to make them easier to compare. - if libcpp_abi_alternate_string_layout: - if _libcpp_big_endian: - return short_size >> 1 - else: - return short_size - elif _libcpp_big_endian: - return short_size - else: - return short_size >> 1 - def __init__(self, val): self.val = val @@ -223,18 +203,13 @@ short_size = short_field["__size_"] if short_size == 0: return "" - short_mask = self.val["__short_mask"] - # Counter intuitive to compare the size and short_mask to see if the string - # is long, but that's the way the implementation does it. Note that - # __is_long() doesn't use get_short_size in C++. - is_long = short_size & short_mask - if is_long: + if short_field["__is_long_"]: long_field = value_field["__l"] data = long_field["__data_"] size = long_field["__size_"] else: data = short_field["__data_"] - size = self._get_short_size(short_field, short_size) + size = short_field["__size_"] return data.lazy_string(length=size) def display_hint(self):