diff --git a/libcxx/include/string b/libcxx/include/string --- a/libcxx/include/string +++ b/libcxx/include/string @@ -1173,8 +1173,8 @@ if (__m <= std::numeric_limits::max() / 2) { return __m - __alignment; } else { - bool __uses_lsb = __endian_factor == 2; - return __uses_lsb ? __m - __alignment : (__m / 2) - __alignment; + bool __uses_lsb = __endian_factor == 2; + return (__uses_lsb ? __m - __alignment : (__m / 2) - __alignment); } } @@ -1872,7 +1872,17 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __align_it(size_type __s) _NOEXCEPT {return (__s + (__a-1)) & ~(__a-1);} - enum {__alignment = 16}; + // Round up allocations to 8 bytes. Previous versions of std::string used a + // a value of 16 based on malloc() returning 16 byte aligned allocations. + // The motivation likely being that the requested size is as close as is + // possible to the 'perfect fit'. However, glibc's malloc() uses 8 bytes + // of overhead for small allocations, which results in a usable size of + // `N * 16 + 8`. Rounding to 16 bytes here thus results in extra waste. + // Likewise, other allocators such as tcmalloc() have fine grained 8 byte + // alignments for various size classes, which also results in waste. + // Once the `size returning new` features is in the standard (P0901), we + // can leave all this rounding to the `allocate_at_least` function. + enum {__alignment = 8}; static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __recommend(size_type __s) _NOEXCEPT { @@ -2316,12 +2326,38 @@ size_type __n_copy, size_type __n_del, size_type __n_add, const value_type* __p_new_stuff) { size_type __ms = max_size(); - if (__delta_cap > __ms - __old_cap - 1) + if (__delta_cap > __ms - __old_cap) __throw_length_error(); - pointer __old_p = __get_pointer(); - size_type __cap = __old_cap < __ms / 2 - __alignment ? - __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : - __ms - 1; + pointer __old_p; + size_type __cap; + if (__is_long()) { + __old_p = __get_long_pointer(); + __cap = __old_cap < __ms / 2 - __alignment + ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) + : __ms - 1; + } else { + // The current string is inlined (SSO). In this case we do not want to + // double the capacity for amortized growth as that can be wasteful. + // Consider the following code: + // std::string s(""); + // s = "This is a string of 33 characters"; + // Doubling the capacity here (to guarantee amortized growth) results in + // a min cap of 44, which including terminating zero rounds to 48 bytes. + // Allocating 48 bytes in standard glibc malloc results in an allocation + // of 64 bytes including 8 bytes overhead: glibc malloc (ptmalloc) small + // allocations align on 16 bytes with 8 bytes overhead. The effective + // size then being N * 16 + 8. + // If we had allocated the exact size required, we come to 34 bytes + // which rounds to 40 bytes, which in glibc results in an allocation of + // 48 including overhead. Shorter: for all string lengths 23 - 39, we + // would allocate 16 bytes more than required, which adds up, especially + // if we consider that strings are predominantly short. + // We only need to guarantee amortized growth on continuous growth, so + // we can simply request the exact size here, and only apply the minimum + // doubling on subsequent growth on 'long' string values. + __old_p = __get_short_pointer(); + __cap = __recommend(__old_cap + __delta_cap); + } auto __allocation = std::__allocate_at_least(__alloc(), __cap + 1); pointer __p = __allocation.ptr; __begin_lifetime(__p, __allocation.count); @@ -2359,10 +2395,20 @@ size_type __ms = max_size(); if (__delta_cap > __ms - __old_cap) __throw_length_error(); - pointer __old_p = __get_pointer(); - size_type __cap = __old_cap < __ms / 2 - __alignment ? - __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : - __ms - 1; + pointer __old_p; + size_type __cap; + if (__is_long()) { + __old_p = __get_long_pointer(); + __cap = __old_cap < __ms / 2 - __alignment + ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) + : __ms - 1; + } else { + // The current string is inlined (SSO). In this case we do not want to + // double the capacity for amortized growth as that can be wasteful. + // See the comments in `__grow_by_and_replace` for more information. + __old_p = __get_short_pointer(); + __cap = __recommend(__old_cap + __delta_cap); + } auto __allocation = std::__allocate_at_least(__alloc(), __cap + 1); pointer __p = __allocation.ptr; __begin_lifetime(__p, __allocation.count); diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp --- a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp @@ -17,8 +17,8 @@ #include "test_macros.h" -// alignment of the string heap buffer is hardcoded to 16 -static const std::size_t alignment = 16; +// alignment of the string heap buffer is hard coded to 8. +static const std::size_t alignment = 8; template TEST_CONSTEXPR_CXX20 void full_size() {