diff --git a/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst new file mode 100644 --- /dev/null +++ b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst @@ -0,0 +1,86 @@ +================================== +Unspecified Behavior Randomization +================================== + +Background +========== + +Consider the follow snippet which steadily happens in tests: + + +.. code-block:: cpp + + std::vector> v(SomeData()); + std::sort(v.begin(), v.end(), [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + +Under this assumption all elements in vector whose first elements are equal do +not guarantee any order. Unfortunately, this prevents libcxx introducing other +algorithms because tests might silently fail and the users might heavily depend +on the stability of algorithms and containers. + +Goal +=================== + +Provide functionality for randomizing the unspecified behavior so that the users +can test and migrate their components and libcxx can introduce new sorting +algorithms and optimizations to the containers. + +For example, as of llvm version 12, libcxx sorting algorithm takes +`O(n^2) worst case `_ but according +to the standard its worst case should be `O(n log n)`. This effort helps users +to gradually fix their tests while updating to new faster algorithms. + +Design +====== + +* Introduce new macro `_LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY` which should + be a part of the libcxx config. +* This macro randomizes the unspecified behavior of algorithms and containers. + For example, for sorting algorithm the input range is shuffled and then + sorted. +* This macro is off by default because users should turn it only for testing + purposes and/or migrations if they happen to libcxx. +* This feature is only available for C++11 and further because of + `std::shuffle` availability. +* We may use `ASLR `_ or + static `std::random_device` for seeding the random number generator. This + guarantees the same stability guarantee within run but not through different + runs, for example, for tests become flaky and eventually be seen as broken. + For platforms which do not support ASLR, the seed is fixed during build. +* The users can fix the seed of the random number generator by providing + `_LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY_SEED=seed` definition. + +This comes with some side effects if any of the flags is on: + +* Computation penalty, we think users are OK with that if they use this feature. +* Non reproducible results if they don't use the fixed seed. + + +Impact +------------------ + +Google has measured couple of thousands of tests to be dependent on the +stability of sorting and selection algorithms. As we also plan on updating +(or least, providing under flag more) sorting algorithms, this effort helps +doing it gradually and sustainably. This is also bad for users to depend on the +unspecified behavior in their tests, this effort helps to turn this flag in +debug mode. + +Potential breakages +------------------- + +None if the flag is off. If the flag is on, it may lead to some non-reproducible +results, for example, for caching. + +Currently supported randomization +--------------------------------- + +* `std::sort`, there is no guarantee on the order of equal elements +* `std::partial_sort`, there is no guarantee on the order of equal elements and + on the order of the remaining part +* `std::nth_element`, there is no guarantee on the order from both sides of the + partition + +Patches welcome. diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst --- a/libcxx/docs/UsingLibcxx.rst +++ b/libcxx/docs/UsingLibcxx.rst @@ -241,6 +241,14 @@ warning saying that `std::auto_ptr` is deprecated. If the macro is defined, no warning will be emitted. By default, this macro is not defined. +**_LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY**: + This macro enables the randomization of unspecified behavior in libcxx, for + example, for equal elements in sorting algorithm or randomizing both parts of + the partition after `std::nth_element` call. This effort helps you to migrate + to potential future faster versions of these algorithms and deflake your tests + which depend on such behavior. To fix the seed, use + `_LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY_SEED=seed` definition. + C++17 Specific Configuration Macros ----------------------------------- **_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES**: diff --git a/libcxx/include/__config b/libcxx/include/__config --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -888,6 +888,10 @@ # error Supported values for _LIBCPP_DEBUG are 0 and 1 #endif +#if defined(_LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY) && defined(_LIBCPP_CXX03_LANG) +# error Supported unspecified stability is only for C++11 and higher +#endif + // _LIBCPP_DEBUG_LEVEL is always defined to one of [0, 1, 2] at this point #if _LIBCPP_DEBUG_LEVEL >= 1 && !defined(_LIBCPP_DISABLE_EXTERN_TEMPLATE) # define _LIBCPP_EXTERN_TEMPLATE(...) diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -3107,6 +3107,57 @@ return static_cast(__u + __p.a()); } + +#ifdef _LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY + +template +class _LIBCPP_TYPE_VIS __random_unspecified_behavior_gen_ +{ +public: + __random_unspecified_behavior_gen_() { + __state = __seed(); + __inc = __state + 0xda3e39cb94b95bdbULL; + __inc = (__inc << 1) | 1; + } + typedef std::uint_fast32_t result_type; + + static const result_type _Min = 0; + static const result_type _Max = 0xFFFFFFFF; + + result_type operator()() { + std::uint_fast64_t __oldstate = __state; + __state = __oldstate * 6364136223846793005ULL + __inc; + std::uint_fast32_t __xorshifted = ((__oldstate >> 18u) ^ __oldstate) >> 27u; + std::uint_fast32_t __rot = __oldstate >> 59u; + return (__xorshifted >> __rot) | (__xorshifted << ((-__rot) & 31)); + } + + static _LIBCPP_CONSTEXPR result_type min() {return _Min;} + static _LIBCPP_CONSTEXPR result_type max() {return _Max;} +public: + static const void* const __seed_static; + std::uint_fast64_t __state; + std::uint_fast64_t __inc; + _LIBCPP_ALWAYS_INLINE result_type __seed() { +#ifdef _LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY_SEED + return static_cast( + _LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY_SEED); +#else + return static_cast( + reinterpret_cast(__seed_static)); +#endif + } +}; + +template +const void* const __random_unspecified_behavior_gen_<_Void>::__seed_static = + &__seed_static; + +using __random_unspecified_behavior_gen = + __random_unspecified_behavior_gen_; + +#endif + #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE) \ || defined(_LIBCPP_BUILDING_LIBRARY) class _LIBCPP_TYPE_VIS __rs_default; @@ -5188,6 +5239,10 @@ partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { +#ifdef _LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY + if (!__libcpp_is_constant_evaluated()) + _VSTD::shuffle(__first, __last, __random_unspecified_behavior_gen()); +#endif typedef typename __comp_ref_type<_Compare>::type _Comp_ref; _VSTD::__partial_sort<_Comp_ref>(__first, __middle, __last, __comp); } @@ -5448,6 +5503,10 @@ void nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) { +#ifdef _LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY + if (!__libcpp_is_constant_evaluated()) + _VSTD::shuffle(__first, __last, __random_unspecified_behavior_gen()); +#endif typedef typename __comp_ref_type<_Compare>::type _Comp_ref; _VSTD::__nth_element<_Comp_ref>(__first, __nth, __last, __comp); } @@ -5471,6 +5530,9 @@ if (__libcpp_is_constant_evaluated()) { _VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp)); } else { +#ifdef _LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY + _VSTD::shuffle(__first, __last, __random_unspecified_behavior_gen()); +#endif _VSTD::__sort<_Comp_ref>(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), _Comp_ref(__comp)); } } diff --git a/libcxx/test/libcxx/algorithms/nth_element_stability.pass.cpp b/libcxx/test/libcxx/algorithms/nth_element_stability.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/libcxx/algorithms/nth_element_stability.pass.cpp @@ -0,0 +1,112 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Test std::nth_element stability randomization + +// UNSUPPORTED: c++03 + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY + +#include +#include +#include +#include +#include + +#include "test_macros.h" + +struct MyType { + int value = 0; + constexpr bool operator<(const MyType& other) const { + return value < other.value; + } +}; + +std::vector deterministic() { + static constexpr int kSize = 100; + std::vector v; + v.resize(kSize); + for (int i = 0; i < kSize; ++i) { + v[i].value = (i % 2 ? i : kSize / 2 + i); + } + std::__nth_element(v.begin(), v.begin() + kSize / 2, v.end(), + std::less()); + return v; +} + +void test_randomization() { + static constexpr int kSize = 100; + std::vector v; + v.resize(kSize); + for (int i = 0; i < kSize; ++i) { + v[i].value = (i % 2 ? i : kSize / 2 + i); + } + auto deterministic_v = deterministic(); + std::nth_element(v.begin(), v.begin() + kSize / 2, v.end()); + bool all_equal = true; + for (int i = 0; i < kSize; ++i) { + if (v[i].value != deterministic_v[i].value) { + all_equal = false; + } + } + assert(!all_equal); +} + +void test_same() { + static constexpr int kSize = 100; + std::vector v; + v.resize(kSize); + for (int i = 0; i < kSize; ++i) { + v[i].value = (i % 2 ? i : kSize / 2 + i); + } + auto snapshot_v = v; + auto snapshot_custom_v = v; + std::nth_element(v.begin(), v.begin() + kSize / 2, v.end()); + std::nth_element(snapshot_v.begin(), snapshot_v.begin() + kSize / 2, + snapshot_v.end()); + std::nth_element( + snapshot_custom_v.begin(), snapshot_custom_v.begin() + kSize / 2, + snapshot_custom_v.end(), [](const MyType& lhs, const MyType& rhs) { + return lhs.value < rhs.value; + }); + bool all_equal = true; + for (int i = 0; i < kSize; ++i) { + if (v[i].value != snapshot_v[i].value || + v[i].value != snapshot_custom_v[i].value) { + all_equal = false; + } + if (i < kSize / 2) { + assert(v[i].value <= v[kSize / 2].value); + } + } + assert(all_equal); +} + +#if TEST_STD_VER > 17 +constexpr bool test_constexpr() { + std::array v; + for (int i = 9; i >= 0; --i) { + v[9 - i].value = i; + } + std::nth_element(v.begin(), v.begin() + 5, v.end()); + return std::is_partitioned(v.begin(), v.end(), [&](const MyType& m) { + return m.value <= v[5].value; + }); +} +#endif + +int main(int, char**) { + test_randomization(); + test_same(); +#if TEST_STD_VER > 17 + static_assert(test_constexpr(), ""); +#endif + return 0; +} diff --git a/libcxx/test/libcxx/algorithms/partial_sort_stability.pass.cpp b/libcxx/test/libcxx/algorithms/partial_sort_stability.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/libcxx/algorithms/partial_sort_stability.pass.cpp @@ -0,0 +1,110 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Test std::partial_sort stability randomization + +// UNSUPPORTED: c++03 + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY + +#include +#include +#include +#include +#include + +#include "test_macros.h" + +struct MyType { + int value = 0; + constexpr bool operator<(const MyType& other) const { + return value < other.value; + } +}; + +std::vector deterministic() { + static constexpr int kSize = 100; + std::vector v; + v.resize(kSize); + for (int i = 0; i < kSize; ++i) { + v[i].value = (i % 2 ? 1 : kSize / 2 + i); + } + std::__partial_sort(v.begin(), v.begin() + kSize / 2, v.end(), + std::less()); + return v; +} + +void test_randomization() { + static constexpr int kSize = 100; + std::vector v; + v.resize(kSize); + for (int i = 0; i < kSize; ++i) { + v[i].value = (i % 2 ? 1 : kSize / 2 + i); + } + auto deterministic_v = deterministic(); + std::partial_sort(v.begin(), v.begin() + kSize / 2, v.end()); + bool all_equal = true; + for (int i = 0; i < kSize; ++i) { + if (v[i].value != deterministic_v[i].value) { + all_equal = false; + } + } + assert(!all_equal); +} + +void test_same() { + static constexpr int kSize = 100; + std::vector v; + v.resize(kSize); + for (int i = 0; i < kSize; ++i) { + v[i].value = (i % 2 ? 1 : kSize / 2 + i); + } + auto snapshot_v = v; + auto snapshot_custom_v = v; + std::partial_sort(v.begin(), v.begin() + kSize / 2, v.end()); + std::partial_sort(snapshot_v.begin(), snapshot_v.begin() + kSize / 2, + snapshot_v.end()); + std::partial_sort( + snapshot_custom_v.begin(), snapshot_custom_v.begin() + kSize / 2, + snapshot_custom_v.end(), [](const MyType& lhs, const MyType& rhs) { + return lhs.value < rhs.value; + }); + bool all_equal = true; + for (int i = 0; i < kSize; ++i) { + if (v[i].value != snapshot_v[i].value || + v[i].value != snapshot_custom_v[i].value) { + all_equal = false; + } + if (i < kSize / 2) { + assert(v[i].value == 1); + } + } + assert(all_equal); +} + +#if TEST_STD_VER > 17 +constexpr bool test_constexpr() { + std::array v; + for (int i = 9; i >= 0; --i) { + v[9 - i].value = i; + } + std::partial_sort(v.begin(), v.begin() + 5, v.end()); + return std::is_sorted(v.begin(), v.begin() + 5); +} +#endif + +int main(int, char**) { + test_randomization(); + test_same(); +#if TEST_STD_VER > 17 + static_assert(test_constexpr(), ""); +#endif + return 0; +} diff --git a/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp b/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp @@ -0,0 +1,100 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Test std::sort stability randomization + +// UNSUPPORTED: c++03 + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_RANDOMIZE_UNSPECIFIED_STABILITY + +#include +#include +#include +#include +#include + +#include "test_macros.h" + +struct EqualType { + int value = 0; + constexpr bool operator<(const EqualType&) const { return false; } +}; + +std::vector deterministic() { + static constexpr int kSize = 100; + std::vector v; + v.resize(kSize); + for (int i = 0; i < kSize; ++i) { + v[i].value = kSize / 2 - i * (i % 2 ? -1 : 1); + } + std::__sort(v.begin(), v.end(), std::less()); + return v; +} + +void test_randomization() { + static constexpr int kSize = 100; + std::vector v; + v.resize(kSize); + for (int i = 0; i < kSize; ++i) { + v[i].value = kSize / 2 - i * (i % 2 ? -1 : 1); + } + auto deterministic_v = deterministic(); + std::sort(v.begin(), v.end()); + bool all_equal = true; + for (int i = 0; i < kSize; ++i) { + if (v[i].value != deterministic_v[i].value) { + all_equal = false; + } + } + assert(!all_equal); +} + +void test_same() { + static constexpr int kSize = 100; + std::vector v; + v.resize(kSize); + for (int i = 0; i < kSize; ++i) { + v[i].value = kSize / 2 - i * (i % 2 ? -1 : 1); + } + auto snapshot_v = v; + auto snapshot_custom_v = v; + std::sort(v.begin(), v.end()); + std::sort(snapshot_v.begin(), snapshot_v.end()); + std::sort(snapshot_custom_v.begin(), snapshot_custom_v.end(), + [](const EqualType&, const EqualType&) { return false; }); + bool all_equal = true; + for (int i = 0; i < kSize; ++i) { + if (v[i].value != snapshot_v[i].value || + v[i].value != snapshot_custom_v[i].value) { + all_equal = false; + } + } + assert(all_equal); +} + +#if TEST_STD_VER > 17 +constexpr bool test_constexpr() { + std::array v; + for (int i = 9; i >= 0; --i) { + v[9 - i].value = i; + } + std::sort(v.begin(), v.end()); + return std::is_sorted(v.begin(), v.end()); +} +#endif + +int main(int, char**) { + test_randomization(); + test_same(); +#if TEST_STD_VER > 17 + static_assert(test_constexpr(), ""); +#endif + return 0; +}