Index: benchmarks/GenerateInput.hpp =================================================================== --- benchmarks/GenerateInput.hpp +++ benchmarks/GenerateInput.hpp @@ -138,5 +138,40 @@ return cinputs; } +template +inline std::vector make_killer(size_t N) { + std::vector inputs; + uint32_t candidate = 0; + uint32_t num_solid = 0; + uint32_t gas = N - 1; + + std::vector tmp(N); + inputs.resize(N); + + for (T i = 0; i < N; ++i) { + tmp[i] = i; + inputs[i] = gas; + } + + std::sort(tmp.begin(), tmp.end(), [&](T x, T y) { + if (inputs[x] == gas && inputs[y] == gas) { + if (x == candidate) inputs[x] = num_solid++; + else inputs[y] = num_solid++; + } + + if (inputs[x] == gas) candidate = x; + else if (inputs[y] == gas) candidate = y; + + return inputs[x] < inputs[y]; + }); + return inputs; +} + + +template +inline std::vector getQSortKiller(size_t N){ + std::vector inputs = make_killer(N); + return inputs; +} #endif // BENCHMARK_GENERATE_INPUT_HPP Index: benchmarks/algorithms.bench.cpp =================================================================== --- benchmarks/algorithms.bench.cpp +++ benchmarks/algorithms.bench.cpp @@ -5,7 +5,7 @@ #include "benchmark/benchmark_api.h" #include "GenerateInput.hpp" -constexpr std::size_t TestNumInputs = 1024; +constexpr std::size_t TestNumInputs = 1024*64; template void BM_Sort(benchmark::State& st, GenInputs gen) { @@ -58,5 +58,8 @@ BENCHMARK_CAPTURE(BM_Sort, single_element_strings, getDuplicateStringInputs)->Arg(TestNumInputs); +BENCHMARK_CAPTURE(BM_Sort, qsort_worst_uint32, + getQSortKiller)->Arg(TestNumInputs); + BENCHMARK_MAIN() Index: include/algorithm =================================================================== --- include/algorithm +++ include/algorithm @@ -642,6 +642,7 @@ #include // needed to provide swap_ranges. #include #include +#include #include #if defined(__IBMCPP__) @@ -3994,7 +3995,14 @@ template void -__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +__partial_sort(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + +// Using introsort algorithm for sorting +template +void +__intro_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __depth_limit) { // _Compare is known to be a reference type typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; @@ -4029,6 +4037,12 @@ _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp); return; } + if (__depth_limit == 0) + { + __partial_sort<_Compare>(__first,__last,__last,__comp); + return; + } + // __len > 5 _RandomAccessIterator __m = __first; _RandomAccessIterator __lm1 = __last; @@ -4172,19 +4186,33 @@ // sort smaller range with recursive call and larger with tail recursion elimination if (__i - __first < __last - __i) { - _VSTD::__sort<_Compare>(__first, __i, __comp); - // _VSTD::__sort<_Compare>(__i+1, __last, __comp); + _VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit); + // _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit); __first = ++__i; } else { - _VSTD::__sort<_Compare>(__i+1, __last, __comp); - // _VSTD::__sort<_Compare>(__first, __i, __comp); + _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit); + // _VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit); __last = __i; } + --__depth_limit; } } +template +void +__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +{ + + // Threshold(or depth limit) for introsort is taken to be 2*log2(size) + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; + const difference_type __dp = __last - __first; + difference_type __depth_limit = __last == __first ? 0 : _VSTD::log2(__dp); + __depth_limit *= 2; + __intro_sort<_Compare>(__first, __last, __comp, __depth_limit); +} + // This forwarder keeps the top call and the recursive calls using the same instantiation, forcing a reference _Compare template inline _LIBCPP_INLINE_VISIBILITY