Index: benchmarks/algorithms.bench.cpp =================================================================== --- benchmarks/algorithms.bench.cpp +++ benchmarks/algorithms.bench.cpp @@ -4,9 +4,25 @@ #include "benchmark/benchmark_api.h" #include "GenerateInput.hpp" +#include "test_configs.h" +#include "test_utils.h" + +#include +#include +#include +#include +#include constexpr std::size_t TestNumInputs = 1024; +#define START_TIMER auto start = std::chrono::high_resolution_clock::now(); +#define STOP_TIMER auto end = std::chrono::high_resolution_clock::now();\ + auto elapsed_seconds =\ + std::chrono::duration_cast>(\ + end - start);\ + state.SetIterationTime(elapsed_seconds.count()); + + template void BM_Sort(benchmark::State& st, GenInputs gen) { using ValueType = typename decltype(gen(0))::value_type; @@ -31,6 +47,88 @@ } } +template +void BM_sort_std_common(benchmark::State& state) { + int N = state.range(0); + V v(N); + fill_random(v); + using T = typename V::value_type; + while (state.KeepRunning()) { + std::vector vec(v.begin(), v.end()); + START_TIMER + std::sort(vec.begin(), vec.end()); + STOP_TIMER + } + state.SetComplexityN(N); +} + +template +void BM_sort_std_list_with_vector(benchmark::State& state) { + int N = state.range(0); + V v(N); + fill_random(v); + using T = typename V::value_type; + // Copy the contents into a vector + while (state.KeepRunning()) { + std::vector vec(v.begin(), v.end()); + // Sort the vector + std::sort(vec.begin(), vec.end()); + // Put the item back in the list + v.assign(vec.begin(), vec.end()); + } + state.SetComplexityN(N); +} + +// Sort (a sequence in ascending order) in ascending order. +template +void BM_sort_std_ascending(benchmark::State& state) { + int N = state.range(0); + using T = typename V::value_type; + V v(N); + fill_seq(v); + while (state.KeepRunning()) { + std::vector vec(v.begin(), v.end()); + START_TIMER + std::sort(vec.begin(), vec.end(), std::less()); + STOP_TIMER + } + state.SetComplexityN(N); +} + +// Sort (a sequence in ascending order) in descending order. +template +void BM_sort_std_descending(benchmark::State& state) { + int N = state.range(0); + using T = typename V::value_type; + V v(N); + fill_seq(v); + while (state.KeepRunning()) { + std::vector vec(v.begin(), v.end()); + START_TIMER + std::sort(vec.begin(), vec.end(), std::greater()); + STOP_TIMER + } + state.SetComplexityN(N); +} + +template +void BM_sort_std_worst_quick(benchmark::State& state) { + int N = state.range(0); + using T = typename V::value_type; + V v; + make_killer(N, v); + while (state.KeepRunning()) { + std::vector vec(v.begin(), v.end()); + START_TIMER + std::sort(vec.begin(), vec.end()); + STOP_TIMER + } + state.SetComplexityN(N); +} + + + + BENCHMARK_CAPTURE(BM_Sort, random_uint32, getRandomIntegerInputs)->Arg(TestNumInputs); @@ -58,5 +156,17 @@ BENCHMARK_CAPTURE(BM_Sort, single_element_strings, getDuplicateStringInputs)->Arg(TestNumInputs); +#define COMPLEXITY_BENCHMARK_GEN_T(T) \ + COMPLEXITY_BENCHMARK_GEN(BM_sort_std_common, std::vector, MSize);\ + COMPLEXITY_BENCHMARK_GEN(BM_sort_std_ascending, std::vector, MSize);\ + COMPLEXITY_BENCHMARK_GEN(BM_sort_std_descending, std::vector, MSize);\ + +COMPLEXITY_BENCHMARK_GEN_T(int) +//COMPLEXITY_BENCHMARK_GEN_T(double) +COMPLEXITY_BENCHMARK_GEN_T(aggregate) + +COMPLEXITY_BENCHMARK_GEN(BM_sort_std_list_with_vector, std::list, MSize); +COMPLEXITY_BENCHMARK_GEN(BM_sort_std_list_with_vector, std::list, MSize); +COMPLEXITY_BENCHMARK_GEN(BM_sort_std_worst_quick, std::vector, MSize); BENCHMARK_MAIN() Index: include/algorithm =================================================================== --- include/algorithm +++ include/algorithm @@ -642,6 +642,7 @@ #include // needed to provide swap_ranges. #include #include +#include #include #if defined(__IBMCPP__) @@ -3994,7 +3995,14 @@ template void -__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +__partial_sort(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + +// Using introsort algorithm for sorting +template +void +__intro_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __depth_limit) { // _Compare is known to be a reference type typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; @@ -4029,6 +4037,12 @@ _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp); return; } + if (__depth_limit == 0) + { + __partial_sort<_Compare>(__first,__last,__last,__comp); + return; + } + // __len > 5 _RandomAccessIterator __m = __first; _RandomAccessIterator __lm1 = __last; @@ -4172,19 +4186,33 @@ // sort smaller range with recursive call and larger with tail recursion elimination if (__i - __first < __last - __i) { - _VSTD::__sort<_Compare>(__first, __i, __comp); - // _VSTD::__sort<_Compare>(__i+1, __last, __comp); + _VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit); + // _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit); __first = ++__i; } else { - _VSTD::__sort<_Compare>(__i+1, __last, __comp); - // _VSTD::__sort<_Compare>(__first, __i, __comp); + _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit); + // _VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit); __last = __i; } + --__depth_limit; } } +template +void +__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +{ + + // Threshold(or depth limit) of 2*log2(size) gives the best performance for worst case + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; + const difference_type __dp = __last - __first; + difference_type __depth_limit = __last == __first ? 0 : _VSTD::log2(__dp); + __depth_limit *= 2; + __intro_sort<_Compare>(__first, __last, __comp, __depth_limit); +} + // This forwarder keeps the top call and the recursive calls using the same instantiation, forcing a reference _Compare template inline _LIBCPP_INLINE_VISIBILITY Index: include/rng_utils.h =================================================================== --- /dev/null +++ include/rng_utils.h @@ -0,0 +1,47 @@ +#ifndef RNG_UTILS_H +#define RNG_UTILS_H +#include + +template struct uniform_distribution { + typedef void type; // error +}; + +template<> struct uniform_distribution { + typedef std::uniform_int_distribution type; +}; + +template<> struct uniform_distribution { + typedef std::uniform_int_distribution type; +}; + +template<> struct uniform_distribution { + typedef std::uniform_int_distribution type; +}; + +template<> struct uniform_distribution { + typedef std::uniform_real_distribution type; +}; + +template<> struct uniform_distribution { + typedef std::uniform_real_distribution type; +}; + +template using gen = typename uniform_distribution::type; + +class random_device { +public: + template + T get_rand(T min, T max) { + std::mt19937 e(rd()); // seed the generator + gen d(min, max); // define the range + return d(e); + } + +private: + // TODO: Fix the seed to a constant. + std::random_device rd; // obtain a random number from hardware +}; + + +#endif // RNG_UTILS_H + Index: include/test_configs.h =================================================================== --- /dev/null +++ include/test_configs.h @@ -0,0 +1,59 @@ +#ifndef TEST_CONFIGS_H +#define TEST_CONFIGS_H + +#define KB << 10 +#define MB << 20 +#define GB << 30 + +#define i7_4770 + + +// Configurations for i7_4770 +#ifdef i7_4770 +// To benchmark data residing completely in L1 cache. +#ifdef ENABLE_TRAVIS_BUILD +#define L1 (32 KB) +// To benchmark data residing in L2 cache. +#define L2 (256 KB) +#else +// For the Travis CI to run the entire test. +#define L1 (16 KB) +#define L2 (32 KB) +#endif + +// To benchmark data residing in L3 cache. +#define L3 (8192 KB) +// To benchmark data residing in main memory. +#define MEMORY (12 GB) +#endif + +#define SINGLE_ARG(...) __VA_ARGS__ + +#define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->RangeMultiplier(2)\ + ->Range(L1, L2) + +#define COMPLEXITY_BENCHMARK(x, CACHE_TYPE) BENCHMARK(x)->RangeMultiplier(2)\ + ->Range(L1, CACHE_TYPE)->Complexity() + +#define COMPLEXITY_BENCHMARK_GEN(x, y, CACHE_TYPE) BENCHMARK_TEMPLATE(x, y)\ + ->RangeMultiplier(2)->Range(L1, CACHE_TYPE)\ + ->Complexity() +#endif // TEST_CONFIGS_H + +constexpr int MSize = L2; + +#if defined(__clang__) +#define COMPILER_CLANG +#elif defined(__GNUC__) +#define COMPILER_GCC +#elif defined(_MSC_VER) +#define COMPILER_MSVC +#endif + +#if defined(COMPILER_GCC) || defined(COMPILER_CLANG) +#define ATTR_NOINLINE __attribute__((noinline)) +#elif defined(COMPILER_MSVC) +#define ATTR_NOINLINE __declspec(noinline) +#else +#define ATTR_NOINLINE +#endif Index: include/test_utils.h =================================================================== --- /dev/null +++ include/test_utils.h @@ -0,0 +1,270 @@ +#ifndef TEST_UTILS_H +#define TEST_UTILS_H + +#include "rng_utils.h" +#include +#include + +// TODO: Add more aggregates. +struct aggregate { + int first; + int second; + int third; + int fourth; + aggregate() : first(0), second(0), third(0), fourth(0) + {} + // This is a hacky constructor for ::find on associative containers to work. + aggregate(int i) + : first(i), second(i), third(i), fourth(i) + {} + aggregate(int i, int j, int k, int l) + : first(i), second(j), third(k), fourth(l) + {} + + aggregate& operator++() { + ++first; + ++second; + ++third; + ++fourth; + return *this; + } + aggregate operator++(int) { + aggregate N(*this); + ++(*this); + return N; + } + + bool operator<(const aggregate& i) const { + return first < i.first; + } + + bool operator>(const aggregate& i) const { + return i < *this; + } + + bool operator==(const aggregate& i) const { + return first == i.first; + } + + bool operator!=(const aggregate& i) const { + return !(*this == i); + } +}; + +// Hasher for aggregate data type. +namespace std { + template <> + struct hash + { + std::size_t operator()(const aggregate& k) const + { + using std::hash; + // Hash and combine using bit-shift. + return ((hash()(k.first) + ^ (hash()(k.second) << 1)) >> 1) + ^ (hash()(k.third) << 1) + ^ (hash()(k.fourth) << 1); + } + }; +} + +template +struct remove_const { typedef T type; }; + +// value_type of a std::map is std::pair +template +struct remove_const> { typedef std::pair type; }; + +template +T get_rand(random_device &r, int max) { + return r.get_rand(T(0), T(max)); +} + +template<> +std::pair get_rand>(random_device &r, int max) { + return std::make_pair(r.get_rand(0, max), r.get_rand(0, max)); +} + +template<> +aggregate get_rand(random_device &r, int max) { + return aggregate(r.get_rand(0, max)); +} + +template<> +std::pair +get_rand>(random_device &r, int max) { + return std::make_pair(r.get_rand(0, max), r.get_rand(0, max)); +} + +template +T increment(T &i) { + return ++i; +} + +// value_type of a std::map is std::pair +template<> +std::pair increment>(std::pair &i) { + return std::make_pair(++i.first, i.second); +} + +template<> +std::pair +increment>(std::pair &i) { + return std::make_pair(++i.first, i.second); +} + +template +T init() { + return T(0); +} + +template<> +std::pair init>() { + return std::make_pair(0, 0); +} + +template<> +std::pair init>() { + return std::make_pair(0, 0); +} + +template