Index: benchmarks/algorithms.merge.bench.cpp =================================================================== --- /dev/null +++ benchmarks/algorithms.merge.bench.cpp @@ -0,0 +1,127 @@ +#include +#include + +#include "benchmark/benchmark.h" + +#include "CartesianBenchmarks.hpp" +#include "GenerateInput.hpp" + +namespace { + +constexpr size_t kSplitPointCount = 5; + +template +std::array pickSplitPoints(I f, I l) { + std::ptrdiff_t N = std::distance(f, l); + + return { + f + N / 4, + f + N / 3, + f + N / 2, + f + 2 * N / 3, + f + 3 * N / 4 + }; +} + +template +struct TestIntBase { + static std::vector generateInput(size_t size) { + std::vector Res(size); + std::generate(Res.begin(), Res.end(), + [] { return getRandomInteger(); }); + return Res; + } +}; + +struct TestInt32 : TestIntBase { + static constexpr const char* Name = "TestInt32"; +}; + +struct TestInt64 : TestIntBase { + static constexpr const char* Name = "TestInt64"; +}; + +struct TestUint32 : TestIntBase { + static constexpr const char* Name = "TestUint32"; +}; + +struct TestMediumString { + static constexpr const char* Name = "TestMediumString"; + static constexpr size_t StringSize = 32; + + static std::vector generateInput(size_t size) { + std::vector Res(size); + std::generate(Res.begin(), Res.end(), + [] { return getRandomString(StringSize); }); + return Res; + } +}; + +using AllTestTypes = + std::tuple; + +struct MergeAlg { + template + O operator()(I1 first1, I1 last1, I2 first2, I2 last2, O out) { + return std::merge(first1, last1, first2, last2, out); + } + + static constexpr const char* Name = "MergeAlg"; +}; + +using AllAlgs = std::tuple; + +} // namespace + +template +struct MergeBench { + size_t Quantity; + + std::string name() const { + return std::string("MergeBench_") + Alg::Name + "_" + TestType::Name + '/' + + std::to_string(Quantity); + } + + void run(benchmark::State& state) const { + auto Data = TestType::generateInput(Quantity); + using VecT = decltype(Data); + + const auto SplitPoints = pickSplitPoints(Data.begin(), Data.end()); + + std::array LhsArray{}; + std::array RhsArray{}; + + { + VecT* LhsIt = LhsArray.begin(); + VecT* RhsIt = RhsArray.begin(); + for (auto Split : SplitPoints) { + *LhsIt = VecT{Data.begin(), Split}; + *RhsIt = VecT{Split, Data.end()}; + std::sort(LhsIt->begin(), LhsIt->end()); + std::sort(RhsIt->begin(), RhsIt->end()); + ++LhsIt; + ++RhsIt; + } + } + + VecT output(Data.size()); + + for (auto _ : state) { + for (size_t i = 0; i < LhsArray.size(); ++i) { + Alg{}(LhsArray[i].begin(), LhsArray[i].end(), RhsArray[i].begin(), + RhsArray[i].end(), output.begin()); + benchmark::DoNotOptimize(output); + } + } + } +}; + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + if (benchmark::ReportUnrecognizedArguments(argc, argv)) + return 1; + + const std::vector Quantities = {1 << 9, 1 << 11, 1 << 21}; + makeCartesianProductBenchmark(Quantities); + benchmark::RunSpecifiedBenchmarks(); +} Index: include/algorithm =================================================================== --- include/algorithm +++ include/algorithm @@ -4350,22 +4350,30 @@ __merge(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) { - for (; __first1 != __last1; ++__result) - { - if (__first2 == __last2) - return _VSTD::copy(__first1, __last1, __result); - if (__comp(*__first2, *__first1)) - { - *__result = *__first2; - ++__first2; - } - else - { - *__result = *__first1; - ++__first1; - } + if (__first1 == __last1) goto __copySecond; + if (__first2 == __last2) goto __copyFirst; + + while (true) { + if (__comp(*__first2, *__first1)) goto __takeSecond; + *__result = *__first1; + ++__first1, (void)++__result; + if (__first1 == __last1) goto __copySecond; + goto __unrolledCheck; +__takeSecond: + *__result = *__first2; + ++__first2, (void)++__result; + if (__first2 == __last2) goto __copyFirst; +__unrolledCheck: + if (__comp(*__first2, *__first1)) goto __takeSecond; + *__result = *__first1; + ++__first1, (void)++__result; + if (__first1 == __last1) goto __copySecond; } + +__copySecond: return _VSTD::copy(__first2, __last2, __result); +__copyFirst: + return _VSTD::copy(__first1, __last1, __result); } template