Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -56,6 +56,8 @@ option(LIBCXX_ENABLE_FILESYSTEM "Build filesystem as part of libc++experimental.a" ${LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY}) option(LIBCXX_INCLUDE_TESTS "Build the libc++ tests." ${LLVM_INCLUDE_TESTS}) +option(LIBCXX_INCLUDE_BENCHMARKS "Build the libc++ benchmarks and their dependancies" OFF) +option(LIBCXX_BUILD_BENCHMARK_NATIVE_STDLIB "Build the benchmarks against the native STL" OFF) option(LIBCXX_INCLUDE_DOCS "Build the libc++ documentation." ${LLVM_INCLUDE_DOCS}) set(LIBCXX_LIBDIR_SUFFIX "${LLVM_LIBDIR_SUFFIX}" CACHE STRING "Define suffix of library directory name (32/64)") @@ -426,6 +428,9 @@ add_subdirectory(include) add_subdirectory(lib) +if (LIBCXX_INCLUDE_BENCHMARKS) + add_subdirectory(benchmarks) +endif() if (LIBCXX_INCLUDE_TESTS) add_subdirectory(test) endif() Index: benchmarks/CMakeLists.txt =================================================================== --- /dev/null +++ benchmarks/CMakeLists.txt @@ -0,0 +1,119 @@ +include(ExternalProject) +include(CheckCXXCompilerFlag) + +#============================================================================== +# Build Google Benchmark for libc++ +#============================================================================== +check_cxx_compiler_flag(-stdlib=libc++ LIBCXX_HAS_NO_STDLIB_LIBCXX_FLAG) +if (NOT LIBCXX_HAS_NO_STDLIB_LIBCXX_FLAG) + message(FATAL "Benchmark requires support for the -stdlib=libc++ flag") +endif() + +set(BENCHMARK_LIBCXX_COMPILE_FLAGS + -Wno-unused-command-line-argument + -nostdinc++ + -cxx-isystem ${LIBCXX_SOURCE_DIR}/include + -stdlib=libc++) +set(BENCHMARK_LIBCXX_LINK_FLAGS + -L${LIBCXX_LIBRARY_DIR} + -Wl,-rpath,${LIBCXX_LIBRARY_DIR}) +split_list(BENCHMARK_LIBCXX_COMPILE_FLAGS) +split_list(BENCHMARK_LIBCXX_LINK_FLAGS) + +ExternalProject_Add(google-benchmark-libcxx + EXCLUDE_FROM_ALL ON + DEPENDS cxx + PREFIX benchmark-libcxx + SOURCE_DIR ${LIBCXX_SOURCE_DIR}/utils/google-benchmark + INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/benchmark-libcxx + CMAKE_CACHE_DEFAULT_ARGS + -DCMAKE_BUILD_TYPE:STRING=RELEASE + -DCMAKE_INSTALL_PREFIX:PATH= + -DCMAKE_CXX_FLAGS:STRING=${BENCHMARK_LIBCXX_COMPILE_FLAGS} + -DCMAKE_SHARED_LINK_FLAGS:STRING=${BENCHMARK_LIBCXX_LINK_FLAGS} + -DCMAKE_EXE_LINK_FLAGS:STRING=${BENCHMARK_LIBCXX_LINK_FLAGS} + -DBENCHMARK_ENABLE_TESTING:BOOL=OFF) + +#============================================================================== +# Build Google Benchmark for the native stdlib +#============================================================================== +if (LIBCXX_BUILD_BENCHMARK_NATIVE_STDLIB) + ExternalProject_Add(google-benchmark-native + EXCLUDE_FROM_ALL ON + PREFIX benchmark-native + SOURCE_DIR ${LIBCXX_SOURCE_DIR}/utils/google-benchmark + INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/benchmark-native + CMAKE_CACHE_ARGS + -DBENCHMARK_ENABLE_TESTING:BOOL=OFF + CMAKE_CACHE_DEFAULT_ARGS + -DCMAKE_BUILD_TYPE:STRING=RELEASE + -DCMAKE_INSTALL_PREFIX:PATH=) +endif() + +#============================================================================== +# Benchmark tests configuration +#============================================================================== +add_custom_target(libcxx-benchmarks) + +set(BENCHMARK_LIBCXX_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/benchmark-libcxx) +set(BENCHMARK_NATIVE_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/benchmark-native) +set(BENCHMARK_TEST_COMPILE_FLAGS + -std=c++14 -O2 + -I${BENCHMARK_LIBCXX_INSTALL}/include +) +set(BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS + -nostdinc++ + ${BENCHMARK_TEST_COMPILE_FLAGS} + -Wno-user-defined-literals +) +set(BENCHMARK_TEST_LIBCXX_LINK_FLAGS + -nodefaultlibs + -L${BENCHMARK_LIBCXX_INSTALL}/lib/ +) +set(BENCHMARK_TEST_NATIVE_LINK_FLAGS + -L${BENCHMARK_NATIVE_INSTALL}/lib/ +) +split_list(BENCHMARK_TEST_COMPILE_FLAGS) +split_list(BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS) +split_list(BENCHMARK_TEST_LIBCXX_LINK_FLAGS) +split_list(BENCHMARK_TEST_NATIVE_LINK_FLAGS) +macro(add_benchmark_test name source_file) + set(libcxx_target ${name}_libcxx) + add_executable(${libcxx_target} EXCLUDE_FROM_ALL ${source_file}) + add_dependencies(${libcxx_target} cxx google-benchmark-libcxx) + add_dependencies(libcxx-benchmarks ${libcxx_target}) + target_link_libraries(${libcxx_target} cxx -lbenchmark) + set_target_properties(${libcxx_target} + PROPERTIES + OUTPUT_NAME "${name}.libcxx.out" + COMPILE_FLAGS "${BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS}" + LINK_FLAGS "${BENCHMARK_TEST_LIBCXX_LINK_FLAGS}") + if (LIBCXX_BUILD_BENCHMARK_NATIVE_STDLIB) + set(native_target ${name}_native) + add_executable(${native_target} EXCLUDE_FROM_ALL ${source_file}) + add_dependencies(${native_target} google-benchmark-native) + target_link_libraries(${native_target} -lbenchmark) + if (LIBCXX_HAS_PTHREAD_LIB) + target_link_libraries(${native_target} -pthread) + endif() + add_dependencies(libcxx-benchmarks ${native_target}) + set_target_properties(${native_target} + PROPERTIES + OUTPUT_NAME "${name}.native.out" + INCLUDE_DIRECTORIES "" + COMPILE_FLAGS "${BENCHMARK_TEST_COMPILE_FLAGS}" + LINK_FLAGS "${BENCHMARK_TEST_NATIVE_LINK_FLAGS}") + endif() +endmacro() + + +#============================================================================== +# Register Benchmark tests +#============================================================================== +file(GLOB BENCHMARK_TESTS "*.bench.cpp") +foreach(test_path ${BENCHMARK_TESTS}) + get_filename_component(test_file "${test_path}" NAME) + message(STATUS "TEST: ${test_file}") + string(REPLACE ".bench.cpp" "" test_name "${test_file}") + add_benchmark_test(${test_name} ${test_file}) +endforeach() Index: benchmarks/ContainerBenchmarks.hpp =================================================================== --- /dev/null +++ benchmarks/ContainerBenchmarks.hpp @@ -0,0 +1,69 @@ +#ifndef BENCHMARK_CONTAINER_BENCHMARKS_HPP +#define BENCHMARK_CONTAINER_BENCHMARKS_HPP + +#include + +#include "benchmark/benchmark_api.h" + +namespace ContainerBenchmarks { + +template +void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) { + auto in = gen(st.range_x()); + const auto end = in.end(); + while (st.KeepRunning()) { + c.clear(); + for (auto it = in.begin(); it != end; ++it) { + benchmark::DoNotOptimize(&(*c.insert(*it).first)); + } + benchmark::ClobberMemory(); + } +} + +template +void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) { + auto in = gen(st.range_x()); + const auto end = in.end(); + while (st.KeepRunning()) { + c.clear(); + c.rehash(16); + for (auto it = in.begin(); it != end; ++it) { + benchmark::DoNotOptimize(&(*c.insert(*it).first)); + } + benchmark::ClobberMemory(); + } +} + +template +static void BM_Find(benchmark::State& st, Container c, GenInputs gen) { + auto in = gen(st.range_x()); + c.insert(in.begin(), in.end()); + benchmark::DoNotOptimize(&(*c.begin())); + const auto end = in.data() + in.size(); + while (st.KeepRunning()) { + for (auto it = in.data(); it != end; ++it) { + benchmark::DoNotOptimize(&(*c.find(*it))); + } + benchmark::ClobberMemory(); + } +} + +template +static void BM_FindRehash(benchmark::State& st, Container c, GenInputs gen) { + c.rehash(8); + auto in = gen(st.range_x()); + c.insert(in.begin(), in.end()); + benchmark::DoNotOptimize(&(*c.begin())); + const auto end = in.data() + in.size(); + while (st.KeepRunning()) { + for (auto it = in.data(); it != end; ++it) { + benchmark::DoNotOptimize(&(*c.find(*it))); + } + benchmark::ClobberMemory(); + } + +} + +} // end namespace ContainerBenchmarks + +#endif // BENCHMARK_CONTAINER_BENCHMARKS_HPP Index: benchmarks/GenerateInput.hpp =================================================================== --- /dev/null +++ benchmarks/GenerateInput.hpp @@ -0,0 +1,133 @@ +#ifndef BENCHMARK_GENERATE_INPUT_HPP +#define BENCHMARK_GENERATE_INPUT_HPP + +#include +#include +#include +#include +#include +#include + +static const char Letters[] = { + '0','1','2','3','4', + '5','6','7','8','9', + 'A','B','C','D','E','F', + 'G','H','I','J','K', + 'L','M','N','O','P', + 'Q','R','S','T','U', + 'V','W','X','Y','Z', + 'a','b','c','d','e','f', + 'g','h','i','j','k', + 'l','m','n','o','p', + 'q','r','s','t','u', + 'v','w','x','y','z' +}; +static const std::size_t LettersSize = sizeof(Letters); + +inline std::default_random_engine& getRandomEngine() { + static std::default_random_engine RandEngine(std::random_device{}()); + return RandEngine; +} + +inline char getRandomChar() { + std::uniform_int_distribution<> LettersDist(0, LettersSize-1); + return Letters[LettersDist(getRandomEngine())]; +} + +template +inline IntT getRandomInteger() { + std::uniform_int_distribution dist; + return dist(getRandomEngine()); +} + +inline std::string getRandomString(std::size_t Len) { + std::string str(Len, 0); + std::generate_n(str.begin(), Len, &getRandomChar); + return str; +} + +template +inline std::vector getDuplicateIntegerInputs(size_t N) { + std::vector inputs(N, static_cast(-1)); + return inputs; +} + +template +inline std::vector getSortedIntegerInputs(size_t N) { + std::vector inputs; + for (size_t i=0; i < N; i += 1) + inputs.push_back(i); + return inputs; +} + +template +std::vector getSortedLargeIntegerInputs(size_t N) { + std::vector inputs; + for (size_t i=0; i < N; ++i) { + inputs.push_back(i + N); + } + return inputs; +} + +template +std::vector getSortedTopBitsIntegerInputs(size_t N) { + std::vector inputs = getSortedIntegerInputs(N); + for (auto& E : inputs) E <<= ((sizeof(IntT) / 2) * CHAR_BIT); + return inputs; +} + +template +inline std::vector getReverseSortedIntegerInputs(size_t N) { + std::vector inputs; + std::size_t i = N; + while (i > 0) { + --i; + inputs.push_back(i); + } + return inputs; +} + +template +std::vector getPipeOrganIntegerInputs(size_t N) { + std::vector v; v.reserve(N); + for (size_t i = 0; i < N/2; ++i) v.push_back(i); + for (size_t i = N/2; i < N; ++i) v.push_back(N - i); + return v; +} + + +template +std::vector getRandomIntegerInputs(size_t N) { + std::vector inputs; + for (size_t i=0; i < N; ++i) { + inputs.push_back(getRandomInteger()); + } + return inputs; +} + +inline std::vector getDuplicateStringInputs(size_t N) { + std::vector inputs(N, getRandomString(1024)); + return inputs; +} + +inline std::vector getRandomStringInputs(size_t N) { + std::vector inputs; + for (int i=0; i < N; ++i) { + inputs.push_back(getRandomString(1024)); + } + return inputs; +} + +inline std::vector getSortedStringInputs(size_t N) { + std::vector inputs = getRandomStringInputs(N); + std::sort(inputs.begin(), inputs.end()); + return inputs; +} + +inline std::vector getReverseSortedStringInputs(size_t N) { + std::vector inputs = getSortedStringInputs(N); + std::reverse(inputs.begin(), inputs.end()); + return inputs; +} + +#endif // BENCHMARK_GENERATE_INPUT_HPP Index: benchmarks/algorithms.bench.cpp =================================================================== --- /dev/null +++ benchmarks/algorithms.bench.cpp @@ -0,0 +1,62 @@ +#include +#include +#include + +#include "benchmark/benchmark_api.h" +#include "GenerateInput.hpp" + +constexpr std::size_t TestNumInputs = 1024; + +template +void BM_Sort(benchmark::State& st, GenInputs gen) { + using ValueType = typename decltype(gen(0))::value_type; + const auto in = gen(st.range_x()); + std::vector inputs[5]; + auto reset_inputs = [&]() { + for (auto& C : inputs) { + C = in; + benchmark::DoNotOptimize(C.data()); + } + }; + reset_inputs(); + while (st.KeepRunning()) { + for (auto& I : inputs) { + std::sort(I.data(), I.data() + I.size()); + benchmark::DoNotOptimize(I.data()); + } + st.PauseTiming(); + reset_inputs(); + benchmark::ClobberMemory(); + st.ResumeTiming(); + } +} + +BENCHMARK_CAPTURE(BM_Sort, random_uint32, + getRandomIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Sort, sorted_ascending_uint32, + getSortedIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Sort, sorted_descending_uint32, + getReverseSortedIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Sort, single_element_uint32, + getDuplicateIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Sort, pipe_organ_uint32, + getPipeOrganIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Sort, random_strings, + getRandomStringInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Sort, sorted_ascending_strings, + getSortedStringInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Sort, sorted_descending_strings, + getReverseSortedStringInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Sort, single_element_strings, + getDuplicateStringInputs)->Arg(TestNumInputs); + + +BENCHMARK_MAIN() Index: benchmarks/unordered_set_operations.bench.cpp =================================================================== --- benchmarks/unordered_set_operations.bench.cpp +++ benchmarks/unordered_set_operations.bench.cpp @@ -1,44 +1,268 @@ #include #include +#include #include +#include +#include #include "benchmark/benchmark_api.h" -template -std::vector getInputs(size_t N) { - std::vector inputs; - for (size_t i=0; i < N; ++i) { - inputs.push_back(i); - } - return inputs; +#include "ContainerBenchmarks.hpp" +#include "GenerateInput.hpp" + +using namespace ContainerBenchmarks; + +constexpr std::size_t TestNumInputs = 1024; + +template +inline __attribute__((__always_inline__)) +_Size loadword(const void* __p) { + _Size __r; + std::memcpy(&__r, __p, sizeof(__r)); + return __r; } -template -void BM_SetInsert(benchmark::State& st, Container c, Inputs const& in) { - const auto end = in.end(); - while (st.KeepRunning()) { - c.clear(); - for (auto it = in.begin(); it != end; ++it) { - benchmark::DoNotOptimize(c.insert(*it)); - } - benchmark::DoNotOptimize(c); - } +inline __attribute__((__always_inline__)) +std::size_t rotate_by_at_least_1(std::size_t __val, int __shift) { + return (__val >> __shift) | (__val << (64 - __shift)); +} + +inline __attribute__((__always_inline__)) +std::size_t hash_len_16(std::size_t __u, std::size_t __v) { + const std::size_t __mul = 0x9ddfea08eb382d69ULL; + std::size_t __a = (__u ^ __v) * __mul; + __a ^= (__a >> 47); + std::size_t __b = (__v ^ __a) * __mul; + __b ^= (__b >> 47); + __b *= __mul; + return __b; +} + + +template +inline __attribute__((__always_inline__)) +std::size_t hash_len_0_to_8(const char* __s) { + static_assert(_Len == 4 || _Len == 8, ""); + const uint64_t __a = loadword(__s); + const uint64_t __b = loadword(__s + _Len - 4); + return hash_len_16(_Len + (__a << 3), __b); } -BENCHMARK_CAPTURE(BM_SetInsert, uint32_insert, - std::unordered_set{}, getInputs(1024)); -template -void BM_SetFind(benchmark::State& st, Container c, Inputs const& in) { - c.insert(in.begin(), in.end()); - const auto end = in.end(); +struct UInt32Hash { + UInt32Hash() = default; + inline __attribute__((__always_inline__)) + std::size_t operator()(uint32_t data) const { + return hash_len_0_to_8<4>(reinterpret_cast(&data)); + } +}; + +struct UInt64Hash { + UInt64Hash() = default; + inline __attribute__((__always_inline__)) + std::size_t operator()(uint64_t data) const { + return hash_len_0_to_8<8>(reinterpret_cast(&data)); + } +}; + +struct UInt128Hash { + UInt128Hash() = default; + inline __attribute__((__always_inline__)) + std::size_t operator()(__uint128_t data) const { + const __uint128_t __mask = static_cast(-1); + const std::size_t __a = (std::size_t)(data & __mask); + const std::size_t __b = (std::size_t)((data & (__mask << 64)) >> 64); + return hash_len_16(__a, rotate_by_at_least_1(__b + 16, 16)) ^ __b; + } +}; + +struct UInt32Hash2 { + UInt32Hash2() = default; + inline __attribute__((__always_inline__)) + std::size_t operator()(uint32_t data) const { + const uint32_t __m = 0x5bd1e995; + const uint32_t __r = 24; + uint32_t __h = 4; + uint32_t __k = data; + __k *= __m; + __k ^= __k >> __r; + __k *= __m; + __h *= __m; + __h ^= __k; + __h ^= __h >> 13; + __h *= __m; + __h ^= __h >> 15; + return __h; + } +}; + +struct UInt64Hash2 { + UInt64Hash2() = default; + inline __attribute__((__always_inline__)) + std::size_t operator()(uint64_t data) const { + return hash_len_0_to_8<8>(reinterpret_cast(&data)); + } +}; + +//----------------------------------------------------------------------------// +// BM_Hash +// ---------------------------------------------------------------------------// + +template +void BM_Hash(benchmark::State& st, HashFn fn, GenInputs gen) { + auto in = gen(st.range_x()); + const auto end = in.data() + in.size(); + std::size_t last_hash = 0; + benchmark::DoNotOptimize(&last_hash); while (st.KeepRunning()) { - for (auto it = in.begin(); it != end; ++it) { - benchmark::DoNotOptimize(c.find(*it)); + for (auto it = in.data(); it != end; ++it) { + benchmark::DoNotOptimize(last_hash += fn(*it)); } + benchmark::ClobberMemory(); } } -BENCHMARK_CAPTURE(BM_SetFind, uint32_lookup, - std::unordered_set{}, getInputs(1024)); +BENCHMARK_CAPTURE(BM_Hash, + uint32_random_std_hash, + std::hash{}, + getRandomIntegerInputs) -> Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Hash, + uint32_random_custom_hash, + UInt32Hash{}, + getRandomIntegerInputs) -> Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Hash, + uint32_top_std_hash, + std::hash{}, + getSortedTopBitsIntegerInputs) -> Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_Hash, + uint32_top_custom_hash, + UInt32Hash{}, + getSortedTopBitsIntegerInputs) -> Arg(TestNumInputs); + + +//----------------------------------------------------------------------------// +// BM_InsertValue +// ---------------------------------------------------------------------------// + + +// Sorted Assending // +BENCHMARK_CAPTURE(BM_InsertValue, + unordered_set_uint32, + std::unordered_set{}, + getRandomIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_InsertValue, + unordered_set_uint32_sorted, + std::unordered_set{}, + getSortedIntegerInputs)->Arg(TestNumInputs); + +// Top Bytes // +BENCHMARK_CAPTURE(BM_InsertValue, + unordered_set_top_bits_uint32, + std::unordered_set{}, + getSortedTopBitsIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_InsertValueRehash, + unordered_set_top_bits_uint32, + std::unordered_set{}, + getSortedTopBitsIntegerInputs)->Arg(TestNumInputs); + +// String // +BENCHMARK_CAPTURE(BM_InsertValue, + unordered_set_string, + std::unordered_set{}, + getRandomStringInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_InsertValueRehash, + unordered_set_string, + std::unordered_set{}, + getRandomStringInputs)->Arg(TestNumInputs); + +//----------------------------------------------------------------------------// +// BM_Find +// ---------------------------------------------------------------------------// + +// Random // +BENCHMARK_CAPTURE(BM_Find, + unordered_set_random_uint64, + std::unordered_set{}, + getRandomIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_FindRehash, + unordered_set_random_uint64, + std::unordered_set{}, + getRandomIntegerInputs)->Arg(TestNumInputs); + +// Sorted // +BENCHMARK_CAPTURE(BM_Find, + unordered_set_sorted_uint64, + std::unordered_set{}, + getSortedIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_FindRehash, + unordered_set_sorted_uint64, + std::unordered_set{}, + getSortedIntegerInputs)->Arg(TestNumInputs); + + +// Sorted // +#if 1 +BENCHMARK_CAPTURE(BM_Find, + unordered_set_sorted_uint128, + std::unordered_set<__uint128_t, UInt128Hash>{}, + getSortedTopBitsIntegerInputs<__uint128_t>)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_FindRehash, + unordered_set_sorted_uint128, + std::unordered_set<__uint128_t, UInt128Hash>{}, + getSortedTopBitsIntegerInputs<__uint128_t>)->Arg(TestNumInputs); +#endif + +// Sorted // +BENCHMARK_CAPTURE(BM_Find, + unordered_set_sorted_uint32, + std::unordered_set{}, + getSortedIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_FindRehash, + unordered_set_sorted_uint32, + std::unordered_set{}, + getSortedIntegerInputs)->Arg(TestNumInputs); + +// Sorted Ascending // +BENCHMARK_CAPTURE(BM_Find, + unordered_set_sorted_large_uint64, + std::unordered_set{}, + getSortedLargeIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_FindRehash, + unordered_set_sorted_large_uint64, + std::unordered_set{}, + getSortedLargeIntegerInputs)->Arg(TestNumInputs); + + +// Top Bits // +BENCHMARK_CAPTURE(BM_Find, + unordered_set_top_bits_uint64, + std::unordered_set{}, + getSortedTopBitsIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_FindRehash, + unordered_set_top_bits_uint64, + std::unordered_set{}, + getSortedTopBitsIntegerInputs)->Arg(TestNumInputs); + +// String // +BENCHMARK_CAPTURE(BM_Find, + unordered_set_string, + std::unordered_set{}, + getRandomStringInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_FindRehash, + unordered_set_string, + std::unordered_set{}, + getRandomStringInputs)->Arg(TestNumInputs); BENCHMARK_MAIN() Index: docs/BuildingLibcxx.rst =================================================================== --- docs/BuildingLibcxx.rst +++ docs/BuildingLibcxx.rst @@ -227,7 +227,7 @@ libc++abi is the C++ ABI library used. -libc++ Feature options +libc++ Feature Options ---------------------- .. option:: LIBCXX_ENABLE_EXCEPTIONS:BOOL @@ -242,9 +242,25 @@ Build libc++ with run time type information. +.. option:: LIBCXX_INCLUDE_BENCHMARKS:BOOL -libc++ Feature options ----------------------- + **Default**: ``OFF`` + + Build the libc++ benchmark tests and the Google Benchmark library needed + to support them. + +.. option:: LIBCXX_BUILD_BENCHMARK_NATIVE_STDLIB:BOOL + + **Default**:: ``OFF`` + + Build the libc++ benchmark tests and Google Benchmark library against the + native standard library on the platform. On linux this can be used to compare + libc++ to libstdc++ by building the benchmark tests against both standard + libraries. + + +libc++ ABI Feature Options +-------------------------- The following options allow building libc++ for a different ABI version. Index: docs/TestingLibcxx.rst =================================================================== --- docs/TestingLibcxx.rst +++ docs/TestingLibcxx.rst @@ -198,3 +198,62 @@ If ``LIBCXX_COLOR_DIAGNOSTICS`` is defined then the test suite will attempt to use color diagnostic outputs from the compiler. Also see :option:`color_diagnostics`. + +Benchmarks +========== + +Libc++ contains benchmark tests separately from the test of the test suite. +The benchmarks are written using the `Google Benchmark`_ library, a copy of which +is stored in the libc++ repository. + +For more information about using the Google Benchmark library see the +`official documentation `_. + +.. _`Google Benchmark`: https://github.com/google/benchmark + +Building Benchmarks +------------------- + +The benchmark tests are not enabled by default. To build the benchmarks +libc++ must be configured using the CMake option ``-DLIBCXX_INCLUDE_BENCHMARKS=ON``. +Then the benchmarks can be built using the ``libcxx-benchmarks`` target. + +An example build would look like: + +.. code-block:: bash + + $ cd build + $ cmake [options] -DLIBCXX_INCLUDE_BENCHMARKS=ON + $ make libcxx-benchmarks + +This will build all of the benchmarks under ``/benchmarks`` to be +built against the just-built libc++. The compiled tests are output into +``build/benchmarks``. + +The benchmarks can also be built against the platforms native standard library +using the ``-DLIBCXX_BUILD_BENCHMARKS_NATIVE_STDLIB=ON`` CMake option. This +is useful for comparing the performance of libc++ to other standard libraries. +The compiled benchmarks are named ``.libcxx.out`` if they test libc++ and +``.native.out`` otherwise. + +Also See: + + * :ref:`Building Libc++ ` + * :ref:`CMake Options` + +Running Benchmarks +------------------ + +The benchmarks must be run manually by the user. Currently there is no way +to run them as part of the build. + +For example: + +.. code-block:: bash + + $ cd build/benchmarks + $ make libcxx-benchmarks + $ ./algorithms.libcxx.out # Runs all the benchmarks + $ ./algorithms.libcxx.out --benchmark_filter=BM_Sort.* # Only runs the sort benchmarks + +For more information about running benchmarks see `Google Benchmark`_. Index: utils/google-benchmark/.gitignore =================================================================== --- /dev/null +++ utils/google-benchmark/.gitignore @@ -0,0 +1,46 @@ +*.a +*.so +*.so.?* +*.dll +*.exe +*.dylib +*.cmake +!/cmake/*.cmake +*~ +*.pyc +__pycache__ + +# lcov +*.lcov +/lcov + +# cmake files. +/Testing +CMakeCache.txt +CMakeFiles/ +cmake_install.cmake + +# makefiles. +Makefile + +# in-source build. +bin/ +lib/ +/test/*_test + +# exuberant ctags. +tags + +# YouCompleteMe configuration. +.ycm_extra_conf.pyc + +# ninja generated files. +.ninja_deps +.ninja_log +build.ninja +install_manifest.txt +rules.ninja + +# out-of-source build top-level folders. +build/ +_build/ Index: utils/google-benchmark/AUTHORS =================================================================== --- /dev/null +++ utils/google-benchmark/AUTHORS @@ -0,0 +1,33 @@ +# This is the official list of benchmark authors for copyright purposes. +# This file is distinct from the CONTRIBUTORS files. +# See the latter for an explanation. +# +# Names should be added to this file as: +# Name or Organization +# The email address is not required for organizations. +# +# Please keep the list sorted. + +Albert Pretorius +Arne Beer +Christopher Seymour +David Coeurjolly +Dominic Hamon +Eugene Zhuk +Evgeny Safronov +Felix Homann +Google Inc. +Ismael Jimenez Martinez +JianXiong Zhou +Jussi Knuuttila +Kaito Udagawa +Lei Xu +Matt Clarkson +Oleksandr Sochka +Paul Redmond +Radoslav Yovchev +Shuo Chen +Yusuke Suzuki +Dirac Research +Zbigniew Skowron +Dominik Czarnota Index: utils/google-benchmark/CMakeLists.txt =================================================================== --- /dev/null +++ utils/google-benchmark/CMakeLists.txt @@ -0,0 +1,147 @@ +cmake_minimum_required (VERSION 2.8.11) +project (benchmark) + +foreach(p + CMP0054 # CMake 3.1 + CMP0056 # export EXE_LINKER_FLAGS to try_run + ) + if(POLICY ${p}) + cmake_policy(SET ${p} NEW) + endif() +endforeach() + +option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) +option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF) +# Make sure we can import out CMake functions +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +# Read the git tags to determine the project version +include(GetGitVersion) +get_git_version(GIT_VERSION) + +# Tell the user what versions we are using +string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION}) +message("-- Version: ${VERSION}") + +# The version of the libraries +set(GENERIC_LIB_VERSION ${VERSION}) +string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION) + +# Import our CMake modules +include(CheckCXXCompilerFlag) +include(AddCXXCompilerFlag) +include(CXXFeatureCheck) + +if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + # Turn compiler warnings up to 11 + string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") + add_definitions(-D_CRT_SECURE_NO_WARNINGS) + + # Link time optimisation + if (BENCHMARK_ENABLE_LTO) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GL") + set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "${CMAKE_STATIC_LINKER_FLAGS_RELEASE} /LTCG") + set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /LTCG") + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG") + + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /GL") + string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO}") + set(CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") + string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO}") + set(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") + string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO}") + set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") + + set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /GL") + set(CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL "${CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL} /LTCG") + set(CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL "${CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL} /LTCG") + set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG") + endif() +else() + # Try and enable C++11. Don't use C++14 because it doesn't work in some + # configurations. + add_cxx_compiler_flag(-std=c++11) + if (NOT HAVE_CXX_FLAG_STD_CXX11) + add_cxx_compiler_flag(-std=c++0x) + endif() + + # Turn compiler warnings up to 11 + add_cxx_compiler_flag(-Wall) + + add_cxx_compiler_flag(-Wextra) + add_cxx_compiler_flag(-Wshadow) + add_cxx_compiler_flag(-Werror RELEASE) + add_cxx_compiler_flag(-Werror RELWITHDEBINFO) + add_cxx_compiler_flag(-Werror MINSIZEREL) + add_cxx_compiler_flag(-pedantic) + add_cxx_compiler_flag(-pedantic-errors) + add_cxx_compiler_flag(-Wshorten-64-to-32) + add_cxx_compiler_flag(-Wfloat-equal) + add_cxx_compiler_flag(-Wzero-as-null-pointer-constant) + add_cxx_compiler_flag(-fstrict-aliasing) + if (HAVE_CXX_FLAG_FSTRICT_ALIASING) + add_cxx_compiler_flag(-Wstrict-aliasing) + endif() + add_cxx_compiler_flag(-Wthread-safety) + if (HAVE_WTHREAD_SAFETY) + add_definitions(-DHAVE_WTHREAD_SAFETY) + cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) + endif() + + # Link time optimisation + if (BENCHMARK_ENABLE_LTO) + add_cxx_compiler_flag(-flto) + if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") + find_program(GCC_AR gcc-ar) + if (GCC_AR) + set(CMAKE_AR ${GCC_AR}) + endif() + find_program(GCC_RANLIB gcc-ranlib) + if (GCC_RANLIB) + set(CMAKE_RANLIB ${GCC_RANLIB}) + endif() + endif() + endif() + + # Coverage build type + set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING + "Flags used by the C++ compiler during coverage builds." + FORCE) + set(CMAKE_EXE_LINKER_FLAGS_COVERAGE + "${CMAKE_EXE_LINKER_FLAGS_DEBUG}" CACHE STRING + "Flags used for linking binaries during coverage builds." + FORCE) + set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE + "${CMAKE_SHARED_LINKER_FLAGS_DEBUG}" CACHE STRING + "Flags used by the shared libraries linker during coverage builds." + FORCE) + mark_as_advanced( + CMAKE_CXX_FLAGS_COVERAGE + CMAKE_EXE_LINKER_FLAGS_COVERAGE + CMAKE_SHARED_LINKER_FLAGS_COVERAGE) + set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING + "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel Coverage." + FORCE) + add_cxx_compiler_flag(--coverage COVERAGE) +endif() + +# C++ feature checks +cxx_feature_check(STD_REGEX) +cxx_feature_check(GNU_POSIX_REGEX) +cxx_feature_check(POSIX_REGEX) +cxx_feature_check(STEADY_CLOCK) + +# Ensure we have pthreads +find_package(Threads REQUIRED) + +# Set up directories +include_directories(${PROJECT_SOURCE_DIR}/include) + +# Build the targets +add_subdirectory(src) + +if (BENCHMARK_ENABLE_TESTING) + enable_testing() + add_subdirectory(test) +endif() Index: utils/google-benchmark/CONTRIBUTING.md =================================================================== --- /dev/null +++ utils/google-benchmark/CONTRIBUTING.md @@ -0,0 +1,58 @@ +# How to contribute # + +We'd love to accept your patches and contributions to this project. There are +a just a few small guidelines you need to follow. + + +## Contributor License Agreement ## + +Contributions to any Google project must be accompanied by a Contributor +License Agreement. This is not a copyright **assignment**, it simply gives +Google permission to use and redistribute your contributions as part of the +project. + + * If you are an individual writing original source code and you're sure you + own the intellectual property, then you'll need to sign an [individual + CLA][]. + + * If you work for a company that wants to allow you to contribute your work, + then you'll need to sign a [corporate CLA][]. + +You generally only need to submit a CLA once, so if you've already submitted +one (even if it was for a different project), you probably don't need to do it +again. + +[individual CLA]: https://developers.google.com/open-source/cla/individual +[corporate CLA]: https://developers.google.com/open-source/cla/corporate + +Once your CLA is submitted (or if you already submitted one for +another Google project), make a commit adding yourself to the +[AUTHORS][] and [CONTRIBUTORS][] files. This commit can be part +of your first [pull request][]. + +[AUTHORS]: AUTHORS +[CONTRIBUTORS]: CONTRIBUTORS + + +## Submitting a patch ## + + 1. It's generally best to start by opening a new issue describing the bug or + feature you're intending to fix. Even if you think it's relatively minor, + it's helpful to know what people are working on. Mention in the initial + issue that you are planning to work on that bug or feature so that it can + be assigned to you. + + 1. Follow the normal process of [forking][] the project, and setup a new + branch to work in. It's important that each group of changes be done in + separate branches in order to ensure that a pull request only includes the + commits related to that bug or feature. + + 1. Do your best to have [well-formed commit messages][] for each change. + This provides consistency throughout the project, and ensures that commit + messages are able to be formatted properly by various git tools. + + 1. Finally, push the commits to your fork and submit a [pull request][]. + +[forking]: https://help.github.com/articles/fork-a-repo +[well-formed commit messages]: http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html +[pull request]: https://help.github.com/articles/creating-a-pull-request Index: utils/google-benchmark/CONTRIBUTORS =================================================================== --- /dev/null +++ utils/google-benchmark/CONTRIBUTORS @@ -0,0 +1,52 @@ +# People who have agreed to one of the CLAs and can contribute patches. +# The AUTHORS file lists the copyright holders; this file +# lists people. For example, Google employees are listed here +# but not in AUTHORS, because Google holds the copyright. +# +# Names should be added to this file only after verifying that +# the individual or the individual's organization has agreed to +# the appropriate Contributor License Agreement, found here: +# +# https://developers.google.com/open-source/cla/individual +# https://developers.google.com/open-source/cla/corporate +# +# The agreement for individuals can be filled out on the web. +# +# When adding J Random Contributor's name to this file, +# either J's name or J's organization's name should be +# added to the AUTHORS file, depending on whether the +# individual or corporate CLA was used. +# +# Names should be added to this file as: +# Name +# +# Please keep the list sorted. + +Albert Pretorius +Arne Beer +Billy Robert O'Neal III +Chris Kennelly +Christopher Seymour +David Coeurjolly +Dominic Hamon +Eric Fiselier +Eugene Zhuk +Evgeny Safronov +Felix Homann +Ismael Jimenez Martinez +JianXiong Zhou +Jussi Knuuttila +Kaito Udagawa +Kai Wolf +Lei Xu +Matt Clarkson +Oleksandr Sochka +Pascal Leroy +Paul Redmond +Pierre Phaneuf +Radoslav Yovchev +Shuo Chen +Yusuke Suzuki +Tobias Ulvgård +Zbigniew Skowron +Dominik Czarnota Index: utils/google-benchmark/LICENSE =================================================================== --- /dev/null +++ utils/google-benchmark/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. Index: utils/google-benchmark/README.LLVM =================================================================== --- /dev/null +++ utils/google-benchmark/README.LLVM @@ -0,0 +1,6 @@ +LLVM notes +---------- + +This directory contains the Google Benchmark source code with some unnecessary +files removed. Note that this directory is under a different license than +libc++. Index: utils/google-benchmark/README.md =================================================================== --- /dev/null +++ utils/google-benchmark/README.md @@ -0,0 +1,510 @@ +# benchmark +[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark) +[![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master) +[![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) + +A library to support the benchmarking of functions, similar to unit-tests. + +Discussion group: https://groups.google.com/d/forum/benchmark-discuss + +IRC channel: https://freenode.net #googlebenchmark + +## Example usage +### Basic usage +Define a function that executes the code to be measured. + +```c++ +static void BM_StringCreation(benchmark::State& state) { + while (state.KeepRunning()) + std::string empty_string; +} +// Register the function as a benchmark +BENCHMARK(BM_StringCreation); + +// Define another benchmark +static void BM_StringCopy(benchmark::State& state) { + std::string x = "hello"; + while (state.KeepRunning()) + std::string copy(x); +} +BENCHMARK(BM_StringCopy); + +BENCHMARK_MAIN(); +``` + +### Passing arguments +Sometimes a family of benchmarks can be implemented with just one routine that +takes an extra argument to specify which one of the family of benchmarks to +run. For example, the following code defines a family of benchmarks for +measuring the speed of `memcpy()` calls of different lengths: + +```c++ +static void BM_memcpy(benchmark::State& state) { + char* src = new char[state.range_x()]; + char* dst = new char[state.range_x()]; + memset(src, 'x', state.range_x()); + while (state.KeepRunning()) + memcpy(dst, src, state.range_x()); + state.SetBytesProcessed(int64_t(state.iterations()) * + int64_t(state.range_x())); + delete[] src; + delete[] dst; +} +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following invocation will pick a few appropriate arguments in +the specified range and will generate a benchmark for each such argument. + +```c++ +BENCHMARK(BM_memcpy)->Range(8, 8<<10); +``` + +By default the arguments in the range are generated in multiples of eight and +the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the +range multiplier is changed to multiples of two. + +```c++ +BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); +``` +Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. + +You might have a benchmark that depends on two inputs. For example, the +following code defines a family of benchmarks for measuring the speed of set +insertion. + +```c++ +static void BM_SetInsert(benchmark::State& state) { + while (state.KeepRunning()) { + state.PauseTiming(); + std::set data = ConstructRandomSet(state.range_x()); + state.ResumeTiming(); + for (int j = 0; j < state.range_y(); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert) + ->ArgPair(1<<10, 1) + ->ArgPair(1<<10, 8) + ->ArgPair(1<<10, 64) + ->ArgPair(1<<10, 512) + ->ArgPair(8<<10, 1) + ->ArgPair(8<<10, 8) + ->ArgPair(8<<10, 64) + ->ArgPair(8<<10, 512); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following macro will pick a few appropriate arguments in the +product of the two specified ranges and will generate a benchmark for each such +pair. + +```c++ +BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512); +``` + +For more complex patterns of inputs, passing a custom function to `Apply` allows +programmatic specification of an arbitrary set of arguments on which to run the +benchmark. The following example enumerates a dense range on one parameter, +and a sparse range on the second. + +```c++ +static void CustomArguments(benchmark::internal::Benchmark* b) { + for (int i = 0; i <= 10; ++i) + for (int j = 32; j <= 1024*1024; j *= 8) + b->ArgPair(i, j); +} +BENCHMARK(BM_SetInsert)->Apply(CustomArguments); +``` + +### Calculate asymptotic complexity (Big O) +Asymptotic complexity might be calculated for a family of benchmarks. The +following code will calculate the coefficient for the high-order term in the +running time and the normalized root-mean square error of string comparison. + +```c++ +static void BM_StringCompare(benchmark::State& state) { + std::string s1(state.range_x(), '-'); + std::string s2(state.range_x(), '-'); + while (state.KeepRunning()) { + benchmark::DoNotOptimize(s1.compare(s2)); + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); +``` + +As shown in the following invocation, asymptotic complexity might also be +calculated automatically. + +```c++ +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); +``` + +The following code will specify asymptotic complexity with a lambda function, +that might be used to customize high-order term calculation. + +```c++ +BENCHMARK(BM_StringCompare)->RangeMultiplier(2) + ->Range(1<<10, 1<<18)->Complexity([](int n)->double{return n; }); +``` + +### Templated benchmarks +Templated benchmarks work the same way: This example produces and consumes +messages of size `sizeof(v)` `range_x` times. It also outputs throughput in the +absence of multiprogramming. + +```c++ +template int BM_Sequential(benchmark::State& state) { + Q q; + typename Q::value_type v; + while (state.KeepRunning()) { + for (int i = state.range_x(); i--; ) + q.push(v); + for (int e = state.range_x(); e--; ) + q.Wait(&v); + } + // actually messages, not bytes: + state.SetBytesProcessed( + static_cast(state.iterations())*state.range_x()); +} +BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); +``` + +Three macros are provided for adding benchmark templates. + +```c++ +#if __cplusplus >= 201103L // C++11 and greater. +#define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters. +#else // C++ < C++11 +#define BENCHMARK_TEMPLATE(func, arg1) +#endif +#define BENCHMARK_TEMPLATE1(func, arg1) +#define BENCHMARK_TEMPLATE2(func, arg1, arg2) +``` + +## Passing arbitrary arguments to a benchmark +In C++11 it is possible to define a benchmark that takes an arbitrary number +of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` +macro creates a benchmark that invokes `func` with the `benchmark::State` as +the first argument followed by the specified `args...`. +The `test_case_name` is appended to the name of the benchmark and +should describe the values passed. + +```c++ +template ` +void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { + [...] +} +// Registers a benchmark named "BM_takes_args/int_string_test` that passes +// the specified values to `extra_args`. +BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); +``` +Note that elements of `...args` may refer to global variables. Users should +avoid modifying global state inside of a benchmark. + +### Multithreaded benchmarks +In a multithreaded test (benchmark invoked by multiple threads simultaneously), +it is guaranteed that none of the threads will start until all have called +`KeepRunning`, and all will have finished before KeepRunning returns false. As +such, any global setup or teardown can be wrapped in a check against the thread +index: + +```c++ +static void BM_MultiThreaded(benchmark::State& state) { + if (state.thread_index == 0) { + // Setup code here. + } + while (state.KeepRunning()) { + // Run the test as normal. + } + if (state.thread_index == 0) { + // Teardown code here. + } +} +BENCHMARK(BM_MultiThreaded)->Threads(2); +``` + +If the benchmarked code itself uses threads and you want to compare it to +single-threaded code, you may want to use real-time ("wallclock") measurements +for latency comparisons: + +```c++ +BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); +``` + +Without `UseRealTime`, CPU time is used by default. + + +## Manual timing +For benchmarking something for which neither CPU time nor real-time are +correct or accurate enough, completely manual timing is supported using +the `UseManualTime` function. + +When `UseManualTime` is used, the benchmarked code must call +`SetIterationTime` once per iteration of the `KeepRunning` loop to +report the manually measured time. + +An example use case for this is benchmarking GPU execution (e.g. OpenCL +or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot +be accurately measured using CPU time or real-time. Instead, they can be +measured accurately using a dedicated API, and these measurement results +can be reported back with `SetIterationTime`. + +```c++ +static void BM_ManualTiming(benchmark::State& state) { + int microseconds = state.range_x(); + std::chrono::duration sleep_duration { + static_cast(microseconds) + }; + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(sleep_duration); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed_seconds = + std::chrono::duration_cast>( + end - start); + + state.SetIterationTime(elapsed_seconds.count()); + } +} +BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); +``` + +### Preventing optimisation +To prevent a value or expression from being optimized away by the compiler +the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` +functions can be used. + +```c++ +static void BM_test(benchmark::State& state) { + while (state.KeepRunning()) { + int x = 0; + for (int i=0; i < 64; ++i) { + benchmark::DoNotOptimize(x += i); + } + } +} +``` + +`DoNotOptimize()` forces the *result* of `` to be stored in either +memory or a register. For GNU based compilers it acts as read/write barrier +for global memory. More specifically it forces the compiler to flush pending +writes to memory and reload any other values as necessary. + +Note that `DoNotOptimize()` does not prevent optimizations on `` +in any way. `` may even be removed entirely when the result is already +known. For example: + +```c++ + /* Example 1: `` is removed entirely. */ + int foo(int x) { return x + 42; } + while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); + + /* Example 2: Result of '' is only reused */ + int bar(int) __attribute__((const)); + while (...) DoNotOptimize(bar(0)); // Optimized to: + // int __result__ = bar(0); + // while (...) DoNotOptimize(__result__); +``` + +The second tool for preventing optimizations is `ClobberMemory()`. In essence +`ClobberMemory()` forces the compiler to perform all pending writes to global +memory. Memory managed by block scope objects must be "escaped" using +`DoNotOptimize(...)` before it can be clobbered. In the below example +`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized +away. + +```c++ +static void BM_vector_push_back(benchmark::State& state) { + while (state.KeepRunning()) { + std::vector v; + v.reserve(1); + benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. + v.push_back(42); + benchmark::ClobberMemory(); // Force 42 to be written to memory. + } +} +``` + +Note that `ClobberMemory()` is only available for GNU based compilers. + +### Set time unit manually +If a benchmark runs a few milliseconds it may be hard to visually compare the +measured times, since the output data is given in nanoseconds per default. In +order to manually set the time unit, you can specify it manually: + +```c++ +BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); +``` + +## Controlling number of iterations +In all cases, the number of iterations for which the benchmark is run is +governed by the amount of time the benchmark takes. Concretely, the number of +iterations is at least one, not more than 1e9, until CPU time is greater than +the minimum time, or the wallclock time is 5x minimum time. The minimum time is +set as a flag `--benchmark_min_time` or per benchmark by calling `MinTime` on +the registered benchmark object. + +## Reporting the mean and standard devation by repeated benchmarks +By default each benchmark is run once and that single result is reported. +However benchmarks are often noisy and a single result may not be representative +of the overall behavior. For this reason it's possible to repeatedly rerun the +benchmark. + +The number of runs of each benchmark is specified globally by the +`--benchmark_repetitions` flag or on a per benchmark basis by calling +`Repetitions` on the registered benchmark object. When a benchmark is run +more than once the mean and standard deviation of the runs will be reported. + +## Fixtures +Fixture tests are created by +first defining a type that derives from ::benchmark::Fixture and then +creating/registering the tests using the following macros: + +* `BENCHMARK_F(ClassName, Method)` +* `BENCHMARK_DEFINE_F(ClassName, Method)` +* `BENCHMARK_REGISTER_F(ClassName, Method)` + +For Example: + +```c++ +class MyFixture : public benchmark::Fixture {}; + +BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { + while (st.KeepRunning()) { + ... + } +} + +BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { + while (st.KeepRunning()) { + ... + } +} +/* BarTest is NOT registered */ +BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); +/* BarTest is now registered */ +``` + +## Exiting Benchmarks in Error + +When errors caused by external influences, such as file I/O and network +communication, occur within a benchmark the +`State::SkipWithError(const char* msg)` function can be used to skip that run +of benchmark and report the error. Note that only future iterations of the +`KeepRunning()` are skipped. Users may explicitly return to exit the +benchmark immediately. + +The `SkipWithError(...)` function may be used at any point within the benchmark, +including before and after the `KeepRunning()` loop. + +For example: + +```c++ +static void BM_test(benchmark::State& state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + // KeepRunning() loop will not be entered. + } + while (state.KeepRunning()) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // Needed to skip the rest of the iteration. + } + do_stuff(data); + } +} +``` + +## Output Formats +The library supports multiple output formats. Use the +`--benchmark_format=` flag to set the format type. `tabular` is +the default format. + +The Tabular format is intended to be a human readable format. By default +the format generates color output. Context is output on stderr and the +tabular data on stdout. Example tabular output looks like: +``` +Benchmark Time(ns) CPU(ns) Iterations +---------------------------------------------------------------------- +BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s +BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s +BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s +``` + +The JSON format outputs human readable json split into two top level attributes. +The `context` attribute contains information about the run in general, including +information about the CPU and the date. +The `benchmarks` attribute contains a list of ever benchmark run. Example json +output looks like: +``` json +{ + "context": { + "date": "2015/03/17-18:40:25", + "num_cpus": 40, + "mhz_per_cpu": 2801, + "cpu_scaling_enabled": false, + "build_type": "debug" + }, + "benchmarks": [ + { + "name": "BM_SetInsert/1024/1", + "iterations": 94877, + "real_time": 29275, + "cpu_time": 29836, + "bytes_per_second": 134066, + "items_per_second": 33516 + }, + { + "name": "BM_SetInsert/1024/8", + "iterations": 21609, + "real_time": 32317, + "cpu_time": 32429, + "bytes_per_second": 986770, + "items_per_second": 246693 + }, + { + "name": "BM_SetInsert/1024/10", + "iterations": 21393, + "real_time": 32724, + "cpu_time": 33355, + "bytes_per_second": 1199226, + "items_per_second": 299807 + } + ] +} +``` + +The CSV format outputs comma-separated values. The `context` is output on stderr +and the CSV itself on stdout. Example CSV output looks like: +``` +name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label +"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, +"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, +"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, +``` + +## Debug vs Release +By default, benchmark builds as a debug library. You will see a warning in the output when this is the case. To build it as a release library instead, use: + +``` +cmake -DCMAKE_BUILD_TYPE=Release +``` + +To enable link-time optimisation, use + +``` +cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_LTO=true +``` + +## Linking against the library +When using gcc, it is necessary to link against pthread to avoid runtime exceptions. This is due to how gcc implements std::thread. See [issue #67](https://github.com/google/benchmark/issues/67) for more details. Index: utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake =================================================================== --- /dev/null +++ utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake @@ -0,0 +1,37 @@ +# - Adds a compiler flag if it is supported by the compiler +# +# This function checks that the supplied compiler flag is supported and then +# adds it to the corresponding compiler flags +# +# add_cxx_compiler_flag( []) +# +# - Example +# +# include(AddCXXCompilerFlag) +# add_cxx_compiler_flag(-Wall) +# add_cxx_compiler_flag(-no-strict-aliasing RELEASE) +# Requires CMake 2.6+ + +if(__add_cxx_compiler_flag) + return() +endif() +set(__add_cxx_compiler_flag INCLUDED) + +include(CheckCXXCompilerFlag) + +function(add_cxx_compiler_flag FLAG) + string(TOUPPER "HAVE_CXX_FLAG_${FLAG}" SANITIZED_FLAG) + string(REPLACE "+" "X" SANITIZED_FLAG ${SANITIZED_FLAG}) + string(REGEX REPLACE "[^A-Za-z_0-9]" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) + string(REGEX REPLACE "_+" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) + set(CMAKE_REQUIRED_FLAGS "${FLAG}") + check_cxx_compiler_flag("" ${SANITIZED_FLAG}) + if(${SANITIZED_FLAG}) + set(VARIANT ${ARGV1}) + if(ARGV1) + string(TOUPPER "_${VARIANT}" VARIANT) + endif() + set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) + endif() +endfunction() + Index: utils/google-benchmark/cmake/CXXFeatureCheck.cmake =================================================================== --- /dev/null +++ utils/google-benchmark/cmake/CXXFeatureCheck.cmake @@ -0,0 +1,42 @@ +# - Compile and run code to check for C++ features +# +# This functions compiles a source file under the `cmake` folder +# and adds the corresponding `HAVE_[FILENAME]` flag to the CMake +# environment +# +# cxx_feature_check( []) +# +# - Example +# +# include(CXXFeatureCheck) +# cxx_feature_check(STD_REGEX) +# Requires CMake 2.6+ + +if(__cxx_feature_check) + return() +endif() +set(__cxx_feature_check INCLUDED) + +function(cxx_feature_check FILE) + string(TOLOWER ${FILE} FILE) + string(TOUPPER ${FILE} VAR) + string(TOUPPER "HAVE_${VAR}" FEATURE) + if (DEFINED HAVE_${VAR}) + return() + endif() + message("-- Performing Test ${FEATURE}") + try_run(RUN_${FEATURE} COMPILE_${FEATURE} + ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp) + if(RUN_${FEATURE} EQUAL 0) + message("-- Performing Test ${FEATURE} -- success") + set(HAVE_${VAR} 1 CACHE INTERNAL "Feature test for ${FILE}" PARENT_SCOPE) + add_definitions(-DHAVE_${VAR}) + else() + if(NOT COMPILE_${FEATURE}) + message("-- Performing Test ${FEATURE} -- failed to compile") + else() + message("-- Performing Test ${FEATURE} -- compiled but failed to run") + endif() + endif() +endfunction() + Index: utils/google-benchmark/cmake/GetGitVersion.cmake =================================================================== --- /dev/null +++ utils/google-benchmark/cmake/GetGitVersion.cmake @@ -0,0 +1,51 @@ +# - Returns a version string from Git tags +# +# This function inspects the annotated git tags for the project and returns a string +# into a CMake variable +# +# get_git_version() +# +# - Example +# +# include(GetGitVersion) +# get_git_version(GIT_VERSION) +# +# Requires CMake 2.8.11+ +find_package(Git) + +if(__get_git_version) + return() +endif() +set(__get_git_version INCLUDED) + +function(get_git_version var) + if(GIT_EXECUTABLE) + execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 + RESULT_VARIABLE status + OUTPUT_VARIABLE GIT_VERSION + ERROR_QUIET) + if(${status}) + set(GIT_VERSION "v0.0.0") + else() + string(STRIP ${GIT_VERSION} GIT_VERSION) + string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION}) + endif() + + # Work out if the repository is dirty + execute_process(COMMAND ${GIT_EXECUTABLE} update-index -q --refresh + OUTPUT_QUIET + ERROR_QUIET) + execute_process(COMMAND ${GIT_EXECUTABLE} diff-index --name-only HEAD -- + OUTPUT_VARIABLE GIT_DIFF_INDEX + ERROR_QUIET) + string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY) + if (${GIT_DIRTY}) + set(GIT_VERSION "${GIT_VERSION}-dirty") + endif() + else() + set(GIT_VERSION "v0.0.0") + endif() + + message("-- git Version: ${GIT_VERSION}") + set(${var} ${GIT_VERSION} PARENT_SCOPE) +endfunction() Index: utils/google-benchmark/cmake/gnu_posix_regex.cpp =================================================================== --- /dev/null +++ utils/google-benchmark/cmake/gnu_posix_regex.cpp @@ -0,0 +1,12 @@ +#include +#include +int main() { + std::string str = "test0159"; + regex_t re; + int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + return ec; + } + return regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; +} + Index: utils/google-benchmark/cmake/posix_regex.cpp =================================================================== --- /dev/null +++ utils/google-benchmark/cmake/posix_regex.cpp @@ -0,0 +1,14 @@ +#include +#include +int main() { + std::string str = "test0159"; + regex_t re; + int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + return ec; + } + int ret = regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; + regfree(&re); + return ret; +} + Index: utils/google-benchmark/cmake/std_regex.cpp =================================================================== --- /dev/null +++ utils/google-benchmark/cmake/std_regex.cpp @@ -0,0 +1,10 @@ +#include +#include +int main() { + const std::string str = "test0159"; + std::regex re; + re = std::regex("^[a-z]+[0-9]+$", + std::regex_constants::extended | std::regex_constants::nosubs); + return std::regex_search(str, re) ? 0 : -1; +} + Index: utils/google-benchmark/cmake/steady_clock.cpp =================================================================== --- /dev/null +++ utils/google-benchmark/cmake/steady_clock.cpp @@ -0,0 +1,7 @@ +#include + +int main() { + typedef std::chrono::steady_clock Clock; + Clock::time_point tp = Clock::now(); + ((void)tp); +} Index: utils/google-benchmark/cmake/thread_safety_attributes.cpp =================================================================== --- /dev/null +++ utils/google-benchmark/cmake/thread_safety_attributes.cpp @@ -0,0 +1,4 @@ +#define HAVE_THREAD_SAFETY_ATTRIBUTES +#include "../src/mutex.h" + +int main() {} Index: utils/google-benchmark/include/benchmark/benchmark.h =================================================================== --- /dev/null +++ utils/google-benchmark/include/benchmark/benchmark.h @@ -0,0 +1,21 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef BENCHMARK_BENCHMARK_H_ +#define BENCHMARK_BENCHMARK_H_ + +#include "macros.h" +#include "benchmark_api.h" +#include "reporter.h" + +#endif // BENCHMARK_BENCHMARK_H_ Index: utils/google-benchmark/include/benchmark/benchmark_api.h =================================================================== --- /dev/null +++ utils/google-benchmark/include/benchmark/benchmark_api.h @@ -0,0 +1,747 @@ +// Support for registering benchmarks for functions. + +/* Example usage: +// Define a function that executes the code to be measured a +// specified number of times: +static void BM_StringCreation(benchmark::State& state) { + while (state.KeepRunning()) + std::string empty_string; +} + +// Register the function as a benchmark +BENCHMARK(BM_StringCreation); + +// Define another benchmark +static void BM_StringCopy(benchmark::State& state) { + std::string x = "hello"; + while (state.KeepRunning()) + std::string copy(x); +} +BENCHMARK(BM_StringCopy); + +// Augment the main() program to invoke benchmarks if specified +// via the --benchmarks command line flag. E.g., +// my_unittest --benchmark_filter=all +// my_unittest --benchmark_filter=BM_StringCreation +// my_unittest --benchmark_filter=String +// my_unittest --benchmark_filter='Copy|Creation' +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + return 0; +} + +// Sometimes a family of microbenchmarks can be implemented with +// just one routine that takes an extra argument to specify which +// one of the family of benchmarks to run. For example, the following +// code defines a family of microbenchmarks for measuring the speed +// of memcpy() calls of different lengths: + +static void BM_memcpy(benchmark::State& state) { + char* src = new char[state.range_x()]; char* dst = new char[state.range_x()]; + memset(src, 'x', state.range_x()); + while (state.KeepRunning()) + memcpy(dst, src, state.range_x()); + state.SetBytesProcessed(int64_t(state.iterations()) * + int64_t(state.range_x())); + delete[] src; delete[] dst; +} +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); + +// The preceding code is quite repetitive, and can be replaced with the +// following short-hand. The following invocation will pick a few +// appropriate arguments in the specified range and will generate a +// microbenchmark for each such argument. +BENCHMARK(BM_memcpy)->Range(8, 8<<10); + +// You might have a microbenchmark that depends on two inputs. For +// example, the following code defines a family of microbenchmarks for +// measuring the speed of set insertion. +static void BM_SetInsert(benchmark::State& state) { + while (state.KeepRunning()) { + state.PauseTiming(); + set data = ConstructRandomSet(state.range_x()); + state.ResumeTiming(); + for (int j = 0; j < state.range_y(); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert) + ->ArgPair(1<<10, 1) + ->ArgPair(1<<10, 8) + ->ArgPair(1<<10, 64) + ->ArgPair(1<<10, 512) + ->ArgPair(8<<10, 1) + ->ArgPair(8<<10, 8) + ->ArgPair(8<<10, 64) + ->ArgPair(8<<10, 512); + +// The preceding code is quite repetitive, and can be replaced with +// the following short-hand. The following macro will pick a few +// appropriate arguments in the product of the two specified ranges +// and will generate a microbenchmark for each such pair. +BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512); + +// For more complex patterns of inputs, passing a custom function +// to Apply allows programmatic specification of an +// arbitrary set of arguments to run the microbenchmark on. +// The following example enumerates a dense range on +// one parameter, and a sparse range on the second. +static void CustomArguments(benchmark::internal::Benchmark* b) { + for (int i = 0; i <= 10; ++i) + for (int j = 32; j <= 1024*1024; j *= 8) + b->ArgPair(i, j); +} +BENCHMARK(BM_SetInsert)->Apply(CustomArguments); + +// Templated microbenchmarks work the same way: +// Produce then consume 'size' messages 'iters' times +// Measures throughput in the absence of multiprogramming. +template int BM_Sequential(benchmark::State& state) { + Q q; + typename Q::value_type v; + while (state.KeepRunning()) { + for (int i = state.range_x(); i--; ) + q.push(v); + for (int e = state.range_x(); e--; ) + q.Wait(&v); + } + // actually messages, not bytes: + state.SetBytesProcessed( + static_cast(state.iterations())*state.range_x()); +} +BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); + +Use `Benchmark::MinTime(double t)` to set the minimum time used to run the +benchmark. This option overrides the `benchmark_min_time` flag. + +void BM_test(benchmark::State& state) { + ... body ... +} +BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds. + +In a multithreaded test, it is guaranteed that none of the threads will start +until all have called KeepRunning, and all will have finished before KeepRunning +returns false. As such, any global setup or teardown you want to do can be +wrapped in a check against the thread index: + +static void BM_MultiThreaded(benchmark::State& state) { + if (state.thread_index == 0) { + // Setup code here. + } + while (state.KeepRunning()) { + // Run the test as normal. + } + if (state.thread_index == 0) { + // Teardown code here. + } +} +BENCHMARK(BM_MultiThreaded)->Threads(4); + + +If a benchmark runs a few milliseconds it may be hard to visually compare the +measured times, since the output data is given in nanoseconds per default. In +order to manually set the time unit, you can specify it manually: + +BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); +*/ + +#ifndef BENCHMARK_BENCHMARK_API_H_ +#define BENCHMARK_BENCHMARK_API_H_ + +#include +#include +#include + +#include "macros.h" + +namespace benchmark { +class BenchmarkReporter; + +void Initialize(int* argc, char** argv); + +// Generate a list of benchmarks matching the specified --benchmark_filter flag +// and if --benchmark_list_tests is specified return after printing the name +// of each matching benchmark. Otherwise run each matching benchmark and +// report the results. +// +// The second overload reports the results using the specified 'reporter'. +// +// RETURNS: The number of matching benchmarks. +size_t RunSpecifiedBenchmarks(); +size_t RunSpecifiedBenchmarks(BenchmarkReporter* reporter); + + +// If this routine is called, peak memory allocation past this point in the +// benchmark is reported at the end of the benchmark report line. (It is +// computed by running the benchmark once with a single iteration and a memory +// tracer.) +// TODO(dominic) +// void MemoryUsage(); + +namespace internal { +class Benchmark; +class BenchmarkImp; +class BenchmarkFamilies; + +template struct Voider { + typedef void type; +}; + +template +struct EnableIfString {}; + +template +struct EnableIfString::type> { + typedef int type; +}; + +void UseCharPointer(char const volatile*); + +// Take ownership of the pointer and register the benchmark. Return the +// registered benchmark. +Benchmark* RegisterBenchmarkInternal(Benchmark*); + +} // end namespace internal + + +// The DoNotOptimize(...) function can be used to prevent a value or +// expression from being optimized away by the compiler. This function is +// intended to add little to no overhead. +// See: https://youtu.be/nXaxk27zwlk?t=2441 +#if defined(__GNUC__) +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + asm volatile("" : : "g"(value) : "memory"); +} +// Force the compiler to flush pending writes to global memory. Acts as an +// effective read/write barrier +inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { + asm volatile("" : : : "memory"); +} +#else +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + internal::UseCharPointer(&reinterpret_cast(value)); +} +// FIXME Add ClobberMemory() for non-gnu compilers +#endif + +// TimeUnit is passed to a benchmark in order to specify the order of magnitude +// for the measured time. +enum TimeUnit { + kNanosecond, + kMicrosecond, + kMillisecond +}; + +// BigO is passed to a benchmark in order to specify the asymptotic computational +// complexity for the benchmark. In case oAuto is selected, complexity will be +// calculated automatically to the best fit. +enum BigO { + oNone, + o1, + oN, + oNSquared, + oNCubed, + oLogN, + oNLogN, + oAuto, + oLambda +}; + +// BigOFunc is passed to a benchmark in order to specify the asymptotic +// computational complexity for the benchmark. +typedef double(BigOFunc)(int); + +// State is passed to a running Benchmark and contains state for the +// benchmark to use. +class State { +public: + State(size_t max_iters, bool has_x, int x, bool has_y, int y, + int thread_i, int n_threads); + + // Returns true if the benchmark should continue through another iteration. + // NOTE: A benchmark may not return from the test until KeepRunning() has + // returned false. + bool KeepRunning() { + if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { + assert(!finished_); + started_ = true; + ResumeTiming(); + } + bool const res = total_iterations_++ < max_iterations; + if (BENCHMARK_BUILTIN_EXPECT(!res, false)) { + assert(started_ && (!finished_ || error_occurred_)); + if (!error_occurred_) { + PauseTiming(); + } + // Total iterations now is one greater than max iterations. Fix this. + total_iterations_ = max_iterations; + finished_ = true; + } + return res; + } + + // REQUIRES: timer is running and 'SkipWithError(...)' has not been called + // by the current thread. + // Stop the benchmark timer. If not called, the timer will be + // automatically stopped after KeepRunning() returns false for the first time. + // + // For threaded benchmarks the PauseTiming() function acts + // like a barrier. I.e., the ith call by a particular thread to this + // function will block until all active threads have made their ith call. + // The timer will stop when the last thread has called this function. + // + // NOTE: PauseTiming()/ResumeTiming() are relatively + // heavyweight, and so their use should generally be avoided + // within each benchmark iteration, if possible. + void PauseTiming(); + + // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called + // by the current thread. + // Start the benchmark timer. The timer is NOT running on entrance to the + // benchmark function. It begins running after the first call to KeepRunning() + // + // For threaded benchmarks the ResumeTiming() function acts + // like a barrier. I.e., the ith call by a particular thread to this + // function will block until all active threads have made their ith call. + // The timer will start when the last thread has called this function. + // + // NOTE: PauseTiming()/ResumeTiming() are relatively + // heavyweight, and so their use should generally be avoided + // within each benchmark iteration, if possible. + void ResumeTiming(); + + // REQUIRES: 'SkipWithError(...)' has not been called previously by the + // current thread. + // Skip any future iterations of the 'KeepRunning()' loop in the current + // thread and report an error with the specified 'msg'. After this call + // the user may explicitly 'return' from the benchmark. + // + // For threaded benchmarks only the current thread stops executing. If + // multiple threads report an error only the first error message is used. + // The current thread is no longer considered 'active' by + // 'PauseTiming()' and 'ResumingTiming()'. + // + // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit + // the current scope immediately. If the function is called from within + // the 'KeepRunning()' loop the current iteration will finish. It is the users + // responsibility to exit the scope as needed. + void SkipWithError(const char* msg); + + // REQUIRES: called exactly once per iteration of the KeepRunning loop. + // Set the manually measured time for this benchmark iteration, which + // is used instead of automatically measured time if UseManualTime() was + // specified. + // + // For threaded benchmarks the SetIterationTime() function acts + // like a barrier. I.e., the ith call by a particular thread to this + // function will block until all threads have made their ith call. + // The time will be set by the last thread to call this function. + void SetIterationTime(double seconds); + + // Set the number of bytes processed by the current benchmark + // execution. This routine is typically called once at the end of a + // throughput oriented benchmark. If this routine is called with a + // value > 0, the report is printed in MB/sec instead of nanoseconds + // per iteration. + // + // REQUIRES: a benchmark has exited its KeepRunning loop. + BENCHMARK_ALWAYS_INLINE + void SetBytesProcessed(size_t bytes) { + bytes_processed_ = bytes; + } + + BENCHMARK_ALWAYS_INLINE + size_t bytes_processed() const { + return bytes_processed_; + } + + // If this routine is called with complexity_n > 0 and complexity report is requested for the + // family benchmark, then current benchmark will be part of the computation and complexity_n will + // represent the length of N. + BENCHMARK_ALWAYS_INLINE + void SetComplexityN(int complexity_n) { + complexity_n_ = complexity_n; + } + + BENCHMARK_ALWAYS_INLINE + size_t complexity_length_n() { + return complexity_n_; + } + + // If this routine is called with items > 0, then an items/s + // label is printed on the benchmark report line for the currently + // executing benchmark. It is typically called at the end of a processing + // benchmark where a processing items/second output is desired. + // + // REQUIRES: a benchmark has exited its KeepRunning loop. + BENCHMARK_ALWAYS_INLINE + void SetItemsProcessed(size_t items) { + items_processed_ = items; + } + + BENCHMARK_ALWAYS_INLINE + size_t items_processed() const { + return items_processed_; + } + + // If this routine is called, the specified label is printed at the + // end of the benchmark report line for the currently executing + // benchmark. Example: + // static void BM_Compress(benchmark::State& state) { + // ... + // double compress = input_size / output_size; + // state.SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression)); + // } + // Produces output that looks like: + // BM_Compress 50 50 14115038 compress:27.3% + // + // REQUIRES: a benchmark has exited its KeepRunning loop. + void SetLabel(const char* label); + + // Allow the use of std::string without actually including . + // This function does not participate in overload resolution unless StringType + // has the nested typename `basic_string`. This typename should be provided + // as an injected class name in the case of std::string. + template + void SetLabel(StringType const & str, + typename internal::EnableIfString::type = 1) { + this->SetLabel(str.c_str()); + } + + // Range arguments for this run. CHECKs if the argument has been set. + BENCHMARK_ALWAYS_INLINE + int range_x() const { + assert(has_range_x_); + ((void)has_range_x_); // Prevent unused warning. + return range_x_; + } + + BENCHMARK_ALWAYS_INLINE + int range_y() const { + assert(has_range_y_); + ((void)has_range_y_); // Prevent unused warning. + return range_y_; + } + + BENCHMARK_ALWAYS_INLINE + size_t iterations() const { return total_iterations_; } + +private: + bool started_; + bool finished_; + size_t total_iterations_; + + bool has_range_x_; + int range_x_; + + bool has_range_y_; + int range_y_; + + size_t bytes_processed_; + size_t items_processed_; + + int complexity_n_; + +public: + // FIXME: Make this private somehow. + bool error_occurred_; +public: + // Index of the executing thread. Values from [0, threads). + const int thread_index; + // Number of threads concurrently executing the benchmark. + const int threads; + const size_t max_iterations; + +private: + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State); +}; + +namespace internal { + +typedef void(Function)(State&); + +// ------------------------------------------------------ +// Benchmark registration object. The BENCHMARK() macro expands +// into an internal::Benchmark* object. Various methods can +// be called on this object to change the properties of the benchmark. +// Each method returns "this" so that multiple method calls can +// chained into one expression. +class Benchmark { +public: + virtual ~Benchmark(); + + // Note: the following methods all return "this" so that multiple + // method calls can be chained together in one expression. + + // Run this benchmark once with "x" as the extra argument passed + // to the function. + // REQUIRES: The function passed to the constructor must accept an arg1. + Benchmark* Arg(int x); + + // Run this benchmark with the given time unit for the generated output report + Benchmark* Unit(TimeUnit unit); + + // Run this benchmark once for a number of values picked from the + // range [start..limit]. (start and limit are always picked.) + // REQUIRES: The function passed to the constructor must accept an arg1. + Benchmark* Range(int start, int limit); + + // Run this benchmark once for every value in the range [start..limit] + // REQUIRES: The function passed to the constructor must accept an arg1. + Benchmark* DenseRange(int start, int limit); + + // Run this benchmark once with "x,y" as the extra arguments passed + // to the function. + // REQUIRES: The function passed to the constructor must accept arg1,arg2. + Benchmark* ArgPair(int x, int y); + + // Pick a set of values A from the range [lo1..hi1] and a set + // of values B from the range [lo2..hi2]. Run the benchmark for + // every pair of values in the cartesian product of A and B + // (i.e., for all combinations of the values in A and B). + // REQUIRES: The function passed to the constructor must accept arg1,arg2. + Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2); + + // Pass this benchmark object to *func, which can customize + // the benchmark by calling various methods like Arg, ArgPair, + // Threads, etc. + Benchmark* Apply(void (*func)(Benchmark* benchmark)); + + // Set the range multiplier for non-dense range. If not called, the range multiplier + // kRangeMultiplier will be used. + Benchmark* RangeMultiplier(int multiplier); + + // Set the minimum amount of time to use when running this benchmark. This + // option overrides the `benchmark_min_time` flag. + // REQUIRES: `t > 0` + Benchmark* MinTime(double t); + + // Specify the amount of times to repeat this benchmark. This option overrides + // the `benchmark_repetitions` flag. + // REQUIRES: `n > 0` + Benchmark* Repetitions(int n); + + // If a particular benchmark is I/O bound, runs multiple threads internally or + // if for some reason CPU timings are not representative, call this method. If + // called, the elapsed time will be used to control how many iterations are + // run, and in the printing of items/second or MB/seconds values. If not + // called, the cpu time used by the benchmark will be used. + Benchmark* UseRealTime(); + + // If a benchmark must measure time manually (e.g. if GPU execution time is being + // measured), call this method. If called, each benchmark iteration should call + // SetIterationTime(seconds) to report the measured time, which will be used + // to control how many iterations are run, and in the printing of items/second + // or MB/second values. + Benchmark* UseManualTime(); + + // Set the asymptotic computational complexity for the benchmark. If called + // the asymptotic computational complexity will be shown on the output. + Benchmark* Complexity(BigO complexity = benchmark::oAuto); + + // Set the asymptotic computational complexity for the benchmark. If called + // the asymptotic computational complexity will be shown on the output. + Benchmark* Complexity(BigOFunc* complexity); + + // Support for running multiple copies of the same benchmark concurrently + // in multiple threads. This may be useful when measuring the scaling + // of some piece of code. + + // Run one instance of this benchmark concurrently in t threads. + Benchmark* Threads(int t); + + // Pick a set of values T from [min_threads,max_threads]. + // min_threads and max_threads are always included in T. Run this + // benchmark once for each value in T. The benchmark run for a + // particular value t consists of t threads running the benchmark + // function concurrently. For example, consider: + // BENCHMARK(Foo)->ThreadRange(1,16); + // This will run the following benchmarks: + // Foo in 1 thread + // Foo in 2 threads + // Foo in 4 threads + // Foo in 8 threads + // Foo in 16 threads + Benchmark* ThreadRange(int min_threads, int max_threads); + + // Equivalent to ThreadRange(NumCPUs(), NumCPUs()) + Benchmark* ThreadPerCpu(); + + virtual void Run(State& state) = 0; + + // Used inside the benchmark implementation + struct Instance; + +protected: + explicit Benchmark(const char* name); + Benchmark(Benchmark const&); + void SetName(const char* name); + +private: + friend class BenchmarkFamilies; + BenchmarkImp* imp_; + + Benchmark& operator=(Benchmark const&); +}; + +// The class used to hold all Benchmarks created from static function. +// (ie those created using the BENCHMARK(...) macros. +class FunctionBenchmark : public Benchmark { +public: + FunctionBenchmark(const char* name, Function* func) + : Benchmark(name), func_(func) + {} + + virtual void Run(State& st); +private: + Function* func_; +}; + +} // end namespace internal + +// The base class for all fixture tests. +class Fixture: public internal::Benchmark { +public: + Fixture() : internal::Benchmark("") {} + + virtual void Run(State& st) { + this->SetUp(st); + this->BenchmarkCase(st); + this->TearDown(st); + } + + virtual void SetUp(const State&) {} + virtual void TearDown(const State&) {} + +protected: + virtual void BenchmarkCase(State&) = 0; +}; + +} // end namespace benchmark + + +// ------------------------------------------------------ +// Macro to register benchmarks + +// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1 +// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be +// empty. If X is empty the expression becomes (+1 == +0). +#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0) +#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__ +#else +#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__ +#endif + +// Helpers for generating unique variable names +#define BENCHMARK_PRIVATE_NAME(n) \ + BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n) +#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) +#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c + +#define BENCHMARK_PRIVATE_DECLARE(n) \ + static ::benchmark::internal::Benchmark* \ + BENCHMARK_PRIVATE_NAME(n) BENCHMARK_UNUSED + +#define BENCHMARK(n) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#n, n))) + +// Old-style macros +#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) +#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2)) +#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t)) +#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi)) +#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \ + BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2)) + +#if __cplusplus >= 201103L + +// Register a benchmark which invokes the function specified by `func` +// with the additional arguments specified by `...`. +// +// For example: +// +// template ` +// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { +// [...] +//} +// /* Registers a benchmark named "BM_takes_args/int_string_test` */ +// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); +#define BENCHMARK_CAPTURE(func, test_case_name, ...) \ + BENCHMARK_PRIVATE_DECLARE(func) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark( \ + #func "/" #test_case_name, \ + [](::benchmark::State& st) { func(st, __VA_ARGS__); }))) + +#endif // __cplusplus >= 11 + +// This will register a benchmark for a templatized function. For example: +// +// template +// void BM_Foo(int iters); +// +// BENCHMARK_TEMPLATE(BM_Foo, 1); +// +// will register BM_Foo<1> as a benchmark. +#define BENCHMARK_TEMPLATE1(n, a) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n))) + +#define BENCHMARK_TEMPLATE2(n, a, b) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark( \ + #n "<" #a "," #b ">", n))) + +#if __cplusplus >= 201103L +#define BENCHMARK_TEMPLATE(n, ...) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark( \ + #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>))) +#else +#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) +#endif + + +#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ +class BaseClass##_##Method##_Benchmark : public BaseClass { \ +public: \ + BaseClass##_##Method##_Benchmark() : BaseClass() { \ + this->SetName(#BaseClass "/" #Method);} \ +protected: \ + virtual void BenchmarkCase(::benchmark::State&); \ +}; + +#define BENCHMARK_DEFINE_F(BaseClass, Method) \ + BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + void BaseClass##_##Method##_Benchmark::BenchmarkCase + +#define BENCHMARK_REGISTER_F(BaseClass, Method) \ + BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark) + +#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \ + BENCHMARK_PRIVATE_DECLARE(TestName) = \ + (::benchmark::internal::RegisterBenchmarkInternal(new TestName())) + +// This macro will define and register a benchmark within a fixture class. +#define BENCHMARK_F(BaseClass, Method) \ + BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + BENCHMARK_REGISTER_F(BaseClass, Method); \ + void BaseClass##_##Method##_Benchmark::BenchmarkCase + + +// Helper macro to create a main routine in a test that runs the benchmarks +#define BENCHMARK_MAIN() \ + int main(int argc, char** argv) { \ + ::benchmark::Initialize(&argc, argv); \ + ::benchmark::RunSpecifiedBenchmarks(); \ + } + +#endif // BENCHMARK_BENCHMARK_API_H_ Index: utils/google-benchmark/include/benchmark/macros.h =================================================================== --- /dev/null +++ utils/google-benchmark/include/benchmark/macros.h @@ -0,0 +1,56 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef BENCHMARK_MACROS_H_ +#define BENCHMARK_MACROS_H_ + +#if __cplusplus < 201103L +# define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + TypeName& operator=(const TypeName&) +#else +# define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&) = delete; \ + TypeName& operator=(const TypeName&) = delete +#endif + +#if defined(__GNUC__) +# define BENCHMARK_UNUSED __attribute__((unused)) +# define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) +# define BENCHMARK_NOEXCEPT noexcept +# define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#elif defined(_MSC_VER) && !defined(__clang__) +# define BENCHMARK_UNUSED +# define BENCHMARK_ALWAYS_INLINE __forceinline +# if _MSC_VER >= 1900 +# define BENCHMARK_NOEXCEPT noexcept +# define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +# else +# define BENCHMARK_NOEXCEPT +# define BENCHMARK_NOEXCEPT_OP(x) +# endif +# define __func__ __FUNCTION__ +#else +# define BENCHMARK_UNUSED +# define BENCHMARK_ALWAYS_INLINE +# define BENCHMARK_NOEXCEPT +# define BENCHMARK_NOEXCEPT_OP(x) +#endif + +#if defined(__GNUC__) +# define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) +#else +# define BENCHMARK_BUILTIN_EXPECT(x, y) x +#endif + +#endif // BENCHMARK_MACROS_H_ Index: utils/google-benchmark/include/benchmark/reporter.h =================================================================== --- /dev/null +++ utils/google-benchmark/include/benchmark/reporter.h @@ -0,0 +1,216 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef BENCHMARK_REPORTER_H_ +#define BENCHMARK_REPORTER_H_ + +#include +#include +#include +#include +#include + +#include "benchmark_api.h" // For forward declaration of BenchmarkReporter + +namespace benchmark { + +// Interface for custom benchmark result printers. +// By default, benchmark reports are printed to stdout. However an application +// can control the destination of the reports by calling +// RunSpecifiedBenchmarks and passing it a custom reporter object. +// The reporter object must implement the following interface. +class BenchmarkReporter { + public: + struct Context { + int num_cpus; + double mhz_per_cpu; + bool cpu_scaling_enabled; + + // The number of chars in the longest benchmark name. + size_t name_field_width; + }; + + struct Run { + Run() : + error_occurred(false), + iterations(1), + time_unit(kNanosecond), + real_accumulated_time(0), + cpu_accumulated_time(0), + bytes_per_second(0), + items_per_second(0), + max_heapbytes_used(0), + complexity(oNone), + complexity_n(0), + report_big_o(false), + report_rms(false) {} + + std::string benchmark_name; + std::string report_label; // Empty if not set by benchmark. + bool error_occurred; + std::string error_message; + + int64_t iterations; + TimeUnit time_unit; + double real_accumulated_time; + double cpu_accumulated_time; + + // Return a value representing the real time per iteration in the unit + // specified by 'time_unit'. + // NOTE: If 'iterations' is zero the returned value represents the + // accumulated time. + double GetAdjustedRealTime() const; + + // Return a value representing the cpu time per iteration in the unit + // specified by 'time_unit'. + // NOTE: If 'iterations' is zero the returned value represents the + // accumulated time. + double GetAdjustedCPUTime() const; + + // Zero if not set by benchmark. + double bytes_per_second; + double items_per_second; + + // This is set to 0.0 if memory tracing is not enabled. + double max_heapbytes_used; + + // Keep track of arguments to compute asymptotic complexity + BigO complexity; + BigOFunc* complexity_lambda; + int complexity_n; + + // Inform print function whether the current run is a complexity report + bool report_big_o; + bool report_rms; + }; + + // Construct a BenchmarkReporter with the output stream set to 'std::cout' + // and the error stream set to 'std::cerr' + BenchmarkReporter(); + + // Called once for every suite of benchmarks run. + // The parameter "context" contains information that the + // reporter may wish to use when generating its report, for example the + // platform under which the benchmarks are running. The benchmark run is + // never started if this function returns false, allowing the reporter + // to skip runs based on the context information. + virtual bool ReportContext(const Context& context) = 0; + + // Called once for each group of benchmark runs, gives information about + // cpu-time and heap memory usage during the benchmark run. If the group + // of runs contained more than two entries then 'report' contains additional + // elements representing the mean and standard deviation of those runs. + // Additionally if this group of runs was the last in a family of benchmarks + // 'reports' contains additional entries representing the asymptotic + // complexity and RMS of that benchmark family. + virtual void ReportRuns(const std::vector& report) = 0; + + // Called once and only once after ever group of benchmarks is run and + // reported. + virtual void Finalize() {} + + // REQUIRES: The object referenced by 'out' is valid for the lifetime + // of the reporter. + void SetOutputStream(std::ostream* out) { + assert(out); + output_stream_ = out; + } + + // REQUIRES: The object referenced by 'err' is valid for the lifetime + // of the reporter. + void SetErrorStream(std::ostream* err) { + assert(err); + error_stream_ = err; + } + + std::ostream& GetOutputStream() const { + return *output_stream_; + } + + std::ostream& GetErrorStream() const { + return *error_stream_; + } + + virtual ~BenchmarkReporter(); + + // Write a human readable string to 'out' representing the specified + // 'context'. + // REQUIRES: 'out' is non-null. + static void PrintBasicContext(std::ostream* out, Context const& context); + + private: + std::ostream* output_stream_; + std::ostream* error_stream_; +}; + +// Simple reporter that outputs benchmark data to the console. This is the +// default reporter used by RunSpecifiedBenchmarks(). +class ConsoleReporter : public BenchmarkReporter { + public: + virtual bool ReportContext(const Context& context); + virtual void ReportRuns(const std::vector& reports); + + protected: + virtual void PrintRunData(const Run& report); + + size_t name_field_width_; +}; + +class JSONReporter : public BenchmarkReporter { + public: + JSONReporter() : first_report_(true) {} + virtual bool ReportContext(const Context& context); + virtual void ReportRuns(const std::vector& reports); + virtual void Finalize(); + + private: + void PrintRunData(const Run& report); + + bool first_report_; +}; + +class CSVReporter : public BenchmarkReporter { + public: + virtual bool ReportContext(const Context& context); + virtual void ReportRuns(const std::vector& reports); + + private: + void PrintRunData(const Run& report); +}; + +inline const char* GetTimeUnitString(TimeUnit unit) { + switch (unit) { + case kMillisecond: + return "ms"; + case kMicrosecond: + return "us"; + case kNanosecond: + default: + return "ns"; + } +} + +inline double GetTimeUnitMultiplier(TimeUnit unit) { + switch (unit) { + case kMillisecond: + return 1e3; + case kMicrosecond: + return 1e6; + case kNanosecond: + default: + return 1e9; + } +} + +} // end namespace benchmark +#endif // BENCHMARK_REPORTER_H_ Index: utils/google-benchmark/mingw.py =================================================================== --- /dev/null +++ utils/google-benchmark/mingw.py @@ -0,0 +1,320 @@ +#! /usr/bin/env python +# encoding: utf-8 + +import argparse +import errno +import logging +import os +import platform +import re +import sys +import subprocess +import tempfile + +try: + import winreg +except ImportError: + import _winreg as winreg +try: + import urllib.request as request +except ImportError: + import urllib as request +try: + import urllib.parse as parse +except ImportError: + import urlparse as parse + +class EmptyLogger(object): + ''' + Provides an implementation that performs no logging + ''' + def debug(self, *k, **kw): + pass + def info(self, *k, **kw): + pass + def warn(self, *k, **kw): + pass + def error(self, *k, **kw): + pass + def critical(self, *k, **kw): + pass + def setLevel(self, *k, **kw): + pass + +urls = ( + 'http://downloads.sourceforge.net/project/mingw-w64/Toolchains%20' + 'targetting%20Win32/Personal%20Builds/mingw-builds/installer/' + 'repository.txt', + 'http://downloads.sourceforge.net/project/mingwbuilds/host-windows/' + 'repository.txt' +) +''' +A list of mingw-build repositories +''' + +def repository(urls = urls, log = EmptyLogger()): + ''' + Downloads and parse mingw-build repository files and parses them + ''' + log.info('getting mingw-builds repository') + versions = {} + re_sourceforge = re.compile(r'http://sourceforge.net/projects/([^/]+)/files') + re_sub = r'http://downloads.sourceforge.net/project/\1' + for url in urls: + log.debug(' - requesting: %s', url) + socket = request.urlopen(url) + repo = socket.read() + if not isinstance(repo, str): + repo = repo.decode(); + socket.close() + for entry in repo.split('\n')[:-1]: + value = entry.split('|') + version = tuple([int(n) for n in value[0].strip().split('.')]) + version = versions.setdefault(version, {}) + arch = value[1].strip() + if arch == 'x32': + arch = 'i686' + elif arch == 'x64': + arch = 'x86_64' + arch = version.setdefault(arch, {}) + threading = arch.setdefault(value[2].strip(), {}) + exceptions = threading.setdefault(value[3].strip(), {}) + revision = exceptions.setdefault(int(value[4].strip()[3:]), + re_sourceforge.sub(re_sub, value[5].strip())) + return versions + +def find_in_path(file, path=None): + ''' + Attempts to find an executable in the path + ''' + if platform.system() == 'Windows': + file += '.exe' + if path is None: + path = os.environ.get('PATH', '') + if type(path) is type(''): + path = path.split(os.pathsep) + return list(filter(os.path.exists, + map(lambda dir, file=file: os.path.join(dir, file), path))) + +def find_7zip(log = EmptyLogger()): + ''' + Attempts to find 7zip for unpacking the mingw-build archives + ''' + log.info('finding 7zip') + path = find_in_path('7z') + if not path: + key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\7-Zip') + path, _ = winreg.QueryValueEx(key, 'Path') + path = [os.path.join(path, '7z.exe')] + log.debug('found \'%s\'', path[0]) + return path[0] + +find_7zip() + +def unpack(archive, location, log = EmptyLogger()): + ''' + Unpacks a mingw-builds archive + ''' + sevenzip = find_7zip(log) + log.info('unpacking %s', os.path.basename(archive)) + cmd = [sevenzip, 'x', archive, '-o' + location, '-y'] + log.debug(' - %r', cmd) + with open(os.devnull, 'w') as devnull: + subprocess.check_call(cmd, stdout = devnull) + +def download(url, location, log = EmptyLogger()): + ''' + Downloads and unpacks a mingw-builds archive + ''' + log.info('downloading MinGW') + log.debug(' - url: %s', url) + log.debug(' - location: %s', location) + + re_content = re.compile(r'attachment;[ \t]*filename=(")?([^"]*)(")?[\r\n]*') + + stream = request.urlopen(url) + try: + content = stream.getheader('Content-Disposition') or '' + except AttributeError: + content = stream.headers.getheader('Content-Disposition') or '' + matches = re_content.match(content) + if matches: + filename = matches.group(2) + else: + parsed = parse.urlparse(stream.geturl()) + filename = os.path.basename(parsed.path) + + try: + os.makedirs(location) + except OSError as e: + if e.errno == errno.EEXIST and os.path.isdir(location): + pass + else: + raise + + archive = os.path.join(location, filename) + with open(archive, 'wb') as out: + while True: + buf = stream.read(1024) + if not buf: + break + out.write(buf) + unpack(archive, location, log = log) + os.remove(archive) + + possible = os.path.join(location, 'mingw64') + if not os.path.exists(possible): + possible = os.path.join(location, 'mingw32') + if not os.path.exists(possible): + raise ValueError('Failed to find unpacked MinGW: ' + possible) + return possible + +def root(location = None, arch = None, version = None, threading = None, + exceptions = None, revision = None, log = EmptyLogger()): + ''' + Returns the root folder of a specific version of the mingw-builds variant + of gcc. Will download the compiler if needed + ''' + + # Get the repository if we don't have all the information + if not (arch and version and threading and exceptions and revision): + versions = repository(log = log) + + # Determine some defaults + version = version or max(versions.keys()) + if not arch: + arch = platform.machine().lower() + if arch == 'x86': + arch = 'i686' + elif arch == 'amd64': + arch = 'x86_64' + if not threading: + keys = versions[version][arch].keys() + if 'posix' in keys: + threading = 'posix' + elif 'win32' in keys: + threading = 'win32' + else: + threading = keys[0] + if not exceptions: + keys = versions[version][arch][threading].keys() + if 'seh' in keys: + exceptions = 'seh' + elif 'sjlj' in keys: + exceptions = 'sjlj' + else: + exceptions = keys[0] + if revision == None: + revision = max(versions[version][arch][threading][exceptions].keys()) + if not location: + location = os.path.join(tempfile.gettempdir(), 'mingw-builds') + + # Get the download url + url = versions[version][arch][threading][exceptions][revision] + + # Tell the user whatzzup + log.info('finding MinGW %s', '.'.join(str(v) for v in version)) + log.debug(' - arch: %s', arch) + log.debug(' - threading: %s', threading) + log.debug(' - exceptions: %s', exceptions) + log.debug(' - revision: %s', revision) + log.debug(' - url: %s', url) + + # Store each specific revision differently + slug = '{version}-{arch}-{threading}-{exceptions}-rev{revision}' + slug = slug.format( + version = '.'.join(str(v) for v in version), + arch = arch, + threading = threading, + exceptions = exceptions, + revision = revision + ) + if arch == 'x86_64': + root_dir = os.path.join(location, slug, 'mingw64') + elif arch == 'i686': + root_dir = os.path.join(location, slug, 'mingw32') + else: + raise ValueError('Unknown MinGW arch: ' + arch) + + # Download if needed + if not os.path.exists(root_dir): + downloaded = download(url, os.path.join(location, slug), log = log) + if downloaded != root_dir: + raise ValueError('The location of mingw did not match\n%s\n%s' + % (downloaded, root_dir)) + + return root_dir + +def str2ver(string): + ''' + Converts a version string into a tuple + ''' + try: + version = tuple(int(v) for v in string.split('.')) + if len(version) is not 3: + raise ValueError() + except ValueError: + raise argparse.ArgumentTypeError( + 'please provide a three digit version string') + return version + +def main(): + ''' + Invoked when the script is run directly by the python interpreter + ''' + parser = argparse.ArgumentParser( + description = 'Downloads a specific version of MinGW', + formatter_class = argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('--location', + help = 'the location to download the compiler to', + default = os.path.join(tempfile.gettempdir(), 'mingw-builds')) + parser.add_argument('--arch', required = True, choices = ['i686', 'x86_64'], + help = 'the target MinGW architecture string') + parser.add_argument('--version', type = str2ver, + help = 'the version of GCC to download') + parser.add_argument('--threading', choices = ['posix', 'win32'], + help = 'the threading type of the compiler') + parser.add_argument('--exceptions', choices = ['sjlj', 'seh', 'dwarf'], + help = 'the method to throw exceptions') + parser.add_argument('--revision', type=int, + help = 'the revision of the MinGW release') + group = parser.add_mutually_exclusive_group() + group.add_argument('-v', '--verbose', action='store_true', + help='increase the script output verbosity') + group.add_argument('-q', '--quiet', action='store_true', + help='only print errors and warning') + args = parser.parse_args() + + # Create the logger + logger = logging.getLogger('mingw') + handler = logging.StreamHandler() + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + if args.quiet: + logger.setLevel(logging.WARN) + if args.verbose: + logger.setLevel(logging.DEBUG) + + # Get MinGW + root_dir = root(location = args.location, arch = args.arch, + version = args.version, threading = args.threading, + exceptions = args.exceptions, revision = args.revision, + log = logger) + + sys.stdout.write('%s\n' % os.path.join(root_dir, 'bin')) + +if __name__ == '__main__': + try: + main() + except IOError as e: + sys.stderr.write('IO error: %s\n' % e) + sys.exit(1) + except OSError as e: + sys.stderr.write('OS error: %s\n' % e) + sys.exit(1) + except KeyboardInterrupt as e: + sys.stderr.write('Killed\n') + sys.exit(1) Index: utils/google-benchmark/src/CMakeLists.txt =================================================================== --- /dev/null +++ utils/google-benchmark/src/CMakeLists.txt @@ -0,0 +1,51 @@ +# Allow the source files to find headers in src/ +include_directories(${PROJECT_SOURCE_DIR}/src) + +# Define the source files +set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc" + "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc" + "log.cc" "reporter.cc" "sleep.cc" "string_util.cc" + "sysinfo.cc" "walltime.cc" "complexity.cc") +# Determine the correct regular expression engine to use +if(HAVE_STD_REGEX) + set(RE_FILES "re_std.cc") +elseif(HAVE_GNU_POSIX_REGEX) + set(RE_FILES "re_posix.cc") +elseif(HAVE_POSIX_REGEX) + set(RE_FILES "re_posix.cc") +else() + message(FATAL_ERROR "Failed to determine the source files for the regular expression backend") +endif() + +add_library(benchmark ${SOURCE_FILES} ${RE_FILES}) + + +set_target_properties(benchmark PROPERTIES + OUTPUT_NAME "benchmark" + VERSION ${GENERIC_LIB_VERSION} + SOVERSION ${GENERIC_LIB_SOVERSION} +) + +# Link threads. +target_link_libraries(benchmark ${CMAKE_THREAD_LIBS_INIT}) + +# We need extra libraries on Windows +if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") + target_link_libraries(benchmark Shlwapi) +endif() + +# Expose public API +target_include_directories(benchmark PUBLIC ${PROJECT_SOURCE_DIR}/include) + +# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable) +install( + TARGETS benchmark + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + COMPONENT library) + +install( + DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark" + DESTINATION include + FILES_MATCHING PATTERN "*.*h") Index: utils/google-benchmark/src/arraysize.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/arraysize.h @@ -0,0 +1,34 @@ +#ifndef BENCHMARK_ARRAYSIZE_H_ +#define BENCHMARK_ARRAYSIZE_H_ + +#include "internal_macros.h" + +namespace benchmark { +namespace internal { +// The arraysize(arr) macro returns the # of elements in an array arr. +// The expression is a compile-time constant, and therefore can be +// used in defining new arrays, for example. If you use arraysize on +// a pointer by mistake, you will get a compile-time error. +// + + +// This template function declaration is used in defining arraysize. +// Note that the function doesn't need an implementation, as we only +// use its type. +template +char (&ArraySizeHelper(T (&array)[N]))[N]; + +// That gcc wants both of these prototypes seems mysterious. VC, for +// its part, can't decide which to use (another mystery). Matching of +// template overloads: the final frontier. +#ifndef COMPILER_MSVC +template +char (&ArraySizeHelper(const T (&array)[N]))[N]; +#endif + +#define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array))) + +} // end namespace internal +} // end namespace benchmark + +#endif // BENCHMARK_ARRAYSIZE_H_ Index: utils/google-benchmark/src/benchmark.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/benchmark.cc @@ -0,0 +1,1123 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/benchmark.h" +#include "internal_macros.h" + +#ifndef BENCHMARK_OS_WINDOWS +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "commandlineflags.h" +#include "complexity.h" +#include "log.h" +#include "mutex.h" +#include "re.h" +#include "stat.h" +#include "string_util.h" +#include "sysinfo.h" +#include "walltime.h" + +DEFINE_bool(benchmark_list_tests, false, + "Print a list of benchmarks. This option overrides all other " + "options."); + +DEFINE_string(benchmark_filter, ".", + "A regular expression that specifies the set of benchmarks " + "to execute. If this flag is empty, no benchmarks are run. " + "If this flag is the string \"all\", all benchmarks linked " + "into the process are run."); + +DEFINE_double(benchmark_min_time, 0.5, + "Minimum number of seconds we should run benchmark before " + "results are considered significant. For cpu-time based " + "tests, this is the lower bound on the total cpu time " + "used by all threads that make up the test. For real-time " + "based tests, this is the lower bound on the elapsed time " + "of the benchmark execution, regardless of number of " + "threads."); + +DEFINE_int32(benchmark_repetitions, 1, + "The number of runs of each benchmark. If greater than 1, the " + "mean and standard deviation of the runs will be reported."); + +DEFINE_string(benchmark_format, "console", + "The format to use for console output. Valid values are " + "'console', 'json', or 'csv'."); + +DEFINE_bool(color_print, true, "Enables colorized logging."); + +DEFINE_int32(v, 0, "The level of verbose logging to output"); + + +namespace benchmark { + +namespace internal { + +void UseCharPointer(char const volatile*) {} + +// NOTE: This is a dummy "mutex" type used to denote the actual mutex +// returned by GetBenchmarkLock(). This is only used to placate the thread +// safety warnings by giving the return of GetBenchmarkLock() a name. +struct CAPABILITY("mutex") BenchmarkLockType {}; +BenchmarkLockType BenchmarkLockVar; + +} // end namespace internal + +inline Mutex& RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) +GetBenchmarkLock() +{ + static Mutex lock; + return lock; +} + +namespace { + +bool IsZero(double n) { + return std::abs(n) < std::numeric_limits::epsilon(); +} + +// For non-dense Range, intermediate values are powers of kRangeMultiplier. +static const int kRangeMultiplier = 8; +static const size_t kMaxIterations = 1000000000; + +bool running_benchmark = false; + +// Global variable so that a benchmark can cause a little extra printing +std::string* GetReportLabel() { + static std::string label GUARDED_BY(GetBenchmarkLock()); + return &label; +} + +// Global variable so that a benchmark can report an error as a human readable +// string. If error_message is null no error occurred. +#if defined(_MSC_VER) && _MSC_VER <= 1800 +typedef char* error_message_type; +#else +typedef const char* error_message_type; +#endif + +static std::atomic error_message = ATOMIC_VAR_INIT(nullptr); + +// TODO(ericwf): support MallocCounter. +//static benchmark::MallocCounter *benchmark_mc; + +struct ThreadStats { + ThreadStats() : bytes_processed(0), items_processed(0), complexity_n(0) {} + int64_t bytes_processed; + int64_t items_processed; + int complexity_n; +}; + +// Timer management class +class TimerManager { + public: + TimerManager(int num_threads, Notification* done) + : num_threads_(num_threads), + running_threads_(num_threads), + done_(done), + running_(false), + real_time_used_(0), + cpu_time_used_(0), + manual_time_used_(0), + num_finalized_(0), + phase_number_(0), + entered_(0) + { + } + + // Called by each thread + void StartTimer() EXCLUDES(lock_) { + bool last_thread = false; + { + MutexLock ml(lock_); + last_thread = Barrier(ml); + if (last_thread) { + CHECK(!running_) << "Called StartTimer when timer is already running"; + running_ = true; + start_real_time_ = walltime::Now(); + start_cpu_time_ = MyCPUUsage() + ChildrenCPUUsage(); + } + } + if (last_thread) { + phase_condition_.notify_all(); + } + } + + // Called by each thread + void StopTimer() EXCLUDES(lock_) { + bool last_thread = false; + { + MutexLock ml(lock_); + last_thread = Barrier(ml); + if (last_thread) { + CHECK(running_) << "Called StopTimer when timer is already stopped"; + InternalStop(); + } + } + if (last_thread) { + phase_condition_.notify_all(); + } + } + + // Called by each thread + void SetIterationTime(double seconds) EXCLUDES(lock_) { + bool last_thread = false; + { + MutexLock ml(lock_); + last_thread = Barrier(ml); + if (last_thread) { + manual_time_used_ += seconds; + } + } + if (last_thread) { + phase_condition_.notify_all(); + } + } + + // Called by each thread + void Finalize() EXCLUDES(lock_) { + MutexLock l(lock_); + num_finalized_++; + if (num_finalized_ == num_threads_) { + CHECK(!running_) << + "The timer should be stopped before the timer is finalized"; + done_->Notify(); + } + } + + void RemoveErroredThread() EXCLUDES(lock_) { + MutexLock ml(lock_); + int last_thread = --running_threads_ == 0; + if (last_thread && running_) + InternalStop(); + else if (!last_thread) + phase_condition_.notify_all(); + } + + // REQUIRES: timer is not running + double real_time_used() EXCLUDES(lock_) { + MutexLock l(lock_); + CHECK(!running_); + return real_time_used_; + } + + // REQUIRES: timer is not running + double cpu_time_used() EXCLUDES(lock_) { + MutexLock l(lock_); + CHECK(!running_); + return cpu_time_used_; + } + + // REQUIRES: timer is not running + double manual_time_used() EXCLUDES(lock_) { + MutexLock l(lock_); + CHECK(!running_); + return manual_time_used_; + } + + private: + Mutex lock_; + Condition phase_condition_; + int num_threads_; + int running_threads_; + Notification* done_; + + bool running_; // Is the timer running + double start_real_time_; // If running_ + double start_cpu_time_; // If running_ + + // Accumulated time so far (does not contain current slice if running_) + double real_time_used_; + double cpu_time_used_; + // Manually set iteration time. User sets this with SetIterationTime(seconds). + double manual_time_used_; + + // How many threads have called Finalize() + int num_finalized_; + + // State for barrier management + int phase_number_; + int entered_; // Number of threads that have entered this barrier + + void InternalStop() REQUIRES(lock_) { + CHECK(running_); + running_ = false; + real_time_used_ += walltime::Now() - start_real_time_; + cpu_time_used_ += ((MyCPUUsage() + ChildrenCPUUsage()) + - start_cpu_time_); + } + + // Enter the barrier and wait until all other threads have also + // entered the barrier. Returns iff this is the last thread to + // enter the barrier. + bool Barrier(MutexLock& ml) REQUIRES(lock_) { + CHECK_LT(entered_, running_threads_); + entered_++; + if (entered_ < running_threads_) { + // Wait for all threads to enter + int phase_number_cp = phase_number_; + auto cb = [this, phase_number_cp]() { + return this->phase_number_ > phase_number_cp || + entered_ == running_threads_; // A thread has aborted in error + }; + phase_condition_.wait(ml.native_handle(), cb); + if (phase_number_ > phase_number_cp) + return false; + // else (running_threads_ == entered_) and we are the last thread. + } + // Last thread has reached the barrier + phase_number_++; + entered_ = 0; + return true; + } +}; + +// TimerManager for current run. +static std::unique_ptr timer_manager = nullptr; + +} // end namespace + +namespace internal { + +// Information kept per benchmark we may want to run +struct Benchmark::Instance { + std::string name; + Benchmark* benchmark; + bool has_arg1; + int arg1; + bool has_arg2; + int arg2; + TimeUnit time_unit; + int range_multiplier; + bool use_real_time; + bool use_manual_time; + BigO complexity; + BigOFunc* complexity_lambda; + bool last_benchmark_instance; + int repetitions; + double min_time; + int threads; // Number of concurrent threads to use + bool multithreaded; // Is benchmark multi-threaded? +}; + +// Class for managing registered benchmarks. Note that each registered +// benchmark identifies a family of related benchmarks to run. +class BenchmarkFamilies { + public: + static BenchmarkFamilies* GetInstance(); + + // Registers a benchmark family and returns the index assigned to it. + size_t AddBenchmark(std::unique_ptr family); + + // Extract the list of benchmark instances that match the specified + // regular expression. + bool FindBenchmarks(const std::string& re, + std::vector* benchmarks); + private: + BenchmarkFamilies() {} + + std::vector> families_; + Mutex mutex_; +}; + + +class BenchmarkImp { +public: + explicit BenchmarkImp(const char* name); + ~BenchmarkImp(); + + void Arg(int x); + void Unit(TimeUnit unit); + void Range(int start, int limit); + void DenseRange(int start, int limit); + void ArgPair(int start, int limit); + void RangePair(int lo1, int hi1, int lo2, int hi2); + void RangeMultiplier(int multiplier); + void MinTime(double n); + void Repetitions(int n); + void UseRealTime(); + void UseManualTime(); + void Complexity(BigO complexity); + void ComplexityLambda(BigOFunc* complexity); + void Threads(int t); + void ThreadRange(int min_threads, int max_threads); + void ThreadPerCpu(); + void SetName(const char* name); + + static void AddRange(std::vector* dst, int lo, int hi, int mult); + +private: + friend class BenchmarkFamilies; + + std::string name_; + int arg_count_; + std::vector< std::pair > args_; // Args for all benchmark runs + TimeUnit time_unit_; + int range_multiplier_; + double min_time_; + int repetitions_; + bool use_real_time_; + bool use_manual_time_; + BigO complexity_; + BigOFunc* complexity_lambda_; + std::vector thread_counts_; + + BenchmarkImp& operator=(BenchmarkImp const&); +}; + +BenchmarkFamilies* BenchmarkFamilies::GetInstance() { + static BenchmarkFamilies instance; + return &instance; +} + + +size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr family) { + MutexLock l(mutex_); + size_t index = families_.size(); + families_.push_back(std::move(family)); + return index; +} + +bool BenchmarkFamilies::FindBenchmarks( + const std::string& spec, + std::vector* benchmarks) { + // Make regular expression out of command-line flag + std::string error_msg; + Regex re; + if (!re.Init(spec, &error_msg)) { + std::cerr << "Could not compile benchmark re: " << error_msg << std::endl; + return false; + } + + // Special list of thread counts to use when none are specified + std::vector one_thread; + one_thread.push_back(1); + + MutexLock l(mutex_); + for (std::unique_ptr& bench_family : families_) { + // Family was deleted or benchmark doesn't match + if (!bench_family) continue; + BenchmarkImp* family = bench_family->imp_; + + if (family->arg_count_ == -1) { + family->arg_count_ = 0; + family->args_.emplace_back(-1, -1); + } + for (auto const& args : family->args_) { + const std::vector* thread_counts = + (family->thread_counts_.empty() + ? &one_thread + : &family->thread_counts_); + for (int num_threads : *thread_counts) { + + Benchmark::Instance instance; + instance.name = family->name_; + instance.benchmark = bench_family.get(); + instance.has_arg1 = family->arg_count_ >= 1; + instance.arg1 = args.first; + instance.has_arg2 = family->arg_count_ == 2; + instance.arg2 = args.second; + instance.time_unit = family->time_unit_; + instance.range_multiplier = family->range_multiplier_; + instance.min_time = family->min_time_; + instance.repetitions = family->repetitions_; + instance.use_real_time = family->use_real_time_; + instance.use_manual_time = family->use_manual_time_; + instance.complexity = family->complexity_; + instance.complexity_lambda = family->complexity_lambda_; + instance.threads = num_threads; + instance.multithreaded = !(family->thread_counts_.empty()); + + // Add arguments to instance name + if (family->arg_count_ >= 1) { + AppendHumanReadable(instance.arg1, &instance.name); + } + if (family->arg_count_ >= 2) { + AppendHumanReadable(instance.arg2, &instance.name); + } + if (!IsZero(family->min_time_)) { + instance.name += StringPrintF("/min_time:%0.3f", family->min_time_); + } + if (family->repetitions_ != 0) { + instance.name += StringPrintF("/repeats:%d", family->repetitions_); + } + if (family->use_manual_time_) { + instance.name += "/manual_time"; + } else if (family->use_real_time_) { + instance.name += "/real_time"; + } + + // Add the number of threads used to the name + if (!family->thread_counts_.empty()) { + instance.name += StringPrintF("/threads:%d", instance.threads); + } + + if (re.Match(instance.name)) { + instance.last_benchmark_instance = (args == family->args_.back()); + benchmarks->push_back(instance); + } + } + } + } + return true; +} + +BenchmarkImp::BenchmarkImp(const char* name) + : name_(name), arg_count_(-1), time_unit_(kNanosecond), + range_multiplier_(kRangeMultiplier), min_time_(0.0), repetitions_(0), + use_real_time_(false), use_manual_time_(false), + complexity_(oNone) { +} + +BenchmarkImp::~BenchmarkImp() { +} + +void BenchmarkImp::Arg(int x) { + CHECK(arg_count_ == -1 || arg_count_ == 1); + arg_count_ = 1; + args_.emplace_back(x, -1); +} + +void BenchmarkImp::Unit(TimeUnit unit) { + time_unit_ = unit; +} + +void BenchmarkImp::Range(int start, int limit) { + CHECK(arg_count_ == -1 || arg_count_ == 1); + arg_count_ = 1; + std::vector arglist; + AddRange(&arglist, start, limit, range_multiplier_); + + for (int i : arglist) { + args_.emplace_back(i, -1); + } +} + +void BenchmarkImp::DenseRange(int start, int limit) { + CHECK(arg_count_ == -1 || arg_count_ == 1); + arg_count_ = 1; + CHECK_GE(start, 0); + CHECK_LE(start, limit); + for (int arg = start; arg <= limit; arg++) { + args_.emplace_back(arg, -1); + } +} + +void BenchmarkImp::ArgPair(int x, int y) { + CHECK(arg_count_ == -1 || arg_count_ == 2); + arg_count_ = 2; + args_.emplace_back(x, y); +} + +void BenchmarkImp::RangePair(int lo1, int hi1, int lo2, int hi2) { + CHECK(arg_count_ == -1 || arg_count_ == 2); + arg_count_ = 2; + std::vector arglist1, arglist2; + AddRange(&arglist1, lo1, hi1, range_multiplier_); + AddRange(&arglist2, lo2, hi2, range_multiplier_); + + for (int i : arglist1) { + for (int j : arglist2) { + args_.emplace_back(i, j); + } + } +} + +void BenchmarkImp::RangeMultiplier(int multiplier) { + CHECK(multiplier > 1); + range_multiplier_ = multiplier; +} + +void BenchmarkImp::MinTime(double t) { + CHECK(t > 0.0); + min_time_ = t; +} + + +void BenchmarkImp::Repetitions(int n) { + CHECK(n > 0); + repetitions_ = n; +} + +void BenchmarkImp::UseRealTime() { + CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; + use_real_time_ = true; +} + +void BenchmarkImp::UseManualTime() { + CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; + use_manual_time_ = true; +} + +void BenchmarkImp::Complexity(BigO complexity){ + complexity_ = complexity; +} + +void BenchmarkImp::ComplexityLambda(BigOFunc* complexity) { + complexity_lambda_ = complexity; +} + +void BenchmarkImp::Threads(int t) { + CHECK_GT(t, 0); + thread_counts_.push_back(t); +} + +void BenchmarkImp::ThreadRange(int min_threads, int max_threads) { + CHECK_GT(min_threads, 0); + CHECK_GE(max_threads, min_threads); + + AddRange(&thread_counts_, min_threads, max_threads, 2); +} + +void BenchmarkImp::ThreadPerCpu() { + static int num_cpus = NumCPUs(); + thread_counts_.push_back(num_cpus); +} + +void BenchmarkImp::SetName(const char* name) { + name_ = name; +} + +void BenchmarkImp::AddRange(std::vector* dst, int lo, int hi, int mult) { + CHECK_GE(lo, 0); + CHECK_GE(hi, lo); + CHECK_GE(mult, 2); + + // Add "lo" + dst->push_back(lo); + + static const int kint32max = std::numeric_limits::max(); + + // Now space out the benchmarks in multiples of "mult" + for (int32_t i = 1; i < kint32max/mult; i *= mult) { + if (i >= hi) break; + if (i > lo) { + dst->push_back(i); + } + } + // Add "hi" (if different from "lo") + if (hi != lo) { + dst->push_back(hi); + } +} + +Benchmark::Benchmark(const char* name) + : imp_(new BenchmarkImp(name)) +{ +} + +Benchmark::~Benchmark() { + delete imp_; +} + +Benchmark::Benchmark(Benchmark const& other) + : imp_(new BenchmarkImp(*other.imp_)) +{ +} + +Benchmark* Benchmark::Arg(int x) { + imp_->Arg(x); + return this; +} + +Benchmark* Benchmark::Unit(TimeUnit unit) { + imp_->Unit(unit); + return this; +} + +Benchmark* Benchmark::Range(int start, int limit) { + imp_->Range(start, limit); + return this; +} + +Benchmark* Benchmark::DenseRange(int start, int limit) { + imp_->DenseRange(start, limit); + return this; +} + +Benchmark* Benchmark::ArgPair(int x, int y) { + imp_->ArgPair(x, y); + return this; +} + +Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) { + imp_->RangePair(lo1, hi1, lo2, hi2); + return this; +} + +Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { + custom_arguments(this); + return this; +} + +Benchmark* Benchmark::RangeMultiplier(int multiplier) { + imp_->RangeMultiplier(multiplier); + return this; +} + + +Benchmark* Benchmark::Repetitions(int t) { + imp_->Repetitions(t); + return this; +} + +Benchmark* Benchmark::MinTime(double t) { + imp_->MinTime(t); + return this; +} + +Benchmark* Benchmark::UseRealTime() { + imp_->UseRealTime(); + return this; +} + +Benchmark* Benchmark::UseManualTime() { + imp_->UseManualTime(); + return this; +} + +Benchmark* Benchmark::Complexity(BigO complexity) { + imp_->Complexity(complexity); + return this; +} + +Benchmark* Benchmark::Complexity(BigOFunc* complexity) { + imp_->Complexity(oLambda); + imp_->ComplexityLambda(complexity); + return this; +} + +Benchmark* Benchmark::Threads(int t) { + imp_->Threads(t); + return this; +} + +Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { + imp_->ThreadRange(min_threads, max_threads); + return this; +} + +Benchmark* Benchmark::ThreadPerCpu() { + imp_->ThreadPerCpu(); + return this; +} + +void Benchmark::SetName(const char* name) { + imp_->SetName(name); +} + +void FunctionBenchmark::Run(State& st) { + func_(st); +} + +} // end namespace internal + +namespace { + +// Execute one thread of benchmark b for the specified number of iterations. +// Adds the stats collected for the thread into *total. +void RunInThread(const benchmark::internal::Benchmark::Instance* b, + size_t iters, int thread_id, + ThreadStats* total) EXCLUDES(GetBenchmarkLock()) { + State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id, b->threads); + b->benchmark->Run(st); + CHECK(st.iterations() == st.max_iterations) << + "Benchmark returned before State::KeepRunning() returned false!"; + { + MutexLock l(GetBenchmarkLock()); + total->bytes_processed += st.bytes_processed(); + total->items_processed += st.items_processed(); + total->complexity_n += st.complexity_length_n(); + } + + timer_manager->Finalize(); +} + +void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, + BenchmarkReporter* br, + std::vector& complexity_reports) + EXCLUDES(GetBenchmarkLock()) { + size_t iters = 1; + + std::vector reports; + + std::vector pool; + if (b.multithreaded) + pool.resize(b.threads); + + const int repeats = b.repetitions != 0 ? b.repetitions + : FLAGS_benchmark_repetitions; + for (int i = 0; i < repeats; i++) { + std::string mem; + for (;;) { + // Try benchmark + VLOG(2) << "Running " << b.name << " for " << iters << "\n"; + + { + MutexLock l(GetBenchmarkLock()); + GetReportLabel()->clear(); + } + error_message = nullptr; + + Notification done; + timer_manager = std::unique_ptr(new TimerManager(b.threads, &done)); + + ThreadStats total; + running_benchmark = true; + if (b.multithreaded) { + // If this is out first iteration of the while(true) loop then the + // threads haven't been started and can't be joined. Otherwise we need + // to join the thread before replacing them. + for (std::thread& thread : pool) { + if (thread.joinable()) + thread.join(); + } + for (std::size_t ti = 0; ti < pool.size(); ++ti) { + pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti), &total); + } + } else { + // Run directly in this thread + RunInThread(&b, iters, 0, &total); + } + done.WaitForNotification(); + running_benchmark = false; + + const double cpu_accumulated_time = timer_manager->cpu_time_used(); + const double real_accumulated_time = timer_manager->real_time_used(); + const double manual_accumulated_time = timer_manager->manual_time_used(); + timer_manager.reset(); + + VLOG(2) << "Ran in " << cpu_accumulated_time << "/" + << real_accumulated_time << "\n"; + + // Base decisions off of real time if requested by this benchmark. + double seconds = cpu_accumulated_time; + if (b.use_manual_time) { + seconds = manual_accumulated_time; + } else if (b.use_real_time) { + seconds = real_accumulated_time; + } + + std::string label; + { + MutexLock l(GetBenchmarkLock()); + label = *GetReportLabel(); + } + error_message_type error_msg = error_message; + + const double min_time = !IsZero(b.min_time) ? b.min_time + : FLAGS_benchmark_min_time; + + // If this was the first run, was elapsed time or cpu time large enough? + // If this is not the first run, go with the current value of iter. + if ((i > 0) || (error_msg != nullptr) || + (iters >= kMaxIterations) || + (seconds >= min_time) || + (real_accumulated_time >= 5*min_time)) { + + // Create report about this benchmark run. + BenchmarkReporter::Run report; + report.benchmark_name = b.name; + report.error_occurred = error_msg != nullptr; + report.error_message = error_msg != nullptr ? error_msg : ""; + report.report_label = label; + // Report the total iterations across all threads. + report.iterations = static_cast(iters) * b.threads; + report.time_unit = b.time_unit; + + if (!report.error_occurred) { + double bytes_per_second = 0; + if (total.bytes_processed > 0 && seconds > 0.0) { + bytes_per_second = (total.bytes_processed / seconds); + } + double items_per_second = 0; + if (total.items_processed > 0 && seconds > 0.0) { + items_per_second = (total.items_processed / seconds); + } + + if (b.use_manual_time) { + report.real_accumulated_time = manual_accumulated_time; + } else { + report.real_accumulated_time = real_accumulated_time; + } + report.cpu_accumulated_time = cpu_accumulated_time; + report.bytes_per_second = bytes_per_second; + report.items_per_second = items_per_second; + report.complexity_n = total.complexity_n; + report.complexity = b.complexity; + report.complexity_lambda = b.complexity_lambda; + if(report.complexity != oNone) + complexity_reports.push_back(report); + } + + reports.push_back(report); + break; + } + + // See how much iterations should be increased by + // Note: Avoid division by zero with max(seconds, 1ns). + double multiplier = min_time * 1.4 / std::max(seconds, 1e-9); + // If our last run was at least 10% of FLAGS_benchmark_min_time then we + // use the multiplier directly. Otherwise we use at most 10 times + // expansion. + // NOTE: When the last run was at least 10% of the min time the max + // expansion should be 14x. + bool is_significant = (seconds / min_time) > 0.1; + multiplier = is_significant ? multiplier : std::min(10.0, multiplier); + if (multiplier <= 1.0) multiplier = 2.0; + double next_iters = std::max(multiplier * iters, iters + 1.0); + if (next_iters > kMaxIterations) { + next_iters = kMaxIterations; + } + VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; + iters = static_cast(next_iters + 0.5); + } + } + std::vector additional_run_stats = ComputeStats(reports); + reports.insert(reports.end(), additional_run_stats.begin(), + additional_run_stats.end()); + + if((b.complexity != oNone) && b.last_benchmark_instance) { + additional_run_stats = ComputeBigO(complexity_reports); + reports.insert(reports.end(), additional_run_stats.begin(), + additional_run_stats.end()); + complexity_reports.clear(); + } + + br->ReportRuns(reports); + + if (b.multithreaded) { + for (std::thread& thread : pool) + thread.join(); + } +} + +} // namespace + +State::State(size_t max_iters, bool has_x, int x, bool has_y, int y, + int thread_i, int n_threads) + : started_(false), finished_(false), total_iterations_(0), + has_range_x_(has_x), range_x_(x), + has_range_y_(has_y), range_y_(y), + bytes_processed_(0), items_processed_(0), + complexity_n_(0), + error_occurred_(false), + thread_index(thread_i), + threads(n_threads), + max_iterations(max_iters) +{ + CHECK(max_iterations != 0) << "At least one iteration must be run"; + CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; +} + +void State::PauseTiming() { + // Add in time accumulated so far + CHECK(running_benchmark); + CHECK(started_ && !finished_ && !error_occurred_); + timer_manager->StopTimer(); +} + +void State::ResumeTiming() { + CHECK(running_benchmark); + CHECK(started_ && !finished_ && !error_occurred_); + timer_manager->StartTimer(); +} + +void State::SkipWithError(const char* msg) { + CHECK(msg); + error_occurred_ = true; + error_message_type expected_no_error_msg = nullptr; + error_message.compare_exchange_weak(expected_no_error_msg, + const_cast(msg)); + started_ = finished_ = true; + total_iterations_ = max_iterations; + timer_manager->RemoveErroredThread(); +} + +void State::SetIterationTime(double seconds) +{ + CHECK(running_benchmark); + timer_manager->SetIterationTime(seconds); +} + +void State::SetLabel(const char* label) { + CHECK(running_benchmark); + MutexLock l(GetBenchmarkLock()); + *GetReportLabel() = label; +} + +namespace internal { +namespace { + +void RunMatchingBenchmarks(const std::vector& benchmarks, + BenchmarkReporter* reporter) { + CHECK(reporter != nullptr); + + // Determine the width of the name field using a minimum width of 10. + bool has_repetitions = FLAGS_benchmark_repetitions > 1; + size_t name_field_width = 10; + for (const Benchmark::Instance& benchmark : benchmarks) { + name_field_width = + std::max(name_field_width, benchmark.name.size()); + has_repetitions |= benchmark.repetitions > 1; + } + if (has_repetitions) + name_field_width += std::strlen("_stddev"); + + // Print header here + BenchmarkReporter::Context context; + context.num_cpus = NumCPUs(); + context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f; + + context.cpu_scaling_enabled = CpuScalingEnabled(); + context.name_field_width = name_field_width; + + // Keep track of runing times of all instances of current benchmark + std::vector complexity_reports; + + if (reporter->ReportContext(context)) { + for (const auto& benchmark : benchmarks) { + RunBenchmark(benchmark, reporter, complexity_reports); + } + } +} + +std::unique_ptr GetDefaultReporter() { + typedef std::unique_ptr PtrType; + if (FLAGS_benchmark_format == "console") { + return PtrType(new ConsoleReporter); + } else if (FLAGS_benchmark_format == "json") { + return PtrType(new JSONReporter); + } else if (FLAGS_benchmark_format == "csv") { + return PtrType(new CSVReporter); + } else { + std::cerr << "Unexpected format: '" << FLAGS_benchmark_format << "'\n"; + std::exit(1); + } +} + +} // end namespace +} // end namespace internal + +size_t RunSpecifiedBenchmarks() { + return RunSpecifiedBenchmarks(nullptr); +} + +size_t RunSpecifiedBenchmarks(BenchmarkReporter* reporter) { + std::string spec = FLAGS_benchmark_filter; + if (spec.empty() || spec == "all") + spec = "."; // Regexp that matches all benchmarks + + std::vector benchmarks; + auto families = internal::BenchmarkFamilies::GetInstance(); + if (!families->FindBenchmarks(spec, &benchmarks)) return 0; + + if (FLAGS_benchmark_list_tests) { + for (auto const& benchmark : benchmarks) + std::cout << benchmark.name << "\n"; + } else { + std::unique_ptr default_reporter; + if (!reporter) { + default_reporter = internal::GetDefaultReporter(); + reporter = default_reporter.get(); + } + internal::RunMatchingBenchmarks(benchmarks, reporter); + reporter->Finalize(); + } + return benchmarks.size(); +} + +namespace internal { + +void PrintUsageAndExit() { + fprintf(stdout, + "benchmark" + " [--benchmark_list_tests={true|false}]\n" + " [--benchmark_filter=]\n" + " [--benchmark_min_time=]\n" + " [--benchmark_repetitions=]\n" + " [--benchmark_format=]\n" + " [--color_print={true|false}]\n" + " [--v=]\n"); + exit(0); +} + +void ParseCommandLineFlags(int* argc, char** argv) { + using namespace benchmark; + for (int i = 1; i < *argc; ++i) { + if ( + ParseBoolFlag(argv[i], "benchmark_list_tests", + &FLAGS_benchmark_list_tests) || + ParseStringFlag(argv[i], "benchmark_filter", + &FLAGS_benchmark_filter) || + ParseDoubleFlag(argv[i], "benchmark_min_time", + &FLAGS_benchmark_min_time) || + ParseInt32Flag(argv[i], "benchmark_repetitions", + &FLAGS_benchmark_repetitions) || + ParseStringFlag(argv[i], "benchmark_format", + &FLAGS_benchmark_format) || + ParseBoolFlag(argv[i], "color_print", + &FLAGS_color_print) || + ParseInt32Flag(argv[i], "v", &FLAGS_v)) { + for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1]; + + --(*argc); + --i; + } else if (IsFlag(argv[i], "help")) { + PrintUsageAndExit(); + } + } + + if (FLAGS_benchmark_format != "console" && + FLAGS_benchmark_format != "json" && + FLAGS_benchmark_format != "csv") { + PrintUsageAndExit(); + } +} + +Benchmark* RegisterBenchmarkInternal(Benchmark* bench) { + std::unique_ptr bench_ptr(bench); + BenchmarkFamilies* families = BenchmarkFamilies::GetInstance(); + families->AddBenchmark(std::move(bench_ptr)); + return bench; +} + +} // end namespace internal + +void Initialize(int* argc, char** argv) { + internal::ParseCommandLineFlags(argc, argv); + internal::SetLogLevel(FLAGS_v); + // TODO remove this. It prints some output the first time it is called. + // We don't want to have this ouput printed during benchmarking. + MyCPUUsage(); + // The first call to walltime::Now initialized it. Call it once to + // prevent the initialization from happening in a benchmark. + walltime::Now(); +} + +} // end namespace benchmark Index: utils/google-benchmark/src/check.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/check.h @@ -0,0 +1,72 @@ +#ifndef CHECK_H_ +#define CHECK_H_ + +#include +#include + +#include "internal_macros.h" +#include "log.h" + +namespace benchmark { +namespace internal { + +typedef void(AbortHandlerT)(); + +inline AbortHandlerT*& GetAbortHandler() { + static AbortHandlerT* handler = &std::abort; + return handler; +} + +BENCHMARK_NORETURN inline void CallAbortHandler() { + GetAbortHandler()(); + std::abort(); // fallback to enforce noreturn +} + +// CheckHandler is the class constructed by failing CHECK macros. CheckHandler +// will log information about the failures and abort when it is destructed. +class CheckHandler { +public: + CheckHandler(const char* check, const char* file, const char* func, int line) + : log_(GetErrorLogInstance()) + { + log_ << file << ":" << line << ": " << func << ": Check `" + << check << "' failed. "; + } + + std::ostream& GetLog() { + return log_; + } + + BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { + log_ << std::endl; + CallAbortHandler(); + } + + CheckHandler & operator=(const CheckHandler&) = delete; + CheckHandler(const CheckHandler&) = delete; + CheckHandler() = delete; +private: + std::ostream& log_; +}; + +} // end namespace internal +} // end namespace benchmark + +// The CHECK macro returns a std::ostream object that can have extra information +// written to it. +#ifndef NDEBUG +# define CHECK(b) (b ? ::benchmark::internal::GetNullLogInstance() \ + : ::benchmark::internal::CheckHandler( \ + #b, __FILE__, __func__, __LINE__).GetLog()) +#else +# define CHECK(b) ::benchmark::internal::GetNullLogInstance() +#endif + +#define CHECK_EQ(a, b) CHECK((a) == (b)) +#define CHECK_NE(a, b) CHECK((a) != (b)) +#define CHECK_GE(a, b) CHECK((a) >= (b)) +#define CHECK_LE(a, b) CHECK((a) <= (b)) +#define CHECK_GT(a, b) CHECK((a) > (b)) +#define CHECK_LT(a, b) CHECK((a) < (b)) + +#endif // CHECK_H_ Index: utils/google-benchmark/src/colorprint.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/colorprint.h @@ -0,0 +1,27 @@ +#ifndef BENCHMARK_COLORPRINT_H_ +#define BENCHMARK_COLORPRINT_H_ + +#include +#include +#include + +namespace benchmark { +enum LogColor { + COLOR_DEFAULT, + COLOR_RED, + COLOR_GREEN, + COLOR_YELLOW, + COLOR_BLUE, + COLOR_MAGENTA, + COLOR_CYAN, + COLOR_WHITE +}; + +std::string FormatString(const char* msg, va_list args); +std::string FormatString(const char* msg, ...); + +void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...); + +} // end namespace benchmark + +#endif // BENCHMARK_COLORPRINT_H_ Index: utils/google-benchmark/src/colorprint.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/colorprint.cc @@ -0,0 +1,158 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "colorprint.h" + +#include +#include +#include +#include +#include + +#include "commandlineflags.h" +#include "check.h" +#include "internal_macros.h" + +#ifdef BENCHMARK_OS_WINDOWS +#include +#endif + +DECLARE_bool(color_print); + +namespace benchmark { +namespace { +#ifdef BENCHMARK_OS_WINDOWS +typedef WORD PlatformColorCode; +#else +typedef const char* PlatformColorCode; +#endif + +PlatformColorCode GetPlatformColorCode(LogColor color) { +#ifdef BENCHMARK_OS_WINDOWS + switch (color) { + case COLOR_RED: + return FOREGROUND_RED; + case COLOR_GREEN: + return FOREGROUND_GREEN; + case COLOR_YELLOW: + return FOREGROUND_RED | FOREGROUND_GREEN; + case COLOR_BLUE: + return FOREGROUND_BLUE; + case COLOR_MAGENTA: + return FOREGROUND_BLUE | FOREGROUND_RED; + case COLOR_CYAN: + return FOREGROUND_BLUE | FOREGROUND_GREEN; + case COLOR_WHITE: // fall through to default + default: + return 0; + } +#else + switch (color) { + case COLOR_RED: + return "1"; + case COLOR_GREEN: + return "2"; + case COLOR_YELLOW: + return "3"; + case COLOR_BLUE: + return "4"; + case COLOR_MAGENTA: + return "5"; + case COLOR_CYAN: + return "6"; + case COLOR_WHITE: + return "7"; + default: + return nullptr; + }; +#endif +} + +} // end namespace + +std::string FormatString(const char *msg, va_list args) { + // we might need a second shot at this, so pre-emptivly make a copy + va_list args_cp; + va_copy(args_cp, args); + + std::size_t size = 256; + char local_buff[256]; + auto ret = std::vsnprintf(local_buff, size, msg, args_cp); + + va_end(args_cp); + + // currently there is no error handling for failure, so this is hack. + CHECK(ret >= 0); + + if (ret == 0) // handle empty expansion + return {}; + else if (static_cast(ret) < size) + return local_buff; + else { + // we did not provide a long enough buffer on our first attempt. + size = (size_t)ret + 1; // + 1 for the null byte + std::unique_ptr buff(new char[size]); + ret = std::vsnprintf(buff.get(), size, msg, args); + CHECK(ret > 0 && ((size_t)ret) < size); + return buff.get(); + } +} + +std::string FormatString(const char *msg, ...) { + va_list args; + va_start(args, msg); + auto tmp = FormatString(msg, args); + va_end(args); + return tmp; +} + +void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + + if (!FLAGS_color_print) { + out << FormatString(fmt, args); + va_end(args); + return; + } + +#ifdef BENCHMARK_OS_WINDOWS + const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); + + // Gets the current text color. + CONSOLE_SCREEN_BUFFER_INFO buffer_info; + GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); + const WORD old_color_attrs = buffer_info.wAttributes; + + // We need to flush the stream buffers into the console before each + // SetConsoleTextAttribute call lest it affect the text that is already + // printed but has not yet reached the console. + fflush(stdout); + SetConsoleTextAttribute(stdout_handle, + GetPlatformColorCode(color) | FOREGROUND_INTENSITY); + vprintf(fmt, args); + + fflush(stdout); + // Restores the text color. + SetConsoleTextAttribute(stdout_handle, old_color_attrs); +#else + const char* color_code = GetPlatformColorCode(color); + if (color_code) out << FormatString("\033[0;3%sm", color_code); + out << FormatString(fmt, args) << "\033[m"; +#endif + + va_end(args); +} + +} // end namespace benchmark Index: utils/google-benchmark/src/commandlineflags.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/commandlineflags.h @@ -0,0 +1,76 @@ +#ifndef BENCHMARK_COMMANDLINEFLAGS_H_ +#define BENCHMARK_COMMANDLINEFLAGS_H_ + +#include +#include + +// Macro for referencing flags. +#define FLAG(name) FLAGS_##name + +// Macros for declaring flags. +#define DECLARE_bool(name) extern bool FLAG(name) +#define DECLARE_int32(name) extern int32_t FLAG(name) +#define DECLARE_int64(name) extern int64_t FLAG(name) +#define DECLARE_double(name) extern double FLAG(name) +#define DECLARE_string(name) extern std::string FLAG(name) + +// Macros for defining flags. +#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val) +#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val) +#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val) +#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val) +#define DEFINE_string(name, default_val, doc) \ + std::string FLAG(name) = (default_val) + +namespace benchmark { +// Parses 'str' for a 32-bit signed integer. If successful, writes the result +// to *value and returns true; otherwise leaves *value unchanged and returns +// false. +bool ParseInt32(const std::string& src_text, const char* str, int32_t* value); + +// Parses a bool/Int32/string from the environment variable +// corresponding to the given Google Test flag. +bool BoolFromEnv(const char* flag, bool default_val); +int32_t Int32FromEnv(const char* flag, int32_t default_val); +double DoubleFromEnv(const char* flag, double default_val); +const char* StringFromEnv(const char* flag, const char* default_val); + +// Parses a string for a bool flag, in the form of either +// "--flag=value" or "--flag". +// +// In the former case, the value is taken as true as long as it does +// not start with '0', 'f', or 'F'. +// +// In the latter case, the value is taken as true. +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseBoolFlag(const char* str, const char* flag, bool* value); + +// Parses a string for an Int32 flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); + +// Parses a string for a Double flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseDoubleFlag(const char* str, const char* flag, double* value); + +// Parses a string for a string flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseStringFlag(const char* str, const char* flag, std::string* value); + +// Returns true if the string matches the flag. +bool IsFlag(const char* str, const char* flag); + +} // end namespace benchmark + +#endif // BENCHMARK_COMMANDLINEFLAGS_H_ Index: utils/google-benchmark/src/commandlineflags.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/commandlineflags.cc @@ -0,0 +1,220 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "commandlineflags.h" + +#include +#include +#include +#include + +namespace benchmark { +// Parses 'str' for a 32-bit signed integer. If successful, writes +// the result to *value and returns true; otherwise leaves *value +// unchanged and returns false. +bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) { + // Parses the environment variable as a decimal integer. + char* end = nullptr; + const long long_value = strtol(str, &end, 10); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + std::cerr << src_text << " is expected to be a 32-bit integer, " + << "but actually has value \"" << str << "\".\n"; + return false; + } + + // Is the parsed value in the range of an Int32? + const int32_t result = static_cast(long_value); + if (long_value == std::numeric_limits::max() || + long_value == std::numeric_limits::min() || + // The parsed value overflows as a long. (strtol() returns + // LONG_MAX or LONG_MIN when the input overflows.) + result != long_value + // The parsed value overflows as an Int32. + ) { + std::cerr << src_text << " is expected to be a 32-bit integer, " + << "but actually has value \"" << str << "\", " + << "which overflows.\n"; + return false; + } + + *value = result; + return true; +} + +// Parses 'str' for a double. If successful, writes the result to *value and +// returns true; otherwise leaves *value unchanged and returns false. +bool ParseDouble(const std::string& src_text, const char* str, double* value) { + // Parses the environment variable as a decimal integer. + char* end = nullptr; + const double double_value = strtod(str, &end); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + std::cerr << src_text << " is expected to be a double, " + << "but actually has value \"" << str << "\".\n"; + return false; + } + + *value = double_value; + return true; +} + +inline const char* GetEnv(const char* name) { +#if defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9) + // Environment variables which we programmatically clear will be set to the + // empty string rather than unset (nullptr). Handle that case. + const char* const env = getenv(name); + return (env != nullptr && env[0] != '\0') ? env : nullptr; +#else + return getenv(name); +#endif +} + +// Returns the name of the environment variable corresponding to the +// given flag. For example, FlagToEnvVar("foo") will return +// "BENCHMARK_FOO" in the open-source version. +static std::string FlagToEnvVar(const char* flag) { + const std::string flag_str(flag); + + std::string env_var; + for (size_t i = 0; i != flag_str.length(); ++i) + env_var += static_cast(::toupper(flag_str.c_str()[i])); + + return "BENCHMARK_" + env_var; +} + +// Reads and returns the Boolean environment variable corresponding to +// the given flag; if it's not set, returns default_value. +// +// The value is considered true iff it's not "0". +bool BoolFromEnv(const char* flag, bool default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = GetEnv(env_var.c_str()); + return string_value == nullptr ? default_value : strcmp(string_value, "0") != 0; +} + +// Reads and returns a 32-bit integer stored in the environment +// variable corresponding to the given flag; if it isn't set or +// doesn't represent a valid 32-bit integer, returns default_value. +int32_t Int32FromEnv(const char* flag, int32_t default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = GetEnv(env_var.c_str()); + if (string_value == nullptr) { + // The environment variable is not set. + return default_value; + } + + int32_t result = default_value; + if (!ParseInt32(std::string("Environment variable ") + env_var, string_value, + &result)) { + std::cout << "The default value " << default_value << " is used.\n"; + return default_value; + } + + return result; +} + +// Reads and returns the string environment variable corresponding to +// the given flag; if it's not set, returns default_value. +const char* StringFromEnv(const char* flag, const char* default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value = GetEnv(env_var.c_str()); + return value == nullptr ? default_value : value; +} + +// Parses a string as a command line flag. The string should have +// the format "--flag=value". When def_optional is true, the "=value" +// part can be omitted. +// +// Returns the value of the flag, or nullptr if the parsing failed. +const char* ParseFlagValue(const char* str, const char* flag, + bool def_optional) { + // str and flag must not be nullptr. + if (str == nullptr || flag == nullptr) return nullptr; + + // The flag must start with "--". + const std::string flag_str = std::string("--") + std::string(flag); + const size_t flag_len = flag_str.length(); + if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr; + + // Skips the flag name. + const char* flag_end = str + flag_len; + + // When def_optional is true, it's OK to not have a "=value" part. + if (def_optional && (flag_end[0] == '\0')) return flag_end; + + // If def_optional is true and there are more characters after the + // flag name, or if def_optional is false, there must be a '=' after + // the flag name. + if (flag_end[0] != '=') return nullptr; + + // Returns the string after "=". + return flag_end + 1; +} + +bool ParseBoolFlag(const char* str, const char* flag, bool* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, true); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + // Converts the string value to a bool. + *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F'); + return true; +} + +bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + // Sets *value to the value of the flag. + return ParseInt32(std::string("The value of flag --") + flag, value_str, + value); +} + +bool ParseDoubleFlag(const char* str, const char* flag, double* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + // Sets *value to the value of the flag. + return ParseDouble(std::string("The value of flag --") + flag, value_str, + value); +} + +bool ParseStringFlag(const char* str, const char* flag, std::string* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + *value = value_str; + return true; +} + +bool IsFlag(const char* str, const char* flag) { + return (ParseFlagValue(str, flag, true) != nullptr); +} +} // end namespace benchmark Index: utils/google-benchmark/src/complexity.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/complexity.h @@ -0,0 +1,64 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Source project : https://github.com/ismaelJimenez/cpp.leastsq +// Adapted to be used with google benchmark + +#ifndef COMPLEXITY_H_ +#define COMPLEXITY_H_ + +#include +#include + +#include "benchmark/benchmark_api.h" +#include "benchmark/reporter.h" + +namespace benchmark { + +// Return a vector containing the mean and standard devation information for +// the specified list of reports. If 'reports' contains less than two +// non-errored runs an empty vector is returned +std::vector ComputeStats( + const std::vector& reports); + +// Return a vector containing the bigO and RMS information for the specified +// list of reports. If 'reports.size() < 2' an empty vector is returned. +std::vector ComputeBigO( + const std::vector& reports); + +// This data structure will contain the result returned by MinimalLeastSq +// - coef : Estimated coeficient for the high-order term as +// interpolated from data. +// - rms : Normalized Root Mean Squared Error. +// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability +// form has been provided to MinimalLeastSq this will return +// the same value. In case BigO::oAuto has been selected, this +// parameter will return the best fitting curve detected. + +struct LeastSq { + LeastSq() : + coef(0.0), + rms(0.0), + complexity(oNone) {} + + double coef; + double rms; + BigO complexity; +}; + +// Function to return an string for the calculated complexity +std::string GetBigOString(BigO complexity); + +} // end namespace benchmark +#endif // COMPLEXITY_H_ Index: utils/google-benchmark/src/complexity.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/complexity.cc @@ -0,0 +1,283 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Source project : https://github.com/ismaelJimenez/cpp.leastsq +// Adapted to be used with google benchmark + +#include "benchmark/benchmark_api.h" + +#include +#include +#include "check.h" +#include "complexity.h" +#include "stat.h" + +namespace benchmark { + +// Internal function to calculate the different scalability forms +BigOFunc* FittingCurve(BigO complexity) { + switch (complexity) { + case oN: + return [](int n) -> double { return n; }; + case oNSquared: + return [](int n) -> double { return n * n; }; + case oNCubed: + return [](int n) -> double { return n * n * n; }; + case oLogN: + return [](int n) { return std::log2(n); }; + case oNLogN: + return [](int n) { return n * std::log2(n); }; + case o1: + default: + return [](int) { return 1.0; }; + } +} + +// Function to return an string for the calculated complexity +std::string GetBigOString(BigO complexity) { + switch (complexity) { + case oN: + return "N"; + case oNSquared: + return "N^2"; + case oNCubed: + return "N^3"; + case oLogN: + return "lgN"; + case oNLogN: + return "NlgN"; + case o1: + return "(1)"; + default: + return "f(N)"; + } +} + +// Find the coefficient for the high-order term in the running time, by +// minimizing the sum of squares of relative error, for the fitting curve +// given by the lambda expresion. +// - n : Vector containing the size of the benchmark tests. +// - time : Vector containing the times for the benchmark tests. +// - fitting_curve : lambda expresion (e.g. [](int n) {return n; };). + +// For a deeper explanation on the algorithm logic, look the README file at +// http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit + +LeastSq MinimalLeastSq(const std::vector& n, + const std::vector& time, + BigOFunc* fitting_curve) { + double sigma_gn = 0.0; + double sigma_gn_squared = 0.0; + double sigma_time = 0.0; + double sigma_time_gn = 0.0; + + // Calculate least square fitting parameter + for (size_t i = 0; i < n.size(); ++i) { + double gn_i = fitting_curve(n[i]); + sigma_gn += gn_i; + sigma_gn_squared += gn_i * gn_i; + sigma_time += time[i]; + sigma_time_gn += time[i] * gn_i; + } + + LeastSq result; + result.complexity = oLambda; + + // Calculate complexity. + result.coef = sigma_time_gn / sigma_gn_squared; + + // Calculate RMS + double rms = 0.0; + for (size_t i = 0; i < n.size(); ++i) { + double fit = result.coef * fitting_curve(n[i]); + rms += pow((time[i] - fit), 2); + } + + // Normalized RMS by the mean of the observed values + double mean = sigma_time / n.size(); + result.rms = sqrt(rms / n.size()) / mean; + + return result; +} + +// Find the coefficient for the high-order term in the running time, by +// minimizing the sum of squares of relative error. +// - n : Vector containing the size of the benchmark tests. +// - time : Vector containing the times for the benchmark tests. +// - complexity : If different than oAuto, the fitting curve will stick to +// this one. If it is oAuto, it will be calculated the best +// fitting curve. +LeastSq MinimalLeastSq(const std::vector& n, + const std::vector& time, + const BigO complexity) { + CHECK_EQ(n.size(), time.size()); + CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two + // benchmark runs are given + CHECK_NE(complexity, oNone); + + LeastSq best_fit; + + if (complexity == oAuto) { + std::vector fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed}; + + // Take o1 as default best fitting curve + best_fit = MinimalLeastSq(n, time, FittingCurve(o1)); + best_fit.complexity = o1; + + // Compute all possible fitting curves and stick to the best one + for (const auto& fit : fit_curves) { + LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit)); + if (current_fit.rms < best_fit.rms) { + best_fit = current_fit; + best_fit.complexity = fit; + } + } + } else { + best_fit = MinimalLeastSq(n, time, FittingCurve(complexity)); + best_fit.complexity = complexity; + } + + return best_fit; +} + +std::vector ComputeStats( + const std::vector& reports) { + typedef BenchmarkReporter::Run Run; + std::vector results; + + auto error_count = + std::count_if(reports.begin(), reports.end(), + [](Run const& run) { return run.error_occurred; }); + + if (reports.size() - error_count < 2) { + // We don't report aggregated data if there was a single run. + return results; + } + // Accumulators. + Stat1_d real_accumulated_time_stat; + Stat1_d cpu_accumulated_time_stat; + Stat1_d bytes_per_second_stat; + Stat1_d items_per_second_stat; + // All repetitions should be run with the same number of iterations so we + // can take this information from the first benchmark. + int64_t const run_iterations = reports.front().iterations; + + // Populate the accumulators. + for (Run const& run : reports) { + CHECK_EQ(reports[0].benchmark_name, run.benchmark_name); + CHECK_EQ(run_iterations, run.iterations); + if (run.error_occurred) continue; + real_accumulated_time_stat += + Stat1_d(run.real_accumulated_time / run.iterations, run.iterations); + cpu_accumulated_time_stat += + Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations); + items_per_second_stat += Stat1_d(run.items_per_second, run.iterations); + bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations); + } + + // Get the data from the accumulator to BenchmarkReporter::Run's. + Run mean_data; + mean_data.benchmark_name = reports[0].benchmark_name + "_mean"; + mean_data.iterations = run_iterations; + mean_data.real_accumulated_time = + real_accumulated_time_stat.Mean() * run_iterations; + mean_data.cpu_accumulated_time = + cpu_accumulated_time_stat.Mean() * run_iterations; + mean_data.bytes_per_second = bytes_per_second_stat.Mean(); + mean_data.items_per_second = items_per_second_stat.Mean(); + + // Only add label to mean/stddev if it is same for all runs + mean_data.report_label = reports[0].report_label; + for (std::size_t i = 1; i < reports.size(); i++) { + if (reports[i].report_label != reports[0].report_label) { + mean_data.report_label = ""; + break; + } + } + + Run stddev_data; + stddev_data.benchmark_name = reports[0].benchmark_name + "_stddev"; + stddev_data.report_label = mean_data.report_label; + stddev_data.iterations = 0; + stddev_data.real_accumulated_time = real_accumulated_time_stat.StdDev(); + stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev(); + stddev_data.bytes_per_second = bytes_per_second_stat.StdDev(); + stddev_data.items_per_second = items_per_second_stat.StdDev(); + + results.push_back(mean_data); + results.push_back(stddev_data); + return results; +} + +std::vector ComputeBigO( + const std::vector& reports) { + typedef BenchmarkReporter::Run Run; + std::vector results; + + if (reports.size() < 2) return results; + + // Accumulators. + std::vector n; + std::vector real_time; + std::vector cpu_time; + + // Populate the accumulators. + for (const Run& run : reports) { + CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; + n.push_back(run.complexity_n); + real_time.push_back(run.real_accumulated_time / run.iterations); + cpu_time.push_back(run.cpu_accumulated_time / run.iterations); + } + + LeastSq result_cpu; + LeastSq result_real; + + if (reports[0].complexity == oLambda) { + result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda); + result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda); + } else { + result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity); + result_real = MinimalLeastSq(n, real_time, result_cpu.complexity); + } + std::string benchmark_name = + reports[0].benchmark_name.substr(0, reports[0].benchmark_name.find('/')); + + // Get the data from the accumulator to BenchmarkReporter::Run's. + Run big_o; + big_o.benchmark_name = benchmark_name + "_BigO"; + big_o.iterations = 0; + big_o.real_accumulated_time = result_real.coef; + big_o.cpu_accumulated_time = result_cpu.coef; + big_o.report_big_o = true; + big_o.complexity = result_cpu.complexity; + + double multiplier = GetTimeUnitMultiplier(reports[0].time_unit); + + // Only add label to mean/stddev if it is same for all runs + Run rms; + big_o.report_label = reports[0].report_label; + rms.benchmark_name = benchmark_name + "_RMS"; + rms.report_label = big_o.report_label; + rms.iterations = 0; + rms.real_accumulated_time = result_real.rms / multiplier; + rms.cpu_accumulated_time = result_cpu.rms / multiplier; + rms.report_rms = true; + rms.complexity = result_cpu.complexity; + + results.push_back(big_o); + results.push_back(rms); + return results; +} + +} // end namespace benchmark Index: utils/google-benchmark/src/console_reporter.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/console_reporter.cc @@ -0,0 +1,124 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/reporter.h" +#include "complexity.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "colorprint.h" +#include "commandlineflags.h" +#include "internal_macros.h" +#include "string_util.h" +#include "walltime.h" + +DECLARE_bool(color_print); + +namespace benchmark { + +bool ConsoleReporter::ReportContext(const Context& context) { + name_field_width_ = context.name_field_width; + + PrintBasicContext(&GetErrorStream(), context); + +#ifdef BENCHMARK_OS_WINDOWS + if (FLAGS_color_print && &std::cout != &GetOutputStream()) { + GetErrorStream() << "Color printing is only supported for stdout on windows." + " Disabling color printing\n"; + FLAGS_color_print = false; + } +#endif + std::string str = FormatString("%-*s %13s %13s %10s\n", + static_cast(name_field_width_), "Benchmark", + "Time", "CPU", "Iterations"); + GetOutputStream() << str << std::string(str.length() - 1, '-') << "\n"; + + return true; +} + +void ConsoleReporter::ReportRuns(const std::vector& reports) { + for (const auto& run : reports) + PrintRunData(run); +} + +void ConsoleReporter::PrintRunData(const Run& result) { + auto& Out = GetOutputStream(); + + auto name_color = + (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN; + ColorPrintf(Out, name_color, "%-*s ", name_field_width_, + result.benchmark_name.c_str()); + + if (result.error_occurred) { + ColorPrintf(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'", + result.error_message.c_str()); + ColorPrintf(Out, COLOR_DEFAULT, "\n"); + return; + } + // Format bytes per second + std::string rate; + if (result.bytes_per_second > 0) { + rate = StrCat(" ", HumanReadableNumber(result.bytes_per_second), "B/s"); + } + + // Format items per second + std::string items; + if (result.items_per_second > 0) { + items = StrCat(" ", HumanReadableNumber(result.items_per_second), + " items/s"); + } + + const double real_time = result.GetAdjustedRealTime(); + const double cpu_time = result.GetAdjustedCPUTime(); + + if (result.report_big_o) { + std::string big_o = GetBigOString(result.complexity); + ColorPrintf(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time, + big_o.c_str(), cpu_time, big_o.c_str()); + } else if (result.report_rms) { + ColorPrintf(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100, + cpu_time * 100); + } else { + const char* timeLabel = GetTimeUnitString(result.time_unit); + ColorPrintf(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel, + cpu_time, timeLabel); + } + + if (!result.report_big_o && !result.report_rms) { + ColorPrintf(Out, COLOR_CYAN, "%10lld", result.iterations); + } + + if (!rate.empty()) { + ColorPrintf(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str()); + } + + if (!items.empty()) { + ColorPrintf(Out, COLOR_DEFAULT, " %*s", 18, items.c_str()); + } + + if (!result.report_label.empty()) { + ColorPrintf(Out, COLOR_DEFAULT, " %s", result.report_label.c_str()); + } + + ColorPrintf(Out, COLOR_DEFAULT, "\n"); +} + +} // end namespace benchmark Index: utils/google-benchmark/src/csv_reporter.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/csv_reporter.cc @@ -0,0 +1,118 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/reporter.h" +#include "complexity.h" + +#include +#include +#include +#include +#include +#include + +#include "string_util.h" +#include "walltime.h" + +// File format reference: http://edoceo.com/utilitas/csv-file-format. + +namespace benchmark { + +namespace { +std::vector elements = { + "name", + "iterations", + "real_time", + "cpu_time", + "time_unit", + "bytes_per_second", + "items_per_second", + "label", + "error_occurred", + "error_message" +}; +} + +bool CSVReporter::ReportContext(const Context& context) { + PrintBasicContext(&GetErrorStream(), context); + + std::ostream& Out = GetOutputStream(); + for (auto B = elements.begin(); B != elements.end(); ) { + Out << *B++; + if (B != elements.end()) + Out << ","; + } + Out << "\n"; + return true; +} + +void CSVReporter::ReportRuns(const std::vector & reports) { + for (const auto& run : reports) + PrintRunData(run); +} + +void CSVReporter::PrintRunData(const Run & run) { + std::ostream& Out = GetOutputStream(); + + // Field with embedded double-quote characters must be doubled and the field + // delimited with double-quotes. + std::string name = run.benchmark_name; + ReplaceAll(&name, "\"", "\"\""); + Out << '"' << name << "\","; + if (run.error_occurred) { + Out << std::string(elements.size() - 3, ','); + Out << "true,"; + std::string msg = run.error_message; + ReplaceAll(&msg, "\"", "\"\""); + Out << '"' << msg << "\"\n"; + return; + } + + // Do not print iteration on bigO and RMS report + if (!run.report_big_o && !run.report_rms) { + Out << run.iterations; + } + Out << ","; + + Out << run.GetAdjustedRealTime() << ","; + Out << run.GetAdjustedCPUTime() << ","; + + // Do not print timeLabel on bigO and RMS report + if (run.report_big_o) { + Out << GetBigOString(run.complexity); + } else if (!run.report_rms) { + Out << GetTimeUnitString(run.time_unit); + } + Out << ","; + + if (run.bytes_per_second > 0.0) { + Out << run.bytes_per_second; + } + Out << ","; + if (run.items_per_second > 0.0) { + Out << run.items_per_second; + } + Out << ","; + if (!run.report_label.empty()) { + // Field with embedded double-quote characters must be doubled and the field + // delimited with double-quotes. + std::string label = run.report_label; + ReplaceAll(&label, "\"", "\"\""); + Out << "\"" << label << "\""; + } + Out << ",,"; // for error_occurred and error_message + Out << '\n'; +} + +} // end namespace benchmark Index: utils/google-benchmark/src/cycleclock.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/cycleclock.h @@ -0,0 +1,145 @@ +// ---------------------------------------------------------------------- +// CycleClock +// A CycleClock tells you the current time in Cycles. The "time" +// is actually time since power-on. This is like time() but doesn't +// involve a system call and is much more precise. +// +// NOTE: Not all cpu/platform/kernel combinations guarantee that this +// clock increments at a constant rate or is synchronized across all logical +// cpus in a system. +// +// If you need the above guarantees, please consider using a different +// API. There are efforts to provide an interface which provides a millisecond +// granularity and implemented as a memory read. A memory read is generally +// cheaper than the CycleClock for many architectures. +// +// Also, in some out of order CPU implementations, the CycleClock is not +// serializing. So if you're trying to count at cycles granularity, your +// data might be inaccurate due to out of order instruction execution. +// ---------------------------------------------------------------------- + +#ifndef BENCHMARK_CYCLECLOCK_H_ +#define BENCHMARK_CYCLECLOCK_H_ + +#include + +#include "benchmark/macros.h" +#include "internal_macros.h" + +#if defined(BENCHMARK_OS_MACOSX) +#include +#endif +// For MSVC, we want to use '_asm rdtsc' when possible (since it works +// with even ancient MSVC compilers), and when not possible the +// __rdtsc intrinsic, declared in . Unfortunately, in some +// environments, and have conflicting +// declarations of some other intrinsics, breaking compilation. +// Therefore, we simply declare __rdtsc ourselves. See also +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +#if defined(COMPILER_MSVC) && !defined(_M_IX86) +extern "C" uint64_t __rdtsc(); +#pragma intrinsic(__rdtsc) +#endif + +#ifndef BENCHMARK_OS_WINDOWS +#include +#endif + +namespace benchmark { +// NOTE: only i386 and x86_64 have been well tested. +// PPC, sparc, alpha, and ia64 are based on +// http://peter.kuscsik.com/wordpress/?p=14 +// with modifications by m3b. See also +// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h +namespace cycleclock { +// This should return the number of cycles since power-on. Thread-safe. +inline BENCHMARK_ALWAYS_INLINE int64_t Now() { +#if defined(BENCHMARK_OS_MACOSX) + // this goes at the top because we need ALL Macs, regardless of + // architecture, to return the number of "mach time units" that + // have passed since startup. See sysinfo.cc where + // InitializeSystemInfo() sets the supposed cpu clock frequency of + // macs to the number of mach time units per second, not actual + // CPU clock frequency (which can change in the face of CPU + // frequency scaling). Also note that when the Mac sleeps, this + // counter pauses; it does not continue counting, nor does it + // reset to zero. + return mach_absolute_time(); +#elif defined(__i386__) + int64_t ret; + __asm__ volatile("rdtsc" : "=A"(ret)); + return ret; +#elif defined(__x86_64__) || defined(__amd64__) + uint64_t low, high; + __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); + return (high << 32) | low; +#elif defined(__powerpc__) || defined(__ppc__) + // This returns a time-base, which is not always precisely a cycle-count. + int64_t tbl, tbu0, tbu1; + asm("mftbu %0" : "=r"(tbu0)); + asm("mftb %0" : "=r"(tbl)); + asm("mftbu %0" : "=r"(tbu1)); + tbl &= -static_cast(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) + return (tbu1 << 32) | tbl; +#elif defined(__sparc__) + int64_t tick; + asm(".byte 0x83, 0x41, 0x00, 0x00"); + asm("mov %%g1, %0" : "=r"(tick)); + return tick; +#elif defined(__ia64__) + int64_t itc; + asm("mov %0 = ar.itc" : "=r"(itc)); + return itc; +#elif defined(COMPILER_MSVC) && defined(_M_IX86) + // Older MSVC compilers (like 7.x) don't seem to support the + // __rdtsc intrinsic properly, so I prefer to use _asm instead + // when I know it will work. Otherwise, I'll use __rdtsc and hope + // the code is being compiled with a non-ancient compiler. + _asm rdtsc +#elif defined(COMPILER_MSVC) + return __rdtsc(); +#elif defined(__aarch64__) + // System timer of ARMv8 runs at a different frequency than the CPU's. + // The frequency is fixed, typically in the range 1-50MHz. It can be + // read at CNTFRQ special register. We assume the OS has set up + // the virtual timer properly. + int64_t virtual_timer_value; + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); + return virtual_timer_value; +#elif defined(__ARM_ARCH) +#if (__ARM_ARCH >= 6) // V6 is the earliest arch that has a standard cyclecount + uint32_t pmccntr; + uint32_t pmuseren; + uint32_t pmcntenset; + // Read the user mode perf monitor counter access permissions. + asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); + // The counter is set up to count every 64th cycle + return static_cast(pmccntr) * 64; // Should optimize to << 6 + } + } +#endif + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__mips__) + // mips apparently only allows rdtsc for superusers, so we fall + // back to gettimeofday. It's possible clock_gettime would be better. + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#else +// The soft failover to a generic implementation is automatic only for ARM. +// For other platforms the developer is expected to make an attempt to create +// a fast implementation and use generic version if nothing better is available. +#error You need to define CycleTimer for your OS and CPU +#endif +} +} // end namespace cycleclock +} // end namespace benchmark + +#endif // BENCHMARK_CYCLECLOCK_H_ Index: utils/google-benchmark/src/internal_macros.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/internal_macros.h @@ -0,0 +1,40 @@ +#ifndef BENCHMARK_INTERNAL_MACROS_H_ +#define BENCHMARK_INTERNAL_MACROS_H_ + +#include "benchmark/macros.h" + +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +#if __has_feature(cxx_attributes) +# define BENCHMARK_NORETURN [[noreturn]] +#elif defined(__GNUC__) +# define BENCHMARK_NORETURN __attribute__((noreturn)) +#else +# define BENCHMARK_NORETURN +#endif + +#if defined(__CYGWIN__) +# define BENCHMARK_OS_CYGWIN 1 +#elif defined(_WIN32) +# define BENCHMARK_OS_WINDOWS 1 +#elif defined(__APPLE__) +// TODO(ericwf) This doesn't actually check that it is a Mac OSX system. Just +// that it is an apple system. +# define BENCHMARK_OS_MACOSX 1 +#elif defined(__FreeBSD__) +# define BENCHMARK_OS_FREEBSD 1 +#elif defined(__linux__) +# define BENCHMARK_OS_LINUX 1 +#endif + +#if defined(__clang__) +# define COMPILER_CLANG +#elif defined(_MSC_VER) +# define COMPILER_MSVC +#elif defined(__GNUC__) +# define COMPILER_GCC +#endif + +#endif // BENCHMARK_INTERNAL_MACROS_H_ Index: utils/google-benchmark/src/json_reporter.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/json_reporter.cc @@ -0,0 +1,178 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/reporter.h" +#include "complexity.h" + +#include +#include +#include +#include +#include +#include + +#include "string_util.h" +#include "walltime.h" + +namespace benchmark { + +namespace { + +std::string FormatKV(std::string const& key, std::string const& value) { + return StringPrintF("\"%s\": \"%s\"", key.c_str(), value.c_str()); +} + +std::string FormatKV(std::string const& key, const char* value) { + return StringPrintF("\"%s\": \"%s\"", key.c_str(), value); +} + +std::string FormatKV(std::string const& key, bool value) { + return StringPrintF("\"%s\": %s", key.c_str(), value ? "true" : "false"); +} + +std::string FormatKV(std::string const& key, int64_t value) { + std::stringstream ss; + ss << '"' << key << "\": " << value; + return ss.str(); +} + +int64_t RoundDouble(double v) { + return static_cast(v + 0.5); +} + +} // end namespace + +bool JSONReporter::ReportContext(const Context& context) { + std::ostream& out = GetOutputStream(); + + out << "{\n"; + std::string inner_indent(2, ' '); + + // Open context block and print context information. + out << inner_indent << "\"context\": {\n"; + std::string indent(4, ' '); + + std::string walltime_value = LocalDateTimeString(); + out << indent << FormatKV("date", walltime_value) << ",\n"; + + out << indent + << FormatKV("num_cpus", static_cast(context.num_cpus)) + << ",\n"; + out << indent + << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu)) + << ",\n"; + out << indent + << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled) + << ",\n"; + +#if defined(NDEBUG) + const char build_type[] = "release"; +#else + const char build_type[] = "debug"; +#endif + out << indent << FormatKV("library_build_type", build_type) << "\n"; + // Close context block and open the list of benchmarks. + out << inner_indent << "},\n"; + out << inner_indent << "\"benchmarks\": [\n"; + return true; +} + +void JSONReporter::ReportRuns(std::vector const& reports) { + if (reports.empty()) { + return; + } + std::string indent(4, ' '); + std::ostream& out = GetOutputStream(); + if (!first_report_) { + out << ",\n"; + } + first_report_ = false; + + for (auto it = reports.begin(); it != reports.end(); ++it) { + out << indent << "{\n"; + PrintRunData(*it); + out << indent << '}'; + auto it_cp = it; + if (++it_cp != reports.end()) { + out << ",\n"; + } + } +} + +void JSONReporter::Finalize() { + // Close the list of benchmarks and the top level object. + GetOutputStream() << "\n ]\n}\n"; +} + +void JSONReporter::PrintRunData(Run const& run) { + std::string indent(6, ' '); + std::ostream& out = GetOutputStream(); + out << indent + << FormatKV("name", run.benchmark_name) + << ",\n"; + if (run.error_occurred) { + out << indent + << FormatKV("error_occurred", run.error_occurred) + << ",\n"; + out << indent + << FormatKV("error_message", run.error_message) + << ",\n"; + } + if (!run.report_big_o && !run.report_rms) { + out << indent + << FormatKV("iterations", run.iterations) + << ",\n"; + out << indent + << FormatKV("real_time", RoundDouble(run.GetAdjustedRealTime())) + << ",\n"; + out << indent + << FormatKV("cpu_time", RoundDouble(run.GetAdjustedCPUTime())); + out << ",\n" << indent + << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); + } else if (run.report_big_o) { + out << indent + << FormatKV("cpu_coefficient", RoundDouble(run.GetAdjustedCPUTime())) + << ",\n"; + out << indent + << FormatKV("real_coefficient", RoundDouble(run.GetAdjustedRealTime())) + << ",\n"; + out << indent + << FormatKV("big_o", GetBigOString(run.complexity)) + << ",\n"; + out << indent + << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); + } else if(run.report_rms) { + out << indent + << FormatKV("rms", RoundDouble(run.GetAdjustedCPUTime()*100)) + << '%'; + } + if (run.bytes_per_second > 0.0) { + out << ",\n" + << indent + << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second)); + } + if (run.items_per_second > 0.0) { + out << ",\n" + << indent + << FormatKV("items_per_second", RoundDouble(run.items_per_second)); + } + if (!run.report_label.empty()) { + out << ",\n" + << indent + << FormatKV("label", run.report_label); + } + out << '\n'; +} + +} // end namespace benchmark Index: utils/google-benchmark/src/log.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/log.h @@ -0,0 +1,28 @@ +#ifndef BENCHMARK_LOG_H_ +#define BENCHMARK_LOG_H_ + +#include + +namespace benchmark { +namespace internal { + +int GetLogLevel(); +void SetLogLevel(int level); + +std::ostream& GetNullLogInstance(); +std::ostream& GetErrorLogInstance(); + +inline std::ostream& GetLogInstanceForLevel(int level) { + if (level <= GetLogLevel()) { + return GetErrorLogInstance(); + } + return GetNullLogInstance(); +} + +} // end namespace internal +} // end namespace benchmark + +#define VLOG(x) (::benchmark::internal::GetLogInstanceForLevel(x) \ + << "-- LOG(" << x << "): ") + +#endif \ No newline at end of file Index: utils/google-benchmark/src/log.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/log.cc @@ -0,0 +1,40 @@ +#include "log.h" + +#include + +namespace benchmark { +namespace internal { + +int& LoggingLevelImp() { + static int level = 0; + return level; +} + +void SetLogLevel(int value) { + LoggingLevelImp() = value; +} + +int GetLogLevel() { + return LoggingLevelImp(); +} + +class NullLogBuffer : public std::streambuf +{ +public: + int overflow(int c) { + return c; + } +}; + +std::ostream& GetNullLogInstance() { + static NullLogBuffer log_buff; + static std::ostream null_log(&log_buff); + return null_log; +} + +std::ostream& GetErrorLogInstance() { + return std::clog; +} + +} // end namespace internal +} // end namespace benchmark \ No newline at end of file Index: utils/google-benchmark/src/mutex.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/mutex.h @@ -0,0 +1,142 @@ +#ifndef BENCHMARK_MUTEX_H_ +#define BENCHMARK_MUTEX_H_ + +#include +#include + +// Enable thread safety attributes only with clang. +// The attributes can be safely erased when compiling with other compilers. +#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) +#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) +#else +#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op +#endif + +#define CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(capability(x)) + +#define SCOPED_CAPABILITY \ + THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) + +#define GUARDED_BY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) + +#define PT_GUARDED_BY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x)) + +#define ACQUIRED_BEFORE(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__)) + +#define ACQUIRED_AFTER(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__)) + +#define REQUIRES(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__)) + +#define REQUIRES_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__)) + +#define ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__)) + +#define ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__)) + +#define RELEASE(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__)) + +#define RELEASE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__)) + +#define EXCLUDES(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__)) + +#define ASSERT_CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x)) + +#define ASSERT_SHARED_CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x)) + +#define RETURN_CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) + +#define NO_THREAD_SAFETY_ANALYSIS \ + THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) + + +namespace benchmark { + +typedef std::condition_variable Condition; + +// NOTE: Wrappers for std::mutex and std::unique_lock are provided so that +// we can annotate them with thread safety attributes and use the +// -Wthread-safety warning with clang. The standard library types cannot be +// used directly because they do not provided the required annotations. +class CAPABILITY("mutex") Mutex +{ +public: + Mutex() {} + + void lock() ACQUIRE() { mut_.lock(); } + void unlock() RELEASE() { mut_.unlock(); } + std::mutex& native_handle() { + return mut_; + } +private: + std::mutex mut_; +}; + + +class SCOPED_CAPABILITY MutexLock +{ + typedef std::unique_lock MutexLockImp; +public: + MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle()) + { } + ~MutexLock() RELEASE() {} + MutexLockImp& native_handle() { return ml_; } +private: + MutexLockImp ml_; +}; + + +class Notification +{ +public: + Notification() : notified_yet_(false) { } + + void WaitForNotification() const EXCLUDES(mutex_) { + MutexLock m_lock(mutex_); + auto notified_fn = [this]() REQUIRES(mutex_) { + return this->HasBeenNotified(); + }; + cv_.wait(m_lock.native_handle(), notified_fn); + } + + void Notify() EXCLUDES(mutex_) { + { + MutexLock lock(mutex_); + notified_yet_ = 1; + } + cv_.notify_all(); + } + +private: + bool HasBeenNotified() const REQUIRES(mutex_) { + return notified_yet_; + } + + mutable Mutex mutex_; + mutable std::condition_variable cv_; + bool notified_yet_ GUARDED_BY(mutex_); +}; + +} // end namespace benchmark + +#endif // BENCHMARK_MUTEX_H_ Index: utils/google-benchmark/src/re.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/re.h @@ -0,0 +1,60 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_RE_H_ +#define BENCHMARK_RE_H_ + +#if defined(HAVE_STD_REGEX) +#include +#elif defined(HAVE_GNU_POSIX_REGEX) +#include +#elif defined(HAVE_POSIX_REGEX) +#include +#else +#error No regular expression backend was found! +#endif +#include + +namespace benchmark { + +// A wrapper around the POSIX regular expression API that provides automatic +// cleanup +class Regex { + public: + Regex(); + ~Regex(); + + // Compile a regular expression matcher from spec. Returns true on success. + // + // On failure (and if error is not nullptr), error is populated with a human + // readable error message if an error occurs. + bool Init(const std::string& spec, std::string* error); + + // Returns whether str matches the compiled regular expression. + bool Match(const std::string& str); + private: + bool init_; + // Underlying regular expression object +#if defined(HAVE_STD_REGEX) + std::regex re_; +#elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX) + regex_t re_; +#else +# error No regular expression backend implementation available +#endif +}; + +} // end namespace benchmark + +#endif // BENCHMARK_RE_H_ Index: utils/google-benchmark/src/re_posix.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/re_posix.cc @@ -0,0 +1,59 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "check.h" +#include "re.h" + +namespace benchmark { + +Regex::Regex() : init_(false) { } + +bool Regex::Init(const std::string& spec, std::string* error) { + int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + if (error) { + size_t needed = regerror(ec, &re_, nullptr, 0); + char* errbuf = new char[needed]; + regerror(ec, &re_, errbuf, needed); + + // regerror returns the number of bytes necessary to null terminate + // the string, so we move that when assigning to error. + CHECK_NE(needed, 0); + error->assign(errbuf, needed - 1); + + delete[] errbuf; + } + + return false; + } + + init_ = true; + return true; +} + +Regex::~Regex() { + if (init_) { + regfree(&re_); + } +} + +bool Regex::Match(const std::string& str) { + if (!init_) { + return false; + } + + return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0; +} + +} // end namespace benchmark Index: utils/google-benchmark/src/re_std.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/re_std.cc @@ -0,0 +1,44 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "re.h" + +namespace benchmark { + +Regex::Regex() : init_(false) { } + +bool Regex::Init(const std::string& spec, std::string* error) { + try { + re_ = std::regex(spec, std::regex_constants::extended); + + init_ = true; + } catch (const std::regex_error& e) { + if (error) { + *error = e.what(); + } + } + return init_; +} + +Regex::~Regex() { } + +bool Regex::Match(const std::string& str) { + if (!init_) { + return false; + } + + return std::regex_search(str, re_); +} + +} // end namespace benchmark Index: utils/google-benchmark/src/reporter.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/reporter.cc @@ -0,0 +1,75 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/reporter.h" +#include "walltime.h" + +#include + +#include +#include +#include + +#include "check.h" +#include "stat.h" + +namespace benchmark { + +BenchmarkReporter::BenchmarkReporter() + : output_stream_(&std::cout), error_stream_(&std::cerr) +{ +} + +BenchmarkReporter::~BenchmarkReporter() { +} + +void BenchmarkReporter::PrintBasicContext(std::ostream *out_ptr, + Context const &context) { + CHECK(out_ptr) << "cannot be null"; + auto& Out = *out_ptr; + + Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu + << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n"; + + Out << LocalDateTimeString() << "\n"; + + if (context.cpu_scaling_enabled) { + Out << "***WARNING*** CPU scaling is enabled, the benchmark " + "real time measurements may be noisy and will incur extra " + "overhead.\n"; + } + +#ifndef NDEBUG + Out << "***WARNING*** Library was built as DEBUG. Timings may be " + "affected.\n"; +#endif +} + +double BenchmarkReporter::Run::GetAdjustedRealTime() const { + double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit); + if (iterations != 0) + new_time /= static_cast(iterations); + return new_time; +} + +double BenchmarkReporter::Run::GetAdjustedCPUTime() const { + double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit); + if (iterations != 0) + new_time /= static_cast(iterations); + return new_time; +} + + + +} // end namespace benchmark Index: utils/google-benchmark/src/sleep.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/sleep.h @@ -0,0 +1,17 @@ +#ifndef BENCHMARK_SLEEP_H_ +#define BENCHMARK_SLEEP_H_ + +#include + +namespace benchmark { +const int64_t kNumMillisPerSecond = 1000LL; +const int64_t kNumMicrosPerMilli = 1000LL; +const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL; +const int64_t kNumNanosPerMicro = 1000LL; +const int64_t kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; + +void SleepForMilliseconds(int milliseconds); +void SleepForSeconds(double seconds); +} // end namespace benchmark + +#endif // BENCHMARK_SLEEP_H_ Index: utils/google-benchmark/src/sleep.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/sleep.cc @@ -0,0 +1,50 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sleep.h" + +#include +#include + +#include "internal_macros.h" + +#ifdef BENCHMARK_OS_WINDOWS +#include +#endif + +namespace benchmark { +#ifdef BENCHMARK_OS_WINDOWS +// Window's Sleep takes milliseconds argument. +void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } +void SleepForSeconds(double seconds) { + SleepForMilliseconds(static_cast(kNumMillisPerSecond * seconds)); +} +#else // BENCHMARK_OS_WINDOWS +void SleepForMicroseconds(int microseconds) { + struct timespec sleep_time; + sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; + sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; + while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) + ; // Ignore signals and wait for the full interval to elapse. +} + +void SleepForMilliseconds(int milliseconds) { + SleepForMicroseconds(static_cast(milliseconds) * kNumMicrosPerMilli); +} + +void SleepForSeconds(double seconds) { + SleepForMicroseconds(static_cast(seconds * kNumMicrosPerSecond)); +} +#endif // BENCHMARK_OS_WINDOWS +} // end namespace benchmark Index: utils/google-benchmark/src/stat.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/stat.h @@ -0,0 +1,307 @@ +#ifndef BENCHMARK_STAT_H_ +#define BENCHMARK_STAT_H_ + +#include +#include +#include +#include + + +namespace benchmark { + +template +class Stat1; + +template +class Stat1MinMax; + +typedef Stat1 Stat1_f; +typedef Stat1 Stat1_d; +typedef Stat1MinMax Stat1MinMax_f; +typedef Stat1MinMax Stat1MinMax_d; + +template +class Vector2; +template +class Vector3; +template +class Vector4; + +template +class Stat1 { + public: + typedef Stat1 Self; + + Stat1() { Clear(); } + // Create a sample of value dat and weight 1 + explicit Stat1(const VType &dat) { + sum_ = dat; + sum_squares_ = Sqr(dat); + numsamples_ = 1; + } + // Create statistics for all the samples between begin (included) + // and end(excluded) + explicit Stat1(const VType *begin, const VType *end) { + Clear(); + for (const VType *item = begin; item < end; ++item) { + (*this) += Stat1(*item); + } + } + // Create a sample of value dat and weight w + Stat1(const VType &dat, const NumType &w) { + sum_ = w * dat; + sum_squares_ = w * Sqr(dat); + numsamples_ = w; + } + // Copy operator + Stat1(const Self &stat) { + sum_ = stat.sum_; + sum_squares_ = stat.sum_squares_; + numsamples_ = stat.numsamples_; + } + + void Clear() { + numsamples_ = NumType(); + sum_squares_ = sum_ = VType(); + } + + Self &operator=(const Self &stat) { + sum_ = stat.sum_; + sum_squares_ = stat.sum_squares_; + numsamples_ = stat.numsamples_; + return (*this); + } + // Merge statistics from two sample sets. + Self &operator+=(const Self &stat) { + sum_ += stat.sum_; + sum_squares_ += stat.sum_squares_; + numsamples_ += stat.numsamples_; + return (*this); + } + // The operation opposite to += + Self &operator-=(const Self &stat) { + sum_ -= stat.sum_; + sum_squares_ -= stat.sum_squares_; + numsamples_ -= stat.numsamples_; + return (*this); + } + // Multiply the weight of the set of samples by a factor k + Self &operator*=(const VType &k) { + sum_ *= k; + sum_squares_ *= k; + numsamples_ *= k; + return (*this); + } + + // Merge statistics from two sample sets. + Self operator+(const Self &stat) const { return Self(*this) += stat; } + + // The operation opposite to + + Self operator-(const Self &stat) const { return Self(*this) -= stat; } + + // Multiply the weight of the set of samples by a factor k + Self operator*(const VType &k) const { return Self(*this) *= k; } + + // Return the total weight of this sample set + NumType numSamples() const { return numsamples_; } + + // Return the sum of this sample set + VType Sum() const { return sum_; } + + // Return the mean of this sample set + VType Mean() const { + if (numsamples_ == 0) return VType(); + return sum_ * (1.0 / numsamples_); + } + + // Return the mean of this sample set and compute the standard deviation at + // the same time. + VType Mean(VType *stddev) const { + if (numsamples_ == 0) return VType(); + VType mean = sum_ * (1.0 / numsamples_); + if (stddev) { + VType avg_squares = sum_squares_ * (1.0 / numsamples_); + *stddev = Sqrt(avg_squares - Sqr(mean)); + } + return mean; + } + + // Return the standard deviation of the sample set + VType StdDev() const { + if (numsamples_ == 0) return VType(); + VType mean = Mean(); + VType avg_squares = sum_squares_ * (1.0 / numsamples_); + return Sqrt(avg_squares - Sqr(mean)); + } + + private: + static_assert(std::is_integral::value && + !std::is_same::value, + "NumType must be an integral type that is not bool."); + // Let i be the index of the samples provided (using +=) + // and weight[i],value[i] be the data of sample #i + // then the variables have the following meaning: + NumType numsamples_; // sum of weight[i]; + VType sum_; // sum of weight[i]*value[i]; + VType sum_squares_; // sum of weight[i]*value[i]^2; + + // Template function used to square a number. + // For a vector we square all components + template + static inline SType Sqr(const SType &dat) { + return dat * dat; + } + + template + static inline Vector2 Sqr(const Vector2 &dat) { + return dat.MulComponents(dat); + } + + template + static inline Vector3 Sqr(const Vector3 &dat) { + return dat.MulComponents(dat); + } + + template + static inline Vector4 Sqr(const Vector4 &dat) { + return dat.MulComponents(dat); + } + + // Template function used to take the square root of a number. + // For a vector we square all components + template + static inline SType Sqrt(const SType &dat) { + // Avoid NaN due to imprecision in the calculations + if (dat < 0) return 0; + return sqrt(dat); + } + + template + static inline Vector2 Sqrt(const Vector2 &dat) { + // Avoid NaN due to imprecision in the calculations + return Max(dat, Vector2()).Sqrt(); + } + + template + static inline Vector3 Sqrt(const Vector3 &dat) { + // Avoid NaN due to imprecision in the calculations + return Max(dat, Vector3()).Sqrt(); + } + + template + static inline Vector4 Sqrt(const Vector4 &dat) { + // Avoid NaN due to imprecision in the calculations + return Max(dat, Vector4()).Sqrt(); + } +}; + +// Useful printing function +template +std::ostream &operator<<(std::ostream &out, const Stat1 &s) { + out << "{ avg = " << s.Mean() << " std = " << s.StdDev() + << " nsamples = " << s.NumSamples() << "}"; + return out; +} + +// Stat1MinMax: same as Stat1, but it also +// keeps the Min and Max values; the "-" +// operator is disabled because it cannot be implemented +// efficiently +template +class Stat1MinMax : public Stat1 { + public: + typedef Stat1MinMax Self; + + Stat1MinMax() { Clear(); } + // Create a sample of value dat and weight 1 + explicit Stat1MinMax(const VType &dat) : Stat1(dat) { + max_ = dat; + min_ = dat; + } + // Create statistics for all the samples between begin (included) + // and end(excluded) + explicit Stat1MinMax(const VType *begin, const VType *end) { + Clear(); + for (const VType *item = begin; item < end; ++item) { + (*this) += Stat1MinMax(*item); + } + } + // Create a sample of value dat and weight w + Stat1MinMax(const VType &dat, const NumType &w) + : Stat1(dat, w) { + max_ = dat; + min_ = dat; + } + // Copy operator + Stat1MinMax(const Self &stat) : Stat1(stat) { + max_ = stat.max_; + min_ = stat.min_; + } + + void Clear() { + Stat1::Clear(); + if (std::numeric_limits::has_infinity) { + min_ = std::numeric_limits::infinity(); + max_ = -std::numeric_limits::infinity(); + } else { + min_ = std::numeric_limits::max(); + max_ = std::numeric_limits::min(); + } + } + + Self &operator=(const Self &stat) { + this->Stat1::operator=(stat); + max_ = stat.max_; + min_ = stat.min_; + return (*this); + } + // Merge statistics from two sample sets. + Self &operator+=(const Self &stat) { + this->Stat1::operator+=(stat); + if (stat.max_ > max_) max_ = stat.max_; + if (stat.min_ < min_) min_ = stat.min_; + return (*this); + } + // Multiply the weight of the set of samples by a factor k + Self &operator*=(const VType &stat) { + this->Stat1::operator*=(stat); + return (*this); + } + // Merge statistics from two sample sets. + Self operator+(const Self &stat) const { return Self(*this) += stat; } + // Multiply the weight of the set of samples by a factor k + Self operator*(const VType &k) const { return Self(*this) *= k; } + + // Return the maximal value in this sample set + VType Max() const { return max_; } + // Return the minimal value in this sample set + VType Min() const { return min_; } + + private: + // The - operation makes no sense with Min/Max + // unless we keep the full list of values (but we don't) + // make it private, and let it undefined so nobody can call it + Self &operator-=(const Self &stat); // senseless. let it undefined. + + // The operation opposite to - + Self operator-(const Self &stat) const; // senseless. let it undefined. + + // Let i be the index of the samples provided (using +=) + // and weight[i],value[i] be the data of sample #i + // then the variables have the following meaning: + VType max_; // max of value[i] + VType min_; // min of value[i] +}; + +// Useful printing function +template +std::ostream &operator<<(std::ostream &out, + const Stat1MinMax &s) { + out << "{ avg = " << s.Mean() << " std = " << s.StdDev() + << " nsamples = " << s.NumSamples() << " min = " << s.Min() + << " max = " << s.Max() << "}"; + return out; +} +} // end namespace benchmark + +#endif // BENCHMARK_STAT_H_ Index: utils/google-benchmark/src/string_util.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/string_util.h @@ -0,0 +1,44 @@ +#ifndef BENCHMARK_STRING_UTIL_H_ +#define BENCHMARK_STRING_UTIL_H_ + +#include +#include +#include +#include "internal_macros.h" + +namespace benchmark { + +void AppendHumanReadable(int n, std::string* str); + +std::string HumanReadableNumber(double n); + +std::string StringPrintF(const char* format, ...); + +inline std::ostream& +StringCatImp(std::ostream& out) BENCHMARK_NOEXCEPT +{ + return out; +} + +template +inline std::ostream& +StringCatImp(std::ostream& out, First&& f, Rest&&... rest) +{ + out << std::forward(f); + return StringCatImp(out, std::forward(rest)...); +} + +template +inline std::string StrCat(Args&&... args) +{ + std::ostringstream ss; + StringCatImp(ss, std::forward(args)...); + return ss.str(); +} + +void ReplaceAll(std::string* str, const std::string& from, + const std::string& to); + +} // end namespace benchmark + +#endif // BENCHMARK_STRING_UTIL_H_ Index: utils/google-benchmark/src/string_util.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/string_util.cc @@ -0,0 +1,169 @@ +#include "string_util.h" + +#include +#include +#include +#include +#include +#include + +#include "arraysize.h" + +namespace benchmark { +namespace { + +// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta. +const char kBigSIUnits[] = "kMGTPEZY"; +// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi. +const char kBigIECUnits[] = "KMGTPEZY"; +// milli, micro, nano, pico, femto, atto, zepto, yocto. +const char kSmallSIUnits[] = "munpfazy"; + +// We require that all three arrays have the same size. +static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits), + "SI and IEC unit arrays must be the same size"); +static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits), + "Small SI and Big SI unit arrays must be the same size"); + +static const int64_t kUnitsSize = arraysize(kBigSIUnits); + +} // end anonymous namespace + +void ToExponentAndMantissa(double val, double thresh, int precision, + double one_k, std::string* mantissa, + int64_t* exponent) { + std::stringstream mantissa_stream; + + if (val < 0) { + mantissa_stream << "-"; + val = -val; + } + + // Adjust threshold so that it never excludes things which can't be rendered + // in 'precision' digits. + const double adjusted_threshold = + std::max(thresh, 1.0 / std::pow(10.0, precision)); + const double big_threshold = adjusted_threshold * one_k; + const double small_threshold = adjusted_threshold; + + if (val > big_threshold) { + // Positive powers + double scaled = val; + for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) { + scaled /= one_k; + if (scaled <= big_threshold) { + mantissa_stream << scaled; + *exponent = i + 1; + *mantissa = mantissa_stream.str(); + return; + } + } + mantissa_stream << val; + *exponent = 0; + } else if (val < small_threshold) { + // Negative powers + double scaled = val; + for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) { + scaled *= one_k; + if (scaled >= small_threshold) { + mantissa_stream << scaled; + *exponent = -static_cast(i + 1); + *mantissa = mantissa_stream.str(); + return; + } + } + mantissa_stream << val; + *exponent = 0; + } else { + mantissa_stream << val; + *exponent = 0; + } + *mantissa = mantissa_stream.str(); +} + +std::string ExponentToPrefix(int64_t exponent, bool iec) { + if (exponent == 0) return ""; + + const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1); + if (index >= kUnitsSize) return ""; + + const char* array = + (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits); + if (iec) + return array[index] + std::string("i"); + else + return std::string(1, array[index]); +} + +std::string ToBinaryStringFullySpecified(double value, double threshold, + int precision) { + std::string mantissa; + int64_t exponent; + ToExponentAndMantissa(value, threshold, precision, 1024.0, &mantissa, + &exponent); + return mantissa + ExponentToPrefix(exponent, false); +} + +void AppendHumanReadable(int n, std::string* str) { + std::stringstream ss; + // Round down to the nearest SI prefix. + ss << "/" << ToBinaryStringFullySpecified(n, 1.0, 0); + *str += ss.str(); +} + +std::string HumanReadableNumber(double n) { + // 1.1 means that figures up to 1.1k should be shown with the next unit down; + // this softens edge effects. + // 1 means that we should show one decimal place of precision. + return ToBinaryStringFullySpecified(n, 1.1, 1); +} + +std::string StringPrintFImp(const char *msg, va_list args) +{ + // we might need a second shot at this, so pre-emptivly make a copy + va_list args_cp; + va_copy(args_cp, args); + + // TODO(ericwf): use std::array for first attempt to avoid one memory + // allocation guess what the size might be + std::array local_buff; + std::size_t size = local_buff.size(); + // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation in the android-ndk + auto ret = vsnprintf(local_buff.data(), size, msg, args_cp); + + va_end(args_cp); + + // handle empty expansion + if (ret == 0) + return std::string{}; + if (static_cast(ret) < size) + return std::string(local_buff.data()); + + // we did not provide a long enough buffer on our first attempt. + // add 1 to size to account for null-byte in size cast to prevent overflow + size = static_cast(ret) + 1; + auto buff_ptr = std::unique_ptr(new char[size]); + // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation in the android-ndk + ret = vsnprintf(buff_ptr.get(), size, msg, args); + return std::string(buff_ptr.get()); +} + +std::string StringPrintF(const char* format, ...) +{ + va_list args; + va_start(args, format); + std::string tmp = StringPrintFImp(format, args); + va_end(args); + return tmp; +} + +void ReplaceAll(std::string* str, const std::string& from, + const std::string& to) { + std::size_t start = 0; + while((start = str->find(from, start)) != std::string::npos) { + str->replace(start, from.length(), to); + start += to.length(); + } +} + +} // end namespace benchmark Index: utils/google-benchmark/src/sysinfo.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/sysinfo.h @@ -0,0 +1,12 @@ +#ifndef BENCHMARK_SYSINFO_H_ +#define BENCHMARK_SYSINFO_H_ + +namespace benchmark { +double MyCPUUsage(); +double ChildrenCPUUsage(); +int NumCPUs(); +double CyclesPerSecond(); +bool CpuScalingEnabled(); +} // end namespace benchmark + +#endif // BENCHMARK_SYSINFO_H_ Index: utils/google-benchmark/src/sysinfo.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/sysinfo.cc @@ -0,0 +1,420 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sysinfo.h" +#include "internal_macros.h" + +#ifdef BENCHMARK_OS_WINDOWS +#include +#include +#include +#else +#include +#include +#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD +#include +#include +#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX +#include +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "arraysize.h" +#include "check.h" +#include "cycleclock.h" +#include "internal_macros.h" +#include "log.h" +#include "sleep.h" +#include "string_util.h" + +namespace benchmark { +namespace { +std::once_flag cpuinfo_init; +double cpuinfo_cycles_per_second = 1.0; +int cpuinfo_num_cpus = 1; // Conservative guess +std::mutex cputimens_mutex; + +#if !defined BENCHMARK_OS_MACOSX +const int64_t estimate_time_ms = 1000; + +// Helper function estimates cycles/sec by observing cycles elapsed during +// sleep(). Using small sleep time decreases accuracy significantly. +int64_t EstimateCyclesPerSecond() { + const int64_t start_ticks = cycleclock::Now(); + SleepForMilliseconds(estimate_time_ms); + return cycleclock::Now() - start_ticks; +} +#endif + +#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN +// Helper function for reading an int from a file. Returns true if successful +// and the memory location pointed to by value is set to the value read. +bool ReadIntFromFile(const char* file, long* value) { + bool ret = false; + int fd = open(file, O_RDONLY); + if (fd != -1) { + char line[1024]; + char* err; + memset(line, '\0', sizeof(line)); + CHECK(read(fd, line, sizeof(line) - 1)); + const long temp_value = strtol(line, &err, 10); + if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { + *value = temp_value; + ret = true; + } + close(fd); + } + return ret; +} +#endif + +void InitializeSystemInfo() { +#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN + char line[1024]; + char* err; + long freq; + + bool saw_mhz = false; + + // If the kernel is exporting the tsc frequency use that. There are issues + // where cpuinfo_max_freq cannot be relied on because the BIOS may be + // exporintg an invalid p-state (on x86) or p-states may be used to put the + // processor in a new mode (turbo mode). Essentially, those frequencies + // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as + // well. + if (!saw_mhz && + ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { + // The value is in kHz (as the file name suggests). For example, on a + // 2GHz warpstation, the file contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; + } + + // If CPU scaling is in effect, we want to use the *maximum* frequency, + // not whatever CPU speed some random processor happens to be using now. + if (!saw_mhz && + ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &freq)) { + // The value is in kHz. For example, on a 2GHz warpstation, the file + // contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; + } + + // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. + const char* pname = "/proc/cpuinfo"; + int fd = open(pname, O_RDONLY); + if (fd == -1) { + perror(pname); + if (!saw_mhz) { + cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); + } + return; + } + + double bogo_clock = 1.0; + bool saw_bogo = false; + long max_cpu_id = 0; + int num_cpus = 0; + line[0] = line[1] = '\0'; + size_t chars_read = 0; + do { // we'll exit when the last read didn't read anything + // Move the next line to the beginning of the buffer + const size_t oldlinelen = strlen(line); + if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line + line[0] = '\0'; + else // still other lines left to save + memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1)); + // Terminate the new line, reading more if we can't find the newline + char* newline = strchr(line, '\n'); + if (newline == nullptr) { + const size_t linelen = strlen(line); + const size_t bytes_to_read = sizeof(line) - 1 - linelen; + CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes + chars_read = read(fd, line + linelen, bytes_to_read); + line[linelen + chars_read] = '\0'; + newline = strchr(line, '\n'); + } + if (newline != nullptr) *newline = '\0'; + + // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only + // accept postive values. Some environments (virtual machines) report zero, + // which would cause infinite looping in WallTime_Init. + if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) + saw_mhz = true; + } + } else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) + saw_bogo = true; + } + } else if (strncmp(line, "processor", sizeof("processor") - 1) == 0) { + // The above comparison is case-sensitive because ARM kernels often + // include a "Processor" line that tells you about the CPU, distinct + // from the usual "processor" lines that give you CPU ids. No current + // Linux architecture is using "Processor" for CPU ids. + num_cpus++; // count up every time we see an "processor :" entry + const char* id_str = strchr(line, ':'); + if (id_str) { + const long cpu_id = strtol(id_str + 1, &err, 10); + if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id) + max_cpu_id = cpu_id; + } + } + } while (chars_read > 0); + close(fd); + + if (!saw_mhz) { + if (saw_bogo) { + // If we didn't find anything better, we'll use bogomips, but + // we're not happy about it. + cpuinfo_cycles_per_second = bogo_clock; + } else { + // If we don't even have bogomips, we'll use the slow estimation. + cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); + } + } + if (num_cpus == 0) { + fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n"); + } else { + if ((max_cpu_id + 1) != num_cpus) { + fprintf(stderr, + "CPU ID assignments in /proc/cpuinfo seem messed up." + " This is usually caused by a bad BIOS.\n"); + } + cpuinfo_num_cpus = num_cpus; + } + +#elif defined BENCHMARK_OS_FREEBSD +// For this sysctl to work, the machine must be configured without +// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 +// and later. Before that, it's a 32-bit quantity (and gives the +// wrong answer on machines faster than 2^32 Hz). See +// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html +// But also compare FreeBSD 7.0: +// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 +// 231 error = sysctl_handle_quad(oidp, &freq, 0, req); +// To FreeBSD 6.3 (it's the same in 6-STABLE): +// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 +// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); +#if __FreeBSD__ >= 7 + uint64_t hz = 0; +#else + unsigned int hz = 0; +#endif + size_t sz = sizeof(hz); + const char* sysctl_path = "machdep.tsc_freq"; + if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) { + fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", + sysctl_path, strerror(errno)); + cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); + } else { + cpuinfo_cycles_per_second = hz; + } +// TODO: also figure out cpuinfo_num_cpus + +#elif defined BENCHMARK_OS_WINDOWS + // In NT, read MHz from the registry. If we fail to do so or we're in win9x + // then make a crude estimate. + DWORD data, data_size = sizeof(data); + if (IsWindowsXPOrGreater() && + SUCCEEDED( + SHGetValueA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", + "~MHz", nullptr, &data, &data_size))) + cpuinfo_cycles_per_second = static_cast((int64_t)data * (int64_t)(1000 * 1000)); // was mhz + else + cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); +// TODO: also figure out cpuinfo_num_cpus + +#elif defined BENCHMARK_OS_MACOSX + // returning "mach time units" per second. the current number of elapsed + // mach time units can be found by calling uint64 mach_absolute_time(); + // while not as precise as actual CPU cycles, it is accurate in the face + // of CPU frequency scaling and multi-cpu/core machines. + // Our mac users have these types of machines, and accuracy + // (i.e. correctness) trumps precision. + // See cycleclock.h: CycleClock::Now(), which returns number of mach time + // units on Mac OS X. + mach_timebase_info_data_t timebase_info; + mach_timebase_info(&timebase_info); + double mach_time_units_per_nanosecond = + static_cast(timebase_info.denom) / + static_cast(timebase_info.numer); + cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9; + + int num_cpus = 0; + size_t size = sizeof(num_cpus); + int numcpus_name[] = {CTL_HW, HW_NCPU}; + if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) == + 0 && + (size == sizeof(num_cpus))) + cpuinfo_num_cpus = num_cpus; + +#else + // Generic cycles per second counter + cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); +#endif +} +} // end namespace + +// getrusage() based implementation of MyCPUUsage +static double MyCPUUsageRUsage() { +#ifndef BENCHMARK_OS_WINDOWS + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) == 0) { + return (static_cast(ru.ru_utime.tv_sec) + + static_cast(ru.ru_utime.tv_usec) * 1e-6 + + static_cast(ru.ru_stime.tv_sec) + + static_cast(ru.ru_stime.tv_usec) * 1e-6); + } else { + return 0.0; + } +#else + HANDLE proc = GetCurrentProcess(); + FILETIME creation_time; + FILETIME exit_time; + FILETIME kernel_time; + FILETIME user_time; + ULARGE_INTEGER kernel; + ULARGE_INTEGER user; + GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time); + kernel.HighPart = kernel_time.dwHighDateTime; + kernel.LowPart = kernel_time.dwLowDateTime; + user.HighPart = user_time.dwHighDateTime; + user.LowPart = user_time.dwLowDateTime; + return (static_cast(kernel.QuadPart) + + static_cast(user.QuadPart)) * 1e-7; +#endif // OS_WINDOWS +} + +#ifndef BENCHMARK_OS_WINDOWS +static bool MyCPUUsageCPUTimeNsLocked(double* cputime) { + static int cputime_fd = -1; + if (cputime_fd == -1) { + cputime_fd = open("/proc/self/cputime_ns", O_RDONLY); + if (cputime_fd < 0) { + cputime_fd = -1; + return false; + } + } + char buff[64]; + memset(buff, 0, sizeof(buff)); + if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) { + close(cputime_fd); + cputime_fd = -1; + return false; + } + unsigned long long result = strtoull(buff, nullptr, 0); + if (result == (std::numeric_limits::max)()) { + close(cputime_fd); + cputime_fd = -1; + return false; + } + *cputime = static_cast(result) / 1e9; + return true; +} +#endif // OS_WINDOWS + +double MyCPUUsage() { +#ifndef BENCHMARK_OS_WINDOWS + { + std::lock_guard l(cputimens_mutex); + static bool use_cputime_ns = true; + if (use_cputime_ns) { + double value; + if (MyCPUUsageCPUTimeNsLocked(&value)) { + return value; + } + // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage(). + VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n"; + use_cputime_ns = false; + } + } +#endif // OS_WINDOWS + return MyCPUUsageRUsage(); +} + +double ChildrenCPUUsage() { +#ifndef BENCHMARK_OS_WINDOWS + struct rusage ru; + if (getrusage(RUSAGE_CHILDREN, &ru) == 0) { + return (static_cast(ru.ru_utime.tv_sec) + + static_cast(ru.ru_utime.tv_usec) * 1e-6 + + static_cast(ru.ru_stime.tv_sec) + + static_cast(ru.ru_stime.tv_usec) * 1e-6); + } else { + return 0.0; + } +#else + // TODO: Not sure what this even means on Windows + return 0.0; +#endif // OS_WINDOWS +} + +double CyclesPerSecond(void) { + std::call_once(cpuinfo_init, InitializeSystemInfo); + return cpuinfo_cycles_per_second; +} + +int NumCPUs(void) { + std::call_once(cpuinfo_init, InitializeSystemInfo); + return cpuinfo_num_cpus; +} + +// The ""'s catch people who don't pass in a literal for "str" +#define strliterallen(str) (sizeof("" str "") - 1) + +// Must use a string literal for prefix. +#define memprefix(str, len, prefix) \ + ((((len) >= strliterallen(prefix)) && \ + std::memcmp(str, prefix, strliterallen(prefix)) == 0) \ + ? str + strliterallen(prefix) \ + : nullptr) + +bool CpuScalingEnabled() { +#ifndef BENCHMARK_OS_WINDOWS + // On Linux, the CPUfreq subsystem exposes CPU information as files on the + // local file system. If reading the exported files fails, then we may not be + // running on Linux, so we silently ignore all the read errors. + for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { + std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu, + "/cpufreq/scaling_governor"); + FILE* file = fopen(governor_file.c_str(), "r"); + if (!file) break; + char buff[16]; + size_t bytes_read = fread(buff, 1, sizeof(buff), file); + fclose(file); + if (memprefix(buff, bytes_read, "performance") == nullptr) return true; + } +#endif + return false; +} + +} // end namespace benchmark Index: utils/google-benchmark/src/walltime.h =================================================================== --- /dev/null +++ utils/google-benchmark/src/walltime.h @@ -0,0 +1,17 @@ +#ifndef BENCHMARK_WALLTIME_H_ +#define BENCHMARK_WALLTIME_H_ + +#include + +namespace benchmark { +typedef double WallTime; + +namespace walltime { +WallTime Now(); +} // end namespace walltime + +std::string LocalDateTimeString(); + +} // end namespace benchmark + +#endif // BENCHMARK_WALLTIME_H_ Index: utils/google-benchmark/src/walltime.cc =================================================================== --- /dev/null +++ utils/google-benchmark/src/walltime.cc @@ -0,0 +1,263 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/macros.h" +#include "internal_macros.h" +#include "walltime.h" + +#if defined(BENCHMARK_OS_WINDOWS) +#include +#include // for timeval +#else +#include +#endif + +#include +#include +#include +#include + +#include +#include +#include + +#include "arraysize.h" +#include "check.h" +#include "cycleclock.h" +#include "log.h" +#include "sysinfo.h" + +namespace benchmark { +namespace walltime { + +namespace { + +#if defined(HAVE_STEADY_CLOCK) +template +struct ChooseSteadyClock { + typedef std::chrono::high_resolution_clock type; +}; + +template <> +struct ChooseSteadyClock { + typedef std::chrono::steady_clock type; +}; +#endif + +struct ChooseClockType { +#if defined(HAVE_STEADY_CLOCK) + typedef ChooseSteadyClock<>::type type; +#else + typedef std::chrono::high_resolution_clock type; +#endif +}; + +class WallTimeImp +{ +public: + WallTime Now(); + + static WallTimeImp& GetWallTimeImp() { + static WallTimeImp* imp = new WallTimeImp(); + return *imp; + } + +private: + WallTimeImp(); + // Helper routines to load/store a float from an AtomicWord. Required because + // g++ < 4.7 doesn't support std::atomic correctly. I cannot wait to + // get rid of this horror show. + void SetDrift(float f) { + int32_t w; + memcpy(&w, &f, sizeof(f)); + std::atomic_store(&drift_adjust_, w); + } + + float GetDrift() const { + float f; + int32_t w = std::atomic_load(&drift_adjust_); + memcpy(&f, &w, sizeof(f)); + return f; + } + + WallTime Slow() const { + struct timeval tv; +#if defined(BENCHMARK_OS_WINDOWS) + FILETIME file_time; + SYSTEMTIME system_time; + ULARGE_INTEGER ularge; + const unsigned __int64 epoch = 116444736000000000LL; + + GetSystemTime(&system_time); + SystemTimeToFileTime(&system_time, &file_time); + ularge.LowPart = file_time.dwLowDateTime; + ularge.HighPart = file_time.dwHighDateTime; + + tv.tv_sec = (long)((ularge.QuadPart - epoch) / (10L * 1000 * 1000)); + tv.tv_usec = (long)(system_time.wMilliseconds * 1000); +#else + gettimeofday(&tv, nullptr); +#endif + return tv.tv_sec + tv.tv_usec * 1e-6; + } + +private: + static_assert(sizeof(float) <= sizeof(int32_t), + "type sizes don't allow the drift_adjust hack"); + + WallTime base_walltime_; + int64_t base_cycletime_; + int64_t cycles_per_second_; + double seconds_per_cycle_; + uint32_t last_adjust_time_; + std::atomic drift_adjust_; + int64_t max_interval_cycles_; + + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp); +}; + + +WallTime WallTimeImp::Now() { + WallTime now = 0.0; + WallTime result = 0.0; + int64_t ct = 0; + uint32_t top_bits = 0; + do { + ct = cycleclock::Now(); + int64_t cycle_delta = ct - base_cycletime_; + result = base_walltime_ + cycle_delta * seconds_per_cycle_; + + top_bits = static_cast(uint64_t(ct) >> 32); + // Recompute drift no more often than every 2^32 cycles. + // I.e., @2GHz, ~ every two seconds + if (top_bits == last_adjust_time_) { // don't need to recompute drift + return result + GetDrift(); + } + + now = Slow(); + } while (cycleclock::Now() - ct > max_interval_cycles_); + // We are now sure that "now" and "result" were produced within + // kMaxErrorInterval of one another. + + SetDrift(static_cast(now - result)); + last_adjust_time_ = top_bits; + return now; +} + + +WallTimeImp::WallTimeImp() + : base_walltime_(0.0), base_cycletime_(0), + cycles_per_second_(0), seconds_per_cycle_(0.0), + last_adjust_time_(0), drift_adjust_(0), + max_interval_cycles_(0) { + const double kMaxErrorInterval = 100e-6; + cycles_per_second_ = static_cast(CyclesPerSecond()); + CHECK(cycles_per_second_ != 0); + seconds_per_cycle_ = 1.0 / cycles_per_second_; + max_interval_cycles_ = + static_cast(cycles_per_second_ * kMaxErrorInterval); + do { + base_cycletime_ = cycleclock::Now(); + base_walltime_ = Slow(); + } while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_); + // We are now sure that "base_walltime" and "base_cycletime" were produced + // within kMaxErrorInterval of one another. + + SetDrift(0.0); + last_adjust_time_ = static_cast(uint64_t(base_cycletime_) >> 32); +} + +WallTime CPUWalltimeNow() { + static WallTimeImp& imp = WallTimeImp::GetWallTimeImp(); + return imp.Now(); +} + +WallTime ChronoWalltimeNow() { + typedef ChooseClockType::type Clock; + typedef std::chrono::duration + FPSeconds; + static_assert(std::chrono::treat_as_floating_point::value, + "This type must be treated as a floating point type."); + auto now = Clock::now().time_since_epoch(); + return std::chrono::duration_cast(now).count(); +} + +bool UseCpuCycleClock() { + bool useWallTime = !CpuScalingEnabled(); + if (useWallTime) { + VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n"; + } else { + VLOG(1) << "Using std::chrono to provide walltime::Now().\n"; + } + return useWallTime; +} + + +} // end anonymous namespace + +// WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is +// enabled at the start of the program then std::chrono::system_clock is used +// instead. +WallTime Now() +{ + static bool useCPUClock = UseCpuCycleClock(); + if (useCPUClock) { + return CPUWalltimeNow(); + } else { + return ChronoWalltimeNow(); + } +} + +} // end namespace walltime + + +namespace { + +std::string DateTimeString(bool local) { + typedef std::chrono::system_clock Clock; + std::time_t now = Clock::to_time_t(Clock::now()); + char storage[128]; + std::size_t written; + + if (local) { +#if defined(BENCHMARK_OS_WINDOWS) + written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now)); +#else + std::tm timeinfo; + std::memset(&timeinfo, 0, sizeof(std::tm)); + ::localtime_r(&now, &timeinfo); + written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); +#endif + } else { +#if defined(BENCHMARK_OS_WINDOWS) + written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now)); +#else + std::tm timeinfo; + std::memset(&timeinfo, 0, sizeof(std::tm)); + ::gmtime_r(&now, &timeinfo); + written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); +#endif + } + CHECK(written < arraysize(storage)); + ((void)written); // prevent unused variable in optimized mode. + return std::string(storage); +} + +} // end namespace + +std::string LocalDateTimeString() { + return DateTimeString(true); +} + +} // end namespace benchmark Index: utils/google-benchmark/test/CMakeLists.txt =================================================================== --- /dev/null +++ utils/google-benchmark/test/CMakeLists.txt @@ -0,0 +1,111 @@ +# Enable the tests + +find_package(Threads REQUIRED) + +macro(compile_benchmark_test name) + add_executable(${name} "${name}.cc") + target_link_libraries(${name} benchmark ${CMAKE_THREAD_LIBS_INIT}) +endmacro(compile_benchmark_test) + +# Demonstration executable +compile_benchmark_test(benchmark_test) +add_test(benchmark benchmark_test --benchmark_min_time=0.01) + +compile_benchmark_test(filter_test) +macro(add_filter_test name filter expect) + add_test(${name} filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect}) + add_test(${name}_list_only filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect}) +endmacro(add_filter_test) + +add_filter_test(filter_simple "Foo" 3) +add_filter_test(filter_suffix "BM_.*" 4) +add_filter_test(filter_regex_all ".*" 5) +add_filter_test(filter_regex_blank "" 5) +add_filter_test(filter_regex_none "monkey" 0) +add_filter_test(filter_regex_wildcard ".*Foo.*" 3) +add_filter_test(filter_regex_begin "^BM_.*" 4) +add_filter_test(filter_regex_begin2 "^N" 1) +add_filter_test(filter_regex_end ".*Ba$" 1) + +compile_benchmark_test(options_test) +add_test(options_benchmarks options_test --benchmark_min_time=0.01) + +compile_benchmark_test(basic_test) +add_test(basic_benchmark basic_test --benchmark_min_time=0.01) + +compile_benchmark_test(diagnostics_test) +add_test(diagnostics_test diagnostics_test --benchmark_min_time=0.01) + +compile_benchmark_test(skip_with_error_test) +add_test(skip_with_error_test skip_with_error_test --benchmark_min_time=0.01) + +compile_benchmark_test(donotoptimize_test) +add_test(donotoptimize_test donotoptimize_test --benchmark_min_time=0.01) + +compile_benchmark_test(fixture_test) +add_test(fixture_test fixture_test --benchmark_min_time=0.01) + +compile_benchmark_test(map_test) +add_test(map_test map_test --benchmark_min_time=0.01) + +compile_benchmark_test(reporter_output_test) +add_test(reporter_output_test reporter_output_test --benchmark_min_time=0.01) + +check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG) +if (BENCHMARK_HAS_CXX03_FLAG) + set(CXX03_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-std=c++11" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}") + string(REPLACE "-std=c++0x" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}") + + compile_benchmark_test(cxx03_test) + set_target_properties(cxx03_test + PROPERTIES COMPILE_FLAGS "${CXX03_FLAGS}") + add_test(cxx03 cxx03_test --benchmark_min_time=0.01) +endif() + +compile_benchmark_test(complexity_test) +add_test(complexity_benchmark complexity_test --benchmark_min_time=0.01) + +# Add the coverage command(s) +if(CMAKE_BUILD_TYPE) + string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) +endif() +if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage") + find_program(GCOV gcov) + find_program(LCOV lcov) + find_program(GENHTML genhtml) + find_program(CTEST ctest) + if (GCOV AND LCOV AND GENHTML AND CTEST AND HAVE_CXX_FLAG_COVERAGE) + add_custom_command( + OUTPUT ${CMAKE_BINARY_DIR}/lcov/index.html + COMMAND ${LCOV} -q -z -d . + COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o before.lcov -i + COMMAND ${CTEST} --force-new-ctest-process + COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o after.lcov + COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov + COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov + COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark + DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test complexity_test + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Running LCOV" + ) + add_custom_target(coverage + DEPENDS ${CMAKE_BINARY_DIR}/lcov/index.html + COMMENT "LCOV report at lcov/index.html" + ) + message(STATUS "Coverage command added") + else() + if (HAVE_CXX_FLAG_COVERAGE) + set(CXX_FLAG_COVERAGE_MESSAGE supported) + else() + set(CXX_FLAG_COVERAGE_MESSAGE unavailable) + endif() + message(WARNING + "Coverage not available:\n" + " gcov: ${GCOV}\n" + " lcov: ${LCOV}\n" + " genhtml: ${GENHTML}\n" + " ctest: ${CTEST}\n" + " --coverage flag: ${CXX_FLAG_COVERAGE_MESSAGE}") + endif() +endif() Index: utils/google-benchmark/test/basic_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/basic_test.cc @@ -0,0 +1,102 @@ + +#include "benchmark/benchmark_api.h" + +#define BASIC_BENCHMARK_TEST(x) \ + BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192) + +void BM_empty(benchmark::State& state) { + while (state.KeepRunning()) { + benchmark::DoNotOptimize(state.iterations()); + } +} +BENCHMARK(BM_empty); +BENCHMARK(BM_empty)->ThreadPerCpu(); + +void BM_spin_empty(benchmark::State& state) { + while (state.KeepRunning()) { + for (int x = 0; x < state.range_x(); ++x) { + benchmark::DoNotOptimize(x); + } + } +} +BASIC_BENCHMARK_TEST(BM_spin_empty); +BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu(); + +void BM_spin_pause_before(benchmark::State& state) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + while(state.KeepRunning()) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_before); +BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu(); + + +void BM_spin_pause_during(benchmark::State& state) { + while(state.KeepRunning()) { + state.PauseTiming(); + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + state.ResumeTiming(); + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_during); +BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu(); + +void BM_pause_during(benchmark::State& state) { + while(state.KeepRunning()) { + state.PauseTiming(); + state.ResumeTiming(); + } +} +BENCHMARK(BM_pause_during); +BENCHMARK(BM_pause_during)->ThreadPerCpu(); +BENCHMARK(BM_pause_during)->UseRealTime(); +BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu(); + +void BM_spin_pause_after(benchmark::State& state) { + while(state.KeepRunning()) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + } + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_after); +BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu(); + + +void BM_spin_pause_before_and_after(benchmark::State& state) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + while(state.KeepRunning()) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + } + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after); +BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu(); + + +void BM_empty_stop_start(benchmark::State& state) { + while (state.KeepRunning()) { } +} +BENCHMARK(BM_empty_stop_start); +BENCHMARK(BM_empty_stop_start)->ThreadPerCpu(); + +BENCHMARK_MAIN() Index: utils/google-benchmark/test/benchmark_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/benchmark_test.cc @@ -0,0 +1,224 @@ +#include "benchmark/benchmark.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__GNUC__) +# define BENCHMARK_NOINLINE __attribute__((noinline)) +#else +# define BENCHMARK_NOINLINE +#endif + +namespace { + +int BENCHMARK_NOINLINE Factorial(uint32_t n) { + return (n == 1) ? 1 : n * Factorial(n - 1); +} + +double CalculatePi(int depth) { + double pi = 0.0; + for (int i = 0; i < depth; ++i) { + double numerator = static_cast(((i % 2) * 2) - 1); + double denominator = static_cast((2 * i) - 1); + pi += numerator / denominator; + } + return (pi - 1.0) * 4; +} + +std::set ConstructRandomSet(int size) { + std::set s; + for (int i = 0; i < size; ++i) + s.insert(i); + return s; +} + +std::mutex test_vector_mu; +std::vector* test_vector = nullptr; + +} // end namespace + +static void BM_Factorial(benchmark::State& state) { + int fac_42 = 0; + while (state.KeepRunning()) + fac_42 = Factorial(8); + // Prevent compiler optimizations + std::stringstream ss; + ss << fac_42; + state.SetLabel(ss.str()); +} +BENCHMARK(BM_Factorial); +BENCHMARK(BM_Factorial)->UseRealTime(); + +static void BM_CalculatePiRange(benchmark::State& state) { + double pi = 0.0; + while (state.KeepRunning()) + pi = CalculatePi(state.range_x()); + std::stringstream ss; + ss << pi; + state.SetLabel(ss.str()); +} +BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); + +static void BM_CalculatePi(benchmark::State& state) { + static const int depth = 1024; + while (state.KeepRunning()) { + benchmark::DoNotOptimize(CalculatePi(depth)); + } +} +BENCHMARK(BM_CalculatePi)->Threads(8); +BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32); +BENCHMARK(BM_CalculatePi)->ThreadPerCpu(); + +static void BM_SetInsert(benchmark::State& state) { + while (state.KeepRunning()) { + state.PauseTiming(); + std::set data = ConstructRandomSet(state.range_x()); + state.ResumeTiming(); + for (int j = 0; j < state.range_y(); ++j) + data.insert(rand()); + } + state.SetItemsProcessed(state.iterations() * state.range_y()); + state.SetBytesProcessed(state.iterations() * state.range_y() * sizeof(int)); +} +BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10); + +template +static void BM_Sequential(benchmark::State& state) { + ValueType v = 42; + while (state.KeepRunning()) { + Container c; + for (int i = state.range_x(); --i; ) + c.push_back(v); + } + const size_t items_processed = state.iterations() * state.range_x(); + state.SetItemsProcessed(items_processed); + state.SetBytesProcessed(items_processed * sizeof(v)); +} +BENCHMARK_TEMPLATE2(BM_Sequential, std::vector, int)->Range(1 << 0, 1 << 10); +BENCHMARK_TEMPLATE(BM_Sequential, std::list)->Range(1 << 0, 1 << 10); +// Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond. +#if __cplusplus >= 201103L +BENCHMARK_TEMPLATE(BM_Sequential, std::vector, int)->Arg(512); +#endif + +static void BM_StringCompare(benchmark::State& state) { + std::string s1(state.range_x(), '-'); + std::string s2(state.range_x(), '-'); + while (state.KeepRunning()) + benchmark::DoNotOptimize(s1.compare(s2)); +} +BENCHMARK(BM_StringCompare)->Range(1, 1<<20); + +static void BM_SetupTeardown(benchmark::State& state) { + if (state.thread_index == 0) { + // No need to lock test_vector_mu here as this is running single-threaded. + test_vector = new std::vector(); + } + int i = 0; + while (state.KeepRunning()) { + std::lock_guard l(test_vector_mu); + if (i%2 == 0) + test_vector->push_back(i); + else + test_vector->pop_back(); + ++i; + } + if (state.thread_index == 0) { + delete test_vector; + } +} +BENCHMARK(BM_SetupTeardown)->ThreadPerCpu(); + +static void BM_LongTest(benchmark::State& state) { + double tracker = 0.0; + while (state.KeepRunning()) { + for (int i = 0; i < state.range_x(); ++i) + benchmark::DoNotOptimize(tracker += i); + } +} +BENCHMARK(BM_LongTest)->Range(1<<16,1<<28); + +static void BM_ParallelMemset(benchmark::State& state) { + int size = state.range_x() / sizeof(int); + int thread_size = size / state.threads; + int from = thread_size * state.thread_index; + int to = from + thread_size; + + if (state.thread_index == 0) { + test_vector = new std::vector(size); + } + + while (state.KeepRunning()) { + for (int i = from; i < to; i++) { + // No need to lock test_vector_mu as ranges + // do not overlap between threads. + benchmark::DoNotOptimize(test_vector->at(i) = 1); + } + } + + if (state.thread_index == 0) { + delete test_vector; + } +} +BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); + +static void BM_ManualTiming(benchmark::State& state) { + size_t slept_for = 0; + int microseconds = state.range_x(); + std::chrono::duration sleep_duration { + static_cast(microseconds) + }; + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(std::chrono::duration_cast< + std::chrono::nanoseconds>(sleep_duration)); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed = + std::chrono::duration_cast>( + end - start); + + state.SetIterationTime(elapsed.count()); + slept_for += microseconds; + } + state.SetItemsProcessed(slept_for); +} +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime(); +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime(); + +#if __cplusplus >= 201103L + +template +void BM_with_args(benchmark::State& state, Args&&...) { + while (state.KeepRunning()) {} +} +BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44); +BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, + std::string("abc"), std::pair(42, 3.8)); + +void BM_non_template_args(benchmark::State& state, int, double) { + while(state.KeepRunning()) {} +} +BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); + +#endif // __cplusplus >= 201103L + +BENCHMARK_MAIN() + Index: utils/google-benchmark/test/complexity_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/complexity_test.cc @@ -0,0 +1,297 @@ + +#undef NDEBUG +#include "benchmark/benchmark.h" +#include "../src/check.h" // NOTE: check.h is for internal use only! +#include "../src/re.h" // NOTE: re.h is for internal use only +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +// ========================================================================= // +// -------------------------- Testing Case --------------------------------- // +// ========================================================================= // + +enum MatchRules { + MR_Default, // Skip non-matching lines until a match is found. + MR_Next // Match must occur on the next line. +}; + +struct TestCase { + std::string regex; + int match_rule; + + TestCase(std::string re, int rule = MR_Default) : regex(re), match_rule(rule) {} + + void Check(std::stringstream& remaining_output) const { + benchmark::Regex r; + std::string err_str; + r.Init(regex, &err_str); + CHECK(err_str.empty()) << "Could not construct regex \"" << regex << "\"" + << " got Error: " << err_str; + + std::string line; + while (remaining_output.eof() == false) { + CHECK(remaining_output.good()); + std::getline(remaining_output, line); + if (r.Match(line)) return; + CHECK(match_rule != MR_Next) << "Expected line \"" << line + << "\" to match regex \"" << regex << "\""; + } + + CHECK(remaining_output.eof() == false) + << "End of output reached before match for regex \"" << regex + << "\" was found"; + } +}; + +std::vector ConsoleOutputTests; +std::vector JSONOutputTests; +std::vector CSVOutputTests; + +// ========================================================================= // +// -------------------------- Test Helpers --------------------------------- // +// ========================================================================= // + +class TestReporter : public benchmark::BenchmarkReporter { +public: + TestReporter(std::vector reps) + : reporters_(reps) {} + + virtual bool ReportContext(const Context& context) { + bool last_ret = false; + bool first = true; + for (auto rep : reporters_) { + bool new_ret = rep->ReportContext(context); + CHECK(first || new_ret == last_ret) + << "Reports return different values for ReportContext"; + first = false; + last_ret = new_ret; + } + return last_ret; + } + + virtual void ReportRuns(const std::vector& report) { + for (auto rep : reporters_) + rep->ReportRuns(report); + } + + virtual void Finalize() { + for (auto rep : reporters_) + rep->Finalize(); + } + +private: + std::vector reporters_; +}; + + +#define CONCAT2(x, y) x##y +#define CONCAT(x, y) CONCAT2(x, y) + +#define ADD_CASES(...) \ + int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__) + +int AddCases(std::vector* out, std::initializer_list const& v) { + for (auto const& TC : v) + out->push_back(TC); + return 0; +} + +template +std::string join(First f) { return f; } + +template +std::string join(First f, Args&&... args) { + return std::string(std::move(f)) + "[ ]+" + join(std::forward(args)...); +} + +std::string dec_re = "[0-9]+\\.[0-9]+"; + +#define ADD_COMPLEXITY_CASES(...) \ + int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__) + +int AddComplexityTest(std::vector* console_out, std::vector* json_out, + std::vector* csv_out, std::string big_o_test_name, + std::string rms_test_name, std::string big_o) { + std::string big_o_str = dec_re + " " + big_o; + AddCases(console_out, { + {join("^" + big_o_test_name + "", big_o_str, big_o_str) + "[ ]*$"}, + {join("^" + rms_test_name + "", "[0-9]+ %", "[0-9]+ %") + "[ ]*$"} + }); + AddCases(json_out, { + {"\"name\": \"" + big_o_test_name + "\",$"}, + {"\"cpu_coefficient\": [0-9]+,$", MR_Next}, + {"\"real_coefficient\": [0-9]{1,5},$", MR_Next}, + {"\"big_o\": \"" + big_o + "\",$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}, + {"\"name\": \"" + rms_test_name + "\",$"}, + {"\"rms\": [0-9]+%$", MR_Next}, + {"}", MR_Next} + }); + AddCases(csv_out, { + {"^\"" + big_o_test_name + "\",," + dec_re + "," + dec_re + "," + big_o + ",,,,,$"}, + {"^\"" + rms_test_name + "\",," + dec_re + "," + dec_re + ",,,,,,$"} + }); + return 0; +} + +} // end namespace + +// ========================================================================= // +// --------------------------- Testing BigO O(1) --------------------------- // +// ========================================================================= // + +void BM_Complexity_O1(benchmark::State& state) { + while (state.KeepRunning()) { + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity(benchmark::o1); +BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity([](int){return 1.0; }); +BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity(); + +const char* big_o_1_test_name = "BM_Complexity_O1_BigO"; +const char* rms_o_1_test_name = "BM_Complexity_O1_RMS"; +const char* enum_auto_big_o_1 = "\\([0-9]+\\)"; +const char* lambda_big_o_1 = "f\\(N\\)"; + +// Add enum tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_1_test_name, rms_o_1_test_name, enum_auto_big_o_1); + +// Add lambda tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_1_test_name, rms_o_1_test_name, lambda_big_o_1); + +// ========================================================================= // +// --------------------------- Testing BigO O(N) --------------------------- // +// ========================================================================= // + +std::vector ConstructRandomVector(int size) { + std::vector v; + v.reserve(size); + for (int i = 0; i < size; ++i) { + v.push_back(rand() % size); + } + return v; +} + +void BM_Complexity_O_N(benchmark::State& state) { + auto v = ConstructRandomVector(state.range_x()); + const int item_not_in_vector = state.range_x()*2; // Test worst case scenario (item not in vector) + while (state.KeepRunning()) { + benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector)); + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oN); +BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) -> double{return n; }); +BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(); + +const char* big_o_n_test_name = "BM_Complexity_O_N_BigO"; +const char* rms_o_n_test_name = "BM_Complexity_O_N_RMS"; +const char* enum_auto_big_o_n = "N"; +const char* lambda_big_o_n = "f\\(N\\)"; + +// Add enum tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_n_test_name, rms_o_n_test_name, enum_auto_big_o_n); + +// Add lambda tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_n_test_name, rms_o_n_test_name, lambda_big_o_n); + +// ========================================================================= // +// ------------------------- Testing BigO O(N*lgN) ------------------------- // +// ========================================================================= // + +static void BM_Complexity_O_N_log_N(benchmark::State& state) { + auto v = ConstructRandomVector(state.range_x()); + while (state.KeepRunning()) { + std::sort(v.begin(), v.end()); + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oNLogN); +BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) {return n * std::log2(n); }); +BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(); + +const char* big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO"; +const char* rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS"; +const char* enum_auto_big_o_n_lg_n = "NlgN"; +const char* lambda_big_o_n_lg_n = "f\\(N\\)"; + +// Add enum tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n); + +// Add lambda tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n); + + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + + +int main(int argc, char* argv[]) { + // Add --color_print=false to argv since we don't want to match color codes. + char new_arg[64]; + char* new_argv[64]; + std::copy(argv, argv + argc, new_argv); + new_argv[argc++] = std::strcpy(new_arg, "--color_print=false"); + benchmark::Initialize(&argc, new_argv); + + benchmark::ConsoleReporter CR; + benchmark::JSONReporter JR; + benchmark::CSVReporter CSVR; + struct ReporterTest { + const char* name; + std::vector& output_cases; + benchmark::BenchmarkReporter& reporter; + std::stringstream out_stream; + std::stringstream err_stream; + + ReporterTest(const char* n, + std::vector& out_tc, + benchmark::BenchmarkReporter& br) + : name(n), output_cases(out_tc), reporter(br) { + reporter.SetOutputStream(&out_stream); + reporter.SetErrorStream(&err_stream); + } + } TestCases[] = { + {"ConsoleReporter", ConsoleOutputTests, CR}, + {"JSONReporter", JSONOutputTests, JR}, + {"CSVReporter", CSVOutputTests, CSVR} + }; + + // Create the test reporter and run the benchmarks. + std::cout << "Running benchmarks...\n"; + TestReporter test_rep({&CR, &JR, &CSVR}); + benchmark::RunSpecifiedBenchmarks(&test_rep); + + for (auto& rep_test : TestCases) { + std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n"; + std::string banner(msg.size() - 1, '-'); + std::cout << banner << msg << banner << "\n"; + + std::cerr << rep_test.err_stream.str(); + std::cout << rep_test.out_stream.str(); + + for (const auto& TC : rep_test.output_cases) + TC.Check(rep_test.out_stream); + + std::cout << "\n"; + } + return 0; +} + Index: utils/google-benchmark/test/cxx03_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/cxx03_test.cc @@ -0,0 +1,31 @@ + +#include + +#include "benchmark/benchmark.h" + +#if __cplusplus >= 201103L +#error C++11 or greater detected. Should be C++03. +#endif + +void BM_empty(benchmark::State& state) { + while (state.KeepRunning()) { + volatile std::size_t x = state.iterations(); + ((void)x); + } +} +BENCHMARK(BM_empty); + +template +void BM_template2(benchmark::State& state) { + BM_empty(state); +} +BENCHMARK_TEMPLATE2(BM_template2, int, long); + +template +void BM_template1(benchmark::State& state) { + BM_empty(state); +} +BENCHMARK_TEMPLATE(BM_template1, long); +BENCHMARK_TEMPLATE1(BM_template1, int); + +BENCHMARK_MAIN() Index: utils/google-benchmark/test/diagnostics_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/diagnostics_test.cc @@ -0,0 +1,61 @@ +// Testing: +// State::PauseTiming() +// State::ResumeTiming() +// Test that CHECK's within these function diagnose when they are called +// outside of the KeepRunning() loop. +// +// NOTE: Users should NOT include or use src/check.h. This is only done in +// order to test library internals. + +#include "benchmark/benchmark_api.h" +#include "../src/check.h" +#include +#include + +#if defined(__GNUC__) && !defined(__EXCEPTIONS) +#define TEST_HAS_NO_EXCEPTIONS +#endif + +void TestHandler() { +#ifndef TEST_HAS_NO_EXCEPTIONS + throw std::logic_error(""); +#else + std::abort(); +#endif +} + +void try_invalid_pause_resume(benchmark::State& state) { +#if !defined(NDEBUG) && !defined(TEST_HAS_NO_EXCEPTIONS) + try { + state.PauseTiming(); + std::abort(); + } catch (std::logic_error const&) {} + try { + state.ResumeTiming(); + std::abort(); + } catch (std::logic_error const&) {} +#else + (void)state; // avoid unused warning +#endif +} + +void BM_diagnostic_test(benchmark::State& state) { + static bool called_once = false; + + if (called_once == false) try_invalid_pause_resume(state); + + while (state.KeepRunning()) { + benchmark::DoNotOptimize(state.iterations()); + } + + if (called_once == false) try_invalid_pause_resume(state); + + called_once = true; +} +BENCHMARK(BM_diagnostic_test); + +int main(int argc, char** argv) { + benchmark::internal::GetAbortHandler() = &TestHandler; + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); +} Index: utils/google-benchmark/test/donotoptimize_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/donotoptimize_test.cc @@ -0,0 +1,36 @@ +#include "benchmark/benchmark.h" + +#include + +namespace { +#if defined(__GNUC__) + std::uint64_t double_up(const std::uint64_t x) __attribute__ ((const)); +#endif + std::uint64_t double_up(const std::uint64_t x) { + return x * 2; + } +} + +int main(int, char*[]) { + + // this test verifies compilation of DoNotOptimize() for some types + + char buffer8[8]; + benchmark::DoNotOptimize(buffer8); + + char buffer20[20]; + benchmark::DoNotOptimize(buffer20); + + char buffer1024[1024]; + benchmark::DoNotOptimize(buffer1024); + benchmark::DoNotOptimize(&buffer1024[0]); + + int x = 123; + benchmark::DoNotOptimize(x); + benchmark::DoNotOptimize(&x); + benchmark::DoNotOptimize(x += 42); + + benchmark::DoNotOptimize(double_up(x)); + + return 0; +} Index: utils/google-benchmark/test/filter_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/filter_test.cc @@ -0,0 +1,105 @@ +#include "benchmark/benchmark.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) { + ++count_; + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() : count_(0) {} + + virtual ~TestReporter() {} + + size_t GetCount() const { + return count_; + } + + private: + mutable size_t count_; +}; + +} // end namespace + + +static void NoPrefix(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(NoPrefix); + +static void BM_Foo(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_Foo); + + +static void BM_Bar(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_Bar); + + +static void BM_FooBar(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_FooBar); + + +static void BM_FooBa(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_FooBa); + + + +int main(int argc, char** argv) { + bool list_only = false; + for (int i=0; i < argc; ++i) + list_only |= std::string(argv[i]).find("--benchmark_list_tests") != std::string::npos; + + benchmark::Initialize(&argc, argv); + + TestReporter test_reporter; + const size_t returned_count = benchmark::RunSpecifiedBenchmarks(&test_reporter); + + if (argc == 2) { + // Make sure we ran all of the tests + std::stringstream ss(argv[1]); + size_t expected_return; + ss >> expected_return; + + if (returned_count != expected_return) { + std::cerr << "ERROR: Expected " << expected_return + << " tests to match the filter but returned_count = " + << returned_count << std::endl; + return -1; + } + + const size_t expected_reports = list_only ? 0 : expected_return; + const size_t reports_count = test_reporter.GetCount(); + if (reports_count != expected_reports) { + std::cerr << "ERROR: Expected " << expected_reports + << " tests to be run but reported_count = " << reports_count + << std::endl; + return -1; + } + } + + return 0; +} Index: utils/google-benchmark/test/fixture_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/fixture_test.cc @@ -0,0 +1,52 @@ + +#include "benchmark/benchmark.h" + +#include +#include + +class MyFixture : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State& state) { + if (state.thread_index == 0) { + assert(data.get() == nullptr); + data.reset(new int(42)); + } + } + + void TearDown(const ::benchmark::State& state) { + if (state.thread_index == 0) { + assert(data.get() != nullptr); + data.reset(); + } + } + + ~MyFixture() { + assert(data == nullptr); + } + + std::unique_ptr data; +}; + + +BENCHMARK_F(MyFixture, Foo)(benchmark::State& st) { + assert(data.get() != nullptr); + assert(*data == 42); + while (st.KeepRunning()) { + } +} + +BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) { + if (st.thread_index == 0) { + assert(data.get() != nullptr); + assert(*data == 42); + } + while (st.KeepRunning()) { + assert(data.get() != nullptr); + assert(*data == 42); + } + st.SetItemsProcessed(st.range_x()); +} +BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42); +BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42)->ThreadPerCpu(); + +BENCHMARK_MAIN() Index: utils/google-benchmark/test/map_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/map_test.cc @@ -0,0 +1,58 @@ +#include "benchmark/benchmark.h" + +#include +#include + +namespace { + +std::map ConstructRandomMap(int size) { + std::map m; + for (int i = 0; i < size; ++i) { + m.insert(std::make_pair(rand() % size, rand() % size)); + } + return m; +} + +} // namespace + +// Basic version. +static void BM_MapLookup(benchmark::State& state) { + const int size = state.range_x(); + while (state.KeepRunning()) { + state.PauseTiming(); + std::map m = ConstructRandomMap(size); + state.ResumeTiming(); + for (int i = 0; i < size; ++i) { + benchmark::DoNotOptimize(m.find(rand() % size)); + } + } + state.SetItemsProcessed(state.iterations() * size); +} +BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12); + +// Using fixtures. +class MapFixture : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State& st) { + m = ConstructRandomMap(st.range_x()); + } + + void TearDown(const ::benchmark::State&) { + m.clear(); + } + + std::map m; +}; + +BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) { + const int size = state.range_x(); + while (state.KeepRunning()) { + for (int i = 0; i < size; ++i) { + benchmark::DoNotOptimize(m.find(rand() % size)); + } + } + state.SetItemsProcessed(state.iterations() * size); +} +BENCHMARK_REGISTER_F(MapFixture, Lookup)->Range(1<<3, 1<<12); + +BENCHMARK_MAIN() Index: utils/google-benchmark/test/options_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/options_test.cc @@ -0,0 +1,44 @@ +#include "benchmark/benchmark_api.h" + +#include +#include + +void BM_basic(benchmark::State& state) { + while (state.KeepRunning()) { + } +} + +void BM_basic_slow(benchmark::State& state) { + std::chrono::milliseconds sleep_duration(state.range_x()); + while (state.KeepRunning()) { + std::this_thread::sleep_for( + std::chrono::duration_cast(sleep_duration) + ); + } +} + +BENCHMARK(BM_basic); +BENCHMARK(BM_basic)->Arg(42); +BENCHMARK(BM_basic_slow)->Arg(10)->Unit(benchmark::kNanosecond); +BENCHMARK(BM_basic_slow)->Arg(100)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_basic)->Range(1, 8); +BENCHMARK(BM_basic)->RangeMultiplier(2)->Range(1, 8); +BENCHMARK(BM_basic)->DenseRange(10, 15); +BENCHMARK(BM_basic)->ArgPair(42, 42); +BENCHMARK(BM_basic)->RangePair(64, 512, 64, 512); +BENCHMARK(BM_basic)->MinTime(0.7); +BENCHMARK(BM_basic)->UseRealTime(); +BENCHMARK(BM_basic)->ThreadRange(2, 4); +BENCHMARK(BM_basic)->ThreadPerCpu(); +BENCHMARK(BM_basic)->Repetitions(3); + +void CustomArgs(benchmark::internal::Benchmark* b) { + for (int i = 0; i < 10; ++i) { + b->Arg(i); + } +} + +BENCHMARK(BM_basic)->Apply(CustomArgs); + +BENCHMARK_MAIN() Index: utils/google-benchmark/test/reporter_output_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/reporter_output_test.cc @@ -0,0 +1,259 @@ + +#undef NDEBUG +#include "benchmark/benchmark.h" +#include "../src/check.h" // NOTE: check.h is for internal use only! +#include "../src/re.h" // NOTE: re.h is for internal use only +#include +#include +#include +#include +#include +#include + +namespace { + +// ========================================================================= // +// -------------------------- Testing Case --------------------------------- // +// ========================================================================= // + +enum MatchRules { + MR_Default, // Skip non-matching lines until a match is found. + MR_Next // Match must occur on the next line. +}; + +struct TestCase { + std::string regex; + int match_rule; + + TestCase(std::string re, int rule = MR_Default) : regex(re), match_rule(rule) {} + + void Check(std::stringstream& remaining_output) const { + benchmark::Regex r; + std::string err_str; + r.Init(regex, &err_str); + CHECK(err_str.empty()) << "Could not construct regex \"" << regex << "\"" + << " got Error: " << err_str; + + std::string line; + while (remaining_output.eof() == false) { + CHECK(remaining_output.good()); + std::getline(remaining_output, line); + if (r.Match(line)) return; + CHECK(match_rule != MR_Next) << "Expected line \"" << line + << "\" to match regex \"" << regex << "\""; + } + + CHECK(remaining_output.eof() == false) + << "End of output reached before match for regex \"" << regex + << "\" was found"; + } +}; + +std::vector ConsoleOutputTests; +std::vector JSONOutputTests; +std::vector CSVOutputTests; + +std::vector ConsoleErrorTests; +std::vector JSONErrorTests; +std::vector CSVErrorTests; + +// ========================================================================= // +// -------------------------- Test Helpers --------------------------------- // +// ========================================================================= // + +class TestReporter : public benchmark::BenchmarkReporter { +public: + TestReporter(std::vector reps) + : reporters_(reps) {} + + virtual bool ReportContext(const Context& context) { + bool last_ret = false; + bool first = true; + for (auto rep : reporters_) { + bool new_ret = rep->ReportContext(context); + CHECK(first || new_ret == last_ret) + << "Reports return different values for ReportContext"; + first = false; + last_ret = new_ret; + } + return last_ret; + } + + virtual void ReportRuns(const std::vector& report) { + for (auto rep : reporters_) + rep->ReportRuns(report); + } + + virtual void Finalize() { + for (auto rep : reporters_) + rep->Finalize(); + } + +private: + std::vector reporters_; +}; + + +#define CONCAT2(x, y) x##y +#define CONCAT(x, y) CONCAT2(x, y) + +#define ADD_CASES(...) \ + int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__) + +int AddCases(std::vector* out, std::initializer_list const& v) { + for (auto const& TC : v) + out->push_back(TC); + return 0; +} + +template +std::string join(First f) { return f; } + +template +std::string join(First f, Args&&... args) { + return std::string(std::move(f)) + "[ ]+" + join(std::forward(args)...); +} + +std::string dec_re = "[0-9]+\\.[0-9]+"; + +} // end namespace + +// ========================================================================= // +// ---------------------- Testing Prologue Output -------------------------- // +// ========================================================================= // + +ADD_CASES(&ConsoleOutputTests, { + {join("^Benchmark", "Time", "CPU", "Iterations$"), MR_Next}, + {"^[-]+$", MR_Next} +}); +ADD_CASES(&CSVOutputTests, { + {"name,iterations,real_time,cpu_time,time_unit,bytes_per_second,items_per_second," + "label,error_occurred,error_message"} +}); + +// ========================================================================= // +// ------------------------ Testing Basic Output --------------------------- // +// ========================================================================= // + +void BM_basic(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_basic); + +ADD_CASES(&ConsoleOutputTests, { + {"^BM_basic[ ]+[0-9]{1,5} ns[ ]+[0-9]{1,5} ns[ ]+[0-9]+$"} +}); +ADD_CASES(&JSONOutputTests, { + {"\"name\": \"BM_basic\",$"}, + {"\"iterations\": [0-9]+,$", MR_Next}, + {"\"real_time\": [0-9]{1,5},$", MR_Next}, + {"\"cpu_time\": [0-9]{1,5},$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next} +}); +ADD_CASES(&CSVOutputTests, { + {"^\"BM_basic\",[0-9]+," + dec_re + "," + dec_re + ",ns,,,,,$"} +}); + +// ========================================================================= // +// ------------------------ Testing Error Output --------------------------- // +// ========================================================================= // + +void BM_error(benchmark::State& state) { + state.SkipWithError("message"); + while(state.KeepRunning()) {} +} +BENCHMARK(BM_error); +ADD_CASES(&ConsoleOutputTests, { + {"^BM_error[ ]+ERROR OCCURRED: 'message'$"} +}); +ADD_CASES(&JSONOutputTests, { + {"\"name\": \"BM_error\",$"}, + {"\"error_occurred\": true,$", MR_Next}, + {"\"error_message\": \"message\",$", MR_Next} +}); + +ADD_CASES(&CSVOutputTests, { + {"^\"BM_error\",,,,,,,,true,\"message\"$"} +}); + + +// ========================================================================= // +// ----------------------- Testing Complexity Output ----------------------- // +// ========================================================================= // + +void BM_Complexity_O1(benchmark::State& state) { + while (state.KeepRunning()) { + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O1)->Range(1, 1<<18)->Complexity(benchmark::o1); + +std::string bigOStr = "[0-9]+\\.[0-9]+ \\([0-9]+\\)"; + +ADD_CASES(&ConsoleOutputTests, { + {join("^BM_Complexity_O1_BigO", bigOStr, bigOStr) + "[ ]*$"}, + {join("^BM_Complexity_O1_RMS", "[0-9]+ %", "[0-9]+ %") + "[ ]*$"} +}); + + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + + +int main(int argc, char* argv[]) { + // Add --color_print=false to argv since we don't want to match color codes. + char new_arg[64]; + char* new_argv[64]; + std::copy(argv, argv + argc, new_argv); + new_argv[argc++] = std::strcpy(new_arg, "--color_print=false"); + benchmark::Initialize(&argc, new_argv); + + benchmark::ConsoleReporter CR; + benchmark::JSONReporter JR; + benchmark::CSVReporter CSVR; + struct ReporterTest { + const char* name; + std::vector& output_cases; + std::vector& error_cases; + benchmark::BenchmarkReporter& reporter; + std::stringstream out_stream; + std::stringstream err_stream; + + ReporterTest(const char* n, + std::vector& out_tc, + std::vector& err_tc, + benchmark::BenchmarkReporter& br) + : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) { + reporter.SetOutputStream(&out_stream); + reporter.SetErrorStream(&err_stream); + } + } TestCases[] = { + {"ConsoleReporter", ConsoleOutputTests, ConsoleErrorTests, CR}, + {"JSONReporter", JSONOutputTests, JSONErrorTests, JR}, + {"CSVReporter", CSVOutputTests, CSVErrorTests, CSVR} + }; + + // Create the test reporter and run the benchmarks. + std::cout << "Running benchmarks...\n"; + TestReporter test_rep({&CR, &JR, &CSVR}); + benchmark::RunSpecifiedBenchmarks(&test_rep); + + for (auto& rep_test : TestCases) { + std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n"; + std::string banner(msg.size() - 1, '-'); + std::cout << banner << msg << banner << "\n"; + + std::cerr << rep_test.err_stream.str(); + std::cout << rep_test.out_stream.str(); + + for (const auto& TC : rep_test.error_cases) + TC.Check(rep_test.err_stream); + for (const auto& TC : rep_test.output_cases) + TC.Check(rep_test.out_stream); + + std::cout << "\n"; + } + return 0; +} Index: utils/google-benchmark/test/skip_with_error_test.cc =================================================================== --- /dev/null +++ utils/google-benchmark/test/skip_with_error_test.cc @@ -0,0 +1,161 @@ + +#undef NDEBUG +#include "benchmark/benchmark.h" +#include "../src/check.h" // NOTE: check.h is for internal use only! +#include +#include + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) { + all_runs_.insert(all_runs_.end(), begin(report), end(report)); + ConsoleReporter::ReportRuns(report); + } + + TestReporter() {} + virtual ~TestReporter() {} + + mutable std::vector all_runs_; +}; + +struct TestCase { + std::string name; + bool error_occurred; + std::string error_message; + + typedef benchmark::BenchmarkReporter::Run Run; + + void CheckRun(Run const& run) const { + CHECK(name == run.benchmark_name) << "expected " << name << " got " << run.benchmark_name; + CHECK(error_occurred == run.error_occurred); + CHECK(error_message == run.error_message); + if (error_occurred) { + //CHECK(run.iterations == 0); + } else { + CHECK(run.iterations != 0); + } + } +}; + +std::vector ExpectedResults; + +int AddCases(const char* base_name, std::initializer_list const& v) { + for (auto TC : v) { + TC.name = base_name + TC.name; + ExpectedResults.push_back(std::move(TC)); + } + return 0; +} + +#define CONCAT(x, y) CONCAT2(x, y) +#define CONCAT2(x, y) x##y +#define ADD_CASES(...) \ +int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__) + +} // end namespace + + +void BM_error_before_running(benchmark::State& state) { + state.SkipWithError("error message"); + while (state.KeepRunning()) { + assert(false); + } +} +BENCHMARK(BM_error_before_running); +ADD_CASES("BM_error_before_running", + {{"", true, "error message"}}); + +void BM_error_during_running(benchmark::State& state) { + int first_iter = true; + while (state.KeepRunning()) { + if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) { + assert(first_iter); + first_iter = false; + state.SkipWithError("error message"); + } else { + state.PauseTiming(); + state.ResumeTiming(); + } + } +} +BENCHMARK(BM_error_during_running)->Arg(1)->Arg(2)->ThreadRange(1, 8); +ADD_CASES( + "BM_error_during_running", + {{"/1/threads:1", true, "error message"}, + {"/1/threads:2", true, "error message"}, + {"/1/threads:4", true, "error message"}, + {"/1/threads:8", true, "error message"}, + {"/2/threads:1", false, ""}, + {"/2/threads:2", false, ""}, + {"/2/threads:4", false, ""}, + {"/2/threads:8", false, ""}} +); + +void BM_error_after_running(benchmark::State& state) { + while (state.KeepRunning()) { + benchmark::DoNotOptimize(state.iterations()); + } + if (state.thread_index <= (state.threads / 2)) + state.SkipWithError("error message"); +} +BENCHMARK(BM_error_after_running)->ThreadRange(1, 8); +ADD_CASES( + "BM_error_after_running", + {{"/threads:1", true, "error message"}, + {"/threads:2", true, "error message"}, + {"/threads:4", true, "error message"}, + {"/threads:8", true, "error message"}} +); + +void BM_error_while_paused(benchmark::State& state) { + bool first_iter = true; + while (state.KeepRunning()) { + if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) { + assert(first_iter); + first_iter = false; + state.PauseTiming(); + state.SkipWithError("error message"); + } else { + state.PauseTiming(); + state.ResumeTiming(); + } + } +} +BENCHMARK(BM_error_while_paused)->Arg(1)->Arg(2)->ThreadRange(1, 8); +ADD_CASES( + "BM_error_while_paused", + {{"/1/threads:1", true, "error message"}, + {"/1/threads:2", true, "error message"}, + {"/1/threads:4", true, "error message"}, + {"/1/threads:8", true, "error message"}, + {"/2/threads:1", false, ""}, + {"/2/threads:2", false, ""}, + {"/2/threads:4", false, ""}, + {"/2/threads:8", false, ""}} +); + + +int main(int argc, char* argv[]) { + benchmark::Initialize(&argc, argv); + + TestReporter test_reporter; + benchmark::RunSpecifiedBenchmarks(&test_reporter); + + typedef benchmark::BenchmarkReporter::Run Run; + auto EB = ExpectedResults.begin(); + + for (Run const& run : test_reporter.all_runs_) { + assert(EB != ExpectedResults.end()); + EB->CheckRun(run); + ++EB; + } + assert(EB == ExpectedResults.end()); + + return 0; +}