diff --git a/third-party/benchmark/.ycm_extra_conf.py b/third-party/benchmark/.ycm_extra_conf.py --- a/third-party/benchmark/.ycm_extra_conf.py +++ b/third-party/benchmark/.ycm_extra_conf.py @@ -5,25 +5,21 @@ # compilation database set (by default, one is not set). # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR. flags = [ - "-Wall", - "-Werror", - "-pedantic-errors", - "-std=c++0x", - "-fno-strict-aliasing", - "-O3", - "-DNDEBUG", - # ...and the same thing goes for the magic -x option which specifies the - # language that the files to be compiled are written in. This is mostly - # relevant for c++ headers. - # For a C project, you would set this to 'c' instead of 'c++'. - "-x", - "c++", - "-I", - "include", - "-isystem", - "/usr/include", - "-isystem", - "/usr/local/include", +'-Wall', +'-Werror', +'-pedantic-errors', +'-std=c++0x', +'-fno-strict-aliasing', +'-O3', +'-DNDEBUG', +# ...and the same thing goes for the magic -x option which specifies the +# language that the files to be compiled are written in. This is mostly +# relevant for c++ headers. +# For a C project, you would set this to 'c' instead of 'c++'. +'-x', 'c++', +'-I', 'include', +'-isystem', '/usr/include', +'-isystem', '/usr/local/include', ] @@ -33,84 +29,87 @@ # # Most projects will NOT need to set this to anything; you can just change the # 'flags' list of compilation flags. Notice that YCM itself uses that approach. -compilation_database_folder = "" +compilation_database_folder = '' -if os.path.exists(compilation_database_folder): - database = ycm_core.CompilationDatabase(compilation_database_folder) +if os.path.exists( compilation_database_folder ): + database = ycm_core.CompilationDatabase( compilation_database_folder ) else: - database = None - -SOURCE_EXTENSIONS = [".cc"] + database = None +SOURCE_EXTENSIONS = [ '.cc' ] def DirectoryOfThisScript(): - return os.path.dirname(os.path.abspath(__file__)) - - -def MakeRelativePathsInFlagsAbsolute(flags, working_directory): - if not working_directory: - return list(flags) - new_flags = [] - make_next_absolute = False - path_flags = ["-isystem", "-I", "-iquote", "--sysroot="] - for flag in flags: - new_flag = flag - - if make_next_absolute: - make_next_absolute = False - if not flag.startswith("/"): - new_flag = os.path.join(working_directory, flag) - - for path_flag in path_flags: - if flag == path_flag: - make_next_absolute = True - break - - if flag.startswith(path_flag): - path = flag[len(path_flag) :] - new_flag = path_flag + os.path.join(working_directory, path) - break - - if new_flag: - new_flags.append(new_flag) - return new_flags - - -def IsHeaderFile(filename): - extension = os.path.splitext(filename)[1] - return extension in [".h", ".hxx", ".hpp", ".hh"] - - -def GetCompilationInfoForFile(filename): - # The compilation_commands.json file generated by CMake does not have entries - # for header files. So we do our best by asking the db for flags for a - # corresponding source file, if any. If one exists, the flags for that file - # should be good enough. - if IsHeaderFile(filename): - basename = os.path.splitext(filename)[0] - for extension in SOURCE_EXTENSIONS: - replacement_file = basename + extension - if os.path.exists(replacement_file): - compilation_info = database.GetCompilationInfoForFile(replacement_file) - if compilation_info.compiler_flags_: - return compilation_info - return None - return database.GetCompilationInfoForFile(filename) - - -def FlagsForFile(filename, **kwargs): - if database: - # Bear in mind that compilation_info.compiler_flags_ does NOT return a - # python list, but a "list-like" StringVec object - compilation_info = GetCompilationInfoForFile(filename) - if not compilation_info: - return None - - final_flags = MakeRelativePathsInFlagsAbsolute( - compilation_info.compiler_flags_, compilation_info.compiler_working_dir_ - ) - else: - relative_to = DirectoryOfThisScript() - final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to) - - return {"flags": final_flags, "do_cache": True} + return os.path.dirname( os.path.abspath( __file__ ) ) + + +def MakeRelativePathsInFlagsAbsolute( flags, working_directory ): + if not working_directory: + return list( flags ) + new_flags = [] + make_next_absolute = False + path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ] + for flag in flags: + new_flag = flag + + if make_next_absolute: + make_next_absolute = False + if not flag.startswith( '/' ): + new_flag = os.path.join( working_directory, flag ) + + for path_flag in path_flags: + if flag == path_flag: + make_next_absolute = True + break + + if flag.startswith( path_flag ): + path = flag[ len( path_flag ): ] + new_flag = path_flag + os.path.join( working_directory, path ) + break + + if new_flag: + new_flags.append( new_flag ) + return new_flags + + +def IsHeaderFile( filename ): + extension = os.path.splitext( filename )[ 1 ] + return extension in [ '.h', '.hxx', '.hpp', '.hh' ] + + +def GetCompilationInfoForFile( filename ): + # The compilation_commands.json file generated by CMake does not have entries + # for header files. So we do our best by asking the db for flags for a + # corresponding source file, if any. If one exists, the flags for that file + # should be good enough. + if IsHeaderFile( filename ): + basename = os.path.splitext( filename )[ 0 ] + for extension in SOURCE_EXTENSIONS: + replacement_file = basename + extension + if os.path.exists( replacement_file ): + compilation_info = database.GetCompilationInfoForFile( + replacement_file ) + if compilation_info.compiler_flags_: + return compilation_info + return None + return database.GetCompilationInfoForFile( filename ) + + +def FlagsForFile( filename, **kwargs ): + if database: + # Bear in mind that compilation_info.compiler_flags_ does NOT return a + # python list, but a "list-like" StringVec object + compilation_info = GetCompilationInfoForFile( filename ) + if not compilation_info: + return None + + final_flags = MakeRelativePathsInFlagsAbsolute( + compilation_info.compiler_flags_, + compilation_info.compiler_working_dir_ ) + else: + relative_to = DirectoryOfThisScript() + final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to ) + + return { + 'flags': final_flags, + 'do_cache': True + } diff --git a/third-party/benchmark/AUTHORS b/third-party/benchmark/AUTHORS --- a/third-party/benchmark/AUTHORS +++ b/third-party/benchmark/AUTHORS @@ -13,6 +13,7 @@ Andriy Berestovskyy Arne Beer Carto +Cezary Skrzyński Christian Wassermann Christopher Seymour Colin Braley @@ -27,10 +28,12 @@ Eric Fiselier Eugene Zhuk Evgeny Safronov +Fabien Pichot Federico Ficarelli Felix Homann Gergő Szitár Google Inc. +Henrique Bucher International Business Machines Corporation Ismael Jimenez Martinez Jern-Kuan Leong @@ -41,8 +44,11 @@ Kaito Udagawa Kishan Kumar Lei Xu +Marcel Jacobse Matt Clarkson Maxim Vafin +Mike Apodaca +Min-Yih Hsu MongoDB Inc. Nick Hutchinson Norman Heino @@ -50,13 +56,16 @@ Ori Livneh Paul Redmond Radoslav Yovchev +Raghu Raja +Rainer Orth Roman Lebedev Sayan Bhattacharjee +Shapr3D Shuo Chen +Staffan Tjernstrom Steinar H. Gunderson Stripe, Inc. Tobias Schmidt Yixuan Qiu Yusuke Suzuki Zbigniew Skowron -Min-Yih Hsu diff --git a/third-party/benchmark/CMakeLists.txt b/third-party/benchmark/CMakeLists.txt --- a/third-party/benchmark/CMakeLists.txt +++ b/third-party/benchmark/CMakeLists.txt @@ -1,19 +1,7 @@ -cmake_minimum_required (VERSION 3.5.1) - -foreach(p - CMP0048 # OK to clear PROJECT_VERSION on project() - CMP0054 # CMake 3.1 - CMP0056 # export EXE_LINKER_FLAGS to try_run - CMP0057 # Support no if() IN_LIST operator - CMP0063 # Honor visibility properties for all targets - CMP0077 # Allow option() overrides in importing projects - ) - if(POLICY ${p}) - cmake_policy(SET ${p} NEW) - endif() -endforeach() +# Require CMake 3.10. If available, use the policies up to CMake 3.22. +cmake_minimum_required (VERSION 3.10...3.22) -project (benchmark VERSION 1.6.0 LANGUAGES CXX) +project (benchmark VERSION 1.8.2 LANGUAGES CXX) option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) @@ -26,6 +14,9 @@ # PGC++ maybe reporting false positives. set(BENCHMARK_ENABLE_WERROR OFF) endif() +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC") + set(BENCHMARK_ENABLE_WERROR OFF) +endif() if(BENCHMARK_FORCE_WERROR) set(BENCHMARK_ENABLE_WERROR ON) endif(BENCHMARK_FORCE_WERROR) @@ -50,7 +41,10 @@ option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF) -set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +# Export only public symbols +set(CMAKE_CXX_VISIBILITY_PRESET hidden) +set(CMAKE_VISIBILITY_INLINES_HIDDEN ON) + if(MSVC) # As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and # cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the @@ -116,17 +110,17 @@ set(VERSION "${GIT_VERSION}") endif() # Tell the user what versions we are using -message(STATUS "Version: ${VERSION}") +message(STATUS "Google Benchmark version: ${VERSION}") # The version of the libraries set(GENERIC_LIB_VERSION ${VERSION}) string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION) # Import our CMake modules -include(CheckCXXCompilerFlag) include(AddCXXCompilerFlag) -include(CXXFeatureCheck) +include(CheckCXXCompilerFlag) include(CheckLibraryExists) +include(CXXFeatureCheck) check_library_exists(rt shm_open "" HAVE_LIB_RT) @@ -134,6 +128,16 @@ add_required_cxx_compiler_flag(-m32) endif() +if (MSVC) + set(BENCHMARK_CXX_STANDARD 14) +else() + set(BENCHMARK_CXX_STANDARD 11) +endif() + +set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD}) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +set(CMAKE_CXX_EXTENSIONS OFF) + if (MSVC) # Turn compiler warnings up to 11 string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") @@ -166,21 +170,14 @@ set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG") endif() else() - # Try and enable C++11. Don't use C++14 because it doesn't work in some - # configurations. - add_cxx_compiler_flag(-std=c++11) - if (NOT HAVE_CXX_FLAG_STD_CXX11) - add_cxx_compiler_flag(-std=c++0x) - endif() - # Turn compiler warnings up to 11 add_cxx_compiler_flag(-Wall) add_cxx_compiler_flag(-Wextra) add_cxx_compiler_flag(-Wshadow) + add_cxx_compiler_flag(-Wfloat-equal) + add_cxx_compiler_flag(-Wold-style-cast) if(BENCHMARK_ENABLE_WERROR) - add_cxx_compiler_flag(-Werror RELEASE) - add_cxx_compiler_flag(-Werror RELWITHDEBINFO) - add_cxx_compiler_flag(-Werror MINSIZEREL) + add_cxx_compiler_flag(-Werror) endif() if (NOT BENCHMARK_ENABLE_TESTING) # Disable warning when compiling tests as gtest does not use 'override'. @@ -201,9 +198,7 @@ endif() # Disable deprecation warnings for release builds (when -Werror is enabled). if(BENCHMARK_ENABLE_WERROR) - add_cxx_compiler_flag(-Wno-deprecated RELEASE) - add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO) - add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL) + add_cxx_compiler_flag(-Wno-deprecated) endif() if (NOT BENCHMARK_ENABLE_EXCEPTIONS) add_cxx_compiler_flag(-fno-exceptions) @@ -219,12 +214,12 @@ add_cxx_compiler_flag(-wd654) add_cxx_compiler_flag(-Wthread-safety) if (HAVE_CXX_FLAG_WTHREAD_SAFETY) - cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) + cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include") endif() # On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a # predefined macro, which turns on all of the wonderful libc extensions. - # However g++ doesn't do this in Cygwin so we have to define it ourselfs + # However g++ doesn't do this in Cygwin so we have to define it ourselves # since we depend on GNU/POSIX/BSD extensions. if (CYGWIN) add_definitions(-D_GNU_SOURCE=1) @@ -312,6 +307,7 @@ # Ensure we have pthreads set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +cxx_feature_check(PTHREAD_AFFINITY) if (BENCHMARK_ENABLE_LIBPFM) find_package(PFM) diff --git a/third-party/benchmark/CONTRIBUTORS b/third-party/benchmark/CONTRIBUTORS --- a/third-party/benchmark/CONTRIBUTORS +++ b/third-party/benchmark/CONTRIBUTORS @@ -27,7 +27,9 @@ Alex Steele Andriy Berestovskyy Arne Beer +Bátor Tallér Billy Robert O'Neal III +Cezary Skrzyński Chris Kennelly Christian Wassermann Christopher Seymour @@ -44,12 +46,14 @@ Eric Fiselier Eugene Zhuk Evgeny Safronov +Fabien Pichot Fanbo Meng Federico Ficarelli Felix Homann Geoffrey Martin-Noble Gergő Szitár Hannes Hauswedell +Henrique Bucher Ismael Jimenez Martinez Jern-Kuan Leong JianXiong Zhou @@ -57,12 +61,15 @@ John Millikin Jordan Williams Jussi Knuuttila -Kai Wolf Kaito Udagawa +Kai Wolf Kishan Kumar Lei Xu +Marcel Jacobse Matt Clarkson Maxim Vafin +Mike Apodaca +Min-Yih Hsu Nick Hutchinson Norman Heino Oleksandr Sochka @@ -71,6 +78,8 @@ Paul Redmond Pierre Phaneuf Radoslav Yovchev +Raghu Raja +Rainer Orth Raul Marin Ray Glover Robert Guo @@ -84,4 +93,3 @@ Yixuan Qiu Yusuke Suzuki Zbigniew Skowron -Min-Yih Hsu diff --git a/third-party/benchmark/MODULE.bazel b/third-party/benchmark/MODULE.bazel new file mode 100644 --- /dev/null +++ b/third-party/benchmark/MODULE.bazel @@ -0,0 +1,24 @@ +module(name = "google_benchmark", version="1.8.2") + +bazel_dep(name = "bazel_skylib", version = "1.4.1") +bazel_dep(name = "platforms", version = "0.0.6") +bazel_dep(name = "rules_foreign_cc", version = "0.9.0") +bazel_dep(name = "rules_cc", version = "0.0.6") +bazel_dep(name = "rules_python", version = "0.24.0", dev_dependency = True) +bazel_dep(name = "googletest", version = "1.12.1", repo_name = "com_google_googletest", dev_dependency = True) +bazel_dep(name = "libpfm", version = "4.11.0") + +# Register a toolchain for Python 3.9 to be able to build numpy. Python +# versions >=3.10 are problematic. +# A second reason for this is to be able to build Python hermetically instead +# of relying on the changing default version from rules_python. + +python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True) +python.toolchain(python_version = "3.9") + +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True) +pip.parse( + hub_name="tools_pip_deps", + python_version = "3.9", + requirements_lock="//tools:requirements.txt") +use_repo(pip, "tools_pip_deps") diff --git a/third-party/benchmark/README.md b/third-party/benchmark/README.md --- a/third-party/benchmark/README.md +++ b/third-party/benchmark/README.md @@ -4,10 +4,9 @@ [![bazel](https://github.com/google/benchmark/actions/workflows/bazel.yml/badge.svg)](https://github.com/google/benchmark/actions/workflows/bazel.yml) [![pylint](https://github.com/google/benchmark/workflows/pylint/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Apylint) [![test-bindings](https://github.com/google/benchmark/workflows/test-bindings/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings) - -[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark) [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) +[![Discord](https://discordapp.com/api/guilds/1125694995928719494/widget.png?style=shield)](https://discord.gg/cz7UX7wKC2) A library to benchmark code snippets, similar to unit tests. Example: @@ -33,7 +32,7 @@ [Installation](#installation). See [Usage](#usage) for a full example and the [User Guide](docs/user_guide.md) for a more comprehensive feature overview. -It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/docs/primer.md) +It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/main/docs/primer.md) as some of the structural aspects of the APIs are similar. ## Resources @@ -47,6 +46,8 @@ [Assembly Testing Documentation](docs/AssemblyTests.md) +[Building and installing Python bindings](docs/python_bindings.md) + ## Requirements The library can be used with C++03. However, it requires C++11 to build, @@ -137,6 +138,12 @@ If you are using clang, you may need to set `LLVMAR_EXECUTABLE`, `LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables. +To enable sanitizer checks (eg., `asan` and `tsan`), add: +``` + -DCMAKE_C_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all" + -DCMAKE_CXX_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all " +``` + ### Stable and Experimental Library Versions The main branch contains the latest stable version of the benchmarking library; diff --git a/third-party/benchmark/WORKSPACE b/third-party/benchmark/WORKSPACE --- a/third-party/benchmark/WORKSPACE +++ b/third-party/benchmark/WORKSPACE @@ -1,44 +1,22 @@ workspace(name = "com_github_google_benchmark") -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") -load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") +load("//:bazel/benchmark_deps.bzl", "benchmark_deps") -http_archive( - name = "com_google_absl", - sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111", - strip_prefix = "abseil-cpp-20200225.2", - urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"], -) +benchmark_deps() -git_repository( - name = "com_google_googletest", - remote = "https://github.com/google/googletest.git", - tag = "release-1.11.0", -) +load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies") -http_archive( - name = "pybind11", - build_file = "@//bindings/python:pybind11.BUILD", - sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d", - strip_prefix = "pybind11-2.4.3", - urls = ["https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz"], +rules_foreign_cc_dependencies() + +load("@rules_python//python:pip.bzl", pip3_install="pip_install") + +pip3_install( + name = "tools_pip_deps", + requirements = "//tools:requirements.txt", ) new_local_repository( name = "python_headers", build_file = "@//bindings/python:python_headers.BUILD", - path = "/usr/include/python3.6", # May be overwritten by setup.py. -) - -http_archive( - name = "rules_python", - url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz", - sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0", -) - -load("@rules_python//python:pip.bzl", pip3_install="pip_install") - -pip3_install( - name = "py_deps", - requirements = "//:requirements.txt", + path = "", # May be overwritten by setup.py. ) diff --git a/third-party/benchmark/WORKSPACE.bzlmod b/third-party/benchmark/WORKSPACE.bzlmod new file mode 100644 --- /dev/null +++ b/third-party/benchmark/WORKSPACE.bzlmod @@ -0,0 +1,2 @@ +# This file marks the root of the Bazel workspace. +# See MODULE.bazel for dependencies and setup. diff --git a/third-party/benchmark/bazel/benchmark_deps.bzl b/third-party/benchmark/bazel/benchmark_deps.bzl new file mode 100644 --- /dev/null +++ b/third-party/benchmark/bazel/benchmark_deps.bzl @@ -0,0 +1,65 @@ +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository") + +def benchmark_deps(): + """Loads dependencies required to build Google Benchmark.""" + + if "bazel_skylib" not in native.existing_rules(): + http_archive( + name = "bazel_skylib", + sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz", + ], + ) + + if "rules_foreign_cc" not in native.existing_rules(): + http_archive( + name = "rules_foreign_cc", + sha256 = "bcd0c5f46a49b85b384906daae41d277b3dc0ff27c7c752cc51e43048a58ec83", + strip_prefix = "rules_foreign_cc-0.7.1", + url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.7.1.tar.gz", + ) + + if "rules_python" not in native.existing_rules(): + http_archive( + name = "rules_python", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz", + sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0", + ) + + if "com_google_absl" not in native.existing_rules(): + http_archive( + name = "com_google_absl", + sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111", + strip_prefix = "abseil-cpp-20200225.2", + urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"], + ) + + if "com_google_googletest" not in native.existing_rules(): + new_git_repository( + name = "com_google_googletest", + remote = "https://github.com/google/googletest.git", + tag = "release-1.11.0", + ) + + if "nanobind" not in native.existing_rules(): + new_git_repository( + name = "nanobind", + remote = "https://github.com/wjakob/nanobind.git", + tag = "v1.4.0", + build_file = "@//bindings/python:nanobind.BUILD", + recursive_init_submodules = True, + ) + + if "libpfm" not in native.existing_rules(): + # Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/ + http_archive( + name = "libpfm", + build_file = str(Label("//tools:libpfm.BUILD.bazel")), + sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc", + type = "tar.gz", + strip_prefix = "libpfm-4.11.0", + urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"], + ) diff --git a/third-party/benchmark/bindings/python/build_defs.bzl b/third-party/benchmark/bindings/python/build_defs.bzl --- a/third-party/benchmark/bindings/python/build_defs.bzl +++ b/third-party/benchmark/bindings/python/build_defs.bzl @@ -8,8 +8,8 @@ shared_lib_name = name + shared_lib_suffix native.cc_binary( name = shared_lib_name, - linkshared = 1, - linkstatic = 1, + linkshared = True, + linkstatic = True, srcs = srcs + hdrs, copts = copts, features = features, diff --git a/third-party/benchmark/bindings/python/google_benchmark/__init__.py b/third-party/benchmark/bindings/python/google_benchmark/__init__.py --- a/third-party/benchmark/bindings/python/google_benchmark/__init__.py +++ b/third-party/benchmark/bindings/python/google_benchmark/__init__.py @@ -26,6 +26,7 @@ if __name__ == '__main__': benchmark.main() """ +import atexit from absl import app from google_benchmark import _benchmark @@ -44,6 +45,7 @@ oNLogN, oAuto, oLambda, + State, ) @@ -64,9 +66,10 @@ "oNLogN", "oAuto", "oLambda", + "State", ] -__version__ = "0.2.0" +__version__ = "1.8.2" class __OptionMaker: @@ -101,7 +104,7 @@ options = self.make(func_or_options) options.builder_calls.append((builder_name, args, kwargs)) # The decorator returns Options so it is not technically a decorator - # and needs a final call to @regiser + # and needs a final call to @register return options return __decorator @@ -156,3 +159,4 @@ # Methods for use with custom main function. initialize = _benchmark.Initialize run_benchmarks = _benchmark.RunSpecifiedBenchmarks +atexit.register(_benchmark.ClearRegisteredBenchmarks) diff --git a/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc b/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc --- a/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc +++ b/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc @@ -1,20 +1,17 @@ // Benchmark for Python. -#include -#include -#include - -#include "pybind11/operators.h" -#include "pybind11/pybind11.h" -#include "pybind11/stl.h" -#include "pybind11/stl_bind.h" - #include "benchmark/benchmark.h" -PYBIND11_MAKE_OPAQUE(benchmark::UserCounters); +#include "nanobind/nanobind.h" +#include "nanobind/operators.h" +#include "nanobind/stl/bind_map.h" +#include "nanobind/stl/string.h" +#include "nanobind/stl/vector.h" + +NB_MAKE_OPAQUE(benchmark::UserCounters); namespace { -namespace py = ::pybind11; +namespace nb = nanobind; std::vector Initialize(const std::vector& argv) { // The `argv` pointers here become invalid when this function returns, but @@ -37,15 +34,16 @@ return remaining_argv; } -benchmark::internal::Benchmark* RegisterBenchmark(const char* name, - py::function f) { +benchmark::internal::Benchmark* RegisterBenchmark(const std::string& name, + nb::callable f) { return benchmark::RegisterBenchmark( name, [f](benchmark::State& state) { f(&state); }); } -PYBIND11_MODULE(_benchmark, m) { +NB_MODULE(_benchmark, m) { + using benchmark::TimeUnit; - py::enum_(m, "TimeUnit") + nb::enum_(m, "TimeUnit") .value("kNanosecond", TimeUnit::kNanosecond) .value("kMicrosecond", TimeUnit::kMicrosecond) .value("kMillisecond", TimeUnit::kMillisecond) @@ -53,72 +51,74 @@ .export_values(); using benchmark::BigO; - py::enum_(m, "BigO") + nb::enum_(m, "BigO") .value("oNone", BigO::oNone) .value("o1", BigO::o1) .value("oN", BigO::oN) .value("oNSquared", BigO::oNSquared) .value("oNCubed", BigO::oNCubed) .value("oLogN", BigO::oLogN) - .value("oNLogN", BigO::oLogN) + .value("oNLogN", BigO::oNLogN) .value("oAuto", BigO::oAuto) .value("oLambda", BigO::oLambda) .export_values(); using benchmark::internal::Benchmark; - py::class_(m, "Benchmark") - // For methods returning a pointer tor the current object, reference - // return policy is used to ask pybind not to take ownership oof the + nb::class_(m, "Benchmark") + // For methods returning a pointer to the current object, reference + // return policy is used to ask nanobind not to take ownership of the // returned object and avoid calling delete on it. // https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies // // For methods taking a const std::vector<...>&, a copy is created // because a it is bound to a Python list. // https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html - .def("unit", &Benchmark::Unit, py::return_value_policy::reference) - .def("arg", &Benchmark::Arg, py::return_value_policy::reference) - .def("args", &Benchmark::Args, py::return_value_policy::reference) - .def("range", &Benchmark::Range, py::return_value_policy::reference, - py::arg("start"), py::arg("limit")) + .def("unit", &Benchmark::Unit, nb::rv_policy::reference) + .def("arg", &Benchmark::Arg, nb::rv_policy::reference) + .def("args", &Benchmark::Args, nb::rv_policy::reference) + .def("range", &Benchmark::Range, nb::rv_policy::reference, + nb::arg("start"), nb::arg("limit")) .def("dense_range", &Benchmark::DenseRange, - py::return_value_policy::reference, py::arg("start"), - py::arg("limit"), py::arg("step") = 1) - .def("ranges", &Benchmark::Ranges, py::return_value_policy::reference) + nb::rv_policy::reference, nb::arg("start"), + nb::arg("limit"), nb::arg("step") = 1) + .def("ranges", &Benchmark::Ranges, nb::rv_policy::reference) .def("args_product", &Benchmark::ArgsProduct, - py::return_value_policy::reference) - .def("arg_name", &Benchmark::ArgName, py::return_value_policy::reference) + nb::rv_policy::reference) + .def("arg_name", &Benchmark::ArgName, nb::rv_policy::reference) .def("arg_names", &Benchmark::ArgNames, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("range_pair", &Benchmark::RangePair, - py::return_value_policy::reference, py::arg("lo1"), py::arg("hi1"), - py::arg("lo2"), py::arg("hi2")) + nb::rv_policy::reference, nb::arg("lo1"), nb::arg("hi1"), + nb::arg("lo2"), nb::arg("hi2")) .def("range_multiplier", &Benchmark::RangeMultiplier, - py::return_value_policy::reference) - .def("min_time", &Benchmark::MinTime, py::return_value_policy::reference) + nb::rv_policy::reference) + .def("min_time", &Benchmark::MinTime, nb::rv_policy::reference) + .def("min_warmup_time", &Benchmark::MinWarmUpTime, + nb::rv_policy::reference) .def("iterations", &Benchmark::Iterations, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("repetitions", &Benchmark::Repetitions, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("report_aggregates_only", &Benchmark::ReportAggregatesOnly, - py::return_value_policy::reference, py::arg("value") = true) + nb::rv_policy::reference, nb::arg("value") = true) .def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly, - py::return_value_policy::reference, py::arg("value") = true) + nb::rv_policy::reference, nb::arg("value") = true) .def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("use_real_time", &Benchmark::UseRealTime, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("use_manual_time", &Benchmark::UseManualTime, - py::return_value_policy::reference) + nb::rv_policy::reference) .def( "complexity", (Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity, - py::return_value_policy::reference, - py::arg("complexity") = benchmark::oAuto); + nb::rv_policy::reference, + nb::arg("complexity") = benchmark::oAuto); using benchmark::Counter; - py::class_ py_counter(m, "Counter"); + nb::class_ py_counter(m, "Counter"); - py::enum_(py_counter, "Flags") + nb::enum_(py_counter, "Flags") .value("kDefaults", Counter::Flags::kDefaults) .value("kIsRate", Counter::Flags::kIsRate) .value("kAvgThreads", Counter::Flags::kAvgThreads) @@ -130,52 +130,55 @@ .value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate) .value("kInvert", Counter::Flags::kInvert) .export_values() - .def(py::self | py::self); + .def(nb::self | nb::self); - py::enum_(py_counter, "OneK") + nb::enum_(py_counter, "OneK") .value("kIs1000", Counter::OneK::kIs1000) .value("kIs1024", Counter::OneK::kIs1024) .export_values(); py_counter - .def(py::init(), - py::arg("value") = 0., py::arg("flags") = Counter::kDefaults, - py::arg("k") = Counter::kIs1000) - .def(py::init([](double value) { return Counter(value); })) - .def_readwrite("value", &Counter::value) - .def_readwrite("flags", &Counter::flags) - .def_readwrite("oneK", &Counter::oneK); - py::implicitly_convertible(); - py::implicitly_convertible(); - - py::bind_map(m, "UserCounters"); + .def(nb::init(), + nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults, + nb::arg("k") = Counter::kIs1000) + .def("__init__", ([](Counter *c, double value) { new (c) Counter(value); })) + .def_rw("value", &Counter::value) + .def_rw("flags", &Counter::flags) + .def_rw("oneK", &Counter::oneK) + .def(nb::init_implicit()); + + nb::implicitly_convertible(); + + nb::bind_map(m, "UserCounters"); using benchmark::State; - py::class_(m, "State") + nb::class_(m, "State") .def("__bool__", &State::KeepRunning) - .def_property_readonly("keep_running", &State::KeepRunning) + .def_prop_ro("keep_running", &State::KeepRunning) .def("pause_timing", &State::PauseTiming) .def("resume_timing", &State::ResumeTiming) .def("skip_with_error", &State::SkipWithError) - .def_property_readonly("error_occurred", &State::error_occurred) + .def_prop_ro("error_occurred", &State::error_occurred) .def("set_iteration_time", &State::SetIterationTime) - .def_property("bytes_processed", &State::bytes_processed, + .def_prop_rw("bytes_processed", &State::bytes_processed, &State::SetBytesProcessed) - .def_property("complexity_n", &State::complexity_length_n, + .def_prop_rw("complexity_n", &State::complexity_length_n, &State::SetComplexityN) - .def_property("items_processed", &State::items_processed, - &State::SetItemsProcessed) - .def("set_label", (void(State::*)(const char*)) & State::SetLabel) - .def("range", &State::range, py::arg("pos") = 0) - .def_property_readonly("iterations", &State::iterations) - .def_readwrite("counters", &State::counters) - .def_property_readonly("thread_index", &State::thread_index) - .def_property_readonly("threads", &State::threads); + .def_prop_rw("items_processed", &State::items_processed, + &State::SetItemsProcessed) + .def("set_label", &State::SetLabel) + .def("range", &State::range, nb::arg("pos") = 0) + .def_prop_ro("iterations", &State::iterations) + .def_prop_ro("name", &State::name) + .def_rw("counters", &State::counters) + .def_prop_ro("thread_index", &State::thread_index) + .def_prop_ro("threads", &State::threads); m.def("Initialize", Initialize); m.def("RegisterBenchmark", RegisterBenchmark, - py::return_value_policy::reference); + nb::rv_policy::reference); m.def("RunSpecifiedBenchmarks", []() { benchmark::RunSpecifiedBenchmarks(); }); + m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks); }; } // namespace diff --git a/third-party/benchmark/bindings/python/google_benchmark/example.py b/third-party/benchmark/bindings/python/google_benchmark/example.py --- a/third-party/benchmark/bindings/python/google_benchmark/example.py +++ b/third-party/benchmark/bindings/python/google_benchmark/example.py @@ -38,7 +38,6 @@ while state: sum(range(1_000_000)) - @benchmark.register def pause_timing(state): """Pause timing every iteration.""" @@ -73,7 +72,7 @@ @benchmark.register def custom_counters(state): - """Collect cutom metric using benchmark.Counter.""" + """Collect custom metric using benchmark.Counter.""" num_foo = 0.0 while state: # Benchmark some code here diff --git a/third-party/benchmark/bindings/python/nanobind.BUILD b/third-party/benchmark/bindings/python/nanobind.BUILD new file mode 100644 --- /dev/null +++ b/third-party/benchmark/bindings/python/nanobind.BUILD @@ -0,0 +1,17 @@ +cc_library( + name = "nanobind", + srcs = glob([ + "src/*.cpp" + ]), + copts = ["-fexceptions"], + includes = ["include", "ext/robin_map/include"], + textual_hdrs = glob( + [ + "include/**/*.h", + "src/*.h", + "ext/robin_map/include/tsl/*.h", + ], + ), + deps = ["@python_headers"], + visibility = ["//visibility:public"], +) diff --git a/third-party/benchmark/bindings/python/pybind11.BUILD b/third-party/benchmark/bindings/python/pybind11.BUILD deleted file mode 100644 --- a/third-party/benchmark/bindings/python/pybind11.BUILD +++ /dev/null @@ -1,20 +0,0 @@ -cc_library( - name = "pybind11", - hdrs = glob( - include = [ - "include/pybind11/*.h", - "include/pybind11/detail/*.h", - ], - exclude = [ - "include/pybind11/common.h", - "include/pybind11/eigen.h", - ], - ), - copts = [ - "-fexceptions", - "-Wno-undefined-inline", - "-Wno-pragma-once-outside-header", - ], - includes = ["include"], - visibility = ["//visibility:public"], -) diff --git a/third-party/benchmark/bindings/python/requirements.txt b/third-party/benchmark/bindings/python/requirements.txt deleted file mode 100644 --- a/third-party/benchmark/bindings/python/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -absl-py>=0.7.1 - diff --git a/third-party/benchmark/cmake/CXXFeatureCheck.cmake b/third-party/benchmark/cmake/CXXFeatureCheck.cmake --- a/third-party/benchmark/cmake/CXXFeatureCheck.cmake +++ b/third-party/benchmark/cmake/CXXFeatureCheck.cmake @@ -17,6 +17,8 @@ endif() set(__cxx_feature_check INCLUDED) +option(CXXFEATURECHECK_DEBUG OFF) + function(cxx_feature_check FILE) string(TOLOWER ${FILE} FILE) string(TOUPPER ${FILE} VAR) @@ -27,18 +29,22 @@ return() endif() + set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) if (ARGC GREATER 1) message(STATUS "Enabling additional flags: ${ARGV1}") - list(APPEND BENCHMARK_CXX_LINKER_FLAGS ${ARGV1}) + list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1}) endif() if (NOT DEFINED COMPILE_${FEATURE}) - message(STATUS "Performing Test ${FEATURE}") if(CMAKE_CROSSCOMPILING) + message(STATUS "Cross-compiling to test ${FEATURE}") try_compile(COMPILE_${FEATURE} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp - CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} - LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED ON + CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS} + LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES} + OUTPUT_VARIABLE COMPILE_OUTPUT_VAR) if(COMPILE_${FEATURE}) message(WARNING "If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0") @@ -47,11 +53,14 @@ set(RUN_${FEATURE} 1 CACHE INTERNAL "") endif() else() - message(STATUS "Performing Test ${FEATURE}") + message(STATUS "Compiling and running to test ${FEATURE}") try_run(RUN_${FEATURE} COMPILE_${FEATURE} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp - CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} - LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED ON + CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS} + LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES} + COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR) endif() endif() @@ -61,7 +70,11 @@ add_definitions(-DHAVE_${VAR}) else() if(NOT COMPILE_${FEATURE}) - message(STATUS "Performing Test ${FEATURE} -- failed to compile") + if(CXXFEATURECHECK_DEBUG) + message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}") + else() + message(STATUS "Performing Test ${FEATURE} -- failed to compile") + endif() else() message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run") endif() diff --git a/third-party/benchmark/cmake/GoogleTest.cmake b/third-party/benchmark/cmake/GoogleTest.cmake --- a/third-party/benchmark/cmake/GoogleTest.cmake +++ b/third-party/benchmark/cmake/GoogleTest.cmake @@ -29,15 +29,25 @@ include(${GOOGLETEST_PREFIX}/googletest-paths.cmake) -# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves. -add_compile_options(-w) - # Add googletest directly to our build. This defines # the gtest and gtest_main targets. add_subdirectory(${GOOGLETEST_SOURCE_DIR} ${GOOGLETEST_BINARY_DIR} EXCLUDE_FROM_ALL) +# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves. +if (MSVC) + target_compile_options(gtest PRIVATE "/wd4244" "/wd4722") + target_compile_options(gtest_main PRIVATE "/wd4244" "/wd4722") + target_compile_options(gmock PRIVATE "/wd4244" "/wd4722") + target_compile_options(gmock_main PRIVATE "/wd4244" "/wd4722") +else() + target_compile_options(gtest PRIVATE "-w") + target_compile_options(gtest_main PRIVATE "-w") + target_compile_options(gmock PRIVATE "-w") + target_compile_options(gmock_main PRIVATE "-w") +endif() + if(NOT DEFINED GTEST_COMPILE_COMMANDS) set(GTEST_COMPILE_COMMANDS ON) endif() diff --git a/third-party/benchmark/cmake/Modules/FindPFM.cmake b/third-party/benchmark/cmake/Modules/FindPFM.cmake --- a/third-party/benchmark/cmake/Modules/FindPFM.cmake +++ b/third-party/benchmark/cmake/Modules/FindPFM.cmake @@ -1,26 +1,28 @@ # If successful, the following variables will be defined: -# HAVE_LIBPFM. -# Set BENCHMARK_ENABLE_LIBPFM to 0 to disable, regardless of libpfm presence. -include(CheckIncludeFile) -include(CheckLibraryExists) +# PFM_FOUND. +# PFM_LIBRARIES +# PFM_INCLUDE_DIRS +# the following target will be defined: +# PFM::libpfm + include(FeatureSummary) -enable_language(C) +include(FindPackageHandleStandardArgs) set_package_properties(PFM PROPERTIES URL http://perfmon2.sourceforge.net/ - DESCRIPTION "a helper library to develop monitoring tools" + DESCRIPTION "A helper library to develop monitoring tools" PURPOSE "Used to program specific performance monitoring events") -check_library_exists(libpfm.a pfm_initialize "" HAVE_LIBPFM_INITIALIZE) -if(HAVE_LIBPFM_INITIALIZE) - check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H) - check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H) - check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H) - if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H) - message("Using Perf Counters.") - set(HAVE_LIBPFM 1) - set(PFM_FOUND 1) - endif() -else() - message("Perf Counters support requested, but was unable to find libpfm.") +find_library(PFM_LIBRARY NAMES pfm) +find_path(PFM_INCLUDE_DIR NAMES perfmon/pfmlib.h) + +find_package_handle_standard_args(PFM REQUIRED_VARS PFM_LIBRARY PFM_INCLUDE_DIR) + +if (PFM_FOUND AND NOT TARGET PFM::libpfm) + add_library(PFM::libpfm UNKNOWN IMPORTED) + set_target_properties(PFM::libpfm PROPERTIES + IMPORTED_LOCATION "${PFM_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${PFM_INCLUDE_DIR}") endif() + +mark_as_advanced(PFM_LIBRARY PFM_INCLUDE_DIR) diff --git a/third-party/benchmark/cmake/benchmark.pc.in b/third-party/benchmark/cmake/benchmark.pc.in --- a/third-party/benchmark/cmake/benchmark.pc.in +++ b/third-party/benchmark/cmake/benchmark.pc.in @@ -1,7 +1,7 @@ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix} -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: @PROJECT_NAME@ Description: Google microbenchmark framework diff --git a/third-party/benchmark/cmake/pthread_affinity.cpp b/third-party/benchmark/cmake/pthread_affinity.cpp new file mode 100644 --- /dev/null +++ b/third-party/benchmark/cmake/pthread_affinity.cpp @@ -0,0 +1,16 @@ +#include +int main() { + cpu_set_t set; + CPU_ZERO(&set); + for (int i = 0; i < CPU_SETSIZE; ++i) { + CPU_SET(i, &set); + CPU_CLR(i, &set); + } + pthread_t self = pthread_self(); + int ret; + ret = pthread_getaffinity_np(self, sizeof(set), &set); + if (ret != 0) return ret; + ret = pthread_setaffinity_np(self, sizeof(set), &set); + if (ret != 0) return ret; + return 0; +} diff --git a/third-party/benchmark/docs/AssemblyTests.md b/third-party/benchmark/docs/AssemblyTests.md --- a/third-party/benchmark/docs/AssemblyTests.md +++ b/third-party/benchmark/docs/AssemblyTests.md @@ -111,6 +111,7 @@ is matching stack frame addresses. In this case regular expressions can be used to match the differing bits of output. For example: + ```c++ int ExternInt; struct Point { int x, y, z; }; @@ -127,6 +128,7 @@ // CHECK: ret } ``` + ## Current Requirements and Limitations diff --git a/third-party/benchmark/docs/_config.yml b/third-party/benchmark/docs/_config.yml --- a/third-party/benchmark/docs/_config.yml +++ b/third-party/benchmark/docs/_config.yml @@ -1 +1 @@ -theme: jekyll-theme-minimal \ No newline at end of file +theme: jekyll-theme-minimal diff --git a/third-party/benchmark/docs/dependencies.md b/third-party/benchmark/docs/dependencies.md --- a/third-party/benchmark/docs/dependencies.md +++ b/third-party/benchmark/docs/dependencies.md @@ -1,19 +1,13 @@ # Build tool dependency policy -To ensure the broadest compatibility when building the benchmark library, but -still allow forward progress, we require any build tooling to be available for: +We follow the [Foundational C++ support policy](https://opensource.google/documentation/policies/cplusplus-support) for our build tools. In +particular the ["Build Systems" section](https://opensource.google/documentation/policies/cplusplus-support#build-systems). -* Debian stable _and_ -* The last two Ubuntu LTS releases +## CMake -Currently, this means using build tool versions that are available for Ubuntu -18.04 (Bionic Beaver), Ubuntu 20.04 (Focal Fossa), and Debian 11 (bullseye). +The current supported version is CMake 3.10 as of 2023-08-10. Most modern +distributions include newer versions, for example: -_Note, CI also runs ubuntu-16.04 and ubuntu-14.04 to ensure best effort support -for older versions._ - -## cmake -The current supported version is cmake 3.5.1 as of 2018-06-06. - -_Note, this version is also available for Ubuntu 14.04, an older Ubuntu LTS -release, as `cmake3`._ +* Ubuntu 20.04 provides CMake 3.16.3 +* Debian 11.4 provides CMake 3.18.4 +* Ubuntu 22.04 provides CMake 3.22.1 diff --git a/third-party/benchmark/docs/index.md b/third-party/benchmark/docs/index.md --- a/third-party/benchmark/docs/index.md +++ b/third-party/benchmark/docs/index.md @@ -4,7 +4,9 @@ * [Dependencies](dependencies.md) * [Perf Counters](perf_counters.md) * [Platform Specific Build Instructions](platform_specific_build_instructions.md) +* [Python Bindings](python_bindings.md) * [Random Interleaving](random_interleaving.md) +* [Reducing Variance](reducing_variance.md) * [Releasing](releasing.md) * [Tools](tools.md) -* [User Guide](user_guide.md) \ No newline at end of file +* [User Guide](user_guide.md) diff --git a/third-party/benchmark/docs/perf_counters.md b/third-party/benchmark/docs/perf_counters.md --- a/third-party/benchmark/docs/perf_counters.md +++ b/third-party/benchmark/docs/perf_counters.md @@ -12,16 +12,17 @@ * The benchmark is run on an architecture featuring a Performance Monitoring Unit (PMU), * The benchmark is compiled with support for collecting counters. Currently, - this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build - time + this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a + dependency via Bazel. The feature does not require modifying benchmark code. Counter collection is handled at the boundaries where timer collection is also handled. To opt-in: - -* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. -* Enable the cmake flag BENCHMARK_ENABLE_LIBPFM. +* If using a Bazel build, add `--define pfm=1` to your build flags +* If using CMake: + * Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. + * Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`. To use, pass a comma-separated list of counter names through the `--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning, @@ -31,4 +32,4 @@ The counter values are reported back through the [User Counters](../README.md#custom-counters) mechanism, meaning, they are available in all the formats (e.g. JSON) supported -by User Counters. \ No newline at end of file +by User Counters. diff --git a/third-party/benchmark/docs/python_bindings.md b/third-party/benchmark/docs/python_bindings.md new file mode 100644 --- /dev/null +++ b/third-party/benchmark/docs/python_bindings.md @@ -0,0 +1,34 @@ +# Building and installing Python bindings + +Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and +using Google Benchmark directly in Python. +Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows. +Supported Python versions are Python 3.7 - 3.10. + +To install Google Benchmark's Python bindings, run: + +```bash +python -m pip install --upgrade pip # for manylinux2014 support +python -m pip install google-benchmark +``` + +In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual +environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html) +on how to create virtual environments. + +To build a wheel directly from source, you can follow these steps: +```bash +git clone https://github.com/google/benchmark.git +cd benchmark +# create a virtual environment and activate it +python3 -m venv venv --system-site-packages +source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows + +# upgrade Python's system-wide packages +python -m pip install --upgrade pip setuptools wheel +# builds the wheel and stores it in the directory "wheelhouse". +python -m pip wheel . -w wheelhouse +``` + +NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel, +refer to the [Bazel installation docs](https://bazel.build/install). diff --git a/third-party/benchmark/docs/reducing_variance.md b/third-party/benchmark/docs/reducing_variance.md new file mode 100644 --- /dev/null +++ b/third-party/benchmark/docs/reducing_variance.md @@ -0,0 +1,100 @@ +# Reducing Variance + + + +## Disabling CPU Frequency Scaling + +If you see this error: + +``` +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +``` + +you might want to disable the CPU frequency scaling while running the +benchmark, as well as consider other ways to stabilize the performance of +your system while benchmarking. + +See [Reducing Variance](reducing_variance.md) for more information. + +Exactly how to do this depends on the Linux distribution, +desktop environment, and installed programs. Specific details are a moving +target, so we will not attempt to exhaustively document them here. + +One simple option is to use the `cpupower` program to change the +performance governor to "performance". This tool is maintained along with +the Linux kernel and provided by your distribution. + +It must be run as root, like this: + +```bash +sudo cpupower frequency-set --governor performance +``` + +After this you can verify that all CPUs are using the performance governor +by running this command: + +```bash +cpupower frequency-info -o proc +``` + +The benchmarks you subsequently run will have less variance. + + + +## Reducing Variance in Benchmarks + +The Linux CPU frequency governor [discussed +above](user_guide#disabling-cpu-frequency-scaling) is not the only source +of noise in benchmarks. Some, but not all, of the sources of variance +include: + +1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same + speed, so running a benchmark one time and then again may give a + different result depending on which CPU it ran on. +2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and + AMD Turbo Core and Precision Boost, can temporarily change the CPU + frequency even when the using the "performance" governor on Linux. +3. Context switching between CPUs, or scheduling competition on the CPU the + benchmark is running on. +4. Intel Hyperthreading or AMD SMT causing the same issue as above. +5. Cache effects caused by code running on other CPUs. +6. Non-uniform memory architectures (NUMA). + +These can cause variance in benchmarks results within a single run +(`--benchmark_repetitions=N`) or across multiple runs of the benchmark +program. + +Reducing sources of variance is OS and architecture dependent, which is one +reason some companies maintain machines dedicated to performance testing. + +Some of the easier and and effective ways of reducing variance on a typical +Linux workstation are: + +1. Use the performance governor as [discussed +above](user_guide#disabling-cpu-frequency-scaling). +1. Disable processor boosting by: + ```sh + echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost + ``` + See the Linux kernel's + [boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt) + for more information. +2. Set the benchmark program's task affinity to a fixed cpu. For example: + ```sh + taskset -c 0 ./mybenchmark + ``` +3. Disabling Hyperthreading/SMT. This can be done in the Bios or using the + `/sys` file system (see the LLVM project's [Benchmarking + tips](https://llvm.org/docs/Benchmarking.html)). +4. Close other programs that do non-trivial things based on timers, such as + your web browser, desktop environment, etc. +5. Reduce the working set of your benchmark to fit within the L1 cache, but + do be aware that this may lead you to optimize for an unrelistic + situation. + +Further resources on this topic: + +1. The LLVM project's [Benchmarking + tips](https://llvm.org/docs/Benchmarking.html). +1. The Arch Wiki [Cpu frequency +scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page. diff --git a/third-party/benchmark/docs/releasing.md b/third-party/benchmark/docs/releasing.md --- a/third-party/benchmark/docs/releasing.md +++ b/third-party/benchmark/docs/releasing.md @@ -1,20 +1,24 @@ # How to release * Make sure you're on main and synced to HEAD -* Ensure the project builds and tests run (sanity check only, obviously) +* Ensure the project builds and tests run * `parallel -j0 exec ::: test/*_test` can help ensure everything at least passes * Prepare release notes * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of commits between the last annotated tag and HEAD * Pick the most interesting. -* Create one last commit that updates the version saved in `CMakeLists.txt` and the - `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the release - version you're creating. (This version will be used if benchmark is installed from the - archive you'll be creating in the next step.) +* Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel` + and the `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the + release version you're creating. (This version will be used if benchmark is installed + from the archive you'll be creating in the next step.) ``` -project (benchmark VERSION 1.6.0 LANGUAGES CXX) +project (benchmark VERSION 1.8.0 LANGUAGES CXX) +``` + +``` +module(name = "com_github_google_benchmark", version="1.8.0") ``` ```python @@ -22,7 +26,7 @@ # ... -__version__ = "1.6.0" # <-- change this to the release version you are creating +__version__ = "1.8.0" # <-- change this to the release version you are creating # ... ``` @@ -33,3 +37,5 @@ * `git pull --tags` * `git tag -a -f ` * `git push --force --tags origin` +* Confirm that the "Build and upload Python wheels" action runs to completion + * run it manually if it hasn't run diff --git a/third-party/benchmark/docs/tools.md b/third-party/benchmark/docs/tools.md --- a/third-party/benchmark/docs/tools.md +++ b/third-party/benchmark/docs/tools.md @@ -186,6 +186,146 @@ This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one. As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. +### Note: Interpreting the output + +Performance measurements are an art, and performance comparisons are doubly so. +Results are often noisy and don't necessarily have large absolute differences to +them, so just by visual inspection, it is not at all apparent if two +measurements are actually showing a performance change or not. It is even more +confusing with multiple benchmark repetitions. + +Thankfully, what we can do, is use statistical tests on the results to determine +whether the performance has statistically-significantly changed. `compare.py` +uses [Mann–Whitney U +test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), with a null +hypothesis being that there's no difference in performance. + +**The below output is a summary of a benchmark comparison with statistics +provided for a multi-threaded process.** +``` +Benchmark Time CPU Time Old Time New CPU Old CPU New +----------------------------------------------------------------------------------------------------------------------------- +benchmark/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 27 vs 27 +benchmark/threads:1/process_time/real_time_mean -0.1442 -0.1442 90 77 90 77 +benchmark/threads:1/process_time/real_time_median -0.1444 -0.1444 90 77 90 77 +benchmark/threads:1/process_time/real_time_stddev +0.3974 +0.3933 0 0 0 0 +benchmark/threads:1/process_time/real_time_cv +0.6329 +0.6280 0 0 0 0 +OVERALL_GEOMEAN -0.1442 -0.1442 0 0 0 0 +``` +-------------------------------------------- +Here's a breakdown of each row: + +**benchmark/threads:1/process_time/real_time_pvalue**: This shows the _p-value_ for +the statistical test comparing the performance of the process running with one +thread. A value of 0.0000 suggests a statistically significant difference in +performance. The comparison was conducted using the U Test (Mann-Whitney +U Test) with 27 repetitions for each case. + +**benchmark/threads:1/process_time/real_time_mean**: This shows the relative +difference in mean execution time between two different cases. The negative +value (-0.1442) implies that the new process is faster by about 14.42%. The old +time was 90 units, while the new time is 77 units. + +**benchmark/threads:1/process_time/real_time_median**: Similarly, this shows the +relative difference in the median execution time. Again, the new process is +faster by 14.44%. + +**benchmark/threads:1/process_time/real_time_stddev**: This is the relative +difference in the standard deviation of the execution time, which is a measure +of how much variation or dispersion there is from the mean. A positive value +(+0.3974) implies there is more variance in the execution time in the new +process. + +**benchmark/threads:1/process_time/real_time_cv**: CV stands for Coefficient of +Variation. It is the ratio of the standard deviation to the mean. It provides a +standardized measure of dispersion. An increase (+0.6329) indicates more +relative variability in the new process. + +**OVERALL_GEOMEAN**: Geomean stands for geometric mean, a type of average that is +less influenced by outliers. The negative value indicates a general improvement +in the new process. However, given the values are all zero for the old and new +times, this seems to be a mistake or placeholder in the output. + +----------------------------------------- + + + +Let's first try to see what the different columns represent in the above +`compare.py` benchmarking output: + + 1. **Benchmark:** The name of the function being benchmarked, along with the + size of the input (after the slash). + + 2. **Time:** The average time per operation, across all iterations. + + 3. **CPU:** The average CPU time per operation, across all iterations. + + 4. **Iterations:** The number of iterations the benchmark was run to get a + stable estimate. + + 5. **Time Old and Time New:** These represent the average time it takes for a + function to run in two different scenarios or versions. For example, you + might be comparing how fast a function runs before and after you make some + changes to it. + + 6. **CPU Old and CPU New:** These show the average amount of CPU time that the + function uses in two different scenarios or versions. This is similar to + Time Old and Time New, but focuses on CPU usage instead of overall time. + +In the comparison section, the relative differences in both time and CPU time +are displayed for each input size. + + +A statistically-significant difference is determined by a **p-value**, which is +a measure of the probability that the observed difference could have occurred +just by random chance. A smaller p-value indicates stronger evidence against the +null hypothesis. + +**Therefore:** + 1. If the p-value is less than the chosen significance level (alpha), we + reject the null hypothesis and conclude the benchmarks are significantly + different. + 2. If the p-value is greater than or equal to alpha, we fail to reject the + null hypothesis and treat the two benchmarks as similar. + + + +The result of said the statistical test is additionally communicated through color coding: +```diff ++ Green: +``` + The benchmarks are _**statistically different**_. This could mean the + performance has either **significantly improved** or **significantly + deteriorated**. You should look at the actual performance numbers to see which + is the case. +```diff +- Red: +``` + The benchmarks are _**statistically similar**_. This means the performance + **hasn't significantly changed**. + +In statistical terms, **'green'** means we reject the null hypothesis that +there's no difference in performance, and **'red'** means we fail to reject the +null hypothesis. This might seem counter-intuitive if you're expecting 'green' +to mean 'improved performance' and 'red' to mean 'worsened performance'. +```bash + But remember, in this context: + + 'Success' means 'successfully finding a difference'. + 'Failure' means 'failing to find a difference'. +``` + + +Also, please note that **even if** we determine that there **is** a +statistically-significant difference between the two measurements, it does not +_necessarily_ mean that the actual benchmarks that were measured **are** +different, or vice versa, even if we determine that there is **no** +statistically-significant difference between the two measurements, it does not +necessarily mean that the actual benchmarks that were measured **are not** +different. + + + ### U test If there is a sufficient repetition count of the benchmarks, the tool can do diff --git a/third-party/benchmark/docs/user_guide.md b/third-party/benchmark/docs/user_guide.md --- a/third-party/benchmark/docs/user_guide.md +++ b/third-party/benchmark/docs/user_guide.md @@ -50,14 +50,19 @@ [Custom Statistics](#custom-statistics) +[Memory Usage](#memory-usage) + [Using RegisterBenchmark](#using-register-benchmark) [Exiting with an Error](#exiting-with-an-error) -[A Faster KeepRunning Loop](#a-faster-keep-running-loop) +[A Faster `KeepRunning` Loop](#a-faster-keep-running-loop) + +## Benchmarking Tips [Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) +[Reducing Variance in Benchmarks](reducing_variance.md) @@ -180,6 +185,12 @@ BM_memcpy/32k 1834 ns 1837 ns 357143 ``` +## Disabling Benchmarks + +It is possible to temporarily disable benchmarks by renaming the benchmark +function to have the prefix "DISABLED_". This will cause the benchmark to +be skipped at runtime. + ## Result comparison @@ -232,6 +243,19 @@ the minimum time, or the wallclock time is 5x minimum time. The minimum time is set per benchmark by calling `MinTime` on the registered benchmark object. +Furthermore warming up a benchmark might be necessary in order to get +stable results because of e.g caching effects of the code under benchmark. +Warming up means running the benchmark a given amount of time, before +results are actually taken into account. The amount of time for which +the warmup should be run can be set per benchmark by calling +`MinWarmUpTime` on the registered benchmark object or for all benchmarks +using the `--benchmark_min_warmup_time` command-line option. Note that +`MinWarmUpTime` will overwrite the value of `--benchmark_min_warmup_time` +for the single benchmark. How many iterations the warmup run of each +benchmark takes is determined the same way as described in the paragraph +above. Per default the warmup phase is set to 0 seconds and is therefore +disabled. + Average timings are then reported over the iterations run. If multiple repetitions are requested using the `--benchmark_repetitions` command-line option, or at registration time, the benchmark function will be run several @@ -247,10 +271,12 @@ Global setup/teardown specific to each benchmark can be done by passing a callback to Setup/Teardown: -The setup/teardown callbacks will be invoked once for each benchmark. -If the benchmark is multi-threaded (will run in k threads), they will be invoked exactly once before -each run with k threads. -If the benchmark uses different size groups of threads, the above will be true for each size group. +The setup/teardown callbacks will be invoked once for each benchmark. If the +benchmark is multi-threaded (will run in k threads), they will be invoked +exactly once before each run with k threads. + +If the benchmark uses different size groups of threads, the above will be true +for each size group. Eg., @@ -293,7 +319,7 @@ delete[] src; delete[] dst; } -BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(8<<10); ``` The preceding code is quite repetitive, and can be replaced with the following @@ -322,7 +348,8 @@ static void BM_DenseRange(benchmark::State& state) { for(auto _ : state) { std::vector v(state.range(0), state.range(0)); - benchmark::DoNotOptimize(v.data()); + auto data = v.data(); + benchmark::DoNotOptimize(data); benchmark::ClobberMemory(); } } @@ -362,17 +389,17 @@ product of the two specified ranges and will generate a benchmark for each such pair. -{% raw %} + ```c++ BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); ``` -{% endraw %} + Some benchmarks may require specific argument values that cannot be expressed with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a benchmark input for each combination in the product of the supplied vectors. -{% raw %} + ```c++ BENCHMARK(BM_SetInsert) ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) @@ -391,7 +418,7 @@ ->Args({3<<10, 80}) ->Args({8<<10, 80}); ``` -{% endraw %} + For the most common scenarios, helper methods for creating a list of integers for a given sparse or dense range are provided. @@ -434,13 +461,22 @@ should describe the values passed. ```c++ -template -void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { - [...] +template +void BM_takes_args(benchmark::State& state, Args&&... args) { + auto args_tuple = std::make_tuple(std::move(args)...); + for (auto _ : state) { + std::cout << std::get<0>(args_tuple) << ": " << std::get<1>(args_tuple) + << '\n'; + [...] + } } // Registers a benchmark named "BM_takes_args/int_string_test" that passes -// the specified values to `extra_args`. +// the specified values to `args`. BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); + +// Registers the same benchmark "BM_takes_args/int_test" that passes +// the specified values to `args`. +BENCHMARK_CAPTURE(BM_takes_args, int_test, 42, 43); ``` Note that elements of `...args` may refer to global variables. Users should @@ -459,7 +495,8 @@ std::string s1(state.range(0), '-'); std::string s2(state.range(0), '-'); for (auto _ : state) { - benchmark::DoNotOptimize(s1.compare(s2)); + auto comparison_result = s1.compare(s2); + benchmark::DoNotOptimize(comparison_result); } state.SetComplexityN(state.range(0)); } @@ -668,7 +705,7 @@ When you're compiling in C++11 mode or later you can use `insert()` with `std::initializer_list`: -{% raw %} + ```c++ // With C++11, this can be done: state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); @@ -677,7 +714,7 @@ state.counters["Bar"] = numBars; state.counters["Baz"] = numBazs; ``` -{% endraw %} + ### Counter Reporting @@ -773,6 +810,16 @@ BENCHMARK(BM_MultiThreaded)->Threads(2); ``` +To run the benchmark across a range of thread counts, instead of `Threads`, use +`ThreadRange`. This takes two parameters (`min_threads` and `max_threads`) and +runs the benchmark once for values in the inclusive range. For example: + +```c++ +BENCHMARK(BM_MultiThreaded)->ThreadRange(1, 8); +``` + +will run `BM_MultiThreaded` with thread counts 1, 2, 4, and 8. + If the benchmarked code itself uses threads and you want to compare it to single-threaded code, you may want to use real-time ("wallclock") measurements for latency comparisons: @@ -814,7 +861,7 @@ // Measure the user-visible time, the wall clock (literally, the time that // has passed on the clock on the wall), use it to decide for how long to -// run the benchmark loop. This will always be meaningful, an will match the +// run the benchmark loop. This will always be meaningful, and will match the // time spent by the main thread in single-threaded case, in general decreasing // with the number of internal threads doing the work. BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); @@ -836,7 +883,7 @@ that loop, every iteration, but without counting that time to the benchmark time. That is possible, although it is not recommended, since it has high overhead. -{% raw %} + ```c++ static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { std::set data; @@ -851,7 +898,7 @@ } BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); ``` -{% endraw %} + @@ -906,6 +953,10 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); ``` +Additionally the default time unit can be set globally with the +`--benchmark_time_unit={ns|us|ms|s}` command line argument. The argument only +affects benchmarks where the time unit is not set explicitly. + ## Preventing Optimization @@ -958,7 +1009,8 @@ for (auto _ : state) { std::vector v; v.reserve(1); - benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. + auto data = v.data(); // Allow v.data() to be clobbered. Pass as non-const + benchmark::DoNotOptimize(data); // lvalue to avoid undesired compiler optimizations v.push_back(42); benchmark::ClobberMemory(); // Force 42 to be written to memory. } @@ -1037,10 +1089,25 @@ BENCHMARK(BM_spin_empty) ->ComputeStatistics("ratio", [](const std::vector& v) -> double { return std::begin(v) / std::end(v); - }, benchmark::StatisticUnit::Percentage) + }, benchmark::StatisticUnit::kPercentage) ->Arg(512); ``` + + +## Memory Usage + +It's often useful to also track memory usage for benchmarks, alongside CPU +performance. For this reason, benchmark offers the `RegisterMemoryManager` +method that allows a custom `MemoryManager` to be injected. + +If set, the `MemoryManager::Start` and `MemoryManager::Stop` methods will be +called at the start and end of benchmark runs to allow user code to fill out +a report on the number of allocations, bytes used, etc. + +This data will then be reported alongside other performance data, currently +only when using JSON output. + ## Using RegisterBenchmark(name, fn, args...) @@ -1077,7 +1144,7 @@ When errors caused by external influences, such as file I/O and network communication, occur within a benchmark the -`State::SkipWithError(const char* msg)` function can be used to skip that run +`State::SkipWithError(const std::string& msg)` function can be used to skip that run of benchmark and report the error. Note that only future iterations of the `KeepRunning()` are skipped. For the ranged-for version of the benchmark loop Users must explicitly exit the loop, otherwise all iterations will be performed. @@ -1188,13 +1255,12 @@ If you see this error: ``` -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may +be noisy and will incur extra overhead. ``` -you might want to disable the CPU frequency scaling while running the benchmark: +you might want to disable the CPU frequency scaling while running the +benchmark, as well as consider other ways to stabilize the performance of +your system while benchmarking. -```bash -sudo cpupower frequency-set --governor performance -./mybench -sudo cpupower frequency-set --governor powersave -``` +See [Reducing Variance](reducing_variance.md) for more information. diff --git a/third-party/benchmark/include/benchmark/benchmark.h b/third-party/benchmark/include/benchmark/benchmark.h --- a/third-party/benchmark/include/benchmark/benchmark.h +++ b/third-party/benchmark/include/benchmark/benchmark.h @@ -187,6 +187,8 @@ #include #include +#include "benchmark/export.h" + #if defined(BENCHMARK_HAS_CXX11) #include #include @@ -216,37 +218,45 @@ #define BENCHMARK_UNUSED #endif +// Used to annotate functions, methods and classes so they +// are not optimized by the compiler. Useful for tests +// where you expect loops to stay in place churning cycles +#if defined(__clang__) +#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone)) +#elif defined(__GNUC__) || defined(__GNUG__) +#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0))) +#else +// MSVC & Intel do not have a no-optimize attribute, only line pragmas +#define BENCHMARK_DONT_OPTIMIZE +#endif + #if defined(__GNUC__) || defined(__clang__) #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) #elif defined(_MSC_VER) && !defined(__clang__) #define BENCHMARK_ALWAYS_INLINE __forceinline -#if _MSC_VER >= 1900 -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) -#else -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) -#endif #define __func__ __FUNCTION__ #else #define BENCHMARK_ALWAYS_INLINE -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) #endif #define BENCHMARK_INTERNAL_TOSTRING2(x) #x #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) // clang-format off -#if defined(__GNUC__) || defined(__clang__) +#if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__) #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) #define BENCHMARK_DISABLE_DEPRECATED_WARNING \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop") +#elif defined(__NVCOMPILER) +#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) +#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) +#define BENCHMARK_DISABLE_DEPRECATED_WARNING \ + _Pragma("diagnostic push") \ + _Pragma("diag_suppress deprecated_entity_with_custom_message") +#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop") #else #define BENCHMARK_BUILTIN_EXPECT(x, y) x #define BENCHMARK_DEPRECATED_MSG(msg) @@ -280,18 +290,44 @@ #define BENCHMARK_OVERRIDE #endif +#if defined(_MSC_VER) +#pragma warning(push) +// C4251: needs to have dll-interface to be used by clients of class +#pragma warning(disable : 4251) +#endif + namespace benchmark { class BenchmarkReporter; -void Initialize(int* argc, char** argv); -void Shutdown(); +// Default number of minimum benchmark running time in seconds. +const char kDefaultMinTimeStr[] = "0.5s"; + +BENCHMARK_EXPORT void PrintDefaultHelp(); + +BENCHMARK_EXPORT void Initialize(int* argc, char** argv, + void (*HelperPrinterf)() = PrintDefaultHelp); +BENCHMARK_EXPORT void Shutdown(); // Report to stdout all arguments in 'argv' as unrecognized except the first. // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). -bool ReportUnrecognizedArguments(int argc, char** argv); +BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv); // Returns the current value of --benchmark_filter. -std::string GetBenchmarkFilter(); +BENCHMARK_EXPORT std::string GetBenchmarkFilter(); + +// Sets a new value to --benchmark_filter. (This will override this flag's +// current value). +// Should be called after `benchmark::Initialize()`, as +// `benchmark::Initialize()` will override the flag's value. +BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value); + +// Returns the current value of --v (command line value for verbosity). +BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity(); + +// Creates a default display reporter. Used by the library when no display +// reporter is provided, but also made available for external use in case a +// custom reporter should respect the `--benchmark_format` flag as a fallback +BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter(); // Generate a list of benchmarks matching the specified --benchmark_filter flag // and if --benchmark_list_tests is specified return after printing the name @@ -309,18 +345,29 @@ // 'file_reporter' is ignored. // // RETURNS: The number of matching benchmarks. -size_t RunSpecifiedBenchmarks(); -size_t RunSpecifiedBenchmarks(std::string spec); +BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(); +BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec); + +BENCHMARK_EXPORT size_t +RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); +BENCHMARK_EXPORT size_t +RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - std::string spec); +BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks( + BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter); +BENCHMARK_EXPORT size_t +RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, std::string spec); + +// TimeUnit is passed to a benchmark in order to specify the order of magnitude +// for the measured time. +enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter, - std::string spec); +BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit(); + +// Sets the default time unit the benchmarks use +// Has to be called before the benchmark loop to take effect +BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit); // If a MemoryManager is registered (via RegisterMemoryManager()), // it can be used to collect and report allocation metrics for a run of the @@ -358,20 +405,16 @@ virtual void Start() = 0; // Implement this to stop recording and fill out the given Result structure. - BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead") - virtual void Stop(Result* result) = 0; - - // FIXME(vyng): Make this pure virtual once we've migrated current users. - BENCHMARK_DISABLE_DEPRECATED_WARNING - virtual void Stop(Result& result) { Stop(&result); } - BENCHMARK_RESTORE_DEPRECATED_WARNING + virtual void Stop(Result& result) = 0; }; // Register a MemoryManager instance that will be used to collect and report // allocation measurements for benchmark runs. +BENCHMARK_EXPORT void RegisterMemoryManager(MemoryManager* memory_manager); // Add a key-value pair to output as part of the context stanza in the report. +BENCHMARK_EXPORT void AddCustomContext(const std::string& key, const std::string& value); namespace internal { @@ -379,14 +422,17 @@ class BenchmarkImp; class BenchmarkFamilies; +BENCHMARK_EXPORT std::map*& GetGlobalContext(); + +BENCHMARK_EXPORT void UseCharPointer(char const volatile*); // Take ownership of the pointer and register the benchmark. Return the // registered benchmark. -Benchmark* RegisterBenchmarkInternal(Benchmark*); +BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(Benchmark*); // Ensure that the standard streams are properly initialized in every TU. -int InitializeStreams(); +BENCHMARK_EXPORT int InitializeStreams(); BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); } // namespace internal @@ -409,7 +455,11 @@ // intended to add little to no overhead. // See: https://youtu.be/nXaxk27zwlk?t=2441 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY +#if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER) template +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { asm volatile("" : : "r,m"(value) : "memory"); } @@ -423,6 +473,98 @@ #endif } +#ifdef BENCHMARK_HAS_CXX11 +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) { +#if defined(__clang__) + asm volatile("" : "+r,m"(value) : : "memory"); +#else + asm volatile("" : "+m,r"(value) : : "memory"); +#endif +} +#endif +#elif defined(BENCHMARK_HAS_CXX11) && (__GNUC__ >= 5) +// Workaround for a bug with full argument copy overhead with GCC. +// See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519 +template +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value && + (sizeof(Tp) <= sizeof(Tp*))>::type + DoNotOptimize(Tp const& value) { + asm volatile("" : : "r,m"(value) : "memory"); +} + +template +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value || + (sizeof(Tp) > sizeof(Tp*))>::type + DoNotOptimize(Tp const& value) { + asm volatile("" : : "m"(value) : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value && + (sizeof(Tp) <= sizeof(Tp*))>::type + DoNotOptimize(Tp& value) { + asm volatile("" : "+m,r"(value) : : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value || + (sizeof(Tp) > sizeof(Tp*))>::type + DoNotOptimize(Tp& value) { + asm volatile("" : "+m"(value) : : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value && + (sizeof(Tp) <= sizeof(Tp*))>::type + DoNotOptimize(Tp&& value) { + asm volatile("" : "+m,r"(value) : : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value || + (sizeof(Tp) > sizeof(Tp*))>::type + DoNotOptimize(Tp&& value) { + asm volatile("" : "+m"(value) : : "memory"); +} + +#else +// Fallback for GCC < 5. Can add some overhead because the compiler is forced +// to use memory operations instead of operations with registers. +// TODO: Remove if GCC < 5 will be unsupported. +template +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + asm volatile("" : : "m"(value) : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { + asm volatile("" : "+m"(value) : : "memory"); +} + +#ifdef BENCHMARK_HAS_CXX11 +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) { + asm volatile("" : "+m"(value) : : "memory"); +} +#endif +#endif + #ifndef BENCHMARK_HAS_CXX11 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { asm volatile("" : : : "memory"); @@ -430,6 +572,9 @@ #endif #elif defined(_MSC_VER) template +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast(value)); _ReadWriteBarrier(); @@ -440,6 +585,9 @@ #endif #else template +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast(value)); } @@ -506,17 +654,13 @@ // This is the container for the user-defined counters. typedef std::map UserCounters; -// TimeUnit is passed to a benchmark in order to specify the order of magnitude -// for the measured time. -enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; - // BigO is passed to a benchmark in order to specify the asymptotic // computational // complexity for the benchmark. In case oAuto is selected, complexity will be // calculated automatically to the best fit. enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; -typedef uint64_t IterationCount; +typedef int64_t IterationCount; enum StatisticUnit { kTime, kPercentage }; @@ -564,11 +708,21 @@ ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly }; +enum Skipped +#if defined(BENCHMARK_HAS_CXX11) + : unsigned +#endif +{ + NotSkipped = 0, + SkippedWithMessage, + SkippedWithError +}; + } // namespace internal // State is passed to a running Benchmark and contains state for the // benchmark to use. -class State { +class BENCHMARK_EXPORT State { public: struct StateIterator; friend struct StateIterator; @@ -600,8 +754,8 @@ // } bool KeepRunningBatch(IterationCount n); - // REQUIRES: timer is running and 'SkipWithError(...)' has not been called - // by the current thread. + // REQUIRES: timer is running and 'SkipWithMessage(...)' or + // 'SkipWithError(...)' has not been called by the current thread. // Stop the benchmark timer. If not called, the timer will be // automatically stopped after the last iteration of the benchmark loop. // @@ -616,8 +770,8 @@ // within each benchmark iteration, if possible. void PauseTiming(); - // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called - // by the current thread. + // REQUIRES: timer is not running and 'SkipWithMessage(...)' or + // 'SkipWithError(...)' has not been called by the current thread. // Start the benchmark timer. The timer is NOT running on entrance to the // benchmark function. It begins running after control flow enters the // benchmark loop. @@ -627,8 +781,30 @@ // within each benchmark iteration, if possible. void ResumeTiming(); - // REQUIRES: 'SkipWithError(...)' has not been called previously by the - // current thread. + // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been + // called previously by the current thread. + // Report the benchmark as resulting in being skipped with the specified + // 'msg'. + // After this call the user may explicitly 'return' from the benchmark. + // + // If the ranged-for style of benchmark loop is used, the user must explicitly + // break from the loop, otherwise all future iterations will be run. + // If the 'KeepRunning()' loop is used the current thread will automatically + // exit the loop at the end of the current iteration. + // + // For threaded benchmarks only the current thread stops executing and future + // calls to `KeepRunning()` will block until all threads have completed + // the `KeepRunning()` loop. If multiple threads report being skipped only the + // first skip message is used. + // + // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit + // the current scope immediately. If the function is called from within + // the 'KeepRunning()' loop the current iteration will finish. It is the users + // responsibility to exit the scope as needed. + void SkipWithMessage(const std::string& msg); + + // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been + // called previously by the current thread. // Report the benchmark as resulting in an error with the specified 'msg'. // After this call the user may explicitly 'return' from the benchmark. // @@ -646,10 +822,13 @@ // the current scope immediately. If the function is called from within // the 'KeepRunning()' loop the current iteration will finish. It is the users // responsibility to exit the scope as needed. - void SkipWithError(const char* msg); + void SkipWithError(const std::string& msg); + + // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called. + bool skipped() const { return internal::NotSkipped != skipped_; } // Returns true if an error has been reported with 'SkipWithError(...)'. - bool error_occurred() const { return error_occurred_; } + bool error_occurred() const { return internal::SkippedWithError == skipped_; } // REQUIRES: called exactly once per iteration of the benchmarking loop. // Set the manually measured time for this benchmark iteration, which @@ -720,11 +899,7 @@ // BM_Compress 50 50 14115038 compress:27.3% // // REQUIRES: a benchmark has exited its benchmarking loop. - void SetLabel(const char* label); - - void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) { - this->SetLabel(str.c_str()); - } + void SetLabel(const std::string& label); // Range arguments for this run. CHECKs if the argument has been set. BENCHMARK_ALWAYS_INLINE @@ -755,6 +930,9 @@ return max_iterations - total_iterations_ + batch_leftover_; } + BENCHMARK_ALWAYS_INLINE + std::string name() const { return name_; } + private: // items we expect on the first cache line (ie 64 bytes of the struct) // When total_iterations_ is 0, KeepRunning() and friends will return false. @@ -772,7 +950,7 @@ private: bool started_; bool finished_; - bool error_occurred_; + internal::Skipped skipped_; // items we don't need on the first cache line std::vector range_; @@ -784,9 +962,9 @@ UserCounters counters; private: - State(IterationCount max_iters, const std::vector& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager, + State(std::string name, IterationCount max_iters, + const std::vector& ranges, int thread_i, int n_threads, + internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement); void StartKeepRunning(); @@ -795,6 +973,7 @@ bool KeepRunningInternal(IterationCount n, bool is_batch); void FinishKeepRunning(); + const std::string name_; const int thread_index_; const int threads_; @@ -826,7 +1005,7 @@ } if (!started_) { StartKeepRunning(); - if (!error_occurred_ && total_iterations_ >= n) { + if (!skipped() && total_iterations_ >= n) { total_iterations_ -= n; return true; } @@ -856,7 +1035,7 @@ BENCHMARK_ALWAYS_INLINE explicit StateIterator(State* st) - : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {} + : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {} public: BENCHMARK_ALWAYS_INLINE @@ -899,7 +1078,7 @@ // be called on this object to change the properties of the benchmark. // Each method returns "this" so that multiple method calls can // chained into one expression. -class Benchmark { +class BENCHMARK_EXPORT Benchmark { public: virtual ~Benchmark(); @@ -1000,12 +1179,19 @@ // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. Benchmark* MinTime(double t); + // Set the minimum amount of time to run the benchmark before taking runtimes + // of this benchmark into account. This + // option overrides the `benchmark_min_warmup_time` flag. + // REQUIRES: `t >= 0` and `Iterations` has not been called on this benchmark. + Benchmark* MinWarmUpTime(double t); + // Specify the amount of iterations that should be run by this benchmark. + // This option overrides the `benchmark_min_time` flag. // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark. // // NOTE: This function should only be used when *exact* iteration control is // needed and never to control or limit how long a benchmark runs, where - // `--benchmark_min_time=N` or `MinTime(...)` should be used instead. + // `--benchmark_min_time=s` or `MinTime(...)` should be used instead. Benchmark* Iterations(IterationCount n); // Specify the amount of times to repeat this benchmark. This option overrides @@ -1025,7 +1211,7 @@ // By default, the CPU time is measured only for the main thread, which may // be unrepresentative if the benchmark uses threads internally. If called, // the total CPU time spent by all the threads will be measured instead. - // By default, the only the main thread CPU time will be measured. + // By default, only the main thread CPU time will be measured. Benchmark* MeasureProcessCPUTime(); // If a particular benchmark should use the Wall clock instead of the CPU time @@ -1090,12 +1276,16 @@ virtual void Run(State& state) = 0; + TimeUnit GetTimeUnit() const; + protected: - explicit Benchmark(const char* name); - Benchmark(Benchmark const&); - void SetName(const char* name); + explicit Benchmark(const std::string& name); + void SetName(const std::string& name); + public: + const char* GetName() const; int ArgsCnt() const; + const char* GetArgName(int arg) const; private: friend class BenchmarkFamilies; @@ -1105,9 +1295,13 @@ AggregationReportMode aggregation_report_mode_; std::vector arg_names_; // Args for all benchmark runs std::vector > args_; // Args for all benchmark runs + TimeUnit time_unit_; + bool use_default_time_unit_; + int range_multiplier_; double min_time_; + double min_warmup_time_; IterationCount iterations_; int repetitions_; bool measure_process_cpu_time_; @@ -1122,7 +1316,17 @@ callback_function setup_; callback_function teardown_; - Benchmark& operator=(Benchmark const&); + Benchmark(Benchmark const&) +#if defined(BENCHMARK_HAS_CXX11) + = delete +#endif + ; + + Benchmark& operator=(Benchmark const&) +#if defined(BENCHMARK_HAS_CXX11) + = delete +#endif + ; }; } // namespace internal @@ -1131,27 +1335,27 @@ // the specified functor 'fn'. // // RETURNS: A pointer to the registered benchmark. -internal::Benchmark* RegisterBenchmark(const char* name, +internal::Benchmark* RegisterBenchmark(const std::string& name, internal::Function* fn); #if defined(BENCHMARK_HAS_CXX11) template -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn); +internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn); #endif // Remove all registered benchmarks. All pointers to previously registered // benchmarks are invalidated. -void ClearRegisteredBenchmarks(); +BENCHMARK_EXPORT void ClearRegisteredBenchmarks(); namespace internal { // The class used to hold all Benchmarks created from static function. // (ie those created using the BENCHMARK(...) macros. -class FunctionBenchmark : public Benchmark { +class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark { public: - FunctionBenchmark(const char* name, Function* func) + FunctionBenchmark(const std::string& name, Function* func) : Benchmark(name), func_(func) {} - virtual void Run(State& st) BENCHMARK_OVERRIDE; + void Run(State& st) BENCHMARK_OVERRIDE; private: Function* func_; @@ -1161,35 +1365,38 @@ template class LambdaBenchmark : public Benchmark { public: - virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); } + void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); } private: template - LambdaBenchmark(const char* name, OLambda&& lam) + LambdaBenchmark(const std::string& name, OLambda&& lam) : Benchmark(name), lambda_(std::forward(lam)) {} LambdaBenchmark(LambdaBenchmark const&) = delete; template // NOLINTNEXTLINE(readability-redundant-declaration) - friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&); + friend Benchmark* ::benchmark::RegisterBenchmark(const std::string&, Lam&&); Lambda lambda_; }; #endif - } // namespace internal -inline internal::Benchmark* RegisterBenchmark(const char* name, +inline internal::Benchmark* RegisterBenchmark(const std::string& name, internal::Function* fn) { + // FIXME: this should be a `std::make_unique<>()` but we don't have C++14. + // codechecker_intentional [cplusplus.NewDeleteLeaks] return internal::RegisterBenchmarkInternal( ::new internal::FunctionBenchmark(name, fn)); } #ifdef BENCHMARK_HAS_CXX11 template -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) { +internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) { using BenchType = internal::LambdaBenchmark::type>; + // FIXME: this should be a `std::make_unique<>()` but we don't have C++14. + // codechecker_intentional [cplusplus.NewDeleteLeaks] return internal::RegisterBenchmarkInternal( ::new BenchType(name, std::forward(fn))); } @@ -1198,7 +1405,7 @@ #if defined(BENCHMARK_HAS_CXX11) && \ (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409) template -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn, +internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn, Args&&... args) { return benchmark::RegisterBenchmark( name, [=](benchmark::State& st) { fn(st, args...); }); @@ -1212,7 +1419,7 @@ public: Fixture() : internal::Benchmark("") {} - virtual void Run(State& st) BENCHMARK_OVERRIDE { + void Run(State& st) BENCHMARK_OVERRIDE { this->SetUp(st); this->BenchmarkCase(st); this->TearDown(st); @@ -1228,7 +1435,6 @@ protected: virtual void BenchmarkCase(State&) = 0; }; - } // namespace benchmark // ------------------------------------------------------ @@ -1268,7 +1474,7 @@ BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \ - &__VA_ARGS__))) + __VA_ARGS__))) #else #define BENCHMARK(n) \ BENCHMARK_PRIVATE_DECLARE(n) = \ @@ -1335,37 +1541,37 @@ #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) #endif -#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() { \ - this->SetName(#BaseClass "/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ +#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "/" #Method); \ + } \ + \ + protected: \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; -#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() { \ - this->SetName(#BaseClass "<" #a ">/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ +#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "<" #a ">/" #Method); \ + } \ + \ + protected: \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; -#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() { \ - this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ +#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ + } \ + \ + protected: \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #ifdef BENCHMARK_HAS_CXX11 @@ -1377,7 +1583,7 @@ } \ \ protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #else #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \ @@ -1439,8 +1645,15 @@ #endif // Helper macro to create a main routine in a test that runs the benchmarks +// Note the workaround for Hexagon simulator passing argc != 0, argv = NULL. #define BENCHMARK_MAIN() \ int main(int argc, char** argv) { \ + char arg0_default[] = "benchmark"; \ + char* args_default = arg0_default; \ + if (!argv) { \ + argc = 1; \ + argv = &args_default; \ + } \ ::benchmark::Initialize(&argc, argv); \ if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ ::benchmark::RunSpecifiedBenchmarks(); \ @@ -1454,7 +1667,7 @@ namespace benchmark { -struct CPUInfo { +struct BENCHMARK_EXPORT CPUInfo { struct CacheInfo { std::string type; int level; @@ -1478,7 +1691,7 @@ }; // Adding Struct for System Information -struct SystemInfo { +struct BENCHMARK_EXPORT SystemInfo { std::string name; static const SystemInfo& Get(); @@ -1490,10 +1703,11 @@ // BenchmarkName contains the components of the Benchmark's name // which allows individual fields to be modified or cleared before // building the final name using 'str()'. -struct BenchmarkName { +struct BENCHMARK_EXPORT BenchmarkName { std::string function_name; std::string args; std::string min_time; + std::string min_warmup_time; std::string iterations; std::string repetitions; std::string time_type; @@ -1509,7 +1723,7 @@ // can control the destination of the reports by calling // RunSpecifiedBenchmarks and passing it a custom reporter object. // The reporter object must implement the following interface. -class BenchmarkReporter { +class BENCHMARK_EXPORT BenchmarkReporter { public: struct Context { CPUInfo const& cpu_info; @@ -1520,17 +1734,17 @@ Context(); }; - struct Run { + struct BENCHMARK_EXPORT Run { static const int64_t no_repetition_index = -1; enum RunType { RT_Iteration, RT_Aggregate }; Run() : run_type(RT_Iteration), aggregate_unit(kTime), - error_occurred(false), + skipped(internal::NotSkipped), iterations(1), threads(1), - time_unit(kNanosecond), + time_unit(GetDefaultTimeUnit()), real_accumulated_time(0), cpu_accumulated_time(0), max_heapbytes_used(0), @@ -1550,8 +1764,8 @@ std::string aggregate_name; StatisticUnit aggregate_unit; std::string report_label; // Empty if not set by benchmark. - bool error_occurred; - std::string error_message; + internal::Skipped skipped; + std::string skip_message; IterationCount iterations; int64_t threads; @@ -1620,6 +1834,12 @@ // to skip runs based on the context information. virtual bool ReportContext(const Context& context) = 0; + // Called once for each group of benchmark runs, gives information about + // the configurations of the runs. + virtual void ReportRunsConfig(double /*min_time*/, + bool /*has_explicit_iters*/, + IterationCount /*iters*/) {} + // Called once for each group of benchmark runs, gives information about // cpu-time and heap memory usage during the benchmark run. If the group // of runs contained more than two entries then 'report' contains additional @@ -1665,7 +1885,7 @@ // Simple reporter that outputs benchmark data to the console. This is the // default reporter used by RunSpecifiedBenchmarks(). -class ConsoleReporter : public BenchmarkReporter { +class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter { public: enum OutputOptions { OO_None = 0, @@ -1677,8 +1897,8 @@ explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults) : output_options_(opts_), name_field_width_(0), printed_header_(false) {} - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; - virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; + bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; protected: virtual void PrintRunData(const Run& report); @@ -1690,12 +1910,12 @@ bool printed_header_; }; -class JSONReporter : public BenchmarkReporter { +class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter { public: JSONReporter() : first_report_(true) {} - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; - virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; - virtual void Finalize() BENCHMARK_OVERRIDE; + bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; + void Finalize() BENCHMARK_OVERRIDE; private: void PrintRunData(const Run& report); @@ -1703,13 +1923,13 @@ bool first_report_; }; -class BENCHMARK_DEPRECATED_MSG( +class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG( "The CSV Reporter will be removed in a future release") CSVReporter : public BenchmarkReporter { public: CSVReporter() : printed_header_(false) {} - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; - virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; + bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; private: void PrintRunData(const Run& report); @@ -1748,18 +1968,24 @@ // Creates a list of integer values for the given range and multiplier. // This can be used together with ArgsProduct() to allow multiple ranges -// with different multiplers. +// with different multipliers. // Example: // ArgsProduct({ // CreateRange(0, 1024, /*multi=*/32), // CreateRange(0, 100, /*multi=*/4), // CreateDenseRange(0, 4, /*step=*/1), // }); +BENCHMARK_EXPORT std::vector CreateRange(int64_t lo, int64_t hi, int multi); // Creates a list of integer values for the given range and step. +BENCHMARK_EXPORT std::vector CreateDenseRange(int64_t start, int64_t limit, int step); } // namespace benchmark +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + #endif // BENCHMARK_BENCHMARK_H_ diff --git a/third-party/benchmark/include/benchmark/export.h b/third-party/benchmark/include/benchmark/export.h new file mode 100644 --- /dev/null +++ b/third-party/benchmark/include/benchmark/export.h @@ -0,0 +1,47 @@ +#ifndef BENCHMARK_EXPORT_H +#define BENCHMARK_EXPORT_H + +#if defined(_WIN32) +#define EXPORT_ATTR __declspec(dllexport) +#define IMPORT_ATTR __declspec(dllimport) +#define NO_EXPORT_ATTR +#define DEPRECATED_ATTR __declspec(deprecated) +#else // _WIN32 +#define EXPORT_ATTR __attribute__((visibility("default"))) +#define IMPORT_ATTR __attribute__((visibility("default"))) +#define NO_EXPORT_ATTR __attribute__((visibility("hidden"))) +#define DEPRECATE_ATTR __attribute__((__deprecated__)) +#endif // _WIN32 + +#ifdef BENCHMARK_STATIC_DEFINE +#define BENCHMARK_EXPORT +#define BENCHMARK_NO_EXPORT +#else // BENCHMARK_STATIC_DEFINE +#ifndef BENCHMARK_EXPORT +#ifdef benchmark_EXPORTS +/* We are building this library */ +#define BENCHMARK_EXPORT EXPORT_ATTR +#else // benchmark_EXPORTS +/* We are using this library */ +#define BENCHMARK_EXPORT IMPORT_ATTR +#endif // benchmark_EXPORTS +#endif // !BENCHMARK_EXPORT + +#ifndef BENCHMARK_NO_EXPORT +#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR +#endif // !BENCHMARK_NO_EXPORT +#endif // BENCHMARK_STATIC_DEFINE + +#ifndef BENCHMARK_DEPRECATED +#define BENCHMARK_DEPRECATED DEPRECATE_ATTR +#endif // BENCHMARK_DEPRECATED + +#ifndef BENCHMARK_DEPRECATED_EXPORT +#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED +#endif // BENCHMARK_DEPRECATED_EXPORT + +#ifndef BENCHMARK_DEPRECATED_NO_EXPORT +#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED +#endif // BENCHMARK_DEPRECATED_EXPORT + +#endif /* BENCHMARK_EXPORT_H */ diff --git a/third-party/benchmark/pyproject.toml b/third-party/benchmark/pyproject.toml new file mode 100644 --- /dev/null +++ b/third-party/benchmark/pyproject.toml @@ -0,0 +1,50 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "google_benchmark" +description = "A library to benchmark code snippets." +requires-python = ">=3.8" +license = {file = "LICENSE"} +keywords = ["benchmark"] + +authors = [ + {name = "Google", email = "benchmark-discuss@googlegroups.com"}, +] + +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Software Development :: Testing", + "Topic :: System :: Benchmark", +] + +dynamic = ["readme", "version"] + +dependencies = [ + "absl-py>=0.7.1", +] + +[project.urls] +Homepage = "https://github.com/google/benchmark" +Documentation = "https://github.com/google/benchmark/tree/main/docs" +Repository = "https://github.com/google/benchmark.git" +Discord = "https://discord.gg/cz7UX7wKC2" + +[tool.setuptools] +package-dir = {"" = "bindings/python"} +zip-safe = false + +[tool.setuptools.packages.find] +where = ["bindings/python"] + +[tool.setuptools.dynamic] +version = { attr = "google_benchmark.__version__" } +readme = { file = "README.md", content-type = "text/markdown" } diff --git a/third-party/benchmark/requirements.txt b/third-party/benchmark/requirements.txt deleted file mode 100644 --- a/third-party/benchmark/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy == 1.19.4 -scipy == 1.5.4 -pandas == 1.1.5 diff --git a/third-party/benchmark/setup.py b/third-party/benchmark/setup.py --- a/third-party/benchmark/setup.py +++ b/third-party/benchmark/setup.py @@ -1,56 +1,50 @@ +import contextlib import os -import posixpath import platform -import re import shutil -import sys +import sysconfig +from pathlib import Path -from distutils import sysconfig import setuptools from setuptools.command import build_ext -HERE = os.path.dirname(os.path.abspath(__file__)) +PYTHON_INCLUDE_PATH_PLACEHOLDER = "" +IS_WINDOWS = platform.system() == "Windows" +IS_MAC = platform.system() == "Darwin" -IS_WINDOWS = sys.platform.startswith("win") - -def _get_version(): - """Parse the version string from __init__.py.""" - with open( - os.path.join(HERE, "bindings", "python", "google_benchmark", "__init__.py") - ) as init_file: +@contextlib.contextmanager +def temp_fill_include_path(fp: str): + """Temporarily set the Python include path in a file.""" + with open(fp, "r+") as f: try: - version_line = next( - line for line in init_file if line.startswith("__version__") + content = f.read() + replaced = content.replace( + PYTHON_INCLUDE_PATH_PLACEHOLDER, + Path(sysconfig.get_paths()['include']).as_posix(), ) - except StopIteration: - raise ValueError("__version__ not defined in __init__.py") - else: - namespace = {} - exec(version_line, namespace) # pylint: disable=exec-used - return namespace["__version__"] - - -def _parse_requirements(path): - with open(os.path.join(HERE, path)) as requirements: - return [ - line.rstrip() - for line in requirements - if not (line.isspace() or line.startswith("#")) - ] + f.seek(0) + f.write(replaced) + f.truncate() + yield + finally: + # revert to the original content after exit + f.seek(0) + f.write(content) + f.truncate() class BazelExtension(setuptools.Extension): """A C/C++ extension that is defined as a Bazel BUILD target.""" - def __init__(self, name, bazel_target): + def __init__(self, name: str, bazel_target: str): + super().__init__(name=name, sources=[]) + self.bazel_target = bazel_target - self.relpath, self.target_name = posixpath.relpath(bazel_target, "//").split( - ":" - ) - setuptools.Extension.__init__(self, name, sources=[]) + stripped_target = bazel_target.split("//")[-1] + self.relpath, self.target_name = stripped_target.split(":") class BuildBazelExtension(build_ext.build_ext): @@ -61,86 +55,59 @@ self.bazel_build(ext) build_ext.build_ext.run(self) - def bazel_build(self, ext): + def bazel_build(self, ext: BazelExtension): """Runs the bazel build to create the package.""" - with open("WORKSPACE", "r") as workspace: - workspace_contents = workspace.read() - - with open("WORKSPACE", "w") as workspace: - workspace.write( - re.sub( - r'(?<=path = ").*(?=", # May be overwritten by setup\.py\.)', - sysconfig.get_python_inc().replace(os.path.sep, posixpath.sep), - workspace_contents, - ) - ) - - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) - - bazel_argv = [ - "bazel", - "build", - ext.bazel_target, - "--symlink_prefix=" + os.path.join(self.build_temp, "bazel-"), - "--compilation_mode=" + ("dbg" if self.debug else "opt"), - ] - - if IS_WINDOWS: - # Link with python*.lib. - for library_dir in self.library_dirs: - bazel_argv.append("--linkopt=/LIBPATH:" + library_dir) - elif sys.platform == "darwin" and platform.machine() == "x86_64": - bazel_argv.append("--macos_minimum_os=10.9") - - self.spawn(bazel_argv) - - shared_lib_suffix = ".dll" if IS_WINDOWS else ".so" - ext_bazel_bin_path = os.path.join( - self.build_temp, - "bazel-bin", - ext.relpath, - ext.target_name + shared_lib_suffix, - ) - - ext_dest_path = self.get_ext_fullpath(ext.name) - ext_dest_dir = os.path.dirname(ext_dest_path) - if not os.path.exists(ext_dest_dir): - os.makedirs(ext_dest_dir) - shutil.copyfile(ext_bazel_bin_path, ext_dest_path) + with temp_fill_include_path("WORKSPACE"): + temp_path = Path(self.build_temp) + + bazel_argv = [ + "bazel", + "build", + ext.bazel_target, + f"--symlink_prefix={temp_path / 'bazel-'}", + f"--compilation_mode={'dbg' if self.debug else 'opt'}", + # C++17 is required by nanobind + f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}", + ] + + if IS_WINDOWS: + # Link with python*.lib. + for library_dir in self.library_dirs: + bazel_argv.append("--linkopt=/LIBPATH:" + library_dir) + elif IS_MAC: + if platform.machine() == "x86_64": + # C++17 needs macOS 10.14 at minimum + bazel_argv.append("--macos_minimum_os=10.14") + + # cross-compilation for Mac ARM64 on GitHub Mac x86 runners. + # ARCHFLAGS is set by cibuildwheel before macOS wheel builds. + archflags = os.getenv("ARCHFLAGS", "") + if "arm64" in archflags: + bazel_argv.append("--cpu=darwin_arm64") + bazel_argv.append("--macos_cpus=arm64") + + elif platform.machine() == "arm64": + bazel_argv.append("--macos_minimum_os=11.0") + + self.spawn(bazel_argv) + + shared_lib_suffix = '.dll' if IS_WINDOWS else '.so' + ext_name = ext.target_name + shared_lib_suffix + ext_bazel_bin_path = temp_path / 'bazel-bin' / ext.relpath / ext_name + + ext_dest_path = Path(self.get_ext_fullpath(ext.name)) + shutil.copyfile(ext_bazel_bin_path, ext_dest_path) + + # explicitly call `bazel shutdown` for graceful exit + self.spawn(["bazel", "shutdown"]) setuptools.setup( - name="google_benchmark", - version=_get_version(), - url="https://github.com/google/benchmark", - description="A library to benchmark code snippets.", - author="Google", - author_email="benchmark-py@google.com", - # Contained modules and scripts. - package_dir={"": "bindings/python"}, - packages=setuptools.find_packages("bindings/python"), - install_requires=_parse_requirements("bindings/python/requirements.txt"), cmdclass=dict(build_ext=BuildBazelExtension), ext_modules=[ BazelExtension( - "google_benchmark._benchmark", - "//bindings/python/google_benchmark:_benchmark", + name="google_benchmark._benchmark", + bazel_target="//bindings/python/google_benchmark:_benchmark", ) ], - zip_safe=False, - # PyPI package information. - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Topic :: Software Development :: Testing", - "Topic :: System :: Benchmark", - ], - license="Apache 2.0", - keywords="benchmark", ) diff --git a/third-party/benchmark/src/CMakeLists.txt b/third-party/benchmark/src/CMakeLists.txt --- a/third-party/benchmark/src/CMakeLists.txt +++ b/third-party/benchmark/src/CMakeLists.txt @@ -25,12 +25,18 @@ SOVERSION ${GENERIC_LIB_SOVERSION} ) target_include_directories(benchmark PUBLIC - $) + $ +) # libpfm, if available -if (HAVE_LIBPFM) - target_link_libraries(benchmark PRIVATE pfm) - add_definitions(-DHAVE_LIBPFM) +if (PFM_FOUND) + target_link_libraries(benchmark PRIVATE PFM::libpfm) + target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM) +endif() + +# pthread affinity, if available +if(HAVE_PTHREAD_AFFINITY) + target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY) endif() # Link threads. @@ -53,6 +59,10 @@ target_link_libraries(benchmark PRIVATE kstat) endif() +if (NOT BUILD_SHARED_LIBS) + target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE) +endif() + # Benchmark main library add_library(benchmark_main "benchmark_main.cc") add_library(benchmark::benchmark_main ALIAS benchmark_main) @@ -60,10 +70,10 @@ OUTPUT_NAME "benchmark_main" VERSION ${GENERIC_LIB_VERSION} SOVERSION ${GENERIC_LIB_SOVERSION} + DEFINE_SYMBOL benchmark_EXPORTS ) target_link_libraries(benchmark_main PUBLIC benchmark::benchmark) - set(generated_dir "${PROJECT_BINARY_DIR}") set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") @@ -107,6 +117,7 @@ install( DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark" + "${PROJECT_BINARY_DIR}/include/benchmark" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.*h") diff --git a/third-party/benchmark/src/benchmark.cc b/third-party/benchmark/src/benchmark.cc --- a/third-party/benchmark/src/benchmark.cc +++ b/third-party/benchmark/src/benchmark.cc @@ -19,7 +19,7 @@ #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -65,12 +65,28 @@ // linked into the binary are run. BM_DEFINE_string(benchmark_filter, ""); -// Minimum number of seconds we should run benchmark before results are -// considered significant. For cpu-time based tests, this is the lower bound +// Specification of how long to run the benchmark. +// +// It can be either an exact number of iterations (specified as `x`), +// or a minimum number of seconds (specified as `s`). If the latter +// format (ie., min seconds) is used, the system may run the benchmark longer +// until the results are considered significant. +// +// For backward compatibility, the `s` suffix may be omitted, in which case, +// the specified number is interpreted as the number of seconds. +// +// For cpu-time based tests, this is the lower bound // on the total cpu time used by all threads that make up the test. For // real-time based tests, this is the lower bound on the elapsed time of the // benchmark execution, regardless of number of threads. -BM_DEFINE_double(benchmark_min_time, 0.5); +BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr); + +// Minimum number of seconds a benchmark should be run before results should be +// taken into account. This e.g can be necessary for benchmarks of code which +// needs to fill some form of cache before performance is of interest. +// Note: results gathered within this period are discarded and not used for +// reported result. +BM_DEFINE_double(benchmark_min_warmup_time, 0.0); // The number of runs of each benchmark. If greater than 1, the mean and // standard deviation of the runs will be reported. @@ -121,6 +137,10 @@ // pairs. Kept internal as it's only used for parsing from env/command line. BM_DEFINE_kvpairs(benchmark_context, {}); +// Set the default time unit to use for reports +// Valid values are 'ns', 'us', 'ms' or 's' +BM_DEFINE_string(benchmark_time_unit, ""); + // The level of verbose logging to output BM_DEFINE_int32(v, 0); @@ -128,23 +148,28 @@ std::map* global_context = nullptr; +BENCHMARK_EXPORT std::map*& GetGlobalContext() { + return global_context; +} + // FIXME: wouldn't LTO mess this up? void UseCharPointer(char const volatile*) {} } // namespace internal -State::State(IterationCount max_iters, const std::vector& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager, +State::State(std::string name, IterationCount max_iters, + const std::vector& ranges, int thread_i, int n_threads, + internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement) : total_iterations_(0), batch_leftover_(0), max_iterations(max_iters), started_(false), finished_(false), - error_occurred_(false), + skipped_(internal::NotSkipped), range_(ranges), complexity_n_(0), + name_(std::move(name)), thread_index_(thread_i), threads_(n_threads), timer_(timer), @@ -166,50 +191,78 @@ #elif defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Winvalid-offsetof" +#endif +#if defined(__NVCC__) +#pragma nv_diagnostic push +#pragma nv_diag_suppress 1427 +#endif +#if defined(__NVCOMPILER) +#pragma diagnostic push +#pragma diag_suppress offset_in_non_POD_nonstandard #endif // Offset tests to ensure commonly accessed data is on the first cache line. const int cache_line_size = 64; - static_assert(offsetof(State, error_occurred_) <= - (cache_line_size - sizeof(error_occurred_)), - ""); + static_assert( + offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), ""); #if defined(__INTEL_COMPILER) #pragma warning pop #elif defined(__GNUC__) #pragma GCC diagnostic pop #endif +#if defined(__NVCC__) +#pragma nv_diagnostic pop +#endif +#if defined(__NVCOMPILER) +#pragma diagnostic pop +#endif } void State::PauseTiming() { // Add in time accumulated so far - BM_CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !skipped()); timer_->StopTimer(); if (perf_counters_measurement_) { - auto measurements = perf_counters_measurement_->StopAndGetMeasurements(); + std::vector> measurements; + if (!perf_counters_measurement_->Stop(measurements)) { + BM_CHECK(false) << "Perf counters read the value failed."; + } for (const auto& name_and_measurement : measurements) { auto name = name_and_measurement.first; auto measurement = name_and_measurement.second; - BM_CHECK_EQ(counters[name], 0.0); + BM_CHECK_EQ(std::fpclassify(double{counters[name]}), FP_ZERO); counters[name] = Counter(measurement, Counter::kAvgIterations); } } } void State::ResumeTiming() { - BM_CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !skipped()); timer_->StartTimer(); if (perf_counters_measurement_) { perf_counters_measurement_->Start(); } } -void State::SkipWithError(const char* msg) { - BM_CHECK(msg); - error_occurred_ = true; +void State::SkipWithMessage(const std::string& msg) { + skipped_ = internal::SkippedWithMessage; + { + MutexLock l(manager_->GetBenchmarkMutex()); + if (internal::NotSkipped == manager_->results.skipped_) { + manager_->results.skip_message_ = msg; + manager_->results.skipped_ = skipped_; + } + } + total_iterations_ = 0; + if (timer_->running()) timer_->StopTimer(); +} + +void State::SkipWithError(const std::string& msg) { + skipped_ = internal::SkippedWithError; { MutexLock l(manager_->GetBenchmarkMutex()); - if (manager_->results.has_error_ == false) { - manager_->results.error_message_ = msg; - manager_->results.has_error_ = true; + if (internal::NotSkipped == manager_->results.skipped_) { + manager_->results.skip_message_ = msg; + manager_->results.skipped_ = skipped_; } } total_iterations_ = 0; @@ -220,7 +273,7 @@ timer_->SetIterationTime(seconds); } -void State::SetLabel(const char* label) { +void State::SetLabel(const std::string& label) { MutexLock l(manager_->GetBenchmarkMutex()); manager_->results.report_label_ = label; } @@ -228,14 +281,14 @@ void State::StartKeepRunning() { BM_CHECK(!started_ && !finished_); started_ = true; - total_iterations_ = error_occurred_ ? 0 : max_iterations; + total_iterations_ = skipped() ? 0 : max_iterations; manager_->StartStopBarrier(); - if (!error_occurred_) ResumeTiming(); + if (!skipped()) ResumeTiming(); } void State::FinishKeepRunning() { - BM_CHECK(started_ && (!finished_ || error_occurred_)); - if (!error_occurred_) { + BM_CHECK(started_ && (!finished_ || skipped())); + if (!skipped()) { PauseTiming(); } // Total iterations has now wrapped around past 0. Fix this. @@ -313,14 +366,26 @@ size_t num_repetitions_total = 0; + // This perfcounters object needs to be created before the runners vector + // below so it outlasts their lifetime. + PerfCountersMeasurement perfcounters( + StrSplit(FLAGS_benchmark_perf_counters, ',')); + + // Vector of benchmarks to run std::vector runners; runners.reserve(benchmarks.size()); + + // Count the number of benchmarks with threads to warn the user in case + // performance counters are used. + int benchmarks_with_threads = 0; + + // Loop through all benchmarks for (const BenchmarkInstance& benchmark : benchmarks) { BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr; if (benchmark.complexity() != oNone) reports_for_family = &per_family_reports[benchmark.family_index()]; - - runners.emplace_back(benchmark, reports_for_family); + benchmarks_with_threads += (benchmark.threads() > 0); + runners.emplace_back(benchmark, &perfcounters, reports_for_family); int num_repeats_of_this_instance = runners.back().GetNumRepeats(); num_repetitions_total += num_repeats_of_this_instance; if (reports_for_family) @@ -328,6 +393,17 @@ } assert(runners.size() == benchmarks.size() && "Unexpected runner count."); + // The use of performance counters with threads would be unintuitive for + // the average user so we need to warn them about this case + if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) { + GetErrorLogInstance() + << "***WARNING*** There are " << benchmarks_with_threads + << " benchmarks with threads and " << perfcounters.num_counters() + << " performance counters were requested. Beware counters will " + "reflect the combined usage across all " + "threads.\n"; + } + std::vector repetition_indices; repetition_indices.reserve(num_repetitions_total); for (size_t runner_index = 0, num_runners = runners.size(); @@ -351,6 +427,12 @@ if (runner.HasRepeatsRemaining()) continue; // FIXME: report each repetition separately, not all of them in bulk. + display_reporter->ReportRunsConfig( + runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters()); + if (file_reporter) + file_reporter->ReportRunsConfig( + runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters()); + RunResults run_results = runner.GetResults(); // Maybe calculate complexity report @@ -384,14 +466,15 @@ typedef std::unique_ptr PtrType; if (name == "console") { return PtrType(new ConsoleReporter(output_opts)); - } else if (name == "json") { - return PtrType(new JSONReporter); - } else if (name == "csv") { - return PtrType(new CSVReporter); - } else { - std::cerr << "Unexpected format: '" << name << "'\n"; - std::exit(1); } + if (name == "json") { + return PtrType(new JSONReporter()); + } + if (name == "csv") { + return PtrType(new CSVReporter()); + } + std::cerr << "Unexpected format: '" << name << "'\n"; + std::exit(1); } BENCHMARK_RESTORE_DEPRECATED_WARNING @@ -428,6 +511,14 @@ } // end namespace internal +BenchmarkReporter* CreateDefaultDisplayReporter() { + static auto default_display_reporter = + internal::CreateReporter(FLAGS_benchmark_format, + internal::GetOutputOptions()) + .release(); + return default_display_reporter; +} + size_t RunSpecifiedBenchmarks() { return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter); } @@ -463,8 +554,7 @@ std::unique_ptr default_display_reporter; std::unique_ptr default_file_reporter; if (!display_reporter) { - default_display_reporter = internal::CreateReporter( - FLAGS_benchmark_format, internal::GetOutputOptions()); + default_display_reporter.reset(CreateDefaultDisplayReporter()); display_reporter = default_display_reporter.get(); } auto& Out = display_reporter->GetOutputStream(); @@ -510,8 +600,23 @@ return benchmarks.size(); } +namespace { +// stores the time unit benchmarks use by default +TimeUnit default_time_unit = kNanosecond; +} // namespace + +TimeUnit GetDefaultTimeUnit() { return default_time_unit; } + +void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; } + std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; } +void SetBenchmarkFilter(std::string value) { + FLAGS_benchmark_filter = std::move(value); +} + +int32_t GetBenchmarkVerbosity() { return FLAGS_v; } + void RegisterMemoryManager(MemoryManager* manager) { internal::memory_manager = manager; } @@ -528,27 +633,31 @@ namespace internal { +void (*HelperPrintf)(); + void PrintUsageAndExit() { - fprintf(stdout, - "benchmark" - " [--benchmark_list_tests={true|false}]\n" - " [--benchmark_filter=]\n" - " [--benchmark_min_time=]\n" - " [--benchmark_repetitions=]\n" - " [--benchmark_enable_random_interleaving={true|false}]\n" - " [--benchmark_report_aggregates_only={true|false}]\n" - " [--benchmark_display_aggregates_only={true|false}]\n" - " [--benchmark_format=]\n" - " [--benchmark_out=]\n" - " [--benchmark_out_format=]\n" - " [--benchmark_color={auto|true|false}]\n" - " [--benchmark_counters_tabular={true|false}]\n" - " [--benchmark_perf_counters=,...]\n" - " [--benchmark_context==,...]\n" - " [--v=]\n"); + HelperPrintf(); exit(0); } +void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) { + if (time_unit_flag == "s") { + return SetDefaultTimeUnit(kSecond); + } + if (time_unit_flag == "ms") { + return SetDefaultTimeUnit(kMillisecond); + } + if (time_unit_flag == "us") { + return SetDefaultTimeUnit(kMicrosecond); + } + if (time_unit_flag == "ns") { + return SetDefaultTimeUnit(kNanosecond); + } + if (!time_unit_flag.empty()) { + PrintUsageAndExit(); + } +} + void ParseCommandLineFlags(int* argc, char** argv) { using namespace benchmark; BenchmarkReporter::Context::executable_name = @@ -557,8 +666,10 @@ if (ParseBoolFlag(argv[i], "benchmark_list_tests", &FLAGS_benchmark_list_tests) || ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || - ParseDoubleFlag(argv[i], "benchmark_min_time", + ParseStringFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || + ParseDoubleFlag(argv[i], "benchmark_min_warmup_time", + &FLAGS_benchmark_min_warmup_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", &FLAGS_benchmark_repetitions) || ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", @@ -578,6 +689,8 @@ &FLAGS_benchmark_perf_counters) || ParseKeyValueFlag(argv[i], "benchmark_context", &FLAGS_benchmark_context) || + ParseStringFlag(argv[i], "benchmark_time_unit", + &FLAGS_benchmark_time_unit) || ParseInt32Flag(argv[i], "v", &FLAGS_v)) { for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1]; @@ -593,6 +706,7 @@ PrintUsageAndExit(); } } + SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit); if (FLAGS_benchmark_color.empty()) { PrintUsageAndExit(); } @@ -608,7 +722,32 @@ } // end namespace internal -void Initialize(int* argc, char** argv) { +void PrintDefaultHelp() { + fprintf(stdout, + "benchmark" + " [--benchmark_list_tests={true|false}]\n" + " [--benchmark_filter=]\n" + " [--benchmark_min_time=`x` OR `s` ]\n" + " [--benchmark_min_warmup_time=]\n" + " [--benchmark_repetitions=]\n" + " [--benchmark_enable_random_interleaving={true|false}]\n" + " [--benchmark_report_aggregates_only={true|false}]\n" + " [--benchmark_display_aggregates_only={true|false}]\n" + " [--benchmark_format=]\n" + " [--benchmark_out=]\n" + " [--benchmark_out_format=]\n" + " [--benchmark_color={auto|true|false}]\n" + " [--benchmark_counters_tabular={true|false}]\n" +#if defined HAVE_LIBPFM + " [--benchmark_perf_counters=,...]\n" +#endif + " [--benchmark_context==,...]\n" + " [--benchmark_time_unit={ns|us|ms|s}]\n" + " [--v=]\n"); +} + +void Initialize(int* argc, char** argv, void (*HelperPrintf)()) { + internal::HelperPrintf = HelperPrintf; internal::ParseCommandLineFlags(argc, argv); internal::LogLevel() = FLAGS_v; } diff --git a/third-party/benchmark/src/benchmark_api_internal.h b/third-party/benchmark/src/benchmark_api_internal.h --- a/third-party/benchmark/src/benchmark_api_internal.h +++ b/third-party/benchmark/src/benchmark_api_internal.h @@ -36,6 +36,7 @@ const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } double min_time() const { return min_time_; } + double min_warmup_time() const { return min_warmup_time_; } IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } void Setup() const; @@ -62,6 +63,7 @@ const std::vector& statistics_; int repetitions_; double min_time_; + double min_warmup_time_; IterationCount iterations_; int threads_; // Number of concurrent threads to us @@ -76,6 +78,7 @@ bool IsZero(double n); +BENCHMARK_EXPORT ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); } // end namespace internal diff --git a/third-party/benchmark/src/benchmark_api_internal.cc b/third-party/benchmark/src/benchmark_api_internal.cc --- a/third-party/benchmark/src/benchmark_api_internal.cc +++ b/third-party/benchmark/src/benchmark_api_internal.cc @@ -16,7 +16,7 @@ per_family_instance_index_(per_family_instance_idx), aggregation_report_mode_(benchmark_.aggregation_report_mode_), args_(args), - time_unit_(benchmark_.time_unit_), + time_unit_(benchmark_.GetTimeUnit()), measure_process_cpu_time_(benchmark_.measure_process_cpu_time_), use_real_time_(benchmark_.use_real_time_), use_manual_time_(benchmark_.use_manual_time_), @@ -25,6 +25,7 @@ statistics_(benchmark_.statistics_), repetitions_(benchmark_.repetitions_), min_time_(benchmark_.min_time_), + min_warmup_time_(benchmark_.min_warmup_time_), iterations_(benchmark_.iterations_), threads_(thread_count) { name_.function_name = benchmark_.name_; @@ -50,6 +51,11 @@ name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); } + if (!IsZero(benchmark->min_warmup_time_)) { + name_.min_warmup_time = + StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_); + } + if (benchmark_.iterations_ != 0) { name_.iterations = StrFormat( "iterations:%lu", static_cast(benchmark_.iterations_)); @@ -87,24 +93,24 @@ IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement) const { - State st(iters, args_, thread_id, threads_, timer, manager, - perf_counters_measurement); + State st(name_.function_name, iters, args_, thread_id, threads_, timer, + manager, perf_counters_measurement); benchmark_.Run(st); return st; } void BenchmarkInstance::Setup() const { if (setup_) { - State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, - nullptr); + State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_, + nullptr, nullptr, nullptr); setup_(st); } } void BenchmarkInstance::Teardown() const { if (teardown_) { - State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, - nullptr); + State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_, + nullptr, nullptr, nullptr); teardown_(st); } } diff --git a/third-party/benchmark/src/benchmark_main.cc b/third-party/benchmark/src/benchmark_main.cc --- a/third-party/benchmark/src/benchmark_main.cc +++ b/third-party/benchmark/src/benchmark_main.cc @@ -14,4 +14,5 @@ #include "benchmark/benchmark.h" +BENCHMARK_EXPORT int main(int, char**); BENCHMARK_MAIN(); diff --git a/third-party/benchmark/src/benchmark_name.cc b/third-party/benchmark/src/benchmark_name.cc --- a/third-party/benchmark/src/benchmark_name.cc +++ b/third-party/benchmark/src/benchmark_name.cc @@ -51,8 +51,9 @@ } } // namespace +BENCHMARK_EXPORT std::string BenchmarkName::str() const { - return join('/', function_name, args, min_time, iterations, repetitions, - time_type, threads); + return join('/', function_name, args, min_time, min_warmup_time, iterations, + repetitions, time_type, threads); } } // namespace benchmark diff --git a/third-party/benchmark/src/benchmark_register.h b/third-party/benchmark/src/benchmark_register.h --- a/third-party/benchmark/src/benchmark_register.h +++ b/third-party/benchmark/src/benchmark_register.h @@ -1,6 +1,7 @@ #ifndef BENCHMARK_REGISTER_H #define BENCHMARK_REGISTER_H +#include #include #include @@ -23,7 +24,7 @@ static const T kmax = std::numeric_limits::max(); // Space out the values in multiples of "mult" - for (T i = static_cast(1); i <= hi; i *= mult) { + for (T i = static_cast(1); i <= hi; i *= static_cast(mult)) { if (i >= lo) { dst->push_back(i); } @@ -32,7 +33,7 @@ if (i > kmax / mult) break; } - return dst->begin() + start_offset; + return dst->begin() + static_cast(start_offset); } template diff --git a/third-party/benchmark/src/benchmark_register.cc b/third-party/benchmark/src/benchmark_register.cc --- a/third-party/benchmark/src/benchmark_register.cc +++ b/third-party/benchmark/src/benchmark_register.cc @@ -15,7 +15,7 @@ #include "benchmark_register.h" #ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -53,10 +53,13 @@ namespace { // For non-dense Range, intermediate values are powers of kRangeMultiplier. -static const int kRangeMultiplier = 8; +static constexpr int kRangeMultiplier = 8; + // The size of a benchmark family determines is the number of inputs to repeat // the benchmark on. If this is "large" then warn the user during configuration. -static const size_t kMaxFamilySize = 100; +static constexpr size_t kMaxFamilySize = 100; + +static constexpr char kDisabledPrefix[] = "DISABLED_"; } // end namespace namespace internal { @@ -116,10 +119,10 @@ // Make regular expression out of command-line flag std::string error_msg; Regex re; - bool isNegativeFilter = false; + bool is_negative_filter = false; if (spec[0] == '-') { spec.replace(0, 1, ""); - isNegativeFilter = true; + is_negative_filter = true; } if (!re.Init(spec, &error_msg)) { Err << "Could not compile benchmark re: " << error_msg << std::endl; @@ -154,7 +157,8 @@ << " will be repeated at least " << family_size << " times.\n"; } // reserve in the special case the regex ".", since we know the final - // family size. + // family size. this doesn't take into account any disabled benchmarks + // so worst case we reserve more than we need. if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size); for (auto const& args : family->args_) { @@ -164,8 +168,9 @@ num_threads); const auto full_name = instance.name().str(); - if ((re.Match(full_name) && !isNegativeFilter) || - (!re.Match(full_name) && isNegativeFilter)) { + if (full_name.rfind(kDisabledPrefix, 0) != 0 && + ((re.Match(full_name) && !is_negative_filter) || + (!re.Match(full_name) && is_negative_filter))) { benchmarks->push_back(std::move(instance)); ++per_family_instance_index; @@ -199,12 +204,14 @@ // Benchmark //=============================================================================// -Benchmark::Benchmark(const char* name) +Benchmark::Benchmark(const std::string& name) : name_(name), aggregation_report_mode_(ARM_Unspecified), - time_unit_(kNanosecond), + time_unit_(GetDefaultTimeUnit()), + use_default_time_unit_(true), range_multiplier_(kRangeMultiplier), min_time_(0), + min_warmup_time_(0), iterations_(0), repetitions_(0), measure_process_cpu_time_(false), @@ -223,7 +230,7 @@ Benchmark::~Benchmark() {} Benchmark* Benchmark::Name(const std::string& name) { - SetName(name.c_str()); + SetName(name); return this; } @@ -235,6 +242,7 @@ Benchmark* Benchmark::Unit(TimeUnit unit) { time_unit_ = unit; + use_default_time_unit_ = false; return this; } @@ -348,9 +356,17 @@ return this; } +Benchmark* Benchmark::MinWarmUpTime(double t) { + BM_CHECK(t >= 0.0); + BM_CHECK(iterations_ == 0); + min_warmup_time_ = t; + return this; +} + Benchmark* Benchmark::Iterations(IterationCount n) { BM_CHECK(n > 0); BM_CHECK(IsZero(min_time_)); + BM_CHECK(IsZero(min_warmup_time_)); iterations_ = n; return this; } @@ -452,7 +468,9 @@ return this; } -void Benchmark::SetName(const char* name) { name_ = name; } +void Benchmark::SetName(const std::string& name) { name_ = name; } + +const char* Benchmark::GetName() const { return name_.c_str(); } int Benchmark::ArgsCnt() const { if (args_.empty()) { @@ -462,6 +480,16 @@ return static_cast(args_.front().size()); } +const char* Benchmark::GetArgName(int arg) const { + BM_CHECK_GE(arg, 0); + BM_CHECK_LT(arg, static_cast(arg_names_.size())); + return arg_names_[arg].c_str(); +} + +TimeUnit Benchmark::GetTimeUnit() const { + return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_; +} + //=============================================================================// // FunctionBenchmark //=============================================================================// diff --git a/third-party/benchmark/src/benchmark_runner.h b/third-party/benchmark/src/benchmark_runner.h --- a/third-party/benchmark/src/benchmark_runner.h +++ b/third-party/benchmark/src/benchmark_runner.h @@ -25,7 +25,8 @@ namespace benchmark { -BM_DECLARE_double(benchmark_min_time); +BM_DECLARE_string(benchmark_min_time); +BM_DECLARE_double(benchmark_min_warmup_time); BM_DECLARE_int32(benchmark_repetitions); BM_DECLARE_bool(benchmark_report_aggregates_only); BM_DECLARE_bool(benchmark_display_aggregates_only); @@ -43,9 +44,21 @@ bool file_report_aggregates_only = false; }; +struct BENCHMARK_EXPORT BenchTimeType { + enum { ITERS, TIME } tag; + union { + IterationCount iters; + double time; + }; +}; + +BENCHMARK_EXPORT +BenchTimeType ParseBenchMinTime(const std::string& value); + class BenchmarkRunner { public: BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, + benchmark::internal::PerfCountersMeasurement* pmc_, BenchmarkReporter::PerFamilyRunReports* reports_for_family); int GetNumRepeats() const { return repeats; } @@ -62,13 +75,22 @@ return reports_for_family; } + double GetMinTime() const { return min_time; } + + bool HasExplicitIters() const { return has_explicit_iteration_count; } + + IterationCount GetIters() const { return iters; } + private: RunResults run_results; const benchmark::internal::BenchmarkInstance& b; BenchmarkReporter::PerFamilyRunReports* reports_for_family; + BenchTimeType parsed_benchtime_flag; const double min_time; + const double min_warmup_time; + bool warmup_done; const int repeats; const bool has_explicit_iteration_count; @@ -82,8 +104,7 @@ // So only the first repetition has to find/calculate it, // the other repetitions will just use that precomputed iteration count. - PerfCountersMeasurement perf_counters_measurement; - PerfCountersMeasurement* const perf_counters_measurement_ptr; + PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr; struct IterationResults { internal::ThreadManager::Result results; @@ -95,6 +116,12 @@ IterationCount PredictNumItersNeeded(const IterationResults& i) const; bool ShouldReportIterationResults(const IterationResults& i) const; + + double GetMinTimeToApply() const; + + void FinishWarmUp(const IterationCount& i); + + void RunWarmUp(); }; } // namespace internal diff --git a/third-party/benchmark/src/benchmark_runner.cc b/third-party/benchmark/src/benchmark_runner.cc --- a/third-party/benchmark/src/benchmark_runner.cc +++ b/third-party/benchmark/src/benchmark_runner.cc @@ -19,7 +19,7 @@ #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -28,11 +28,14 @@ #include #include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -62,6 +65,8 @@ namespace { static constexpr IterationCount kMaxIterations = 1000000000; +const double kDefaultMinTime = + std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr); BenchmarkReporter::Run CreateRunReport( const benchmark::internal::BenchmarkInstance& b, @@ -75,8 +80,8 @@ report.run_name = b.name(); report.family_index = b.family_index(); report.per_family_instance_index = b.per_family_instance_index(); - report.error_occurred = results.has_error_; - report.error_message = results.error_message_; + report.skipped = results.skipped_; + report.skip_message = results.skip_message_; report.report_label = results.report_label_; // This is the total iterations across all threads. report.iterations = results.iterations; @@ -85,7 +90,7 @@ report.repetition_index = repetition_index; report.repetitions = repeats; - if (!report.error_occurred) { + if (!report.skipped) { if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; } else { @@ -122,9 +127,10 @@ b->measure_process_cpu_time() ? internal::ThreadTimer::CreateProcessCpuTime() : internal::ThreadTimer::Create()); + State st = b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); - BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) + BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { MutexLock l(manager->GetBenchmarkMutex()); @@ -139,24 +145,100 @@ manager->NotifyThreadComplete(); } +double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b, + const BenchTimeType& iters_or_time) { + if (!IsZero(b.min_time())) return b.min_time(); + // If the flag was used to specify number of iters, then return the default + // min_time. + if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime; + + return iters_or_time.time; +} + +IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b, + const BenchTimeType& iters_or_time) { + if (b.iterations() != 0) return b.iterations(); + + // We've already concluded that this flag is currently used to pass + // iters but do a check here again anyway. + BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS); + return iters_or_time.iters; +} + } // end namespace +BenchTimeType ParseBenchMinTime(const std::string& value) { + BenchTimeType ret; + + if (value.empty()) { + ret.tag = BenchTimeType::TIME; + ret.time = 0.0; + return ret; + } + + if (value.back() == 'x') { + char* p_end; + // Reset errno before it's changed by strtol. + errno = 0; + IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10); + + // After a valid parse, p_end should have been set to + // point to the 'x' suffix. + BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x') + << "Malformed iters value passed to --benchmark_min_time: `" << value + << "`. Expected --benchmark_min_time=x."; + + ret.tag = BenchTimeType::ITERS; + ret.iters = num_iters; + return ret; + } + + bool has_suffix = value.back() == 's'; + if (!has_suffix) { + BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. " + "Eg., `30s` for 30-seconds."; + } + + char* p_end; + // Reset errno before it's changed by strtod. + errno = 0; + double min_time = std::strtod(value.c_str(), &p_end); + + // After a successful parse, p_end should point to the suffix 's', + // or the end of the string if the suffix was omitted. + BM_CHECK(errno == 0 && p_end != nullptr && + ((has_suffix && *p_end == 's') || *p_end == '\0')) + << "Malformed seconds value passed to --benchmark_min_time: `" << value + << "`. Expected --benchmark_min_time=x."; + + ret.tag = BenchTimeType::TIME; + ret.time = min_time; + + return ret; +} + BenchmarkRunner::BenchmarkRunner( const benchmark::internal::BenchmarkInstance& b_, + PerfCountersMeasurement* pcm_, BenchmarkReporter::PerFamilyRunReports* reports_for_family_) : b(b_), reports_for_family(reports_for_family_), - min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), + parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)), + min_time(ComputeMinTime(b_, parsed_benchtime_flag)), + min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) + ? b.min_warmup_time() + : FLAGS_benchmark_min_warmup_time), + warmup_done(!(min_warmup_time > 0.0)), repeats(b.repetitions() != 0 ? b.repetitions() : FLAGS_benchmark_repetitions), - has_explicit_iteration_count(b.iterations() != 0), + has_explicit_iteration_count(b.iterations() != 0 || + parsed_benchtime_flag.tag == + BenchTimeType::ITERS), pool(b.threads() - 1), - iters(has_explicit_iteration_count ? b.iterations() : 1), - perf_counters_measurement( - PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), - perf_counters_measurement_ptr(perf_counters_measurement.IsValid() - ? &perf_counters_measurement - : nullptr) { + iters(has_explicit_iteration_count + ? ComputeIters(b_, parsed_benchtime_flag) + : 1), + perf_counters_measurement_ptr(pcm_) { run_results.display_report_aggregates_only = (FLAGS_benchmark_report_aggregates_only || FLAGS_benchmark_display_aggregates_only); @@ -169,7 +251,7 @@ run_results.file_report_aggregates_only = (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); BM_CHECK(FLAGS_benchmark_perf_counters.empty() || - perf_counters_measurement.IsValid()) + (perf_counters_measurement_ptr->num_counters() == 0)) << "Perf counters were requested but could not be set up."; } } @@ -232,20 +314,20 @@ const IterationResults& i) const { // See how much iterations should be increased by. // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); + double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9); // If our last run was at least 10% of FLAGS_benchmark_min_time then we // use the multiplier directly. // Otherwise we use at most 10 times expansion. // NOTE: When the last run was at least 10% of the min time the max // expansion should be 14x. - bool is_significant = (i.seconds / min_time) > 0.1; + const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1; multiplier = is_significant ? multiplier : 10.0; // So what seems to be the sufficiently-large iteration count? Round up. const IterationCount max_next_iters = static_cast( std::lround(std::max(multiplier * static_cast(i.iters), static_cast(i.iters) + 1.0))); - // But we do have *some* sanity limits though.. + // But we do have *some* limits though.. const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; @@ -257,21 +339,80 @@ // Determine if this run should be reported; // Either it has run for a sufficient amount of time // or because an error was reported. - return i.results.has_error_ || + return i.results.skipped_ || i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= min_time || // The elapsed time is large enough. + i.seconds >= + GetMinTimeToApply() || // The elapsed time is large enough. // CPU time is specified but the elapsed real time greatly exceeds // the minimum time. - // Note that user provided timers are except from this sanity check. - ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); + // Note that user provided timers are except from this test. + ((i.results.real_time_used >= 5 * GetMinTimeToApply()) && + !b.use_manual_time()); +} + +double BenchmarkRunner::GetMinTimeToApply() const { + // In order to re-use functionality to run and measure benchmarks for running + // a warmup phase of the benchmark, we need a way of telling whether to apply + // min_time or min_warmup_time. This function will figure out if we are in the + // warmup phase and therefore need to apply min_warmup_time or if we already + // in the benchmarking phase and min_time needs to be applied. + return warmup_done ? min_time : min_warmup_time; +} + +void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { + warmup_done = true; + iters = i; +} + +void BenchmarkRunner::RunWarmUp() { + // Use the same mechanisms for warming up the benchmark as used for actually + // running and measuring the benchmark. + IterationResults i_warmup; + // Dont use the iterations determined in the warmup phase for the actual + // measured benchmark phase. While this may be a good starting point for the + // benchmark and it would therefore get rid of the need to figure out how many + // iterations are needed if min_time is set again, this may also be a complete + // wrong guess since the warmup loops might be considerably slower (e.g + // because of caching effects). + const IterationCount i_backup = iters; + + for (;;) { + b.Setup(); + i_warmup = DoNIterations(); + b.Teardown(); + + const bool finish = ShouldReportIterationResults(i_warmup); + + if (finish) { + FinishWarmUp(i_backup); + break; + } + + // Although we are running "only" a warmup phase where running enough + // iterations at once without measuring time isn't as important as it is for + // the benchmarking phase, we still do it the same way as otherwise it is + // very confusing for the user to know how to choose a proper value for + // min_warmup_time if a different approach on running it is used. + iters = PredictNumItersNeeded(i_warmup); + assert(iters > i_warmup.iters && + "if we did more iterations than we want to do the next time, " + "then we should have accepted the current iteration run."); + } } void BenchmarkRunner::DoOneRepetition() { assert(HasRepeatsRemaining() && "Already done all repetitions?"); const bool is_the_first_repetition = num_repetitions_done == 0; - IterationResults i; + // In case a warmup phase is requested by the benchmark, run it now. + // After running the warmup phase the BenchmarkRunner should be in a state as + // this warmup never happened except the fact that warmup_done is set. Every + // other manipulation of the BenchmarkRunner instance would be a bug! Please + // fix it. + if (!warmup_done) RunWarmUp(); + + IterationResults i; // We *may* be gradually increasing the length (iteration count) // of the benchmark until we decide the results are significant. // And once we do, we report those last results and exit. @@ -324,10 +465,7 @@ manager->WaitForAllThreads(); manager.reset(); b.Teardown(); - - BENCHMARK_DISABLE_DEPRECATED_WARNING - memory_manager->Stop(memory_result); - BENCHMARK_RESTORE_DEPRECATED_WARNING + memory_manager->Stop(*memory_result); } // Ok, now actually report. @@ -337,7 +475,7 @@ if (reports_for_family) { ++reports_for_family->num_runs_done; - if (!report.error_occurred) reports_for_family->Runs.push_back(report); + if (!report.skipped) reports_for_family->Runs.push_back(report); } run_results.non_aggregates.push_back(report); diff --git a/third-party/benchmark/src/check.h b/third-party/benchmark/src/check.h --- a/third-party/benchmark/src/check.h +++ b/third-party/benchmark/src/check.h @@ -5,18 +5,34 @@ #include #include +#include "benchmark/export.h" #include "internal_macros.h" #include "log.h" +#if defined(__GNUC__) || defined(__clang__) +#define BENCHMARK_NOEXCEPT noexcept +#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#elif defined(_MSC_VER) && !defined(__clang__) +#if _MSC_VER >= 1900 +#define BENCHMARK_NOEXCEPT noexcept +#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#else +#define BENCHMARK_NOEXCEPT +#define BENCHMARK_NOEXCEPT_OP(x) +#endif +#define __func__ __FUNCTION__ +#else +#define BENCHMARK_NOEXCEPT +#define BENCHMARK_NOEXCEPT_OP(x) +#endif + namespace benchmark { namespace internal { typedef void(AbortHandlerT)(); -inline AbortHandlerT*& GetAbortHandler() { - static AbortHandlerT* handler = &std::abort; - return handler; -} +BENCHMARK_EXPORT +AbortHandlerT*& GetAbortHandler(); BENCHMARK_NORETURN inline void CallAbortHandler() { GetAbortHandler()(); @@ -36,10 +52,17 @@ LogType& GetLog() { return log_; } +#if defined(COMPILER_MSVC) +#pragma warning(push) +#pragma warning(disable : 4722) +#endif BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { log_ << std::endl; CallAbortHandler(); } +#if defined(COMPILER_MSVC) +#pragma warning(pop) +#endif CheckHandler& operator=(const CheckHandler&) = delete; CheckHandler(const CheckHandler&) = delete; diff --git a/third-party/benchmark/src/check.cc b/third-party/benchmark/src/check.cc new file mode 100644 --- /dev/null +++ b/third-party/benchmark/src/check.cc @@ -0,0 +1,11 @@ +#include "check.h" + +namespace benchmark { +namespace internal { + +static AbortHandlerT* handler = &std::abort; + +BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; } + +} // namespace internal +} // namespace benchmark diff --git a/third-party/benchmark/src/colorprint.cc b/third-party/benchmark/src/colorprint.cc --- a/third-party/benchmark/src/colorprint.cc +++ b/third-party/benchmark/src/colorprint.cc @@ -96,18 +96,18 @@ // currently there is no error handling for failure, so this is hack. BM_CHECK(ret >= 0); - if (ret == 0) // handle empty expansion + if (ret == 0) { // handle empty expansion return {}; - else if (static_cast(ret) < size) + } + if (static_cast(ret) < size) { return local_buff; - else { - // we did not provide a long enough buffer on our first attempt. - size = static_cast(ret) + 1; // + 1 for the null byte - std::unique_ptr buff(new char[size]); - ret = vsnprintf(buff.get(), size, msg, args); - BM_CHECK(ret > 0 && (static_cast(ret)) < size); - return buff.get(); } + // we did not provide a long enough buffer on our first attempt. + size = static_cast(ret) + 1; // + 1 for the null byte + std::unique_ptr buff(new char[size]); + ret = vsnprintf(buff.get(), size, msg, args); + BM_CHECK(ret > 0 && (static_cast(ret)) < size); + return buff.get(); } std::string FormatString(const char* msg, ...) { @@ -163,12 +163,24 @@ #else // On non-Windows platforms, we rely on the TERM variable. This list of // supported TERM values is copied from Google Test: - // . + // . const char* const SUPPORTED_TERM_VALUES[] = { - "xterm", "xterm-color", "xterm-256color", - "screen", "screen-256color", "tmux", - "tmux-256color", "rxvt-unicode", "rxvt-unicode-256color", - "linux", "cygwin", + "xterm", + "xterm-color", + "xterm-256color", + "screen", + "screen-256color", + "tmux", + "tmux-256color", + "rxvt-unicode", + "rxvt-unicode-256color", + "linux", + "cygwin", + "xterm-kitty", + "alacritty", + "foot", + "foot-extra", + "wezterm", }; const char* const term = getenv("TERM"); diff --git a/third-party/benchmark/src/commandlineflags.h b/third-party/benchmark/src/commandlineflags.h --- a/third-party/benchmark/src/commandlineflags.h +++ b/third-party/benchmark/src/commandlineflags.h @@ -5,28 +5,33 @@ #include #include +#include "benchmark/export.h" + // Macro for referencing flags. #define FLAG(name) FLAGS_##name // Macros for declaring flags. -#define BM_DECLARE_bool(name) extern bool FLAG(name) -#define BM_DECLARE_int32(name) extern int32_t FLAG(name) -#define BM_DECLARE_double(name) extern double FLAG(name) -#define BM_DECLARE_string(name) extern std::string FLAG(name) +#define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name) +#define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name) +#define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name) +#define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name) #define BM_DECLARE_kvpairs(name) \ - extern std::map FLAG(name) + BENCHMARK_EXPORT extern std::map FLAG(name) // Macros for defining flags. #define BM_DEFINE_bool(name, default_val) \ - bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) + BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) #define BM_DEFINE_int32(name, default_val) \ - int32_t FLAG(name) = benchmark::Int32FromEnv(#name, default_val) + BENCHMARK_EXPORT int32_t FLAG(name) = \ + benchmark::Int32FromEnv(#name, default_val) #define BM_DEFINE_double(name, default_val) \ - double FLAG(name) = benchmark::DoubleFromEnv(#name, default_val) + BENCHMARK_EXPORT double FLAG(name) = \ + benchmark::DoubleFromEnv(#name, default_val) #define BM_DEFINE_string(name, default_val) \ - std::string FLAG(name) = benchmark::StringFromEnv(#name, default_val) -#define BM_DEFINE_kvpairs(name, default_val) \ - std::map FLAG(name) = \ + BENCHMARK_EXPORT std::string FLAG(name) = \ + benchmark::StringFromEnv(#name, default_val) +#define BM_DEFINE_kvpairs(name, default_val) \ + BENCHMARK_EXPORT std::map FLAG(name) = \ benchmark::KvPairsFromEnv(#name, default_val) namespace benchmark { @@ -35,6 +40,7 @@ // // If the variable exists, returns IsTruthyFlagValue() value; if not, // returns the given default value. +BENCHMARK_EXPORT bool BoolFromEnv(const char* flag, bool default_val); // Parses an Int32 from the environment variable corresponding to the given @@ -42,6 +48,7 @@ // // If the variable exists, returns ParseInt32() value; if not, returns // the given default value. +BENCHMARK_EXPORT int32_t Int32FromEnv(const char* flag, int32_t default_val); // Parses an Double from the environment variable corresponding to the given @@ -49,6 +56,7 @@ // // If the variable exists, returns ParseDouble(); if not, returns // the given default value. +BENCHMARK_EXPORT double DoubleFromEnv(const char* flag, double default_val); // Parses a string from the environment variable corresponding to the given @@ -56,6 +64,7 @@ // // If variable exists, returns its value; if not, returns // the given default value. +BENCHMARK_EXPORT const char* StringFromEnv(const char* flag, const char* default_val); // Parses a set of kvpairs from the environment variable corresponding to the @@ -63,6 +72,7 @@ // // If variable exists, returns its value; if not, returns // the given default value. +BENCHMARK_EXPORT std::map KvPairsFromEnv( const char* flag, std::map default_val); @@ -75,40 +85,47 @@ // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseBoolFlag(const char* str, const char* flag, bool* value); // Parses a string for an Int32 flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); // Parses a string for a Double flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseDoubleFlag(const char* str, const char* flag, double* value); // Parses a string for a string flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseStringFlag(const char* str, const char* flag, std::string* value); // Parses a string for a kvpairs flag in the form "--flag=key=value,key=value" // // On success, stores the value of the flag in *value and returns true. On // failure returns false, though *value may have been mutated. +BENCHMARK_EXPORT bool ParseKeyValueFlag(const char* str, const char* flag, std::map* value); // Returns true if the string matches the flag. +BENCHMARK_EXPORT bool IsFlag(const char* str, const char* flag); // Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or // some non-alphanumeric character. Also returns false if the value matches // one of 'no', 'false', 'off' (case-insensitive). As a special case, also // returns true if value is the empty string. +BENCHMARK_EXPORT bool IsTruthyFlagValue(const std::string& value); } // end namespace benchmark diff --git a/third-party/benchmark/src/commandlineflags.cc b/third-party/benchmark/src/commandlineflags.cc --- a/third-party/benchmark/src/commandlineflags.cc +++ b/third-party/benchmark/src/commandlineflags.cc @@ -121,12 +121,14 @@ } // namespace +BENCHMARK_EXPORT bool BoolFromEnv(const char* flag, bool default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str); } +BENCHMARK_EXPORT int32_t Int32FromEnv(const char* flag, int32_t default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); @@ -139,6 +141,7 @@ return value; } +BENCHMARK_EXPORT double DoubleFromEnv(const char* flag, double default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); @@ -151,12 +154,14 @@ return value; } +BENCHMARK_EXPORT const char* StringFromEnv(const char* flag, const char* default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value = getenv(env_var.c_str()); return value == nullptr ? default_val : value; } +BENCHMARK_EXPORT std::map KvPairsFromEnv( const char* flag, std::map default_val) { const std::string env_var = FlagToEnvVar(flag); @@ -201,6 +206,7 @@ return flag_end + 1; } +BENCHMARK_EXPORT bool ParseBoolFlag(const char* str, const char* flag, bool* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, true); @@ -213,6 +219,7 @@ return true; } +BENCHMARK_EXPORT bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -225,6 +232,7 @@ value); } +BENCHMARK_EXPORT bool ParseDoubleFlag(const char* str, const char* flag, double* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -237,6 +245,7 @@ value); } +BENCHMARK_EXPORT bool ParseStringFlag(const char* str, const char* flag, std::string* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -248,6 +257,7 @@ return true; } +BENCHMARK_EXPORT bool ParseKeyValueFlag(const char* str, const char* flag, std::map* value) { const char* const value_str = ParseFlagValue(str, flag, false); @@ -263,23 +273,26 @@ return true; } +BENCHMARK_EXPORT bool IsFlag(const char* str, const char* flag) { return (ParseFlagValue(str, flag, true) != nullptr); } +BENCHMARK_EXPORT bool IsTruthyFlagValue(const std::string& value) { if (value.size() == 1) { char v = value[0]; return isalnum(v) && !(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N'); - } else if (!value.empty()) { + } + if (!value.empty()) { std::string value_lower(value); std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(), [](char c) { return static_cast(::tolower(c)); }); return !(value_lower == "false" || value_lower == "no" || value_lower == "off"); - } else - return true; + } + return true; } } // end namespace benchmark diff --git a/third-party/benchmark/src/complexity.h b/third-party/benchmark/src/complexity.h --- a/third-party/benchmark/src/complexity.h +++ b/third-party/benchmark/src/complexity.h @@ -31,7 +31,7 @@ const std::vector& reports); // This data structure will contain the result returned by MinimalLeastSq -// - coef : Estimated coeficient for the high-order term as +// - coef : Estimated coefficient for the high-order term as // interpolated from data. // - rms : Normalized Root Mean Squared Error. // - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability diff --git a/third-party/benchmark/src/console_reporter.cc b/third-party/benchmark/src/console_reporter.cc --- a/third-party/benchmark/src/console_reporter.cc +++ b/third-party/benchmark/src/console_reporter.cc @@ -33,6 +33,7 @@ namespace benchmark { +BENCHMARK_EXPORT bool ConsoleReporter::ReportContext(const Context& context) { name_field_width_ = context.name_field_width; printed_header_ = false; @@ -52,6 +53,7 @@ return true; } +BENCHMARK_EXPORT void ConsoleReporter::PrintHeader(const Run& run) { std::string str = FormatString("%-*s %13s %15s %12s", static_cast(name_field_width_), @@ -69,6 +71,7 @@ GetOutputStream() << line << "\n" << str << "\n" << line << "\n"; } +BENCHMARK_EXPORT void ConsoleReporter::ReportRuns(const std::vector& reports) { for (const auto& run : reports) { // print the header: @@ -99,6 +102,9 @@ } static std::string FormatTime(double time) { + // For the time columns of the console printer 13 digits are reserved. One of + // them is a space and max two of them are the time unit (e.g ns). That puts + // us at 10 digits usable for the number. // Align decimal places... if (time < 1.0) { return FormatString("%10.3f", time); @@ -109,9 +115,15 @@ if (time < 100.0) { return FormatString("%10.1f", time); } + // Assuming the time is at max 9.9999e+99 and we have 10 digits for the + // number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print. + if (time > 9999999999 /*max 10 digit number*/) { + return FormatString("%1.4e", time); + } return FormatString("%10.0f", time); } +BENCHMARK_EXPORT void ConsoleReporter::PrintRunData(const Run& result) { typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...); auto& Out = GetOutputStream(); @@ -123,9 +135,13 @@ printer(Out, name_color, "%-*s ", name_field_width_, result.benchmark_name().c_str()); - if (result.error_occurred) { + if (internal::SkippedWithError == result.skipped) { printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'", - result.error_message.c_str()); + result.skip_message.c_str()); + printer(Out, COLOR_DEFAULT, "\n"); + return; + } else if (internal::SkippedWithMessage == result.skipped) { + printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str()); printer(Out, COLOR_DEFAULT, "\n"); return; } diff --git a/third-party/benchmark/src/csv_reporter.cc b/third-party/benchmark/src/csv_reporter.cc --- a/third-party/benchmark/src/csv_reporter.cc +++ b/third-party/benchmark/src/csv_reporter.cc @@ -52,11 +52,13 @@ return '"' + tmp + '"'; } +BENCHMARK_EXPORT bool CSVReporter::ReportContext(const Context& context) { PrintBasicContext(&GetErrorStream(), context); return true; } +BENCHMARK_EXPORT void CSVReporter::ReportRuns(const std::vector& reports) { std::ostream& Out = GetOutputStream(); @@ -103,13 +105,14 @@ } } +BENCHMARK_EXPORT void CSVReporter::PrintRunData(const Run& run) { std::ostream& Out = GetOutputStream(); Out << CsvEscape(run.benchmark_name()) << ","; - if (run.error_occurred) { + if (run.skipped) { Out << std::string(elements.size() - 3, ','); - Out << "true,"; - Out << CsvEscape(run.error_message) << "\n"; + Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ","; + Out << CsvEscape(run.skip_message) << "\n"; return; } diff --git a/third-party/benchmark/src/cycleclock.h b/third-party/benchmark/src/cycleclock.h --- a/third-party/benchmark/src/cycleclock.h +++ b/third-party/benchmark/src/cycleclock.h @@ -36,7 +36,8 @@ // declarations of some other intrinsics, breaking compilation. // Therefore, we simply declare __rdtsc ourselves. See also // http://connect.microsoft.com/VisualStudio/feedback/details/262047 -#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) +#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \ + !defined(_M_ARM64EC) extern "C" uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) #endif @@ -114,7 +115,7 @@ // when I know it will work. Otherwise, I'll use __rdtsc and hope // the code is being compiled with a non-ancient compiler. _asm rdtsc -#elif defined(COMPILER_MSVC) && defined(_M_ARM64) +#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC)) // See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics // and https://reviews.llvm.org/D53115 int64_t virtual_timer_value; @@ -132,7 +133,7 @@ // Native Client does not provide any API to access cycle counter. // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday - // because is provides nanosecond resolution (which is noticable at + // because is provides nanosecond resolution (which is noticeable at // least for PNaCl modules running on x86 Mac & Linux). // Initialize to always return 0 if clock_gettime fails. struct timespec ts = {0, 0}; @@ -173,7 +174,7 @@ struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__loongarch__) +#elif defined(__loongarch__) || defined(__csky__) struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; @@ -212,6 +213,10 @@ struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__hexagon__) + uint64_t pcycle; + asm volatile("%0 = C15:14" : "=r"(pcycle)); + return static_cast(pcycle); #else // The soft failover to a generic implementation is automatic only for ARM. // For other platforms the developer is expected to make an attempt to create diff --git a/third-party/benchmark/src/internal_macros.h b/third-party/benchmark/src/internal_macros.h --- a/third-party/benchmark/src/internal_macros.h +++ b/third-party/benchmark/src/internal_macros.h @@ -1,8 +1,6 @@ #ifndef BENCHMARK_INTERNAL_MACROS_H_ #define BENCHMARK_INTERNAL_MACROS_H_ -#include "benchmark/benchmark.h" - /* Needed to detect STL */ #include @@ -44,6 +42,19 @@ #define BENCHMARK_OS_CYGWIN 1 #elif defined(_WIN32) #define BENCHMARK_OS_WINDOWS 1 + // WINAPI_FAMILY_PARTITION is defined in winapifamily.h. + // We include windows.h which implicitly includes winapifamily.h for compatibility. + #ifndef NOMINMAX + #define NOMINMAX + #endif + #include + #if defined(WINAPI_FAMILY_PARTITION) + #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + #define BENCHMARK_OS_WINDOWS_WIN32 1 + #elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define BENCHMARK_OS_WINDOWS_RT 1 + #endif + #endif #if defined(__MINGW32__) #define BENCHMARK_OS_MINGW 1 #endif @@ -80,6 +91,8 @@ #define BENCHMARK_OS_QNX 1 #elif defined(__MVS__) #define BENCHMARK_OS_ZOS 1 +#elif defined(__hexagon__) +#define BENCHMARK_OS_QURT 1 #endif #if defined(__ANDROID__) && defined(__GLIBCXX__) diff --git a/third-party/benchmark/src/json_reporter.cc b/third-party/benchmark/src/json_reporter.cc --- a/third-party/benchmark/src/json_reporter.cc +++ b/third-party/benchmark/src/json_reporter.cc @@ -28,10 +28,6 @@ #include "timers.h" namespace benchmark { -namespace internal { -extern std::map* global_context; -} - namespace { std::string StrEscape(const std::string& s) { @@ -89,12 +85,6 @@ return ss.str(); } -std::string FormatKV(std::string const& key, IterationCount value) { - std::stringstream ss; - ss << '"' << StrEscape(key) << "\": " << value; - return ss.str(); -} - std::string FormatKV(std::string const& key, double value) { std::stringstream ss; ss << '"' << StrEscape(key) << "\": "; @@ -184,8 +174,11 @@ #endif out << indent << FormatKV("library_build_type", build_type); - if (internal::global_context != nullptr) { - for (const auto& kv : *internal::global_context) { + std::map* global_context = + internal::GetGlobalContext(); + + if (global_context != nullptr) { + for (const auto& kv : *global_context) { out << ",\n"; out << indent << FormatKV(kv.first, kv.second); } @@ -261,9 +254,12 @@ BENCHMARK_UNREACHABLE(); }()) << ",\n"; } - if (run.error_occurred) { - out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n"; - out << indent << FormatKV("error_message", run.error_message) << ",\n"; + if (internal::SkippedWithError == run.skipped) { + out << indent << FormatKV("error_occurred", true) << ",\n"; + out << indent << FormatKV("error_message", run.skip_message) << ",\n"; + } else if (internal::SkippedWithMessage == run.skipped) { + out << indent << FormatKV("skipped", true) << ",\n"; + out << indent << FormatKV("skip_message", run.skip_message) << ",\n"; } if (!run.report_big_o && !run.report_rms) { out << indent << FormatKV("iterations", run.iterations) << ",\n"; @@ -301,7 +297,8 @@ out << ",\n" << indent << FormatKV("max_bytes_used", memory_result.max_bytes_used); - auto report_if_present = [&out, &indent](const char* label, int64_t val) { + auto report_if_present = [&out, &indent](const std::string& label, + int64_t val) { if (val != MemoryManager::TombstoneValue) out << ",\n" << indent << FormatKV(label, val); }; diff --git a/third-party/benchmark/src/log.h b/third-party/benchmark/src/log.h --- a/third-party/benchmark/src/log.h +++ b/third-party/benchmark/src/log.h @@ -4,7 +4,12 @@ #include #include -#include "benchmark/benchmark.h" +// NOTE: this is also defined in benchmark.h but we're trying to avoid a +// dependency. +// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer. +#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +#define BENCHMARK_HAS_CXX11 +#endif namespace benchmark { namespace internal { @@ -23,7 +28,16 @@ private: LogType(std::ostream* out) : out_(out) {} std::ostream* out_; - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType); + + // NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have + // a dependency on benchmark.h from here. +#ifndef BENCHMARK_HAS_CXX11 + LogType(const LogType&); + LogType& operator=(const LogType&); +#else + LogType(const LogType&) = delete; + LogType& operator=(const LogType&) = delete; +#endif }; template @@ -47,13 +61,13 @@ } inline LogType& GetNullLogInstance() { - static LogType log(nullptr); - return log; + static LogType null_log(static_cast(nullptr)); + return null_log; } inline LogType& GetErrorLogInstance() { - static LogType log(&std::clog); - return log; + static LogType error_log(&std::clog); + return error_log; } inline LogType& GetLogInstanceForLevel(int level) { diff --git a/third-party/benchmark/src/perf_counters.h b/third-party/benchmark/src/perf_counters.h --- a/third-party/benchmark/src/perf_counters.h +++ b/third-party/benchmark/src/perf_counters.h @@ -17,16 +17,25 @@ #include #include +#include +#include #include #include "benchmark/benchmark.h" #include "check.h" #include "log.h" +#include "mutex.h" #ifndef BENCHMARK_OS_WINDOWS #include #endif +#if defined(_MSC_VER) +#pragma warning(push) +// C4251: needs to have dll-interface to be used by clients of class +#pragma warning(disable : 4251) +#endif + namespace benchmark { namespace internal { @@ -36,18 +45,21 @@ // The implementation ensures the storage is inlined, and allows 0-based // indexing into the counter values. // The object is used in conjunction with a PerfCounters object, by passing it -// to Snapshot(). The values are populated such that -// perfCounters->names()[i]'s value is obtained at position i (as given by -// operator[]) of this object. -class PerfCounterValues { +// to Snapshot(). The Read() method relocates individual reads, discarding +// the initial padding from each group leader in the values buffer such that +// all user accesses through the [] operator are correct. +class BENCHMARK_EXPORT PerfCounterValues { public: explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { BM_CHECK_LE(nr_counters_, kMaxCounters); } - uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; } + // We are reading correctly now so the values don't need to skip padding + uint64_t operator[](size_t pos) const { return values_[pos]; } - static constexpr size_t kMaxCounters = 3; + // Increased the maximum to 32 only since the buffer + // is std::array<> backed + static constexpr size_t kMaxCounters = 32; private: friend class PerfCounters; @@ -58,7 +70,14 @@ sizeof(uint64_t) * (kPadding + nr_counters_)}; } - static constexpr size_t kPadding = 1; + // This reading is complex and as the goal of this class is to + // abstract away the intrincacies of the reading process, this is + // a better place for it + size_t Read(const std::vector& leaders); + + // Move the padding to 2 due to the reading algorithm (1st padding plus a + // current read padding) + static constexpr size_t kPadding = 2; std::array values_; const size_t nr_counters_; }; @@ -66,27 +85,34 @@ // Collect PMU counters. The object, once constructed, is ready to be used by // calling read(). PMU counter collection is enabled from the time create() is // called, to obtain the object, until the object's destructor is called. -class PerfCounters final { +class BENCHMARK_EXPORT PerfCounters final { public: // True iff this platform supports performance counters. static const bool kSupported; - bool IsValid() const { return is_valid_; } + // Returns an empty object static PerfCounters NoCounters() { return PerfCounters(); } - ~PerfCounters(); + ~PerfCounters() { CloseCounters(); } + PerfCounters() = default; PerfCounters(PerfCounters&&) = default; PerfCounters(const PerfCounters&) = delete; + PerfCounters& operator=(PerfCounters&&) noexcept; + PerfCounters& operator=(const PerfCounters&) = delete; // Platform-specific implementations may choose to do some library // initialization here. static bool Initialize(); + // Check if the given counter is supported, if the app wants to + // check before passing + static bool IsCounterSupported(const std::string& name); + // Return a PerfCounters object ready to read the counters with the names // specified. The values are user-mode only. The counter name format is // implementation and OS specific. - // TODO: once we move to C++-17, this should be a std::optional, and then the - // IsValid() boolean can be dropped. + // In case of failure, this method will in the worst case return an + // empty object whose state will still be valid. static PerfCounters Create(const std::vector& counter_names); // Take a snapshot of the current value of the counters into the provided @@ -95,10 +121,7 @@ BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { #ifndef BENCHMARK_OS_WINDOWS assert(values != nullptr); - assert(IsValid()); - auto buffer = values->get_data_buffer(); - auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second); - return static_cast(read_bytes) == buffer.second; + return values->Read(leader_ids_) == counter_ids_.size(); #else (void)values; return false; @@ -110,56 +133,59 @@ private: PerfCounters(const std::vector& counter_names, - std::vector&& counter_ids) + std::vector&& counter_ids, std::vector&& leader_ids) : counter_ids_(std::move(counter_ids)), - counter_names_(counter_names), - is_valid_(true) {} - PerfCounters() : is_valid_(false) {} + leader_ids_(std::move(leader_ids)), + counter_names_(counter_names) {} + + void CloseCounters() const; std::vector counter_ids_; - const std::vector counter_names_; - const bool is_valid_; + std::vector leader_ids_; + std::vector counter_names_; }; // Typical usage of the above primitives. -class PerfCountersMeasurement final { +class BENCHMARK_EXPORT PerfCountersMeasurement final { public: - PerfCountersMeasurement(PerfCounters&& c) - : counters_(std::move(c)), - start_values_(counters_.IsValid() ? counters_.names().size() : 0), - end_values_(counters_.IsValid() ? counters_.names().size() : 0) {} + PerfCountersMeasurement(const std::vector& counter_names); - bool IsValid() const { return counters_.IsValid(); } + size_t num_counters() const { return counters_.num_counters(); } - BENCHMARK_ALWAYS_INLINE void Start() { - assert(IsValid()); + std::vector names() const { return counters_.names(); } + + BENCHMARK_ALWAYS_INLINE bool Start() { + if (num_counters() == 0) return true; // Tell the compiler to not move instructions above/below where we take // the snapshot. ClobberMemory(); - counters_.Snapshot(&start_values_); + valid_read_ &= counters_.Snapshot(&start_values_); ClobberMemory(); + + return valid_read_; } - BENCHMARK_ALWAYS_INLINE std::vector> - StopAndGetMeasurements() { - assert(IsValid()); + BENCHMARK_ALWAYS_INLINE bool Stop( + std::vector>& measurements) { + if (num_counters() == 0) return true; // Tell the compiler to not move instructions above/below where we take // the snapshot. ClobberMemory(); - counters_.Snapshot(&end_values_); + valid_read_ &= counters_.Snapshot(&end_values_); ClobberMemory(); - std::vector> ret; for (size_t i = 0; i < counters_.names().size(); ++i) { double measurement = static_cast(end_values_[i]) - static_cast(start_values_[i]); - ret.push_back({counters_.names()[i], measurement}); + measurements.push_back({counters_.names()[i], measurement}); } - return ret; + + return valid_read_; } private: PerfCounters counters_; + bool valid_read_ = true; PerfCounterValues start_values_; PerfCounterValues end_values_; }; @@ -169,4 +195,8 @@ } // namespace internal } // namespace benchmark +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + #endif // BENCHMARK_PERF_COUNTERS_H diff --git a/third-party/benchmark/src/perf_counters.cc b/third-party/benchmark/src/perf_counters.cc --- a/third-party/benchmark/src/perf_counters.cc +++ b/third-party/benchmark/src/perf_counters.cc @@ -15,6 +15,7 @@ #include "perf_counters.h" #include +#include #include #if defined HAVE_LIBPFM @@ -28,96 +29,215 @@ constexpr size_t PerfCounterValues::kMaxCounters; #if defined HAVE_LIBPFM + +size_t PerfCounterValues::Read(const std::vector& leaders) { + // Create a pointer for multiple reads + const size_t bufsize = values_.size() * sizeof(values_[0]); + char* ptr = reinterpret_cast(values_.data()); + size_t size = bufsize; + for (int lead : leaders) { + auto read_bytes = ::read(lead, ptr, size); + if (read_bytes >= ssize_t(sizeof(uint64_t))) { + // Actual data bytes are all bytes minus initial padding + std::size_t data_bytes = read_bytes - sizeof(uint64_t); + // This should be very cheap since it's in hot cache + std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes); + // Increment our counters + ptr += data_bytes; + size -= data_bytes; + } else { + int err = errno; + GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err + << " " << ::strerror(err) << "\n"; + return 0; + } + } + return (bufsize - size) / sizeof(uint64_t); +} + const bool PerfCounters::kSupported = true; bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; } +bool PerfCounters::IsCounterSupported(const std::string& name) { + perf_event_attr_t attr; + std::memset(&attr, 0, sizeof(attr)); + pfm_perf_encode_arg_t arg; + std::memset(&arg, 0, sizeof(arg)); + arg.attr = &attr; + const int mode = PFM_PLM3; // user mode only + int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT, + &arg); + return (ret == PFM_SUCCESS); +} + PerfCounters PerfCounters::Create( const std::vector& counter_names) { - if (counter_names.empty()) { - return NoCounters(); - } - if (counter_names.size() > PerfCounterValues::kMaxCounters) { - GetErrorLogInstance() - << counter_names.size() - << " counters were requested. The minimum is 1, the maximum is " - << PerfCounterValues::kMaxCounters << "\n"; - return NoCounters(); - } - std::vector counter_ids(counter_names.size()); + // Valid counters will populate these arrays but we start empty + std::vector valid_names; + std::vector counter_ids; + std::vector leader_ids; - const int mode = PFM_PLM3; // user mode only + // Resize to the maximum possible + valid_names.reserve(counter_names.size()); + counter_ids.reserve(counter_names.size()); + + const int kCounterMode = PFM_PLM3; // user mode only + + // Group leads will be assigned on demand. The idea is that once we cannot + // create a counter descriptor, the reason is that this group has maxed out + // so we set the group_id again to -1 and retry - giving the algorithm a + // chance to create a new group leader to hold the next set of counters. + int group_id = -1; + + // Loop through all performance counters for (size_t i = 0; i < counter_names.size(); ++i) { - const bool is_first = i == 0; - struct perf_event_attr attr {}; - attr.size = sizeof(attr); - const int group_id = !is_first ? counter_ids[0] : -1; + // we are about to push into the valid names vector + // check if we did not reach the maximum + if (valid_names.size() == PerfCounterValues::kMaxCounters) { + // Log a message if we maxed out and stop adding + GetErrorLogInstance() + << counter_names.size() << " counters were requested. The maximum is " + << PerfCounterValues::kMaxCounters << " and " << valid_names.size() + << " were already added. All remaining counters will be ignored\n"; + // stop the loop and return what we have already + break; + } + + // Check if this name is empty const auto& name = counter_names[i]; if (name.empty()) { - GetErrorLogInstance() << "A counter name was the empty string\n"; - return NoCounters(); + GetErrorLogInstance() + << "A performance counter name was the empty string\n"; + continue; } + + // Here first means first in group, ie the group leader + const bool is_first = (group_id < 0); + + // This struct will be populated by libpfm from the counter string + // and then fed into the syscall perf_event_open + struct perf_event_attr attr {}; + attr.size = sizeof(attr); + + // This is the input struct to libpfm. pfm_perf_encode_arg_t arg{}; arg.attr = &attr; - - const int pfm_get = - pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg); + const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode, + PFM_OS_PERF_EVENT, &arg); if (pfm_get != PFM_SUCCESS) { - GetErrorLogInstance() << "Unknown counter name: " << name << "\n"; - return NoCounters(); + GetErrorLogInstance() + << "Unknown performance counter name: " << name << "\n"; + continue; } - attr.disabled = is_first; - // Note: the man page for perf_event_create suggests inerit = true and + + // We then proceed to populate the remaining fields in our attribute struct + // Note: the man page for perf_event_create suggests inherit = true and // read_format = PERF_FORMAT_GROUP don't work together, but that's not the // case. + attr.disabled = is_first; attr.inherit = true; attr.pinned = is_first; attr.exclude_kernel = true; attr.exclude_user = false; attr.exclude_hv = true; - // Read all counters in one read. + + // Read all counters in a group in one read. attr.read_format = PERF_FORMAT_GROUP; int id = -1; - static constexpr size_t kNrOfSyscallRetries = 5; - // Retry syscall as it was interrupted often (b/64774091). - for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; - ++num_retries) { - id = perf_event_open(&attr, 0, -1, group_id, 0); - if (id >= 0 || errno != EINTR) { - break; + while (id < 0) { + static constexpr size_t kNrOfSyscallRetries = 5; + // Retry syscall as it was interrupted often (b/64774091). + for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; + ++num_retries) { + id = perf_event_open(&attr, 0, -1, group_id, 0); + if (id >= 0 || errno != EINTR) { + break; + } + } + if (id < 0) { + // If the file descriptor is negative we might have reached a limit + // in the current group. Set the group_id to -1 and retry + if (group_id >= 0) { + // Create a new group + group_id = -1; + } else { + // At this point we have already retried to set a new group id and + // failed. We then give up. + break; + } } } + + // We failed to get a new file descriptor. We might have reached a hard + // hardware limit that cannot be resolved even with group multiplexing if (id < 0) { - GetErrorLogInstance() - << "Failed to get a file descriptor for " << name << "\n"; - return NoCounters(); - } + GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor " + "for performance counter " + << name << ". Ignoring\n"; - counter_ids[i] = id; + // We give up on this counter but try to keep going + // as the others would be fine + continue; + } + if (group_id < 0) { + // This is a leader, store and assign it to the current file descriptor + leader_ids.push_back(id); + group_id = id; + } + // This is a valid counter, add it to our descriptor's list + counter_ids.push_back(id); + valid_names.push_back(name); } - if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) { - GetErrorLogInstance() << "Failed to start counters\n"; - return NoCounters(); + + // Loop through all group leaders activating them + // There is another option of starting ALL counters in a process but + // that would be far reaching an intrusion. If the user is using PMCs + // by themselves then this would have a side effect on them. It is + // friendlier to loop through all groups individually. + for (int lead : leader_ids) { + if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) { + // This should never happen but if it does, we give up on the + // entire batch as recovery would be a mess. + GetErrorLogInstance() << "***WARNING*** Failed to start counters. " + "Claring out all counters.\n"; + + // Close all peformance counters + for (int id : counter_ids) { + ::close(id); + } + + // Return an empty object so our internal state is still good and + // the process can continue normally without impact + return NoCounters(); + } } - return PerfCounters(counter_names, std::move(counter_ids)); + return PerfCounters(std::move(valid_names), std::move(counter_ids), + std::move(leader_ids)); } -PerfCounters::~PerfCounters() { +void PerfCounters::CloseCounters() const { if (counter_ids_.empty()) { return; } - ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE); + for (int lead : leader_ids_) { + ioctl(lead, PERF_EVENT_IOC_DISABLE); + } for (int fd : counter_ids_) { close(fd); } } #else // defined HAVE_LIBPFM +size_t PerfCounterValues::Read(const std::vector&) { return 0; } + const bool PerfCounters::kSupported = false; bool PerfCounters::Initialize() { return false; } +bool PerfCounters::IsCounterSupported(const std::string&) { return false; } + PerfCounters PerfCounters::Create( const std::vector& counter_names) { if (!counter_names.empty()) { @@ -126,7 +246,24 @@ return NoCounters(); } -PerfCounters::~PerfCounters() = default; +void PerfCounters::CloseCounters() const {} #endif // defined HAVE_LIBPFM + +PerfCountersMeasurement::PerfCountersMeasurement( + const std::vector& counter_names) + : start_values_(counter_names.size()), end_values_(counter_names.size()) { + counters_ = PerfCounters::Create(counter_names); +} + +PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept { + if (this != &other) { + CloseCounters(); + + counter_ids_ = std::move(other.counter_ids_); + leader_ids_ = std::move(other.leader_ids_); + counter_names_ = std::move(other.counter_names_); + } + return *this; +} } // namespace internal } // namespace benchmark diff --git a/third-party/benchmark/src/re.h b/third-party/benchmark/src/re.h --- a/third-party/benchmark/src/re.h +++ b/third-party/benchmark/src/re.h @@ -33,7 +33,7 @@ // Prefer C regex libraries when compiling w/o exceptions so that we can // correctly report errors. #if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \ - defined(BENCHMARK_HAVE_STD_REGEX) && \ + defined(HAVE_STD_REGEX) && \ (defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX)) #undef HAVE_STD_REGEX #endif diff --git a/third-party/benchmark/src/reporter.cc b/third-party/benchmark/src/reporter.cc --- a/third-party/benchmark/src/reporter.cc +++ b/third-party/benchmark/src/reporter.cc @@ -25,9 +25,6 @@ #include "timers.h" namespace benchmark { -namespace internal { -extern std::map *global_context; -} BenchmarkReporter::BenchmarkReporter() : output_stream_(&std::cout), error_stream_(&std::cerr) {} @@ -39,7 +36,11 @@ BM_CHECK(out) << "cannot be null"; auto &Out = *out; +#ifndef BENCHMARK_OS_QURT + // Date/time information is not available on QuRT. + // Attempting to get it via this call cause the binary to crash. Out << LocalDateTimeString() << "\n"; +#endif if (context.executable_name) Out << "Running " << context.executable_name << "\n"; @@ -67,8 +68,11 @@ Out << "\n"; } - if (internal::global_context != nullptr) { - for (const auto &kv : *internal::global_context) { + std::map *global_context = + internal::GetGlobalContext(); + + if (global_context != nullptr) { + for (const auto &kv : *global_context) { Out << kv.first << ": " << kv.second << "\n"; } } diff --git a/third-party/benchmark/src/sleep.h b/third-party/benchmark/src/sleep.h deleted file mode 100644 --- a/third-party/benchmark/src/sleep.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef BENCHMARK_SLEEP_H_ -#define BENCHMARK_SLEEP_H_ - -namespace benchmark { -const int kNumMillisPerSecond = 1000; -const int kNumMicrosPerMilli = 1000; -const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000; -const int kNumNanosPerMicro = 1000; -const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; - -void SleepForMilliseconds(int milliseconds); -void SleepForSeconds(double seconds); -} // end namespace benchmark - -#endif // BENCHMARK_SLEEP_H_ diff --git a/third-party/benchmark/src/sleep.cc b/third-party/benchmark/src/sleep.cc deleted file mode 100644 --- a/third-party/benchmark/src/sleep.cc +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "sleep.h" - -#include -#include -#include - -#include "internal_macros.h" - -#ifdef BENCHMARK_OS_WINDOWS -#include -#endif - -#ifdef BENCHMARK_OS_ZOS -#include -#endif - -namespace benchmark { -#ifdef BENCHMARK_OS_WINDOWS -// Window's Sleep takes milliseconds argument. -void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } -void SleepForSeconds(double seconds) { - SleepForMilliseconds(static_cast(kNumMillisPerSecond * seconds)); -} -#else // BENCHMARK_OS_WINDOWS -void SleepForMicroseconds(int microseconds) { -#ifdef BENCHMARK_OS_ZOS - // z/OS does not support nanosleep. Instead call sleep() and then usleep() to - // sleep for the remaining microseconds because usleep() will fail if its - // argument is greater than 1000000. - div_t sleepTime = div(microseconds, kNumMicrosPerSecond); - int seconds = sleepTime.quot; - while (seconds != 0) seconds = sleep(seconds); - while (usleep(sleepTime.rem) == -1 && errno == EINTR) - ; -#else - struct timespec sleep_time; - sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; - sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; - while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) - ; // Ignore signals and wait for the full interval to elapse. -#endif -} - -void SleepForMilliseconds(int milliseconds) { - SleepForMicroseconds(milliseconds * kNumMicrosPerMilli); -} - -void SleepForSeconds(double seconds) { - SleepForMicroseconds(static_cast(seconds * kNumMicrosPerSecond)); -} -#endif // BENCHMARK_OS_WINDOWS -} // end namespace benchmark diff --git a/third-party/benchmark/src/statistics.h b/third-party/benchmark/src/statistics.h --- a/third-party/benchmark/src/statistics.h +++ b/third-party/benchmark/src/statistics.h @@ -22,15 +22,21 @@ namespace benchmark { -// Return a vector containing the mean, median and standard devation information -// (and any user-specified info) for the specified list of reports. If 'reports' -// contains less than two non-errored runs an empty vector is returned +// Return a vector containing the mean, median and standard deviation +// information (and any user-specified info) for the specified list of reports. +// If 'reports' contains less than two non-errored runs an empty vector is +// returned +BENCHMARK_EXPORT std::vector ComputeStats( const std::vector& reports); +BENCHMARK_EXPORT double StatisticsMean(const std::vector& v); +BENCHMARK_EXPORT double StatisticsMedian(const std::vector& v); +BENCHMARK_EXPORT double StatisticsStdDev(const std::vector& v); +BENCHMARK_EXPORT double StatisticsCV(const std::vector& v); } // end namespace benchmark diff --git a/third-party/benchmark/src/statistics.cc b/third-party/benchmark/src/statistics.cc --- a/third-party/benchmark/src/statistics.cc +++ b/third-party/benchmark/src/statistics.cc @@ -89,9 +89,8 @@ typedef BenchmarkReporter::Run Run; std::vector results; - auto error_count = - std::count_if(reports.begin(), reports.end(), - [](Run const& run) { return run.error_occurred; }); + auto error_count = std::count_if(reports.begin(), reports.end(), + [](Run const& run) { return run.skipped; }); if (reports.size() - error_count < 2) { // We don't report aggregated data if there was a single run. @@ -118,11 +117,13 @@ for (auto const& cnt : r.counters) { auto it = counter_stats.find(cnt.first); if (it == counter_stats.end()) { - counter_stats.insert({cnt.first, {cnt.second, std::vector{}}}); - it = counter_stats.find(cnt.first); + it = counter_stats + .emplace(cnt.first, + CounterStat{cnt.second, std::vector{}}) + .first; it->second.s.reserve(reports.size()); } else { - BM_CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); + BM_CHECK_EQ(it->second.c.flags, cnt.second.flags); } } } @@ -131,7 +132,7 @@ for (Run const& run : reports) { BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); BM_CHECK_EQ(run_iterations, run.iterations); - if (run.error_occurred) continue; + if (run.skipped) continue; real_accumulated_time_stat.emplace_back(run.real_accumulated_time); cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); // user counters diff --git a/third-party/benchmark/src/string_util.h b/third-party/benchmark/src/string_util.h --- a/third-party/benchmark/src/string_util.h +++ b/third-party/benchmark/src/string_util.h @@ -4,15 +4,19 @@ #include #include #include +#include +#include "benchmark/benchmark.h" +#include "benchmark/export.h" +#include "check.h" #include "internal_macros.h" namespace benchmark { -void AppendHumanReadable(int n, std::string* str); - -std::string HumanReadableNumber(double n, double one_k = 1024.0); +BENCHMARK_EXPORT +std::string HumanReadableNumber(double n, Counter::OneK one_k); +BENCHMARK_EXPORT #if defined(__MINGW32__) __attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2))) #elif defined(__GNUC__) @@ -38,6 +42,7 @@ return ss.str(); } +BENCHMARK_EXPORT std::vector StrSplit(const std::string& str, char delim); // Disable lint checking for this block since it re-implements C functions. diff --git a/third-party/benchmark/src/string_util.cc b/third-party/benchmark/src/string_util.cc --- a/third-party/benchmark/src/string_util.cc +++ b/third-party/benchmark/src/string_util.cc @@ -11,16 +11,17 @@ #include #include "arraysize.h" +#include "benchmark/benchmark.h" namespace benchmark { namespace { - // kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta. -const char kBigSIUnits[] = "kMGTPEZY"; +const char* const kBigSIUnits[] = {"k", "M", "G", "T", "P", "E", "Z", "Y"}; // Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi. -const char kBigIECUnits[] = "KMGTPEZY"; +const char* const kBigIECUnits[] = {"Ki", "Mi", "Gi", "Ti", + "Pi", "Ei", "Zi", "Yi"}; // milli, micro, nano, pico, femto, atto, zepto, yocto. -const char kSmallSIUnits[] = "munpfazy"; +const char* const kSmallSIUnits[] = {"m", "u", "n", "p", "f", "a", "z", "y"}; // We require that all three arrays have the same size. static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits), @@ -30,9 +31,8 @@ static const int64_t kUnitsSize = arraysize(kBigSIUnits); -void ToExponentAndMantissa(double val, double thresh, int precision, - double one_k, std::string* mantissa, - int64_t* exponent) { +void ToExponentAndMantissa(double val, int precision, double one_k, + std::string* mantissa, int64_t* exponent) { std::stringstream mantissa_stream; if (val < 0) { @@ -43,8 +43,8 @@ // Adjust threshold so that it never excludes things which can't be rendered // in 'precision' digits. const double adjusted_threshold = - std::max(thresh, 1.0 / std::pow(10.0, precision)); - const double big_threshold = adjusted_threshold * one_k; + std::max(1.0, 1.0 / std::pow(10.0, precision)); + const double big_threshold = (adjusted_threshold * one_k) - 1; const double small_threshold = adjusted_threshold; // Values in ]simple_threshold,small_threshold[ will be printed as-is const double simple_threshold = 0.01; @@ -92,37 +92,20 @@ const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1); if (index >= kUnitsSize) return ""; - const char* array = + const char* const* array = (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits); - if (iec) - return array[index] + std::string("i"); - else - return std::string(1, array[index]); + + return std::string(array[index]); } -std::string ToBinaryStringFullySpecified(double value, double threshold, - int precision, double one_k = 1024.0) { +std::string ToBinaryStringFullySpecified(double value, int precision, + Counter::OneK one_k) { std::string mantissa; int64_t exponent; - ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa, + ToExponentAndMantissa(value, precision, + one_k == Counter::kIs1024 ? 1024.0 : 1000.0, &mantissa, &exponent); - return mantissa + ExponentToPrefix(exponent, false); -} - -} // end namespace - -void AppendHumanReadable(int n, std::string* str) { - std::stringstream ss; - // Round down to the nearest SI prefix. - ss << ToBinaryStringFullySpecified(n, 1.0, 0); - *str += ss.str(); -} - -std::string HumanReadableNumber(double n, double one_k) { - // 1.1 means that figures up to 1.1k should be shown with the next unit down; - // this softens edge effects. - // 1 means that we should show one decimal place of precision. - return ToBinaryStringFullySpecified(n, 1.1, 1, one_k); + return mantissa + ExponentToPrefix(exponent, one_k == Counter::kIs1024); } std::string StrFormatImp(const char* msg, va_list args) { @@ -133,21 +116,21 @@ // TODO(ericwf): use std::array for first attempt to avoid one memory // allocation guess what the size might be std::array local_buff; - std::size_t size = local_buff.size(); + // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk - auto ret = vsnprintf(local_buff.data(), size, msg, args_cp); + auto ret = vsnprintf(local_buff.data(), local_buff.size(), msg, args_cp); va_end(args_cp); // handle empty expansion if (ret == 0) return std::string{}; - if (static_cast(ret) < size) + if (static_cast(ret) < local_buff.size()) return std::string(local_buff.data()); // we did not provide a long enough buffer on our first attempt. // add 1 to size to account for null-byte in size cast to prevent overflow - size = static_cast(ret) + 1; + std::size_t size = static_cast(ret) + 1; auto buff_ptr = std::unique_ptr(new char[size]); // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk @@ -155,6 +138,12 @@ return std::string(buff_ptr.get()); } +} // end namespace + +std::string HumanReadableNumber(double n, Counter::OneK one_k) { + return ToBinaryStringFullySpecified(n, 1, one_k); +} + std::string StrFormat(const char* format, ...) { va_list args; va_start(args, format); diff --git a/third-party/benchmark/src/sysinfo.cc b/third-party/benchmark/src/sysinfo.cc --- a/third-party/benchmark/src/sysinfo.cc +++ b/third-party/benchmark/src/sysinfo.cc @@ -30,7 +30,7 @@ #include #else #include -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -45,10 +45,17 @@ #endif #if defined(BENCHMARK_OS_SOLARIS) #include +#include #endif #if defined(BENCHMARK_OS_QNX) #include #endif +#if defined(BENCHMARK_OS_QURT) +#include +#endif +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) +#include +#endif #include #include @@ -65,15 +72,17 @@ #include #include #include +#include #include #include +#include "benchmark/benchmark.h" #include "check.h" #include "cycleclock.h" #include "internal_macros.h" #include "log.h" -#include "sleep.h" #include "string_util.h" +#include "timers.h" namespace benchmark { namespace { @@ -98,67 +107,59 @@ /// `sysctl` with the result type it's to be interpreted as. struct ValueUnion { union DataT { - uint32_t uint32_value; - uint64_t uint64_value; + int32_t int32_value; + int64_t int64_value; // For correct aliasing of union members from bytes. char bytes[8]; }; using DataPtr = std::unique_ptr; // The size of the data union member + its trailing array size. - size_t Size; - DataPtr Buff; + std::size_t size; + DataPtr buff; public: - ValueUnion() : Size(0), Buff(nullptr, &std::free) {} + ValueUnion() : size(0), buff(nullptr, &std::free) {} - explicit ValueUnion(size_t BuffSize) - : Size(sizeof(DataT) + BuffSize), - Buff(::new (std::malloc(Size)) DataT(), &std::free) {} + explicit ValueUnion(std::size_t buff_size) + : size(sizeof(DataT) + buff_size), + buff(::new (std::malloc(size)) DataT(), &std::free) {} ValueUnion(ValueUnion&& other) = default; - explicit operator bool() const { return bool(Buff); } + explicit operator bool() const { return bool(buff); } - char* data() const { return Buff->bytes; } + char* data() const { return buff->bytes; } std::string GetAsString() const { return std::string(data()); } int64_t GetAsInteger() const { - if (Size == sizeof(Buff->uint32_value)) - return static_cast(Buff->uint32_value); - else if (Size == sizeof(Buff->uint64_value)) - return static_cast(Buff->uint64_value); - BENCHMARK_UNREACHABLE(); - } - - uint64_t GetAsUnsigned() const { - if (Size == sizeof(Buff->uint32_value)) - return Buff->uint32_value; - else if (Size == sizeof(Buff->uint64_value)) - return Buff->uint64_value; + if (size == sizeof(buff->int32_value)) + return buff->int32_value; + else if (size == sizeof(buff->int64_value)) + return buff->int64_value; BENCHMARK_UNREACHABLE(); } template std::array GetAsArray() { - const int ArrSize = sizeof(T) * N; - BM_CHECK_LE(ArrSize, Size); - std::array Arr; - std::memcpy(Arr.data(), data(), ArrSize); - return Arr; + const int arr_size = sizeof(T) * N; + BM_CHECK_LE(arr_size, size); + std::array arr; + std::memcpy(arr.data(), data(), arr_size); + return arr; } }; -ValueUnion GetSysctlImp(std::string const& Name) { +ValueUnion GetSysctlImp(std::string const& name) { #if defined BENCHMARK_OS_OPENBSD int mib[2]; mib[0] = CTL_HW; - if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")) { + if ((name == "hw.ncpu") || (name == "hw.cpuspeed")) { ValueUnion buff(sizeof(int)); - if (Name == "hw.ncpu") { + if (name == "hw.ncpu") { mib[1] = HW_NCPU; } else { mib[1] = HW_CPUSPEED; @@ -171,41 +172,41 @@ } return ValueUnion(); #else - size_t CurBuffSize = 0; - if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1) + std::size_t cur_buff_size = 0; + if (sysctlbyname(name.c_str(), nullptr, &cur_buff_size, nullptr, 0) == -1) return ValueUnion(); - ValueUnion buff(CurBuffSize); - if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0) + ValueUnion buff(cur_buff_size); + if (sysctlbyname(name.c_str(), buff.data(), &buff.size, nullptr, 0) == 0) return buff; return ValueUnion(); #endif } BENCHMARK_MAYBE_UNUSED -bool GetSysctl(std::string const& Name, std::string* Out) { - Out->clear(); - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - Out->assign(Buff.data()); +bool GetSysctl(std::string const& name, std::string* out) { + out->clear(); + auto buff = GetSysctlImp(name); + if (!buff) return false; + out->assign(buff.data()); return true; } template ::value>::type> -bool GetSysctl(std::string const& Name, Tp* Out) { - *Out = 0; - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - *Out = static_cast(Buff.GetAsUnsigned()); +bool GetSysctl(std::string const& name, Tp* out) { + *out = 0; + auto buff = GetSysctlImp(name); + if (!buff) return false; + *out = static_cast(buff.GetAsInteger()); return true; } template -bool GetSysctl(std::string const& Name, std::array* Out) { - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - *Out = Buff.GetAsArray(); +bool GetSysctl(std::string const& name, std::array* out) { + auto buff = GetSysctlImp(name); + if (!buff) return false; + *out = buff.GetAsArray(); return true; } #endif @@ -241,21 +242,21 @@ #endif } -int CountSetBitsInCPUMap(std::string Val) { - auto CountBits = [](std::string Part) { +int CountSetBitsInCPUMap(std::string val) { + auto CountBits = [](std::string part) { using CPUMask = std::bitset; - Part = "0x" + Part; - CPUMask Mask(benchmark::stoul(Part, nullptr, 16)); - return static_cast(Mask.count()); + part = "0x" + part; + CPUMask mask(benchmark::stoul(part, nullptr, 16)); + return static_cast(mask.count()); }; - size_t Pos; + std::size_t pos; int total = 0; - while ((Pos = Val.find(',')) != std::string::npos) { - total += CountBits(Val.substr(0, Pos)); - Val = Val.substr(Pos + 1); + while ((pos = val.find(',')) != std::string::npos) { + total += CountBits(val.substr(0, pos)); + val = val.substr(pos + 1); } - if (!Val.empty()) { - total += CountBits(Val); + if (!val.empty()) { + total += CountBits(val); } return total; } @@ -264,16 +265,16 @@ std::vector GetCacheSizesFromKVFS() { std::vector res; std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; - int Idx = 0; + int idx = 0; while (true) { CPUInfo::CacheInfo info; - std::string FPath = StrCat(dir, "index", Idx++, "/"); - std::ifstream f(StrCat(FPath, "size").c_str()); + std::string fpath = StrCat(dir, "index", idx++, "/"); + std::ifstream f(StrCat(fpath, "size").c_str()); if (!f.is_open()) break; std::string suffix; f >> info.size; if (f.fail()) - PrintErrorAndDie("Failed while reading file '", FPath, "size'"); + PrintErrorAndDie("Failed while reading file '", fpath, "size'"); if (f.good()) { f >> suffix; if (f.bad()) @@ -284,13 +285,13 @@ else if (suffix == "K") info.size *= 1024; } - if (!ReadFromFile(StrCat(FPath, "type"), &info.type)) - PrintErrorAndDie("Failed to read from file ", FPath, "type"); - if (!ReadFromFile(StrCat(FPath, "level"), &info.level)) - PrintErrorAndDie("Failed to read from file ", FPath, "level"); + if (!ReadFromFile(StrCat(fpath, "type"), &info.type)) + PrintErrorAndDie("Failed to read from file ", fpath, "type"); + if (!ReadFromFile(StrCat(fpath, "level"), &info.level)) + PrintErrorAndDie("Failed to read from file ", fpath, "level"); std::string map_str; - if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str)) - PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map"); + if (!ReadFromFile(StrCat(fpath, "shared_cpu_map"), &map_str)) + PrintErrorAndDie("Failed to read from file ", fpath, "shared_cpu_map"); info.num_sharing = CountSetBitsInCPUMap(map_str); res.push_back(info); } @@ -301,26 +302,26 @@ #ifdef BENCHMARK_OS_MACOSX std::vector GetCacheSizesMacOSX() { std::vector res; - std::array CacheCounts{{0, 0, 0, 0}}; - GetSysctl("hw.cacheconfig", &CacheCounts); + std::array cache_counts{{0, 0, 0, 0}}; + GetSysctl("hw.cacheconfig", &cache_counts); struct { std::string name; std::string type; int level; - uint64_t num_sharing; - } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]}, - {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]}, - {"hw.l2cachesize", "Unified", 2, CacheCounts[2]}, - {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}}; - for (auto& C : Cases) { + int num_sharing; + } cases[] = {{"hw.l1dcachesize", "Data", 1, cache_counts[1]}, + {"hw.l1icachesize", "Instruction", 1, cache_counts[1]}, + {"hw.l2cachesize", "Unified", 2, cache_counts[2]}, + {"hw.l3cachesize", "Unified", 3, cache_counts[3]}}; + for (auto& c : cases) { int val; - if (!GetSysctl(C.name, &val)) continue; + if (!GetSysctl(c.name, &val)) continue; CPUInfo::CacheInfo info; - info.type = C.type; - info.level = C.level; + info.type = c.type; + info.level = c.level; info.size = val; - info.num_sharing = static_cast(C.num_sharing); + info.num_sharing = c.num_sharing; res.push_back(std::move(info)); } return res; @@ -334,7 +335,7 @@ using UPtr = std::unique_ptr; GetLogicalProcessorInformation(nullptr, &buffer_size); - UPtr buff((PInfo*)malloc(buffer_size), &std::free); + UPtr buff(static_cast(std::malloc(buffer_size)), &std::free); if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ", GetLastError()); @@ -345,16 +346,16 @@ for (; it != end; ++it) { if (it->Relationship != RelationCache) continue; using BitSet = std::bitset; - BitSet B(it->ProcessorMask); + BitSet b(it->ProcessorMask); // To prevent duplicates, only consider caches where CPU 0 is specified - if (!B.test(0)) continue; - CInfo* Cache = &it->Cache; + if (!b.test(0)) continue; + const CInfo& cache = it->Cache; CPUInfo::CacheInfo C; - C.num_sharing = static_cast(B.count()); - C.level = Cache->Level; - C.size = Cache->Size; + C.num_sharing = static_cast(b.count()); + C.level = cache.Level; + C.size = cache.Size; C.type = "Unknown"; - switch (Cache->Type) { + switch (cache.Type) { case CacheUnified: C.type = "Unified"; break; @@ -417,6 +418,8 @@ return GetCacheSizesWindows(); #elif defined(BENCHMARK_OS_QNX) return GetCacheSizesQNX(); +#elif defined(BENCHMARK_OS_QURT) + return std::vector(); #else return GetCacheSizesFromKVFS(); #endif @@ -425,23 +428,32 @@ std::string GetSystemName() { #if defined(BENCHMARK_OS_WINDOWS) std::string str; - const unsigned COUNT = MAX_COMPUTERNAME_LENGTH + 1; + static constexpr int COUNT = MAX_COMPUTERNAME_LENGTH + 1; TCHAR hostname[COUNT] = {'\0'}; DWORD DWCOUNT = COUNT; if (!GetComputerName(hostname, &DWCOUNT)) return std::string(""); #ifndef UNICODE str = std::string(hostname, DWCOUNT); #else - // Using wstring_convert, Is deprecated in C++17 - using convert_type = std::codecvt_utf8; - std::wstring_convert converter; - std::wstring wStr(hostname, DWCOUNT); - str = converter.to_bytes(wStr); + // `WideCharToMultiByte` returns `0` when conversion fails. + int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, + DWCOUNT, NULL, 0, NULL, NULL); + str.resize(len); + WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0], + str.size(), NULL, NULL); #endif return str; -#else // defined(BENCHMARK_OS_WINDOWS) +#elif defined(BENCHMARK_OS_QURT) + std::string str = "Hexagon DSP"; + qurt_arch_version_t arch_version_struct; + if (qurt_sysenv_get_arch_version(&arch_version_struct) == QURT_EOK) { + str += " v"; + str += std::to_string(arch_version_struct.arch_version); + } + return str; +#else #ifndef HOST_NAME_MAX -#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined +#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac doesn't have HOST_NAME_MAX defined #define HOST_NAME_MAX 64 #elif defined(BENCHMARK_OS_NACL) #define HOST_NAME_MAX 64 @@ -449,6 +461,8 @@ #define HOST_NAME_MAX 154 #elif defined(BENCHMARK_OS_RTEMS) #define HOST_NAME_MAX 256 +#elif defined(BENCHMARK_OS_SOLARIS) +#define HOST_NAME_MAX MAXHOSTNAMELEN #else #pragma message("HOST_NAME_MAX not defined. using 64") #define HOST_NAME_MAX 64 @@ -463,8 +477,8 @@ int GetNumCPUs() { #ifdef BENCHMARK_HAS_SYSCTL - int NumCPU = -1; - if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU; + int num_cpu = -1; + if (GetSysctl("hw.ncpu", &num_cpu)) return num_cpu; fprintf(stderr, "Err: %s\n", strerror(errno)); std::exit(EXIT_FAILURE); #elif defined(BENCHMARK_OS_WINDOWS) @@ -478,17 +492,23 @@ // group #elif defined(BENCHMARK_OS_SOLARIS) // Returns -1 in case of a failure. - int NumCPU = sysconf(_SC_NPROCESSORS_ONLN); - if (NumCPU < 0) { + long num_cpu = sysconf(_SC_NPROCESSORS_ONLN); + if (num_cpu < 0) { fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", strerror(errno)); } - return NumCPU; + return (int)num_cpu; #elif defined(BENCHMARK_OS_QNX) return static_cast(_syspage_ptr->num_cpu); +#elif defined(BENCHMARK_OS_QURT) + qurt_sysenv_max_hthreads_t hardware_threads; + if (qurt_sysenv_get_max_hw_threads(&hardware_threads) != QURT_EOK) { + hardware_threads.max_hthreads = 1; + } + return hardware_threads.max_hthreads; #else - int NumCPUs = 0; - int MaxID = -1; + int num_cpus = 0; + int max_id = -1; std::ifstream f("/proc/cpuinfo"); if (!f.is_open()) { std::cerr << "failed to open /proc/cpuinfo\n"; @@ -498,21 +518,21 @@ std::string ln; while (std::getline(f, ln)) { if (ln.empty()) continue; - size_t SplitIdx = ln.find(':'); + std::size_t split_idx = ln.find(':'); std::string value; #if defined(__s390__) // s390 has another format in /proc/cpuinfo // it needs to be parsed differently - if (SplitIdx != std::string::npos) - value = ln.substr(Key.size() + 1, SplitIdx - Key.size() - 1); + if (split_idx != std::string::npos) + value = ln.substr(Key.size() + 1, split_idx - Key.size() - 1); #else - if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); + if (split_idx != std::string::npos) value = ln.substr(split_idx + 1); #endif if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { - NumCPUs++; + num_cpus++; if (!value.empty()) { - int CurID = benchmark::stoi(value); - MaxID = std::max(CurID, MaxID); + const int cur_id = benchmark::stoi(value); + max_id = std::max(cur_id, max_id); } } } @@ -526,16 +546,90 @@ } f.close(); - if ((MaxID + 1) != NumCPUs) { + if ((max_id + 1) != num_cpus) { fprintf(stderr, "CPU ID assignments in /proc/cpuinfo seem messed up." " This is usually caused by a bad BIOS.\n"); } - return NumCPUs; + return num_cpus; #endif BENCHMARK_UNREACHABLE(); } +class ThreadAffinityGuard final { + public: + ThreadAffinityGuard() : reset_affinity(SetAffinity()) { + if (!reset_affinity) + std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU " + "frequency may be incorrect." + << std::endl; + } + + ~ThreadAffinityGuard() { + if (!reset_affinity) return; + +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) + int ret = pthread_setaffinity_np(self, sizeof(previous_affinity), + &previous_affinity); + if (ret == 0) return; +#elif defined(BENCHMARK_OS_WINDOWS_WIN32) + DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity); + if (ret != 0) return; +#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY + PrintErrorAndDie("Failed to reset thread affinity"); + } + + ThreadAffinityGuard(ThreadAffinityGuard&&) = delete; + ThreadAffinityGuard(const ThreadAffinityGuard&) = delete; + ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete; + ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete; + + private: + bool SetAffinity() { +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) + int ret; + self = pthread_self(); + ret = pthread_getaffinity_np(self, sizeof(previous_affinity), + &previous_affinity); + if (ret != 0) return false; + + cpu_set_t affinity; + memcpy(&affinity, &previous_affinity, sizeof(affinity)); + + bool is_first_cpu = true; + + for (int i = 0; i < CPU_SETSIZE; ++i) + if (CPU_ISSET(i, &affinity)) { + if (is_first_cpu) + is_first_cpu = false; + else + CPU_CLR(i, &affinity); + } + + if (is_first_cpu) return false; + + ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity); + return ret == 0; +#elif defined(BENCHMARK_OS_WINDOWS_WIN32) + self = GetCurrentThread(); + DWORD_PTR mask = static_cast(1) << GetCurrentProcessorNumber(); + previous_affinity = SetThreadAffinityMask(self, mask); + return previous_affinity != 0; +#else + return false; +#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY + } + +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) + pthread_t self; + cpu_set_t previous_affinity; +#elif defined(BENCHMARK_OS_WINDOWS_WIN32) + HANDLE self; + DWORD_PTR previous_affinity; +#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY + bool reset_affinity; +}; + double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { // Currently, scaling is only used on linux path here, // suppress diagnostics about it being unused on other paths. @@ -576,7 +670,7 @@ return error_value; } - auto startsWithKey = [](std::string const& Value, std::string const& Key) { + auto StartsWithKey = [](std::string const& Value, std::string const& Key) { if (Key.size() > Value.size()) return false; auto Cmp = [&](char X, char Y) { return std::tolower(X) == std::tolower(Y); @@ -587,18 +681,18 @@ std::string ln; while (std::getline(f, ln)) { if (ln.empty()) continue; - size_t SplitIdx = ln.find(':'); + std::size_t split_idx = ln.find(':'); std::string value; - if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); + if (split_idx != std::string::npos) value = ln.substr(split_idx + 1); // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only // accept positive values. Some environments (virtual machines) report zero, // which would cause infinite looping in WallTime_Init. - if (startsWithKey(ln, "cpu MHz")) { + if (StartsWithKey(ln, "cpu MHz")) { if (!value.empty()) { double cycles_per_second = benchmark::stod(value) * 1000000.0; if (cycles_per_second > 0) return cycles_per_second; } - } else if (startsWithKey(ln, "bogomips")) { + } else if (StartsWithKey(ln, "bogomips")) { if (!value.empty()) { bogo_clock = benchmark::stod(value) * 1000000.0; if (bogo_clock < 0.0) bogo_clock = error_value; @@ -620,7 +714,7 @@ if (bogo_clock >= 0.0) return bogo_clock; #elif defined BENCHMARK_HAS_SYSCTL - constexpr auto* FreqStr = + constexpr auto* freqStr = #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) "machdep.tsc_freq"; #elif defined BENCHMARK_OS_OPENBSD @@ -632,14 +726,17 @@ #endif unsigned long long hz = 0; #if defined BENCHMARK_OS_OPENBSD - if (GetSysctl(FreqStr, &hz)) return hz * 1000000; + if (GetSysctl(freqStr, &hz)) return hz * 1000000; #else - if (GetSysctl(FreqStr, &hz)) return hz; + if (GetSysctl(freqStr, &hz)) return hz; #endif fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", - FreqStr, strerror(errno)); + freqStr, strerror(errno)); + fprintf(stderr, + "This does not affect benchmark measurements, only the " + "metadata output.\n"); -#elif defined BENCHMARK_OS_WINDOWS +#elif defined BENCHMARK_OS_WINDOWS_WIN32 // In NT, read MHz from the registry. If we fail to do so or we're in win9x // then make a crude estimate. DWORD data, data_size = sizeof(data); @@ -648,15 +745,16 @@ SHGetValueA(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "~MHz", nullptr, &data, &data_size))) - return static_cast((int64_t)data * - (int64_t)(1000 * 1000)); // was mhz + return static_cast(static_cast(data) * + static_cast(1000 * 1000)); // was mhz #elif defined(BENCHMARK_OS_SOLARIS) kstat_ctl_t* kc = kstat_open(); if (!kc) { std::cerr << "failed to open /dev/kstat\n"; return -1; } - kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); + kstat_t* ksp = kstat_lookup(kc, const_cast("cpu_info"), -1, + const_cast("cpu_info0")); if (!ksp) { std::cerr << "failed to lookup in /dev/kstat\n"; return -1; @@ -665,8 +763,8 @@ std::cerr << "failed to read from /dev/kstat\n"; return -1; } - kstat_named_t* knp = - (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz"); + kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup( + ksp, const_cast("current_clock_Hz")); if (!knp) { std::cerr << "failed to lookup data in /dev/kstat\n"; return -1; @@ -682,12 +780,44 @@ #elif defined(BENCHMARK_OS_QNX) return static_cast((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) * (int64_t)(1000 * 1000)); +#elif defined(BENCHMARK_OS_QURT) + // QuRT doesn't provide any API to query Hexagon frequency. + return 1000000000; #endif // If we've fallen through, attempt to roughly estimate the CPU clock rate. - const int estimate_time_ms = 1000; + + // Make sure to use the same cycle counter when starting and stopping the + // cycle timer. We just pin the current thread to a cpu in the previous + // affinity set. + ThreadAffinityGuard affinity_guard; + + static constexpr double estimate_time_s = 1.0; + const double start_time = ChronoClockNow(); const auto start_ticks = cycleclock::Now(); - SleepForMilliseconds(estimate_time_ms); - return static_cast(cycleclock::Now() - start_ticks); + + // Impose load instead of calling sleep() to make sure the cycle counter + // works. + using PRNG = std::minstd_rand; + using Result = PRNG::result_type; + PRNG rng(static_cast(start_ticks)); + + Result state = 0; + + do { + static constexpr size_t batch_size = 10000; + rng.discard(batch_size); + state += rng(); + + } while (ChronoClockNow() - start_time < estimate_time_s); + + DoNotOptimize(state); + + const auto end_ticks = cycleclock::Now(); + const double end_time = ChronoClockNow(); + + return static_cast(end_ticks - start_ticks) / (end_time - start_time); + // Reset the affinity of current thread when the lifetime of affinity_guard + // ends. } std::vector GetLoadAvg() { @@ -695,7 +825,7 @@ defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ !defined(__ANDROID__) - constexpr int kMaxSamples = 3; + static constexpr int kMaxSamples = 3; std::vector res(kMaxSamples, 0.0); const int nelem = getloadavg(res.data(), kMaxSamples); if (nelem < 1) { diff --git a/third-party/benchmark/src/thread_manager.h b/third-party/benchmark/src/thread_manager.h --- a/third-party/benchmark/src/thread_manager.h +++ b/third-party/benchmark/src/thread_manager.h @@ -43,8 +43,8 @@ double manual_time_used = 0; int64_t complexity_n = 0; std::string report_label_; - std::string error_message_; - bool has_error_ = false; + std::string skip_message_; + internal::Skipped skipped_ = internal::NotSkipped; UserCounters counters; }; GUARDED_BY(GetBenchmarkMutex()) Result results; diff --git a/third-party/benchmark/src/timers.cc b/third-party/benchmark/src/timers.cc --- a/third-party/benchmark/src/timers.cc +++ b/third-party/benchmark/src/timers.cc @@ -23,7 +23,7 @@ #include #else #include -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -38,6 +38,9 @@ #include #include #endif +#if defined(BENCHMARK_OS_QURT) +#include +#endif #endif #ifdef BENCHMARK_OS_EMSCRIPTEN @@ -56,7 +59,6 @@ #include "check.h" #include "log.h" -#include "sleep.h" #include "string_util.h" namespace benchmark { @@ -65,6 +67,9 @@ #if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wunused-function" #endif +#if defined(__NVCOMPILER) +#pragma diag_suppress declared_but_not_referenced +#endif namespace { #if defined(BENCHMARK_OS_WINDOWS) @@ -79,7 +84,7 @@ static_cast(user.QuadPart)) * 1e-7; } -#elif !defined(BENCHMARK_OS_FUCHSIA) +#elif !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) double MakeTime(struct rusage const& ru) { return (static_cast(ru.ru_utime.tv_sec) + static_cast(ru.ru_utime.tv_usec) * 1e-6 + @@ -119,11 +124,15 @@ &user_time)) return MakeTime(kernel_time, user_time); DiagnoseAndExit("GetProccessTimes() failed"); +#elif defined(BENCHMARK_OS_QURT) + return static_cast( + qurt_timer_timetick_to_us(qurt_timer_get_ticks())) * + 1.0e-6; #elif defined(BENCHMARK_OS_EMSCRIPTEN) // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten. // Use Emscripten-specific API. Reported CPU time would be exactly the // same as total time, but this is ok because there aren't long-latency - // syncronous system calls in Emscripten. + // synchronous system calls in Emscripten. return emscripten_get_now() * 1e-3; #elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. @@ -149,6 +158,10 @@ GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, &user_time); return MakeTime(kernel_time, user_time); +#elif defined(BENCHMARK_OS_QURT) + return static_cast( + qurt_timer_timetick_to_us(qurt_timer_get_ticks())) * + 1.0e-6; #elif defined(BENCHMARK_OS_MACOSX) // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. // See https://github.com/google/benchmark/pull/292 diff --git a/third-party/benchmark/test/AssemblyTests.cmake b/third-party/benchmark/test/AssemblyTests.cmake --- a/third-party/benchmark/test/AssemblyTests.cmake +++ b/third-party/benchmark/test/AssemblyTests.cmake @@ -1,3 +1,23 @@ +set(CLANG_SUPPORTED_VERSION "5.0.0") +set(GCC_SUPPORTED_VERSION "5.5.0") + +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${CLANG_SUPPORTED_VERSION}) + message (WARNING + "Unsupported Clang version " ${CMAKE_CXX_COMPILER_VERSION} + ". Expected is " ${CLANG_SUPPORTED_VERSION} + ". Assembly tests may be broken.") + endif() +elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${GCC_SUPPORTED_VERSION}) + message (WARNING + "Unsupported GCC version " ${CMAKE_CXX_COMPILER_VERSION} + ". Expected is " ${GCC_SUPPORTED_VERSION} + ". Assembly tests may be broken.") + endif() +else() + message (WARNING "Unsupported compiler. Assembly tests may be broken.") +endif() include(split_list) @@ -23,6 +43,7 @@ macro(add_filecheck_test name) cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV}) add_library(${name} OBJECT ${name}.cc) + target_link_libraries(${name} PRIVATE benchmark::benchmark) set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}") set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s") add_custom_target(copy_${name} ALL diff --git a/third-party/benchmark/test/CMakeLists.txt b/third-party/benchmark/test/CMakeLists.txt --- a/third-party/benchmark/test/CMakeLists.txt +++ b/third-party/benchmark/test/CMakeLists.txt @@ -1,5 +1,7 @@ # Enable the tests +set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) include(CheckCXXCompilerFlag) @@ -22,6 +24,10 @@ endforeach() endif() +if (NOT BUILD_SHARED_LIBS) + add_definitions(-DBENCHMARK_STATIC_DEFINE) +endif() + check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) set(BENCHMARK_O3_FLAG "") if (BENCHMARK_HAS_O3_FLAG) @@ -35,10 +41,14 @@ endif() add_library(output_test_helper STATIC output_test_helper.cc output_test.h) +target_link_libraries(output_test_helper PRIVATE benchmark::benchmark) macro(compile_benchmark_test name) add_executable(${name} "${name}.cc") target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT}) + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC") + target_compile_options( ${name} PRIVATE --diag_suppress partial_override ) + endif() endmacro(compile_benchmark_test) macro(compile_benchmark_test_with_main name) @@ -48,26 +58,35 @@ macro(compile_output_test name) add_executable(${name} "${name}.cc" output_test.h) - target_link_libraries(${name} output_test_helper benchmark::benchmark + target_link_libraries(${name} output_test_helper benchmark::benchmark_main ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) endmacro(compile_output_test) # Demonstration executable compile_benchmark_test(benchmark_test) -add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01) +add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01s) compile_benchmark_test(spec_arg_test) add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen) +compile_benchmark_test(spec_arg_verbosity_test) +add_test(NAME spec_arg_verbosity COMMAND spec_arg_verbosity_test --v=42) + compile_benchmark_test(benchmark_setup_teardown_test) add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test) compile_benchmark_test(filter_test) macro(add_filter_test name filter expect) - add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect}) + add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01s --benchmark_filter=${filter} ${expect}) add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect}) endmacro(add_filter_test) +compile_benchmark_test(benchmark_min_time_flag_time_test) +add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_test) + +compile_benchmark_test(benchmark_min_time_flag_iters_test) +add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test) + add_filter_test(filter_simple "Foo" 3) add_filter_test(filter_simple_negative "-Foo" 2) add_filter_test(filter_suffix "BM_.*" 4) @@ -88,78 +107,83 @@ add_filter_test(filter_regex_end_negative "-.*Ba$" 4) compile_benchmark_test(options_test) -add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01) +add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s) compile_benchmark_test(basic_test) -add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01) +add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01s) compile_output_test(repetitions_test) -add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01 --benchmark_repetitions=3) +add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01s --benchmark_repetitions=3) compile_benchmark_test(diagnostics_test) -add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01) +add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01s) compile_benchmark_test(skip_with_error_test) -add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01) +add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01s) compile_benchmark_test(donotoptimize_test) +# Enable errors for deprecated deprecations (DoNotOptimize(Tp const& value)). +check_cxx_compiler_flag(-Werror=deprecated-declarations BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG) +if (BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG) + target_compile_options (donotoptimize_test PRIVATE "-Werror=deprecated-declarations") +endif() # Some of the issues with DoNotOptimize only occur when optimization is enabled check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) if (BENCHMARK_HAS_O3_FLAG) set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3") endif() -add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01) +add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01s) compile_benchmark_test(fixture_test) -add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01) +add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01s) compile_benchmark_test(register_benchmark_test) -add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01) +add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01s) compile_benchmark_test(map_test) -add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01) +add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01s) compile_benchmark_test(multiple_ranges_test) -add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01) +add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01s) compile_benchmark_test(args_product_test) -add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01) +add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01s) compile_benchmark_test_with_main(link_main_test) -add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01) +add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01s) compile_output_test(reporter_output_test) -add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01) +add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01s) compile_output_test(templated_fixture_test) -add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01) +add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01s) compile_output_test(user_counters_test) -add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01) +add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01s) compile_output_test(perf_counters_test) -add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01 --benchmark_perf_counters=CYCLES,BRANCHES) +add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01s --benchmark_perf_counters=CYCLES,BRANCHES) compile_output_test(internal_threading_test) -add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01) +add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s) compile_output_test(report_aggregates_only_test) -add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01) +add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s) compile_output_test(display_aggregates_only_test) -add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01) +add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01s) compile_output_test(user_counters_tabular_test) -add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01) +add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01s) compile_output_test(user_counters_thousands_test) -add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01) +add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01s) compile_output_test(memory_manager_test) -add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01) +add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s) -check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG) -if (BENCHMARK_HAS_CXX03_FLAG) +# MSVC does not allow to set the language standard to C++98/03. +if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") compile_benchmark_test(cxx03_test) set_target_properties(cxx03_test PROPERTIES @@ -170,19 +194,25 @@ # causing the test to fail to compile. To prevent this we explicitly disable # the warning. check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR) - if (BENCHMARK_ENABLE_LTO AND BENCHMARK_HAS_WNO_ODR) - set_target_properties(cxx03_test - PROPERTIES - LINK_FLAGS "-Wno-odr") + check_cxx_compiler_flag(-Wno-lto-type-mismatch BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH) + # Cannot set_target_properties multiple times here because the warnings will + # be overwritten on each call + set (DISABLE_LTO_WARNINGS "") + if (BENCHMARK_HAS_WNO_ODR) + set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-odr") + endif() + if (BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH) + set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-lto-type-mismatch") endif() - add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01) + set_target_properties(cxx03_test PROPERTIES LINK_FLAGS "${DISABLE_LTO_WARNINGS}") + add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01s) endif() # Attempt to work around flaky test failures when running on Appveyor servers. if (DEFINED ENV{APPVEYOR}) - set(COMPLEXITY_MIN_TIME "0.5") + set(COMPLEXITY_MIN_TIME "0.5s") else() - set(COMPLEXITY_MIN_TIME "0.01") + set(COMPLEXITY_MIN_TIME "0.01s") endif() compile_output_test(complexity_test) add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME}) @@ -210,6 +240,8 @@ add_gtest(statistics_gtest) add_gtest(string_util_gtest) add_gtest(perf_counters_gtest) + add_gtest(time_unit_gtest) + add_gtest(min_time_parse_gtest) endif(BENCHMARK_ENABLE_GTEST_TESTS) ############################################################################### diff --git a/third-party/benchmark/test/args_product_test.cc b/third-party/benchmark/test/args_product_test.cc --- a/third-party/benchmark/test/args_product_test.cc +++ b/third-party/benchmark/test/args_product_test.cc @@ -23,7 +23,7 @@ {2, 15, 10, 9}, {4, 5, 6, 11}}) {} - void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + void SetUp(const ::benchmark::State& state) override { std::vector ranges = {state.range(0), state.range(1), state.range(2), state.range(3)}; @@ -34,7 +34,7 @@ // NOTE: This is not TearDown as we want to check after _all_ runs are // complete. - virtual ~ArgsProductFixture() { + ~ArgsProductFixture() override { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; for (const auto& v : expectedValues) { diff --git a/third-party/benchmark/test/basic_test.cc b/third-party/benchmark/test/basic_test.cc --- a/third-party/benchmark/test/basic_test.cc +++ b/third-party/benchmark/test/basic_test.cc @@ -5,7 +5,8 @@ void BM_empty(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); @@ -147,7 +148,7 @@ auto arg = state.range(0); T sum = 0; for (auto _ : state) { - sum += arg; + sum += static_cast(arg); } } BENCHMARK(BM_OneTemplateFunc)->Arg(1); @@ -159,8 +160,8 @@ A sum = 0; B prod = 1; for (auto _ : state) { - sum += arg; - prod *= arg; + sum += static_cast(arg); + prod *= static_cast(arg); } } BENCHMARK(BM_TwoTemplateFunc)->Arg(1); diff --git a/third-party/benchmark/test/benchmark_gtest.cc b/third-party/benchmark/test/benchmark_gtest.cc --- a/third-party/benchmark/test/benchmark_gtest.cc +++ b/third-party/benchmark/test/benchmark_gtest.cc @@ -3,12 +3,12 @@ #include #include "../src/benchmark_register.h" +#include "benchmark/benchmark.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace benchmark { namespace internal { -extern std::map* global_context; namespace { @@ -38,8 +38,9 @@ TEST(AddRangeTest, FullRange8) { std::vector dst; - AddRange(&dst, int8_t{1}, std::numeric_limits::max(), 8); - EXPECT_THAT(dst, testing::ElementsAre(1, 8, 64, 127)); + AddRange(&dst, int8_t{1}, std::numeric_limits::max(), int8_t{8}); + EXPECT_THAT( + dst, testing::ElementsAre(int8_t{1}, int8_t{8}, int8_t{64}, int8_t{127})); } TEST(AddRangeTest, FullRange64) { @@ -129,11 +130,13 @@ TEST(AddRangeTest, Simple8) { std::vector dst; - AddRange(&dst, 1, 8, 2); - EXPECT_THAT(dst, testing::ElementsAre(1, 2, 4, 8)); + AddRange(&dst, int8_t{1}, int8_t{8}, int8_t{2}); + EXPECT_THAT(dst, + testing::ElementsAre(int8_t{1}, int8_t{2}, int8_t{4}, int8_t{8})); } TEST(AddCustomContext, Simple) { + std::map *&global_context = GetGlobalContext(); EXPECT_THAT(global_context, nullptr); AddCustomContext("foo", "bar"); @@ -148,6 +151,7 @@ } TEST(AddCustomContext, DuplicateKey) { + std::map *&global_context = GetGlobalContext(); EXPECT_THAT(global_context, nullptr); AddCustomContext("foo", "bar"); diff --git a/third-party/benchmark/test/benchmark_min_time_flag_iters_test.cc b/third-party/benchmark/test/benchmark_min_time_flag_iters_test.cc new file mode 100644 --- /dev/null +++ b/third-party/benchmark/test/benchmark_min_time_flag_iters_test.cc @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Tests that we can specify the number of iterations with +// --benchmark_min_time=x. +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + iter_nums_.push_back(report[0].iterations); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector& GetIters() const { + return iter_nums_; + } + + private: + std::vector iter_nums_; +}; + +} // end namespace + +static void BM_MyBench(benchmark::State& state) { + for (auto s : state) { + } +} +BENCHMARK(BM_MyBench); + +int main(int argc, char** argv) { + // Make a fake argv and append the new --benchmark_min_time= to it. + int fake_argc = argc + 1; + const char** fake_argv = new const char*[static_cast(fake_argc)]; + for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i]; + fake_argv[argc] = "--benchmark_min_time=4x"; + + benchmark::Initialize(&fake_argc, const_cast(fake_argv)); + + TestReporter test_reporter; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); + assert(returned_count == 1); + + // Check the executed iters. + const std::vector iters = test_reporter.GetIters(); + assert(!iters.empty() && iters[0] == 4); + + delete[] fake_argv; + return 0; +} diff --git a/third-party/benchmark/test/benchmark_min_time_flag_time_test.cc b/third-party/benchmark/test/benchmark_min_time_flag_time_test.cc new file mode 100644 --- /dev/null +++ b/third-party/benchmark/test/benchmark_min_time_flag_time_test.cc @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Tests that we can specify the min time with +// --benchmark_min_time= (no suffix needed) OR +// --benchmark_min_time=s +namespace { + +// This is from benchmark.h +typedef int64_t IterationCount; + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + ConsoleReporter::ReportRuns(report); + }; + + virtual void ReportRunsConfig(double min_time, bool /* has_explicit_iters */, + IterationCount /* iters */) BENCHMARK_OVERRIDE { + min_times_.push_back(min_time); + } + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector& GetMinTimes() const { return min_times_; } + + private: + std::vector min_times_; +}; + +bool AlmostEqual(double a, double b) { + return std::fabs(a - b) < std::numeric_limits::epsilon(); +} + +void DoTestHelper(int* argc, const char** argv, double expected) { + benchmark::Initialize(argc, const_cast(argv)); + + TestReporter test_reporter; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); + assert(returned_count == 1); + + // Check the min_time + const std::vector& min_times = test_reporter.GetMinTimes(); + assert(!min_times.empty() && AlmostEqual(min_times[0], expected)); +} + +} // end namespace + +static void BM_MyBench(benchmark::State& state) { + for (auto s : state) { + } +} +BENCHMARK(BM_MyBench); + +int main(int argc, char** argv) { + // Make a fake argv and append the new --benchmark_min_time= to it. + int fake_argc = argc + 1; + const char** fake_argv = new const char*[static_cast(fake_argc)]; + + for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i]; + + const char* no_suffix = "--benchmark_min_time=4"; + const char* with_suffix = "--benchmark_min_time=4.0s"; + double expected = 4.0; + + fake_argv[argc] = no_suffix; + DoTestHelper(&fake_argc, fake_argv, expected); + + fake_argv[argc] = with_suffix; + DoTestHelper(&fake_argc, fake_argv, expected); + + delete[] fake_argv; + return 0; +} diff --git a/third-party/benchmark/test/benchmark_name_gtest.cc b/third-party/benchmark/test/benchmark_name_gtest.cc --- a/third-party/benchmark/test/benchmark_name_gtest.cc +++ b/third-party/benchmark/test/benchmark_name_gtest.cc @@ -32,6 +32,14 @@ EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_time:3.4s"); } +TEST(BenchmarkNameTest, MinWarmUpTime) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + name.args = "some_args:3/4"; + name.min_warmup_time = "min_warmup_time:3.5s"; + EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_warmup_time:3.5s"); +} + TEST(BenchmarkNameTest, Iterations) { auto name = BenchmarkName(); name.function_name = "function_name"; diff --git a/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc b/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc --- a/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc +++ b/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc @@ -51,10 +51,9 @@ void Execute(const std::string& pattern) { queue->Clear(); - BenchmarkReporter* reporter = new NullReporter; + std::unique_ptr reporter(new NullReporter()); FLAGS_benchmark_filter = pattern; - RunSpecifiedBenchmarks(reporter); - delete reporter; + RunSpecifiedBenchmarks(reporter.get()); queue->Put("DONE"); // End marker } diff --git a/third-party/benchmark/test/benchmark_setup_teardown_test.cc b/third-party/benchmark/test/benchmark_setup_teardown_test.cc --- a/third-party/benchmark/test/benchmark_setup_teardown_test.cc +++ b/third-party/benchmark/test/benchmark_setup_teardown_test.cc @@ -10,19 +10,19 @@ // Test that Setup() and Teardown() are called exactly once // for each benchmark run (single-threaded). -namespace single { +namespace singlethreaded { static int setup_call = 0; static int teardown_call = 0; -} // namespace single +} // namespace singlethreaded static void DoSetup1(const benchmark::State& state) { - ++single::setup_call; + ++singlethreaded::setup_call; // Setup/Teardown should never be called with any thread_idx != 0. assert(state.thread_index() == 0); } static void DoTeardown1(const benchmark::State& state) { - ++single::teardown_call; + ++singlethreaded::teardown_call; assert(state.thread_index() == 0); } @@ -80,11 +80,11 @@ class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { public: - void SetUp(const ::benchmark::State&) BENCHMARK_OVERRIDE { + void SetUp(const ::benchmark::State&) override { fixture_interaction::fixture_setup++; } - ~FIXTURE_BECHMARK_NAME() {} + ~FIXTURE_BECHMARK_NAME() override {} }; BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) { @@ -134,8 +134,8 @@ assert(ret > 0); // Setup/Teardown is called once for each arg group (1,3,5,7). - assert(single::setup_call == 4); - assert(single::teardown_call == 4); + assert(singlethreaded::setup_call == 4); + assert(singlethreaded::teardown_call == 4); // 3 group of threads calling this function (3,5,10). assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3); @@ -145,7 +145,7 @@ // Setup is called 4 times, once for each arg group (1,3,5,7) assert(fixture_interaction::setup == 4); - // Fixture::Setup is called everytime the bm routine is run. + // Fixture::Setup is called every time the bm routine is run. // The exact number is indeterministic, so we just assert that // it's more than setup. assert(fixture_interaction::fixture_setup > fixture_interaction::setup); diff --git a/third-party/benchmark/test/benchmark_test.cc b/third-party/benchmark/test/benchmark_test.cc --- a/third-party/benchmark/test/benchmark_test.cc +++ b/third-party/benchmark/test/benchmark_test.cc @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -26,7 +27,7 @@ namespace { -int BENCHMARK_NOINLINE Factorial(uint32_t n) { +int BENCHMARK_NOINLINE Factorial(int n) { return (n == 1) ? 1 : n * Factorial(n - 1); } @@ -74,7 +75,8 @@ static void BM_CalculatePi(benchmark::State& state) { static const int depth = 1024; for (auto _ : state) { - benchmark::DoNotOptimize(CalculatePi(static_cast(depth))); + double pi = CalculatePi(static_cast(depth)); + benchmark::DoNotOptimize(pi); } } BENCHMARK(BM_CalculatePi)->Threads(8); @@ -90,7 +92,8 @@ for (int j = 0; j < state.range(1); ++j) data.insert(rand()); } state.SetItemsProcessed(state.iterations() * state.range(1)); - state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int)); + state.SetBytesProcessed(state.iterations() * state.range(1) * + static_cast(sizeof(int))); } // Test many inserts at once to reduce the total iterations needed. Otherwise, @@ -108,7 +111,7 @@ } const int64_t items_processed = state.iterations() * state.range(0); state.SetItemsProcessed(items_processed); - state.SetBytesProcessed(items_processed * sizeof(v)); + state.SetBytesProcessed(items_processed * static_cast(sizeof(v))); } BENCHMARK_TEMPLATE2(BM_Sequential, std::vector, int) ->Range(1 << 0, 1 << 10); @@ -122,7 +125,10 @@ size_t len = static_cast(state.range(0)); std::string s1(len, '-'); std::string s2(len, '-'); - for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2)); + for (auto _ : state) { + auto comp = s1.compare(s2); + benchmark::DoNotOptimize(comp); + } } BENCHMARK(BM_StringCompare)->Range(1, 1 << 20); @@ -169,7 +175,7 @@ for (int i = from; i < to; i++) { // No need to lock test_vector_mu as ranges // do not overlap between threads. - benchmark::DoNotOptimize(test_vector->at(i) = 1); + benchmark::DoNotOptimize(test_vector->at(static_cast(i)) = 1); } } @@ -244,4 +250,25 @@ BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2); BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3); +static void BM_BenchmarkName(benchmark::State& state) { + for (auto _ : state) { + } + + // Check that the benchmark name is passed correctly to `state`. + assert("BM_BenchmarkName" == state.name()); +} +BENCHMARK(BM_BenchmarkName); + +// regression test for #1446 +template +static void BM_templated_test(benchmark::State& state) { + for (auto _ : state) { + type created_string; + benchmark::DoNotOptimize(created_string); + } +} + +static auto BM_templated_test_double = BM_templated_test>; +BENCHMARK(BM_templated_test_double); + BENCHMARK_MAIN(); diff --git a/third-party/benchmark/test/clobber_memory_assembly_test.cc b/third-party/benchmark/test/clobber_memory_assembly_test.cc --- a/third-party/benchmark/test/clobber_memory_assembly_test.cc +++ b/third-party/benchmark/test/clobber_memory_assembly_test.cc @@ -3,6 +3,7 @@ #ifdef __clang__ #pragma clang diagnostic ignored "-Wreturn-type" #endif +BENCHMARK_DISABLE_DEPRECATED_WARNING extern "C" { diff --git a/third-party/benchmark/test/complexity_test.cc b/third-party/benchmark/test/complexity_test.cc --- a/third-party/benchmark/test/complexity_test.cc +++ b/third-party/benchmark/test/complexity_test.cc @@ -70,7 +70,7 @@ void BM_Complexity_O1(benchmark::State &state) { for (auto _ : state) { for (int i = 0; i < 1024; ++i) { - benchmark::DoNotOptimize(&i); + benchmark::DoNotOptimize(i); } } state.SetComplexityN(state.range(0)); @@ -109,7 +109,7 @@ std::vector ConstructRandomVector(int64_t size) { std::vector v; - v.reserve(static_cast(size)); + v.reserve(static_cast(size)); for (int i = 0; i < size; ++i) { v.push_back(static_cast(std::rand() % size)); } @@ -121,7 +121,8 @@ // Test worst case scenario (item not in vector) const int64_t item_not_in_vector = state.range(0) * 2; for (auto _ : state) { - benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector)); + auto it = std::find(v.begin(), v.end(), item_not_in_vector); + benchmark::DoNotOptimize(it); } state.SetComplexityN(state.range(0)); } @@ -174,7 +175,7 @@ ->RangeMultiplier(2) ->Range(1 << 10, 1 << 16) ->Complexity([](benchmark::IterationCount n) { - return kLog2E * n * log(static_cast(n)); + return kLog2E * static_cast(n) * log(static_cast(n)); }); BENCHMARK(BM_Complexity_O_N_log_N) ->RangeMultiplier(2) @@ -204,7 +205,8 @@ void BM_ComplexityCaptureArgs(benchmark::State &state, int n) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.SetComplexityN(n); } diff --git a/third-party/benchmark/test/diagnostics_test.cc b/third-party/benchmark/test/diagnostics_test.cc --- a/third-party/benchmark/test/diagnostics_test.cc +++ b/third-party/benchmark/test/diagnostics_test.cc @@ -49,7 +49,8 @@ if (called_once == false) try_invalid_pause_resume(state); for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } if (called_once == false) try_invalid_pause_resume(state); @@ -64,7 +65,8 @@ if (called_once == false) try_invalid_pause_resume(state); while (state.KeepRunning()) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } if (called_once == false) try_invalid_pause_resume(state); @@ -74,7 +76,16 @@ BENCHMARK(BM_diagnostic_test_keep_running); int main(int argc, char* argv[]) { +#ifdef NDEBUG + // This test is exercising functionality for debug builds, which are not + // available in release builds. Skip the test if we are in that environment + // to avoid a test failure. + std::cout << "Diagnostic test disabled in release build" << std::endl; + (void)argc; + (void)argv; +#else benchmark::internal::GetAbortHandler() = &TestHandler; benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); +#endif } diff --git a/third-party/benchmark/test/donotoptimize_assembly_test.cc b/third-party/benchmark/test/donotoptimize_assembly_test.cc --- a/third-party/benchmark/test/donotoptimize_assembly_test.cc +++ b/third-party/benchmark/test/donotoptimize_assembly_test.cc @@ -3,12 +3,16 @@ #ifdef __clang__ #pragma clang diagnostic ignored "-Wreturn-type" #endif +BENCHMARK_DISABLE_DEPRECATED_WARNING extern "C" { extern int ExternInt; extern int ExternInt2; extern int ExternInt3; +extern int BigArray[2049]; + +const int ConstBigArray[2049]{}; inline int Add42(int x) { return x + 42; } @@ -23,7 +27,15 @@ int value; int data[2]; }; + +struct ExtraLarge { + int arr[2049]; +}; } + +extern ExtraLarge ExtraLargeObj; +const ExtraLarge ConstExtraLargeObj{}; + // CHECK-LABEL: test_with_rvalue: extern "C" void test_with_rvalue() { benchmark::DoNotOptimize(Add42(0)); @@ -68,6 +80,22 @@ // CHECK: ret } +// CHECK-LABEL: test_with_extra_large_lvalue_with_op: +extern "C" void test_with_extra_large_lvalue_with_op() { + ExtraLargeObj.arr[16] = 42; + benchmark::DoNotOptimize(ExtraLargeObj); + // CHECK: movl $42, ExtraLargeObj+64(%rip) + // CHECK: ret +} + +// CHECK-LABEL: test_with_big_array_with_op +extern "C" void test_with_big_array_with_op() { + BigArray[16] = 42; + benchmark::DoNotOptimize(BigArray); + // CHECK: movl $42, BigArray+64(%rip) + // CHECK: ret +} + // CHECK-LABEL: test_with_non_trivial_lvalue: extern "C" void test_with_non_trivial_lvalue() { NotTriviallyCopyable NTC(ExternInt); @@ -96,6 +124,18 @@ // CHECK: ret } +// CHECK-LABEL: test_with_const_extra_large_obj: +extern "C" void test_with_const_extra_large_obj() { + benchmark::DoNotOptimize(ConstExtraLargeObj); + // CHECK: ret +} + +// CHECK-LABEL: test_with_const_big_array +extern "C" void test_with_const_big_array() { + benchmark::DoNotOptimize(ConstBigArray); + // CHECK: ret +} + // CHECK-LABEL: test_with_non_trivial_const_lvalue: extern "C" void test_with_non_trivial_const_lvalue() { const NotTriviallyCopyable Obj(ExternInt); diff --git a/third-party/benchmark/test/donotoptimize_test.cc b/third-party/benchmark/test/donotoptimize_test.cc --- a/third-party/benchmark/test/donotoptimize_test.cc +++ b/third-party/benchmark/test/donotoptimize_test.cc @@ -4,9 +4,9 @@ namespace { #if defined(__GNUC__) -std::uint64_t double_up(const std::uint64_t x) __attribute__((const)); +std::int64_t double_up(const std::int64_t x) __attribute__((const)); #endif -std::uint64_t double_up(const std::uint64_t x) { return x * 2; } +std::int64_t double_up(const std::int64_t x) { return x * 2; } } // namespace // Using DoNotOptimize on types like BitRef seem to cause a lot of problems @@ -29,6 +29,15 @@ int main(int, char*[]) { // this test verifies compilation of DoNotOptimize() for some types + char buffer1[1] = ""; + benchmark::DoNotOptimize(buffer1); + + char buffer2[2] = ""; + benchmark::DoNotOptimize(buffer2); + + char buffer3[3] = ""; + benchmark::DoNotOptimize(buffer3); + char buffer8[8] = ""; benchmark::DoNotOptimize(buffer8); @@ -37,17 +46,24 @@ char buffer1024[1024] = ""; benchmark::DoNotOptimize(buffer1024); - benchmark::DoNotOptimize(&buffer1024[0]); + char* bptr = &buffer1024[0]; + benchmark::DoNotOptimize(bptr); int x = 123; benchmark::DoNotOptimize(x); - benchmark::DoNotOptimize(&x); + int* xp = &x; + benchmark::DoNotOptimize(xp); benchmark::DoNotOptimize(x += 42); - benchmark::DoNotOptimize(double_up(x)); + std::int64_t y = double_up(x); + benchmark::DoNotOptimize(y); // These tests are to e - benchmark::DoNotOptimize(BitRef::Make()); BitRef lval = BitRef::Make(); benchmark::DoNotOptimize(lval); + +#ifdef BENCHMARK_HAS_CXX11 + // Check that accept rvalue. + benchmark::DoNotOptimize(BitRef::Make()); +#endif } diff --git a/third-party/benchmark/test/filter_test.cc b/third-party/benchmark/test/filter_test.cc --- a/third-party/benchmark/test/filter_test.cc +++ b/third-party/benchmark/test/filter_test.cc @@ -14,28 +14,27 @@ class TestReporter : public benchmark::ConsoleReporter { public: - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; - virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + void ReportRuns(const std::vector& report) override { ++count_; - max_family_index_ = - std::max(max_family_index_, report[0].family_index); + max_family_index_ = std::max(max_family_index_, report[0].family_index); ConsoleReporter::ReportRuns(report); }; TestReporter() : count_(0), max_family_index_(0) {} - virtual ~TestReporter() {} + ~TestReporter() override {} - size_t GetCount() const { return count_; } + int GetCount() const { return count_; } - size_t GetMaxFamilyIndex() const { return max_family_index_; } + int64_t GetMaxFamilyIndex() const { return max_family_index_; } private: - mutable size_t count_; - mutable size_t max_family_index_; + mutable int count_; + mutable int64_t max_family_index_; }; } // end namespace @@ -79,13 +78,13 @@ benchmark::Initialize(&argc, argv); TestReporter test_reporter; - const size_t returned_count = - benchmark::RunSpecifiedBenchmarks(&test_reporter); + const int64_t returned_count = + static_cast(benchmark::RunSpecifiedBenchmarks(&test_reporter)); if (argc == 2) { // Make sure we ran all of the tests std::stringstream ss(argv[1]); - size_t expected_return; + int64_t expected_return; ss >> expected_return; if (returned_count != expected_return) { @@ -95,8 +94,8 @@ return -1; } - const size_t expected_reports = list_only ? 0 : expected_return; - const size_t reports_count = test_reporter.GetCount(); + const int64_t expected_reports = list_only ? 0 : expected_return; + const int64_t reports_count = test_reporter.GetCount(); if (reports_count != expected_reports) { std::cerr << "ERROR: Expected " << expected_reports << " tests to be run but reported_count = " << reports_count @@ -104,8 +103,8 @@ return -1; } - const size_t max_family_index = test_reporter.GetMaxFamilyIndex(); - const size_t num_families = reports_count == 0 ? 0 : 1 + max_family_index; + const int64_t max_family_index = test_reporter.GetMaxFamilyIndex(); + const int64_t num_families = reports_count == 0 ? 0 : 1 + max_family_index; if (num_families != expected_reports) { std::cerr << "ERROR: Expected " << expected_reports << " test families to be run but num_families = " diff --git a/third-party/benchmark/test/fixture_test.cc b/third-party/benchmark/test/fixture_test.cc --- a/third-party/benchmark/test/fixture_test.cc +++ b/third-party/benchmark/test/fixture_test.cc @@ -8,21 +8,21 @@ class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { public: - void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + void SetUp(const ::benchmark::State& state) override { if (state.thread_index() == 0) { assert(data.get() == nullptr); data.reset(new int(42)); } } - void TearDown(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + void TearDown(const ::benchmark::State& state) override { if (state.thread_index() == 0) { assert(data.get() != nullptr); data.reset(); } } - ~FIXTURE_BECHMARK_NAME() { assert(data == nullptr); } + ~FIXTURE_BECHMARK_NAME() override { assert(data == nullptr); } std::unique_ptr data; }; diff --git a/third-party/benchmark/test/link_main_test.cc b/third-party/benchmark/test/link_main_test.cc --- a/third-party/benchmark/test/link_main_test.cc +++ b/third-party/benchmark/test/link_main_test.cc @@ -2,7 +2,8 @@ void BM_empty(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); diff --git a/third-party/benchmark/test/map_test.cc b/third-party/benchmark/test/map_test.cc --- a/third-party/benchmark/test/map_test.cc +++ b/third-party/benchmark/test/map_test.cc @@ -24,7 +24,8 @@ m = ConstructRandomMap(size); state.ResumeTiming(); for (int i = 0; i < size; ++i) { - benchmark::DoNotOptimize(m.find(std::rand() % size)); + auto it = m.find(std::rand() % size); + benchmark::DoNotOptimize(it); } } state.SetItemsProcessed(state.iterations() * size); @@ -34,11 +35,11 @@ // Using fixtures. class MapFixture : public ::benchmark::Fixture { public: - void SetUp(const ::benchmark::State& st) BENCHMARK_OVERRIDE { + void SetUp(const ::benchmark::State& st) override { m = ConstructRandomMap(static_cast(st.range(0))); } - void TearDown(const ::benchmark::State&) BENCHMARK_OVERRIDE { m.clear(); } + void TearDown(const ::benchmark::State&) override { m.clear(); } std::map m; }; @@ -47,7 +48,8 @@ const int size = static_cast(state.range(0)); for (auto _ : state) { for (int i = 0; i < size; ++i) { - benchmark::DoNotOptimize(m.find(std::rand() % size)); + auto it = m.find(std::rand() % size); + benchmark::DoNotOptimize(it); } } state.SetItemsProcessed(state.iterations() * size); diff --git a/third-party/benchmark/test/memory_manager_test.cc b/third-party/benchmark/test/memory_manager_test.cc --- a/third-party/benchmark/test/memory_manager_test.cc +++ b/third-party/benchmark/test/memory_manager_test.cc @@ -5,16 +5,17 @@ #include "output_test.h" class TestMemoryManager : public benchmark::MemoryManager { - void Start() BENCHMARK_OVERRIDE {} - void Stop(Result* result) BENCHMARK_OVERRIDE { - result->num_allocs = 42; - result->max_bytes_used = 42000; + void Start() override {} + void Stop(Result& result) override { + result.num_allocs = 42; + result.max_bytes_used = 42000; } }; void BM_empty(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); diff --git a/third-party/benchmark/test/min_time_parse_gtest.cc b/third-party/benchmark/test/min_time_parse_gtest.cc new file mode 100644 --- /dev/null +++ b/third-party/benchmark/test/min_time_parse_gtest.cc @@ -0,0 +1,30 @@ +#include "../src/benchmark_runner.h" +#include "gtest/gtest.h" + +namespace { + +TEST(ParseMinTimeTest, InvalidInput) { +#if GTEST_HAS_DEATH_TEST + // Tests only runnable in debug mode (when BM_CHECK is enabled). +#ifndef NDEBUG +#ifndef TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS + ASSERT_DEATH_IF_SUPPORTED( + { benchmark::internal::ParseBenchMinTime("abc"); }, + "Malformed seconds value passed to --benchmark_min_time: `abc`"); + + ASSERT_DEATH_IF_SUPPORTED( + { benchmark::internal::ParseBenchMinTime("123ms"); }, + "Malformed seconds value passed to --benchmark_min_time: `123ms`"); + + ASSERT_DEATH_IF_SUPPORTED( + { benchmark::internal::ParseBenchMinTime("1z"); }, + "Malformed seconds value passed to --benchmark_min_time: `1z`"); + + ASSERT_DEATH_IF_SUPPORTED( + { benchmark::internal::ParseBenchMinTime("1hs"); }, + "Malformed seconds value passed to --benchmark_min_time: `1hs`"); +#endif +#endif +#endif +} +} // namespace diff --git a/third-party/benchmark/test/multiple_ranges_test.cc b/third-party/benchmark/test/multiple_ranges_test.cc --- a/third-party/benchmark/test/multiple_ranges_test.cc +++ b/third-party/benchmark/test/multiple_ranges_test.cc @@ -28,7 +28,7 @@ {2, 7, 15}, {7, 6, 3}}) {} - void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + void SetUp(const ::benchmark::State& state) override { std::vector ranges = {state.range(0), state.range(1), state.range(2)}; @@ -39,7 +39,7 @@ // NOTE: This is not TearDown as we want to check after _all_ runs are // complete. - virtual ~MultipleRangesFixture() { + ~MultipleRangesFixture() override { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; for (const auto& v : expectedValues) { diff --git a/third-party/benchmark/test/options_test.cc b/third-party/benchmark/test/options_test.cc --- a/third-party/benchmark/test/options_test.cc +++ b/third-party/benchmark/test/options_test.cc @@ -33,6 +33,8 @@ BENCHMARK(BM_basic)->Args({42, 42}); BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}}); BENCHMARK(BM_basic)->MinTime(0.7); +BENCHMARK(BM_basic)->MinWarmUpTime(0.8); +BENCHMARK(BM_basic)->MinTime(0.1)->MinWarmUpTime(0.2); BENCHMARK(BM_basic)->UseRealTime(); BENCHMARK(BM_basic)->ThreadRange(2, 4); BENCHMARK(BM_basic)->ThreadPerCpu(); @@ -65,8 +67,8 @@ // Test that the requested iteration count is respected. assert(state.max_iterations == 42); - size_t actual_iterations = 0; - for (auto _ : state) ++actual_iterations; + for (auto _ : state) { + } assert(state.iterations() == state.max_iterations); assert(state.iterations() == 42); } diff --git a/third-party/benchmark/test/output_test.h b/third-party/benchmark/test/output_test.h --- a/third-party/benchmark/test/output_test.h +++ b/third-party/benchmark/test/output_test.h @@ -85,7 +85,7 @@ struct Results; typedef std::function ResultsCheckFn; -size_t AddChecker(const char* bm_name_pattern, const ResultsCheckFn& fn); +size_t AddChecker(const std::string& bm_name_pattern, const ResultsCheckFn& fn); // Class holding the results of a benchmark. // It is passed in calls to checker functions. @@ -117,7 +117,7 @@ // get the string for a result by name, or nullptr if the name // is not found - const std::string* Get(const char* entry_name) const { + const std::string* Get(const std::string& entry_name) const { auto it = values.find(entry_name); if (it == values.end()) return nullptr; return &it->second; @@ -126,12 +126,12 @@ // get a result by name, parsed as a specific type. // NOTE: for counters, use GetCounterAs instead. template - T GetAs(const char* entry_name) const; + T GetAs(const std::string& entry_name) const; // counters are written as doubles, so they have to be read first // as a double, and only then converted to the asked type. template - T GetCounterAs(const char* entry_name) const { + T GetCounterAs(const std::string& entry_name) const { double dval = GetAs(entry_name); T tval = static_cast(dval); return tval; @@ -139,7 +139,7 @@ }; template -T Results::GetAs(const char* entry_name) const { +T Results::GetAs(const std::string& entry_name) const { auto* sv = Get(entry_name); BM_CHECK(sv != nullptr && !sv->empty()); std::stringstream ss; diff --git a/third-party/benchmark/test/output_test_helper.cc b/third-party/benchmark/test/output_test_helper.cc --- a/third-party/benchmark/test/output_test_helper.cc +++ b/third-party/benchmark/test/output_test_helper.cc @@ -45,7 +45,7 @@ static SubMap map = { {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"}, // human-readable float - {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kMGTPEZYmunpfazy]?"}, + {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kKMGTPEZYmunpfazy]?i?"}, {"%percentage", percentage_re}, {"%int", "[ ]*[0-9]+"}, {" %s ", "[ ]+"}, @@ -143,7 +143,7 @@ TestReporter(std::vector reps) : reporters_(std::move(reps)) {} - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + bool ReportContext(const Context& context) override { bool last_ret = false; bool first = true; for (auto rep : reporters_) { @@ -157,10 +157,10 @@ return last_ret; } - void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + void ReportRuns(const std::vector& report) override { for (auto rep : reporters_) rep->ReportRuns(report); } - void Finalize() BENCHMARK_OVERRIDE { + void Finalize() override { for (auto rep : reporters_) rep->Finalize(); } @@ -248,9 +248,8 @@ if (!p.regex->Match(r.name)) { BM_VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; continue; - } else { - BM_VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; } + BM_VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; BM_VLOG(1) << "Checking results of " << r.name << ": ... \n"; p.fn(r); BM_VLOG(1) << "Checking results of " << r.name << ": OK.\n"; @@ -300,7 +299,7 @@ } // end namespace internal -size_t AddChecker(const char* bm_name, const ResultsCheckFn& fn) { +size_t AddChecker(const std::string& bm_name, const ResultsCheckFn& fn) { auto& rc = internal::GetResultsChecker(); rc.Add(bm_name, fn); return rc.results.size(); @@ -328,16 +327,18 @@ BM_CHECK(unit); if (*unit == "ns") { return val * 1.e-9; - } else if (*unit == "us") { + } + if (*unit == "us") { return val * 1.e-6; - } else if (*unit == "ms") { + } + if (*unit == "ms") { return val * 1.e-3; - } else if (*unit == "s") { + } + if (*unit == "s") { return val; - } else { - BM_CHECK(1 == 0) << "unknown time unit: " << *unit; - return 0; } + BM_CHECK(1 == 0) << "unknown time unit: " << *unit; + return 0; } // ========================================================================= // @@ -393,14 +394,14 @@ benchmark::JSONReporter JR; benchmark::CSVReporter CSVR; struct ReporterTest { - const char* name; + std::string name; std::vector& output_cases; std::vector& error_cases; benchmark::BenchmarkReporter& reporter; std::stringstream out_stream; std::stringstream err_stream; - ReporterTest(const char* n, std::vector& out_tc, + ReporterTest(const std::string& n, std::vector& out_tc, std::vector& err_tc, benchmark::BenchmarkReporter& br) : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) { @@ -408,12 +409,12 @@ reporter.SetErrorStream(&err_stream); } } TestCases[] = { - {"ConsoleReporter", GetTestCaseList(TC_ConsoleOut), + {std::string("ConsoleReporter"), GetTestCaseList(TC_ConsoleOut), GetTestCaseList(TC_ConsoleErr), CR}, - {"JSONReporter", GetTestCaseList(TC_JSONOut), GetTestCaseList(TC_JSONErr), - JR}, - {"CSVReporter", GetTestCaseList(TC_CSVOut), GetTestCaseList(TC_CSVErr), - CSVR}, + {std::string("JSONReporter"), GetTestCaseList(TC_JSONOut), + GetTestCaseList(TC_JSONErr), JR}, + {std::string("CSVReporter"), GetTestCaseList(TC_CSVOut), + GetTestCaseList(TC_CSVErr), CSVR}, }; // Create the test reporter and run the benchmarks. @@ -422,7 +423,8 @@ benchmark::RunSpecifiedBenchmarks(&test_rep); for (auto& rep_test : TestCases) { - std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n"; + std::string msg = + std::string("\nTesting ") + rep_test.name + std::string(" Output\n"); std::string banner(msg.size() - 1, '-'); std::cout << banner << msg << banner << "\n"; @@ -439,7 +441,7 @@ // the checks to subscribees. auto& csv = TestCases[2]; // would use == but gcc spits a warning - BM_CHECK(std::strcmp(csv.name, "CSVReporter") == 0); + BM_CHECK(csv.name == std::string("CSVReporter")); internal::GetResultsChecker().CheckResults(csv.out_stream); } diff --git a/third-party/benchmark/test/perf_counters_gtest.cc b/third-party/benchmark/test/perf_counters_gtest.cc --- a/third-party/benchmark/test/perf_counters_gtest.cc +++ b/third-party/benchmark/test/perf_counters_gtest.cc @@ -1,6 +1,8 @@ +#include #include #include "../src/perf_counters.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" #ifndef GTEST_SKIP @@ -11,7 +13,11 @@ #endif using benchmark::internal::PerfCounters; +using benchmark::internal::PerfCountersMeasurement; using benchmark::internal::PerfCounterValues; +using ::testing::AllOf; +using ::testing::Gt; +using ::testing::Lt; namespace { const char kGenericPerfEvent1[] = "CYCLES"; @@ -27,7 +33,7 @@ GTEST_SKIP() << "Performance counters not supported.\n"; } EXPECT_TRUE(PerfCounters::Initialize()); - EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid()); + EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1); } TEST(PerfCountersTest, NegativeTest) { @@ -36,29 +42,46 @@ return; } EXPECT_TRUE(PerfCounters::Initialize()); - EXPECT_FALSE(PerfCounters::Create({}).IsValid()); - EXPECT_FALSE(PerfCounters::Create({""}).IsValid()); - EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid()); + // Sanity checks + // Create() will always create a valid object, even if passed no or + // wrong arguments as the new behavior is to warn and drop unsupported + // counters + EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0); + EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0); + EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0); { - EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, - kGenericPerfEvent3}) - .IsValid()); - } - EXPECT_FALSE( - PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1}) - .IsValid()); - EXPECT_FALSE(PerfCounters::Create({kGenericPerfEvent3, "not a counter name", - kGenericPerfEvent1}) - .IsValid()); + // Try sneaking in a bad egg to see if it is filtered out. The + // number of counters has to be two, not zero + auto counter = + PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1}); + EXPECT_EQ(counter.num_counters(), 2); + EXPECT_EQ(counter.names(), std::vector( + {kGenericPerfEvent2, kGenericPerfEvent1})); + } + { + // Try sneaking in an outrageous counter, like a fat finger mistake + auto counter = PerfCounters::Create( + {kGenericPerfEvent3, "not a counter name", kGenericPerfEvent1}); + EXPECT_EQ(counter.num_counters(), 2); + EXPECT_EQ(counter.names(), std::vector( + {kGenericPerfEvent3, kGenericPerfEvent1})); + } { - EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, - kGenericPerfEvent3}) - .IsValid()); - } - EXPECT_FALSE( - PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, - kGenericPerfEvent3, "MISPREDICTED_BRANCH_RETIRED"}) - .IsValid()); + // Finally try a golden input - it should like all them + EXPECT_EQ(PerfCounters::Create( + {kGenericPerfEvent1, kGenericPerfEvent2, kGenericPerfEvent3}) + .num_counters(), + 3); + } + { + // Add a bad apple in the end of the chain to check the edges + auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, + kGenericPerfEvent3, "bad event name"}); + EXPECT_EQ(counter.num_counters(), 3); + EXPECT_EQ(counter.names(), + std::vector({kGenericPerfEvent1, kGenericPerfEvent2, + kGenericPerfEvent3})); + } } TEST(PerfCountersTest, Read1Counter) { @@ -67,7 +90,7 @@ } EXPECT_TRUE(PerfCounters::Initialize()); auto counters = PerfCounters::Create({kGenericPerfEvent1}); - EXPECT_TRUE(counters.IsValid()); + EXPECT_EQ(counters.num_counters(), 1); PerfCounterValues values1(1); EXPECT_TRUE(counters.Snapshot(&values1)); EXPECT_GT(values1[0], 0); @@ -84,7 +107,7 @@ EXPECT_TRUE(PerfCounters::Initialize()); auto counters = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2}); - EXPECT_TRUE(counters.IsValid()); + EXPECT_EQ(counters.num_counters(), 2); PerfCounterValues values1(2); EXPECT_TRUE(counters.Snapshot(&values1)); EXPECT_GT(values1[0], 0); @@ -95,30 +118,122 @@ EXPECT_GT(values2[1], 0); } -size_t do_work() { - size_t res = 0; - for (size_t i = 0; i < 100000000; ++i) res += i * i; - return res; +TEST(PerfCountersTest, ReopenExistingCounters) { + // This test works in recent and old Intel hardware + // However we cannot make assumptions beyond 3 HW counters + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + std::vector kMetrics({kGenericPerfEvent1}); + std::vector counters(3); + for (auto& counter : counters) { + counter = PerfCounters::Create(kMetrics); + } + PerfCounterValues values(1); + EXPECT_TRUE(counters[0].Snapshot(&values)); + EXPECT_TRUE(counters[1].Snapshot(&values)); + EXPECT_TRUE(counters[2].Snapshot(&values)); +} + +TEST(PerfCountersTest, CreateExistingMeasurements) { + // The test works (i.e. causes read to fail) for the assumptions + // about hardware capabilities (i.e. small number (3) hardware + // counters) at this date, + // the same as previous test ReopenExistingCounters. + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + + // This means we will try 10 counters but we can only guarantee + // for sure at this time that only 3 will work. Perhaps in the future + // we could use libpfm to query for the hardware limits on this + // particular platform. + const int kMaxCounters = 10; + const int kMinValidCounters = 3; + + // Let's use a ubiquitous counter that is guaranteed to work + // on all platforms + const std::vector kMetrics{"cycles"}; + + // Cannot create a vector of actual objects because the + // copy constructor of PerfCounters is deleted - and so is + // implicitly deleted on PerfCountersMeasurement too + std::vector> + perf_counter_measurements; + + perf_counter_measurements.reserve(kMaxCounters); + for (int j = 0; j < kMaxCounters; ++j) { + perf_counter_measurements.emplace_back( + new PerfCountersMeasurement(kMetrics)); + } + + std::vector> measurements; + + // Start all counters together to see if they hold + size_t max_counters = kMaxCounters; + for (size_t i = 0; i < kMaxCounters; ++i) { + auto& counter(*perf_counter_measurements[i]); + EXPECT_EQ(counter.num_counters(), 1); + if (!counter.Start()) { + max_counters = i; + break; + }; + } + + ASSERT_GE(max_counters, kMinValidCounters); + + // Start all together + for (size_t i = 0; i < max_counters; ++i) { + auto& counter(*perf_counter_measurements[i]); + EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters)); + } + + // Start/stop individually + for (size_t i = 0; i < max_counters; ++i) { + auto& counter(*perf_counter_measurements[i]); + measurements.clear(); + counter.Start(); + EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters)); + } +} + +// We try to do some meaningful work here but the compiler +// insists in optimizing away our loop so we had to add a +// no-optimize macro. In case it fails, we added some entropy +// to this pool as well. + +BENCHMARK_DONT_OPTIMIZE size_t do_work() { + static std::mt19937 rd{std::random_device{}()}; + static std::uniform_int_distribution mrand(0, 10); + const size_t kNumLoops = 1000000; + size_t sum = 0; + for (size_t j = 0; j < kNumLoops; ++j) { + sum += mrand(rd); + } + benchmark::DoNotOptimize(sum); + return sum; } -void measure(size_t threadcount, PerfCounterValues* values1, - PerfCounterValues* values2) { - BM_CHECK_NE(values1, nullptr); - BM_CHECK_NE(values2, nullptr); +void measure(size_t threadcount, PerfCounterValues* before, + PerfCounterValues* after) { + BM_CHECK_NE(before, nullptr); + BM_CHECK_NE(after, nullptr); std::vector threads(threadcount); auto work = [&]() { BM_CHECK(do_work() > 1000); }; // We need to first set up the counters, then start the threads, so the - // threads would inherit the counters. But later, we need to first destroy the - // thread pool (so all the work finishes), then measure the counters. So the - // scopes overlap, and we need to explicitly control the scope of the + // threads would inherit the counters. But later, we need to first destroy + // the thread pool (so all the work finishes), then measure the counters. So + // the scopes overlap, and we need to explicitly control the scope of the // threadpool. auto counters = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3}); for (auto& t : threads) t = std::thread(work); - counters.Snapshot(values1); + counters.Snapshot(before); for (auto& t : threads) t.join(); - counters.Snapshot(values2); + counters.Snapshot(after); } TEST(PerfCountersTest, MultiThreaded) { @@ -126,20 +241,73 @@ GTEST_SKIP() << "Test skipped because libpfm is not supported."; } EXPECT_TRUE(PerfCounters::Initialize()); - PerfCounterValues values1(2); - PerfCounterValues values2(2); + PerfCounterValues before(2); + PerfCounterValues after(2); - measure(2, &values1, &values2); - std::vector D1{static_cast(values2[0] - values1[0]), - static_cast(values2[1] - values1[1])}; + // Notice that this test will work even if we taskset it to a single CPU + // In this case the threads will run sequentially + // Start two threads and measure the number of combined cycles and + // instructions + measure(2, &before, &after); + std::vector Elapsed2Threads{ + static_cast(after[0] - before[0]), + static_cast(after[1] - before[1])}; - measure(4, &values1, &values2); - std::vector D2{static_cast(values2[0] - values1[0]), - static_cast(values2[1] - values1[1])}; + // Start four threads and measure the number of combined cycles and + // instructions + measure(4, &before, &after); + std::vector Elapsed4Threads{ + static_cast(after[0] - before[0]), + static_cast(after[1] - before[1])}; - // Some extra work will happen on the main thread - like joining the threads - // - so the ratio won't be quite 2.0, but very close. - EXPECT_GE(D2[0], 1.9 * D1[0]); - EXPECT_GE(D2[1], 1.9 * D1[1]); + // The following expectations fail (at least on a beefy workstation with lots + // of cpus) - it seems that in some circumstances the runtime of 4 threads + // can even be better than with 2. + // So instead of expecting 4 threads to be slower, let's just make sure they + // do not differ too much in general (one is not more than 10x than the + // other). + EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10))); + EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10))); } + +TEST(PerfCountersTest, HardwareLimits) { + // The test works (i.e. causes read to fail) for the assumptions + // about hardware capabilities (i.e. small number (3-4) hardware + // counters) at this date, + // the same as previous test ReopenExistingCounters. + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + + // Taken from `perf list`, but focusses only on those HW events that actually + // were reported when running `sudo perf stat -a sleep 10`. All HW events + // listed in the first command not reported in the second seem to not work. + // This is sad as we don't really get to test the grouping here (groups can + // contain up to 6 members)... + std::vector counter_names{ + "cycles", // leader + "instructions", // + "branches", // + "branch-misses", // + "cache-misses", // + }; + + // In the off-chance that some of these values are not supported, + // we filter them out so the test will complete without failure + // albeit it might not actually test the grouping on that platform + std::vector valid_names; + for (const std::string& name : counter_names) { + if (PerfCounters::IsCounterSupported(name)) { + valid_names.push_back(name); + } + } + PerfCountersMeasurement counter(valid_names); + + std::vector> measurements; + + counter.Start(); + EXPECT_TRUE(counter.Stop(measurements)); +} + } // namespace diff --git a/third-party/benchmark/test/perf_counters_test.cc b/third-party/benchmark/test/perf_counters_test.cc --- a/third-party/benchmark/test/perf_counters_test.cc +++ b/third-party/benchmark/test/perf_counters_test.cc @@ -2,12 +2,20 @@ #include "../src/perf_counters.h" +#include "../src/commandlineflags.h" #include "benchmark/benchmark.h" #include "output_test.h" +namespace benchmark { + +BM_DECLARE_string(benchmark_perf_counters); + +} // namespace benchmark + static void BM_Simple(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_Simple); @@ -23,5 +31,7 @@ if (!benchmark::internal::PerfCounters::kSupported) { return 0; } + benchmark::FLAGS_benchmark_perf_counters = "CYCLES,BRANCHES"; + benchmark::internal::PerfCounters::Initialize(); RunOutputTests(argc, argv); } diff --git a/third-party/benchmark/test/register_benchmark_test.cc b/third-party/benchmark/test/register_benchmark_test.cc --- a/third-party/benchmark/test/register_benchmark_test.cc +++ b/third-party/benchmark/test/register_benchmark_test.cc @@ -10,7 +10,7 @@ class TestReporter : public benchmark::ConsoleReporter { public: - virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + void ReportRuns(const std::vector& report) override { all_runs_.insert(all_runs_.end(), begin(report), end(report)); ConsoleReporter::ReportRuns(report); } @@ -19,11 +19,11 @@ }; struct TestCase { - std::string name; - const char* label; + const std::string name; + const std::string label; // Note: not explicit as we rely on it being converted through ADD_CASES. - TestCase(const char* xname) : TestCase(xname, nullptr) {} - TestCase(const char* xname, const char* xlabel) + TestCase(const std::string& xname) : TestCase(xname, "") {} + TestCase(const std::string& xname, const std::string& xlabel) : name(xname), label(xlabel) {} typedef benchmark::BenchmarkReporter::Run Run; @@ -32,7 +32,7 @@ // clang-format off BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); - if (label) { + if (!label.empty()) { BM_CHECK(run.report_label == label) << "expected " << label << " got " << run.report_label; } else { @@ -95,6 +95,18 @@ #endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK +//----------------------------------------------------------------------------// +// Test RegisterBenchmark with DISABLED_ benchmark +//----------------------------------------------------------------------------// +void DISABLED_BM_function(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(DISABLED_BM_function); +ReturnVal dummy3 = benchmark::RegisterBenchmark("DISABLED_BM_function_manual", + DISABLED_BM_function); +// No need to add cases because we don't expect them to run. + //----------------------------------------------------------------------------// // Test RegisterBenchmark with different callable types //----------------------------------------------------------------------------// @@ -111,7 +123,7 @@ { CustomFixture fx; benchmark::RegisterBenchmark("custom_fixture", fx); - AddCases({"custom_fixture"}); + AddCases({std::string("custom_fixture")}); } #endif #ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK diff --git a/third-party/benchmark/test/reporter_output_test.cc b/third-party/benchmark/test/reporter_output_test.cc --- a/third-party/benchmark/test/reporter_output_test.cc +++ b/third-party/benchmark/test/reporter_output_test.cc @@ -17,7 +17,7 @@ AddCases(TC_ConsoleErr, { {"^%int-%int-%intT%int:%int:%int[-+]%int:%int$", MR_Default}, - {"Running .*/reporter_output_test(\\.exe)?$", MR_Next}, + {"Running .*(/|\\\\)reporter_output_test(\\.exe)?$", MR_Next}, {"Run on \\(%int X %float MHz CPU s?\\)", MR_Next}, }); AddCases(TC_JSONOut, @@ -93,7 +93,8 @@ void BM_bytes_per_second(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.SetBytesProcessed(1); } @@ -124,7 +125,8 @@ void BM_items_per_second(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.SetItemsProcessed(1); } @@ -318,7 +320,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}}); // ========================================================================= // -// ------------------------ Testing Arg Name Output ----------------------- // +// ------------------------ Testing Arg Name Output ------------------------ // // ========================================================================= // void BM_arg_name(benchmark::State& state) { @@ -404,7 +406,8 @@ void BM_Complexity_O1(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.SetComplexityN(state.range(0)); } diff --git a/third-party/benchmark/test/skip_with_error_test.cc b/third-party/benchmark/test/skip_with_error_test.cc --- a/third-party/benchmark/test/skip_with_error_test.cc +++ b/third-party/benchmark/test/skip_with_error_test.cc @@ -10,17 +10,17 @@ class TestReporter : public benchmark::ConsoleReporter { public: - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; - virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + void ReportRuns(const std::vector& report) override { all_runs_.insert(all_runs_.end(), begin(report), end(report)); ConsoleReporter::ReportRuns(report); } TestReporter() {} - virtual ~TestReporter() {} + ~TestReporter() override {} mutable std::vector all_runs_; }; @@ -35,8 +35,9 @@ void CheckRun(Run const& run) const { BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); - BM_CHECK(error_occurred == run.error_occurred); - BM_CHECK(error_message == run.error_message); + BM_CHECK_EQ(error_occurred, + benchmark::internal::SkippedWithError == run.skipped); + BM_CHECK(error_message == run.skip_message); if (error_occurred) { // BM_CHECK(run.iterations == 0); } else { @@ -47,7 +48,8 @@ std::vector ExpectedResults; -int AddCases(const char* base_name, std::initializer_list const& v) { +int AddCases(const std::string& base_name, + std::initializer_list const& v) { for (auto TC : v) { TC.name = base_name + TC.name; ExpectedResults.push_back(std::move(TC)); @@ -141,7 +143,8 @@ void BM_error_after_running(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } if (state.thread_index() <= (state.threads() / 2)) state.SkipWithError("error message"); diff --git a/third-party/benchmark/test/spec_arg_test.cc b/third-party/benchmark/test/spec_arg_test.cc --- a/third-party/benchmark/test/spec_arg_test.cc +++ b/third-party/benchmark/test/spec_arg_test.cc @@ -17,11 +17,11 @@ class TestReporter : public benchmark::ConsoleReporter { public: - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; - virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + void ReportRuns(const std::vector& report) override { assert(report.size() == 1); matched_functions.push_back(report[0].run_name.function_name); ConsoleReporter::ReportRuns(report); @@ -29,7 +29,7 @@ TestReporter() {} - virtual ~TestReporter() {} + ~TestReporter() override {} const std::vector& GetMatchedFunctions() const { return matched_functions; @@ -91,5 +91,15 @@ << matched_functions.front() << "]\n"; return 2; } + + // Test that SetBenchmarkFilter works. + const std::string golden_value = "golden_value"; + benchmark::SetBenchmarkFilter(golden_value); + std::string current_value = benchmark::GetBenchmarkFilter(); + if (golden_value != current_value) { + std::cerr << "Expected [" << golden_value + << "] for --benchmark_filter but got [" << current_value << "]\n"; + return 3; + } return 0; } diff --git a/third-party/benchmark/test/spec_arg_verbosity_test.cc b/third-party/benchmark/test/spec_arg_verbosity_test.cc new file mode 100644 --- /dev/null +++ b/third-party/benchmark/test/spec_arg_verbosity_test.cc @@ -0,0 +1,43 @@ +#include + +#include + +#include "benchmark/benchmark.h" + +// Tests that the user specified verbosity level can be get. +static void BM_Verbosity(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_Verbosity); + +int main(int argc, char** argv) { + const int32_t flagv = 42; + + // Verify that argv specify --v=42. + bool found = false; + for (int i = 0; i < argc; ++i) { + if (strcmp("--v=42", argv[i]) == 0) { + found = true; + break; + } + } + if (!found) { + std::cerr << "This test requires '--v=42' to be passed as a command-line " + << "argument.\n"; + return 1; + } + + benchmark::Initialize(&argc, argv); + + // Check that the current flag value is reported accurately via the + // GetBenchmarkVerbosity() function. + if (flagv != benchmark::GetBenchmarkVerbosity()) { + std::cerr + << "Seeing different value for flags. GetBenchmarkVerbosity() returns [" + << benchmark::GetBenchmarkVerbosity() << "] expected flag=[" << flagv + << "]\n"; + return 1; + } + return 0; +} diff --git a/third-party/benchmark/test/string_util_gtest.cc b/third-party/benchmark/test/string_util_gtest.cc --- a/third-party/benchmark/test/string_util_gtest.cc +++ b/third-party/benchmark/test/string_util_gtest.cc @@ -1,9 +1,12 @@ //===---------------------------------------------------------------------===// -// statistics_test - Unit tests for src/statistics.cc +// string_util_test - Unit tests for src/string_util.cc //===---------------------------------------------------------------------===// +#include + #include "../src/internal_macros.h" #include "../src/string_util.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" namespace { @@ -63,7 +66,10 @@ EXPECT_EQ(4ul, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); } + { + ASSERT_THROW(std::ignore = benchmark::stoul("this is a test"), + std::invalid_argument); + } #endif } @@ -107,7 +113,10 @@ EXPECT_EQ(4ul, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS -{ ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); } +{ + ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"), + std::invalid_argument); +} #endif } @@ -137,7 +146,10 @@ EXPECT_EQ(8ul, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS -{ ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); } +{ + ASSERT_THROW(std::ignore = benchmark::stod("this is a test"), + std::invalid_argument); +} #endif } @@ -149,4 +161,39 @@ std::vector({"hello", "there", "is", "more"})); } +using HumanReadableFixture = ::testing::TestWithParam< + std::tuple>; + +INSTANTIATE_TEST_SUITE_P( + HumanReadableTests, HumanReadableFixture, + ::testing::Values( + std::make_tuple(0.0, benchmark::Counter::kIs1024, "0"), + std::make_tuple(999.0, benchmark::Counter::kIs1024, "999"), + std::make_tuple(1000.0, benchmark::Counter::kIs1024, "1000"), + std::make_tuple(1024.0, benchmark::Counter::kIs1024, "1Ki"), + std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1024, + "976\\.56.Ki"), + std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1024, "1Mi"), + std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1024, + "953\\.674Mi"), + std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1024, + "1Gi"), + std::make_tuple(0.0, benchmark::Counter::kIs1000, "0"), + std::make_tuple(999.0, benchmark::Counter::kIs1000, "999"), + std::make_tuple(1000.0, benchmark::Counter::kIs1000, "1k"), + std::make_tuple(1024.0, benchmark::Counter::kIs1000, "1.024k"), + std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1000, "1M"), + std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1000, + "1\\.04858M"), + std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1000, + "1G"), + std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1000, + "1\\.07374G"))); + +TEST_P(HumanReadableFixture, HumanReadableNumber) { + std::string str = benchmark::HumanReadableNumber(std::get<0>(GetParam()), + std::get<1>(GetParam())); + ASSERT_THAT(str, ::testing::MatchesRegex(std::get<2>(GetParam()))); +} + } // end namespace diff --git a/third-party/benchmark/test/time_unit_gtest.cc b/third-party/benchmark/test/time_unit_gtest.cc new file mode 100644 --- /dev/null +++ b/third-party/benchmark/test/time_unit_gtest.cc @@ -0,0 +1,37 @@ +#include "../include/benchmark/benchmark.h" +#include "gtest/gtest.h" + +namespace benchmark { +namespace internal { + +namespace { + +class DummyBenchmark : public Benchmark { + public: + DummyBenchmark() : Benchmark("dummy") {} + void Run(State&) override {} +}; + +TEST(DefaultTimeUnitTest, TimeUnitIsNotSet) { + DummyBenchmark benchmark; + EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond); +} + +TEST(DefaultTimeUnitTest, DefaultIsSet) { + DummyBenchmark benchmark; + EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond); + SetDefaultTimeUnit(kMillisecond); + EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond); +} + +TEST(DefaultTimeUnitTest, DefaultAndExplicitUnitIsSet) { + DummyBenchmark benchmark; + benchmark.Unit(kMillisecond); + SetDefaultTimeUnit(kMicrosecond); + + EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond); +} + +} // namespace +} // namespace internal +} // namespace benchmark diff --git a/third-party/benchmark/test/user_counters_tabular_test.cc b/third-party/benchmark/test/user_counters_tabular_test.cc --- a/third-party/benchmark/test/user_counters_tabular_test.cc +++ b/third-party/benchmark/test/user_counters_tabular_test.cc @@ -372,7 +372,8 @@ void BM_CounterRates_Tabular(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters.insert({ diff --git a/third-party/benchmark/test/user_counters_test.cc b/third-party/benchmark/test/user_counters_test.cc --- a/third-party/benchmark/test/user_counters_test.cc +++ b/third-party/benchmark/test/user_counters_test.cc @@ -67,7 +67,8 @@ void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.counters["foo"] = 1; state.counters["bar"] = ++num_calls1; @@ -118,7 +119,8 @@ void BM_Counters_Rate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate}; @@ -161,7 +163,8 @@ void BM_Invert(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{0.0001, bm::Counter::kInvert}; @@ -195,14 +198,14 @@ CHECK_BENCHMARK_RESULTS("BM_Invert", &CheckInvert); // ========================================================================= // -// ------------------------- InvertedRate Counters Output -// -------------------------- // +// --------------------- InvertedRate Counters Output ---------------------- // // ========================================================================= // void BM_Counters_InvertedRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = @@ -330,7 +333,8 @@ void BM_Counters_AvgThreadsRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreadsRate}; @@ -417,7 +421,8 @@ void BM_Counters_kIsIterationInvariantRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = @@ -460,7 +465,7 @@ &CheckIsIterationInvariantRate); // ========================================================================= // -// ------------------- AvgIterations Counters Output ------------------ // +// --------------------- AvgIterations Counters Output --------------------- // // ========================================================================= // void BM_Counters_AvgIterations(benchmark::State& state) { @@ -502,13 +507,14 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations", &CheckAvgIterations); // ========================================================================= // -// ----------------- AvgIterationsRate Counters Output ---------------- // +// ------------------- AvgIterationsRate Counters Output ------------------- // // ========================================================================= // void BM_Counters_kAvgIterationsRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterationsRate}; diff --git a/third-party/benchmark/test/user_counters_thousands_test.cc b/third-party/benchmark/test/user_counters_thousands_test.cc --- a/third-party/benchmark/test/user_counters_thousands_test.cc +++ b/third-party/benchmark/test/user_counters_thousands_test.cc @@ -16,13 +16,13 @@ {"t0_1000000DefaultBase", bm::Counter(1000 * 1000, bm::Counter::kDefaults)}, {"t1_1000000Base1000", bm::Counter(1000 * 1000, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1000)}, + bm::Counter::OneK::kIs1000)}, {"t2_1000000Base1024", bm::Counter(1000 * 1000, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1024)}, + bm::Counter::OneK::kIs1024)}, {"t3_1048576Base1000", bm::Counter(1024 * 1024, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1000)}, + bm::Counter::OneK::kIs1000)}, {"t4_1048576Base1024", bm::Counter(1024 * 1024, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1024)}, + bm::Counter::OneK::kIs1024)}, }); } BENCHMARK(BM_Counters_Thousands)->Repetitions(2); @@ -30,21 +30,21 @@ TC_ConsoleOut, { {"^BM_Counters_Thousands/repeats:2 %console_report " - "t0_1000000DefaultBase=1000k " - "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k " - "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"}, + "t0_1000000DefaultBase=1M " + "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki " + "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2 %console_report " - "t0_1000000DefaultBase=1000k " - "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k " - "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"}, + "t0_1000000DefaultBase=1M " + "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki " + "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2_mean %console_report " - "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k " - "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k " - "t4_1048576Base1024=1024k$"}, + "t0_1000000DefaultBase=1M t1_1000000Base1000=1M " + "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M " + "t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2_median %console_report " - "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k " - "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k " - "t4_1048576Base1024=1024k$"}, + "t0_1000000DefaultBase=1M t1_1000000Base1000=1M " + "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M " + "t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2_stddev %console_time_only_report [ " "]*2 t0_1000000DefaultBase=0 t1_1000000Base1000=0 " "t2_1000000Base1024=0 t3_1048576Base1000=0 t4_1048576Base1024=0$"}, diff --git a/third-party/benchmark/tools/compare.py b/third-party/benchmark/tools/compare.py --- a/third-party/benchmark/tools/compare.py +++ b/third-party/benchmark/tools/compare.py @@ -1,7 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import unittest - """ compare.py - versatile benchmark output compare tool """ @@ -10,205 +9,174 @@ from argparse import ArgumentParser import json import sys +import os import gbench from gbench import util, report -from gbench.util import * def check_inputs(in1, in2, flags): """ Perform checking on the user provided inputs and diagnose any abnormalities """ - in1_kind, in1_err = classify_input_file(in1) - in2_kind, in2_err = classify_input_file(in2) - output_file = find_benchmark_flag("--benchmark_out=", flags) - output_type = find_benchmark_flag("--benchmark_out_format=", flags) - if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file: - print( - ( - "WARNING: '--benchmark_out=%s' will be passed to both " - "benchmarks causing it to be overwritten" - ) - % output_file - ) - if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0: - print( - "WARNING: passing optional flags has no effect since both " - "inputs are JSON" - ) - if output_type is not None and output_type != "json": - print( - ( - "ERROR: passing '--benchmark_out_format=%s' to 'compare.py`" - " is not supported." - ) - % output_type - ) + in1_kind, in1_err = util.classify_input_file(in1) + in2_kind, in2_err = util.classify_input_file(in2) + output_file = util.find_benchmark_flag('--benchmark_out=', flags) + output_type = util.find_benchmark_flag('--benchmark_out_format=', flags) + if in1_kind == util.IT_Executable and in2_kind == util.IT_Executable and output_file: + print(("WARNING: '--benchmark_out=%s' will be passed to both " + "benchmarks causing it to be overwritten") % output_file) + if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON: + # When both sides are JSON the only supported flag is + # --benchmark_filter= + for flag in util.remove_benchmark_flags('--benchmark_filter=', flags): + print("WARNING: passing %s has no effect since both " + "inputs are JSON" % flag) + if output_type is not None and output_type != 'json': + print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`" + " is not supported.") % output_type) sys.exit(1) def create_parser(): - parser = ArgumentParser(description="versatile benchmark output compare tool") + parser = ArgumentParser( + description='versatile benchmark output compare tool') parser.add_argument( - "-a", - "--display_aggregates_only", - dest="display_aggregates_only", + '-a', + '--display_aggregates_only', + dest='display_aggregates_only', action="store_true", help="If there are repetitions, by default, we display everything - the" - " actual runs, and the aggregates computed. Sometimes, it is " - "desirable to only view the aggregates. E.g. when there are a lot " - "of repetitions. Do note that only the display is affected. " - "Internally, all the actual runs are still used, e.g. for U test.", - ) + " actual runs, and the aggregates computed. Sometimes, it is " + "desirable to only view the aggregates. E.g. when there are a lot " + "of repetitions. Do note that only the display is affected. " + "Internally, all the actual runs are still used, e.g. for U test.") parser.add_argument( - "--no-color", - dest="color", + '--no-color', + dest='color', default=True, action="store_false", - help="Do not use colors in the terminal output", + help="Do not use colors in the terminal output" ) parser.add_argument( - "-d", - "--dump_to_json", - dest="dump_to_json", - help="Additionally, dump benchmark comparison output to this file in JSON format.", - ) + '-d', + '--dump_to_json', + dest='dump_to_json', + help="Additionally, dump benchmark comparison output to this file in JSON format.") utest = parser.add_argument_group() utest.add_argument( - "--no-utest", - dest="utest", + '--no-utest', + dest='utest', default=True, action="store_false", - help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format( - report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS - ), - ) + help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS)) alpha_default = 0.05 utest.add_argument( "--alpha", - dest="utest_alpha", + dest='utest_alpha', default=alpha_default, type=float, - help=( - "significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)" - ) - % alpha_default, - ) + help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") % + alpha_default) subparsers = parser.add_subparsers( - help="This tool has multiple modes of operation:", dest="mode" - ) + help='This tool has multiple modes of operation:', + dest='mode') parser_a = subparsers.add_parser( - "benchmarks", - help="The most simple use-case, compare all the output of these two benchmarks", - ) - baseline = parser_a.add_argument_group("baseline", "The benchmark baseline") + 'benchmarks', + help='The most simple use-case, compare all the output of these two benchmarks') + baseline = parser_a.add_argument_group( + 'baseline', 'The benchmark baseline') baseline.add_argument( - "test_baseline", - metavar="test_baseline", - type=argparse.FileType("r"), + 'test_baseline', + metavar='test_baseline', + type=argparse.FileType('r'), nargs=1, - help="A benchmark executable or JSON output file", - ) + help='A benchmark executable or JSON output file') contender = parser_a.add_argument_group( - "contender", "The benchmark that will be compared against the baseline" - ) + 'contender', 'The benchmark that will be compared against the baseline') contender.add_argument( - "test_contender", - metavar="test_contender", - type=argparse.FileType("r"), + 'test_contender', + metavar='test_contender', + type=argparse.FileType('r'), nargs=1, - help="A benchmark executable or JSON output file", - ) + help='A benchmark executable or JSON output file') parser_a.add_argument( - "benchmark_options", - metavar="benchmark_options", + 'benchmark_options', + metavar='benchmark_options', nargs=argparse.REMAINDER, - help="Arguments to pass when running benchmark executables", - ) + help='Arguments to pass when running benchmark executables') parser_b = subparsers.add_parser( - "filters", help="Compare filter one with the filter two of benchmark" - ) - baseline = parser_b.add_argument_group("baseline", "The benchmark baseline") + 'filters', help='Compare filter one with the filter two of benchmark') + baseline = parser_b.add_argument_group( + 'baseline', 'The benchmark baseline') baseline.add_argument( - "test", - metavar="test", - type=argparse.FileType("r"), + 'test', + metavar='test', + type=argparse.FileType('r'), nargs=1, - help="A benchmark executable or JSON output file", - ) + help='A benchmark executable or JSON output file') baseline.add_argument( - "filter_baseline", - metavar="filter_baseline", + 'filter_baseline', + metavar='filter_baseline', type=str, nargs=1, - help="The first filter, that will be used as baseline", - ) + help='The first filter, that will be used as baseline') contender = parser_b.add_argument_group( - "contender", "The benchmark that will be compared against the baseline" - ) + 'contender', 'The benchmark that will be compared against the baseline') contender.add_argument( - "filter_contender", - metavar="filter_contender", + 'filter_contender', + metavar='filter_contender', type=str, nargs=1, - help="The second filter, that will be compared against the baseline", - ) + help='The second filter, that will be compared against the baseline') parser_b.add_argument( - "benchmark_options", - metavar="benchmark_options", + 'benchmark_options', + metavar='benchmark_options', nargs=argparse.REMAINDER, - help="Arguments to pass when running benchmark executables", - ) + help='Arguments to pass when running benchmark executables') parser_c = subparsers.add_parser( - "benchmarksfiltered", - help="Compare filter one of first benchmark with filter two of the second benchmark", - ) - baseline = parser_c.add_argument_group("baseline", "The benchmark baseline") + 'benchmarksfiltered', + help='Compare filter one of first benchmark with filter two of the second benchmark') + baseline = parser_c.add_argument_group( + 'baseline', 'The benchmark baseline') baseline.add_argument( - "test_baseline", - metavar="test_baseline", - type=argparse.FileType("r"), + 'test_baseline', + metavar='test_baseline', + type=argparse.FileType('r'), nargs=1, - help="A benchmark executable or JSON output file", - ) + help='A benchmark executable or JSON output file') baseline.add_argument( - "filter_baseline", - metavar="filter_baseline", + 'filter_baseline', + metavar='filter_baseline', type=str, nargs=1, - help="The first filter, that will be used as baseline", - ) + help='The first filter, that will be used as baseline') contender = parser_c.add_argument_group( - "contender", "The benchmark that will be compared against the baseline" - ) + 'contender', 'The benchmark that will be compared against the baseline') contender.add_argument( - "test_contender", - metavar="test_contender", - type=argparse.FileType("r"), + 'test_contender', + metavar='test_contender', + type=argparse.FileType('r'), nargs=1, - help="The second benchmark executable or JSON output file, that will be compared against the baseline", - ) + help='The second benchmark executable or JSON output file, that will be compared against the baseline') contender.add_argument( - "filter_contender", - metavar="filter_contender", + 'filter_contender', + metavar='filter_contender', type=str, nargs=1, - help="The second filter, that will be compared against the baseline", - ) + help='The second filter, that will be compared against the baseline') parser_c.add_argument( - "benchmark_options", - metavar="benchmark_options", + 'benchmark_options', + metavar='benchmark_options', nargs=argparse.REMAINDER, - help="Arguments to pass when running benchmark executables", - ) + help='Arguments to pass when running benchmark executables') return parser @@ -223,16 +191,16 @@ assert not unknown_args benchmark_options = args.benchmark_options - if args.mode == "benchmarks": + if args.mode == 'benchmarks': test_baseline = args.test_baseline[0].name test_contender = args.test_contender[0].name - filter_baseline = "" - filter_contender = "" + filter_baseline = '' + filter_contender = '' # NOTE: if test_baseline == test_contender, you are analyzing the stdev - description = "Comparing %s to %s" % (test_baseline, test_contender) - elif args.mode == "filters": + description = 'Comparing %s to %s' % (test_baseline, test_contender) + elif args.mode == 'filters': test_baseline = args.test[0].name test_contender = args.test[0].name filter_baseline = args.filter_baseline[0] @@ -241,12 +209,9 @@ # NOTE: if filter_baseline == filter_contender, you are analyzing the # stdev - description = "Comparing %s to %s (from %s)" % ( - filter_baseline, - filter_contender, - args.test[0].name, - ) - elif args.mode == "benchmarksfiltered": + description = 'Comparing %s to %s (from %s)' % ( + filter_baseline, filter_contender, args.test[0].name) + elif args.mode == 'benchmarksfiltered': test_baseline = args.test_baseline[0].name test_contender = args.test_contender[0].name filter_baseline = args.filter_baseline[0] @@ -255,12 +220,8 @@ # NOTE: if test_baseline == test_contender and # filter_baseline == filter_contender, you are analyzing the stdev - description = "Comparing %s (from %s) to %s (from %s)" % ( - filter_baseline, - test_baseline, - filter_contender, - test_contender, - ) + description = 'Comparing %s (from %s) to %s (from %s)' % ( + filter_baseline, test_baseline, filter_contender, test_contender) else: # should never happen print("Unrecognized mode of operation: '%s'" % args.mode) @@ -270,229 +231,199 @@ check_inputs(test_baseline, test_contender, benchmark_options) if args.display_aggregates_only: - benchmark_options += ["--benchmark_display_aggregates_only=true"] + benchmark_options += ['--benchmark_display_aggregates_only=true'] options_baseline = [] options_contender = [] if filter_baseline and filter_contender: - options_baseline = ["--benchmark_filter=%s" % filter_baseline] - options_contender = ["--benchmark_filter=%s" % filter_contender] + options_baseline = ['--benchmark_filter=%s' % filter_baseline] + options_contender = ['--benchmark_filter=%s' % filter_contender] # Run the benchmarks and report the results - json1 = json1_orig = gbench.util.sort_benchmark_results( - gbench.util.run_or_load_benchmark( - test_baseline, benchmark_options + options_baseline - ) - ) - json2 = json2_orig = gbench.util.sort_benchmark_results( - gbench.util.run_or_load_benchmark( - test_contender, benchmark_options + options_contender - ) - ) + json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_baseline, benchmark_options + options_baseline)) + json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_contender, benchmark_options + options_contender)) # Now, filter the benchmarks so that the difference report can work if filter_baseline and filter_contender: - replacement = "[%s vs. %s]" % (filter_baseline, filter_contender) - json1 = gbench.report.filter_benchmark(json1_orig, filter_baseline, replacement) + replacement = '[%s vs. %s]' % (filter_baseline, filter_contender) + json1 = gbench.report.filter_benchmark( + json1_orig, filter_baseline, replacement) json2 = gbench.report.filter_benchmark( - json2_orig, filter_contender, replacement - ) + json2_orig, filter_contender, replacement) - diff_report = gbench.report.get_difference_report(json1, json2, args.utest) + diff_report = gbench.report.get_difference_report( + json1, json2, args.utest) output_lines = gbench.report.print_difference_report( diff_report, args.display_aggregates_only, - args.utest, - args.utest_alpha, - args.color, - ) + args.utest, args.utest_alpha, args.color) print(description) for ln in output_lines: print(ln) # Optionally, diff and output to JSON if args.dump_to_json is not None: - with open(args.dump_to_json, "w") as f_json: + with open(args.dump_to_json, 'w') as f_json: json.dump(diff_report, f_json) - class TestParser(unittest.TestCase): def setUp(self): self.parser = create_parser() testInputs = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "gbench", "Inputs" - ) - self.testInput0 = os.path.join(testInputs, "test1_run1.json") - self.testInput1 = os.path.join(testInputs, "test1_run2.json") + os.path.dirname( + os.path.realpath(__file__)), + 'gbench', + 'Inputs') + self.testInput0 = os.path.join(testInputs, 'test1_run1.json') + self.testInput1 = os.path.join(testInputs, 'test1_run2.json') def test_benchmarks_basic(self): parsed = self.parser.parse_args( - ["benchmarks", self.testInput0, self.testInput1] - ) + ['benchmarks', self.testInput0, self.testInput1]) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "benchmarks") + self.assertEqual(parsed.mode, 'benchmarks') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_basic_without_utest(self): parsed = self.parser.parse_args( - ["--no-utest", "benchmarks", self.testInput0, self.testInput1] - ) + ['--no-utest', 'benchmarks', self.testInput0, self.testInput1]) self.assertFalse(parsed.display_aggregates_only) self.assertFalse(parsed.utest) self.assertEqual(parsed.utest_alpha, 0.05) - self.assertEqual(parsed.mode, "benchmarks") + self.assertEqual(parsed.mode, 'benchmarks') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_basic_display_aggregates_only(self): parsed = self.parser.parse_args( - ["-a", "benchmarks", self.testInput0, self.testInput1] - ) + ['-a', 'benchmarks', self.testInput0, self.testInput1]) self.assertTrue(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "benchmarks") + self.assertEqual(parsed.mode, 'benchmarks') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_basic_with_utest_alpha(self): parsed = self.parser.parse_args( - ["--alpha=0.314", "benchmarks", self.testInput0, self.testInput1] - ) + ['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1]) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.utest_alpha, 0.314) - self.assertEqual(parsed.mode, "benchmarks") + self.assertEqual(parsed.mode, 'benchmarks') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_basic_without_utest_with_utest_alpha(self): parsed = self.parser.parse_args( - [ - "--no-utest", - "--alpha=0.314", - "benchmarks", - self.testInput0, - self.testInput1, - ] - ) + ['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1]) self.assertFalse(parsed.display_aggregates_only) self.assertFalse(parsed.utest) self.assertEqual(parsed.utest_alpha, 0.314) - self.assertEqual(parsed.mode, "benchmarks") + self.assertEqual(parsed.mode, 'benchmarks') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_with_remainder(self): parsed = self.parser.parse_args( - ["benchmarks", self.testInput0, self.testInput1, "d"] - ) + ['benchmarks', self.testInput0, self.testInput1, 'd']) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "benchmarks") + self.assertEqual(parsed.mode, 'benchmarks') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.benchmark_options, ["d"]) + self.assertEqual(parsed.benchmark_options, ['d']) def test_benchmarks_with_remainder_after_doubleminus(self): parsed = self.parser.parse_args( - ["benchmarks", self.testInput0, self.testInput1, "--", "e"] - ) + ['benchmarks', self.testInput0, self.testInput1, '--', 'e']) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "benchmarks") + self.assertEqual(parsed.mode, 'benchmarks') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.benchmark_options, ["e"]) + self.assertEqual(parsed.benchmark_options, ['e']) def test_filters_basic(self): - parsed = self.parser.parse_args(["filters", self.testInput0, "c", "d"]) + parsed = self.parser.parse_args( + ['filters', self.testInput0, 'c', 'd']) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "filters") + self.assertEqual(parsed.mode, 'filters') self.assertEqual(parsed.test[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], "c") - self.assertEqual(parsed.filter_contender[0], "d") + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.filter_contender[0], 'd') self.assertFalse(parsed.benchmark_options) def test_filters_with_remainder(self): - parsed = self.parser.parse_args(["filters", self.testInput0, "c", "d", "e"]) + parsed = self.parser.parse_args( + ['filters', self.testInput0, 'c', 'd', 'e']) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "filters") + self.assertEqual(parsed.mode, 'filters') self.assertEqual(parsed.test[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], "c") - self.assertEqual(parsed.filter_contender[0], "d") - self.assertEqual(parsed.benchmark_options, ["e"]) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.filter_contender[0], 'd') + self.assertEqual(parsed.benchmark_options, ['e']) def test_filters_with_remainder_after_doubleminus(self): parsed = self.parser.parse_args( - ["filters", self.testInput0, "c", "d", "--", "f"] - ) + ['filters', self.testInput0, 'c', 'd', '--', 'f']) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "filters") + self.assertEqual(parsed.mode, 'filters') self.assertEqual(parsed.test[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], "c") - self.assertEqual(parsed.filter_contender[0], "d") - self.assertEqual(parsed.benchmark_options, ["f"]) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.filter_contender[0], 'd') + self.assertEqual(parsed.benchmark_options, ['f']) def test_benchmarksfiltered_basic(self): parsed = self.parser.parse_args( - ["benchmarksfiltered", self.testInput0, "c", self.testInput1, "e"] - ) + ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e']) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "benchmarksfiltered") + self.assertEqual(parsed.mode, 'benchmarksfiltered') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], "c") + self.assertEqual(parsed.filter_baseline[0], 'c') self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.filter_contender[0], "e") + self.assertEqual(parsed.filter_contender[0], 'e') self.assertFalse(parsed.benchmark_options) def test_benchmarksfiltered_with_remainder(self): parsed = self.parser.parse_args( - ["benchmarksfiltered", self.testInput0, "c", self.testInput1, "e", "f"] - ) + ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f']) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "benchmarksfiltered") + self.assertEqual(parsed.mode, 'benchmarksfiltered') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], "c") + self.assertEqual(parsed.filter_baseline[0], 'c') self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.filter_contender[0], "e") - self.assertEqual(parsed.benchmark_options[0], "f") + self.assertEqual(parsed.filter_contender[0], 'e') + self.assertEqual(parsed.benchmark_options[0], 'f') def test_benchmarksfiltered_with_remainder_after_doubleminus(self): parsed = self.parser.parse_args( - [ - "benchmarksfiltered", - self.testInput0, - "c", - self.testInput1, - "e", - "--", - "g", - ] - ) + ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g']) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, "benchmarksfiltered") + self.assertEqual(parsed.mode, 'benchmarksfiltered') self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], "c") + self.assertEqual(parsed.filter_baseline[0], 'c') self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.filter_contender[0], "e") - self.assertEqual(parsed.benchmark_options[0], "g") + self.assertEqual(parsed.filter_contender[0], 'e') + self.assertEqual(parsed.benchmark_options[0], 'g') -if __name__ == "__main__": +if __name__ == '__main__': # unittest.main() main() diff --git a/third-party/benchmark/tools/gbench/Inputs/test1_run1.json b/third-party/benchmark/tools/gbench/Inputs/test1_run1.json --- a/third-party/benchmark/tools/gbench/Inputs/test1_run1.json +++ b/third-party/benchmark/tools/gbench/Inputs/test1_run1.json @@ -114,6 +114,14 @@ "real_time": 1, "cpu_time": 1, "time_unit": "s" + }, + { + "name": "BM_hasLabel", + "label": "a label", + "iterations": 1, + "real_time": 1, + "cpu_time": 1, + "time_unit": "s" } ] } diff --git a/third-party/benchmark/tools/gbench/Inputs/test1_run2.json b/third-party/benchmark/tools/gbench/Inputs/test1_run2.json --- a/third-party/benchmark/tools/gbench/Inputs/test1_run2.json +++ b/third-party/benchmark/tools/gbench/Inputs/test1_run2.json @@ -114,6 +114,14 @@ "real_time": 1, "cpu_time": 1, "time_unit": "ns" + }, + { + "name": "BM_hasLabel", + "label": "a label", + "iterations": 1, + "real_time": 1, + "cpu_time": 1, + "time_unit": "s" } ] } diff --git a/third-party/benchmark/tools/gbench/__init__.py b/third-party/benchmark/tools/gbench/__init__.py --- a/third-party/benchmark/tools/gbench/__init__.py +++ b/third-party/benchmark/tools/gbench/__init__.py @@ -1,8 +1,8 @@ """Google Benchmark tooling""" -__author__ = "Eric Fiselier" -__email__ = "eric@efcs.ca" +__author__ = 'Eric Fiselier' +__email__ = 'eric@efcs.ca' __versioninfo__ = (0, 5, 0) -__version__ = ".".join(str(v) for v in __versioninfo__) + "dev" +__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' __all__ = [] diff --git a/third-party/benchmark/tools/gbench/report.py b/third-party/benchmark/tools/gbench/report.py --- a/third-party/benchmark/tools/gbench/report.py +++ b/third-party/benchmark/tools/gbench/report.py @@ -9,7 +9,6 @@ from scipy.stats import mannwhitneyu, gmean from numpy import array -from pandas import Timedelta class BenchmarkColor(object): @@ -18,30 +17,38 @@ self.code = code def __repr__(self): - return "%s%r" % (self.__class__.__name__, (self.name, self.code)) + return '%s%r' % (self.__class__.__name__, + (self.name, self.code)) def __format__(self, format): return self.code # Benchmark Colors Enumeration -BC_NONE = BenchmarkColor("NONE", "") -BC_MAGENTA = BenchmarkColor("MAGENTA", "\033[95m") -BC_CYAN = BenchmarkColor("CYAN", "\033[96m") -BC_OKBLUE = BenchmarkColor("OKBLUE", "\033[94m") -BC_OKGREEN = BenchmarkColor("OKGREEN", "\033[32m") -BC_HEADER = BenchmarkColor("HEADER", "\033[92m") -BC_WARNING = BenchmarkColor("WARNING", "\033[93m") -BC_WHITE = BenchmarkColor("WHITE", "\033[97m") -BC_FAIL = BenchmarkColor("FAIL", "\033[91m") -BC_ENDC = BenchmarkColor("ENDC", "\033[0m") -BC_BOLD = BenchmarkColor("BOLD", "\033[1m") -BC_UNDERLINE = BenchmarkColor("UNDERLINE", "\033[4m") +BC_NONE = BenchmarkColor('NONE', '') +BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') +BC_CYAN = BenchmarkColor('CYAN', '\033[96m') +BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') +BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m') +BC_HEADER = BenchmarkColor('HEADER', '\033[92m') +BC_WARNING = BenchmarkColor('WARNING', '\033[93m') +BC_WHITE = BenchmarkColor('WHITE', '\033[97m') +BC_FAIL = BenchmarkColor('FAIL', '\033[91m') +BC_ENDC = BenchmarkColor('ENDC', '\033[0m') +BC_BOLD = BenchmarkColor('BOLD', '\033[1m') +BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') UTEST_MIN_REPETITIONS = 2 UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. UTEST_COL_NAME = "_pvalue" +_TIME_UNIT_TO_SECONDS_MULTIPLIER = { + "s": 1.0, + "ms": 1e-3, + "us": 1e-6, + "ns": 1e-9, +} + def color_format(use_color, fmt_str, *args, **kwargs): """ @@ -52,11 +59,10 @@ """ assert use_color is True or use_color is False if not use_color: - args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE for arg in args] - kwargs = { - key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE - for key, arg in kwargs.items() - } + args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE + for arg in args] + kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE + for key, arg in kwargs.items()} return fmt_str.format(*args, **kwargs) @@ -67,8 +73,8 @@ """ longest_name = 1 for bc in benchmark_list: - if len(bc["name"]) > longest_name: - longest_name = len(bc["name"]) + if len(bc['name']) > longest_name: + longest_name = len(bc['name']) return longest_name @@ -89,13 +95,13 @@ """ regex = re.compile(family) filtered = {} - filtered["benchmarks"] = [] - for be in json_orig["benchmarks"]: - if not regex.search(be["name"]): + filtered['benchmarks'] = [] + for be in json_orig['benchmarks']: + if not regex.search(be['name']): continue filteredbench = copy.deepcopy(be) # Do NOT modify the old name! - filteredbench["name"] = regex.sub(replacement, filteredbench["name"]) - filtered["benchmarks"].append(filteredbench) + filteredbench['name'] = regex.sub(replacement, filteredbench['name']) + filtered['benchmarks'].append(filteredbench) return filtered @@ -104,11 +110,9 @@ While *keeping* the order, give all the unique 'names' used for benchmarks. """ seen = set() - uniqued = [ - x["name"] - for x in json["benchmarks"] - if x["name"] not in seen and (seen.add(x["name"]) or True) - ] + uniqued = [x['name'] for x in json['benchmarks'] + if x['name'] not in seen and + (seen.add(x['name']) or True)] return uniqued @@ -121,7 +125,7 @@ def is_potentially_comparable_benchmark(x): - return "time_unit" in x and "real_time" in x and "cpu_time" in x + return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x) def partition_benchmarks(json1, json2): @@ -138,24 +142,18 @@ time_unit = None # Pick the time unit from the first entry of the lhs benchmark. # We should be careful not to crash with unexpected input. - for x in json1["benchmarks"]: - if x["name"] == name and is_potentially_comparable_benchmark(x): - time_unit = x["time_unit"] + for x in json1['benchmarks']: + if (x['name'] == name and is_potentially_comparable_benchmark(x)): + time_unit = x['time_unit'] break if time_unit is None: continue # Filter by name and time unit. # All the repetitions are assumed to be comparable. - lhs = [ - x - for x in json1["benchmarks"] - if x["name"] == name and x["time_unit"] == time_unit - ] - rhs = [ - x - for x in json2["benchmarks"] - if x["name"] == name and x["time_unit"] == time_unit - ] + lhs = [x for x in json1['benchmarks'] if x['name'] == name and + x['time_unit'] == time_unit] + rhs = [x for x in json2['benchmarks'] if x['name'] == name and + x['time_unit'] == time_unit] partitions.append([lhs, rhs]) return partitions @@ -165,9 +163,9 @@ Get value of field_name field of benchmark, which is time with time unit time_unit, as time in seconds. """ - time_unit = benchmark["time_unit"] if "time_unit" in benchmark else "s" - dt = Timedelta(benchmark[field_name], time_unit) - return dt / Timedelta(1, "s") + timedelta = benchmark[field_name] + time_unit = benchmark.get('time_unit', 's') + return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit) def calculate_geomean(json): @@ -176,15 +174,11 @@ and calculate their geomean. """ times = [] - for benchmark in json["benchmarks"]: - if "run_type" in benchmark and benchmark["run_type"] == "aggregate": + for benchmark in json['benchmarks']: + if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate': continue - times.append( - [ - get_timedelta_field_as_seconds(benchmark, "real_time"), - get_timedelta_field_as_seconds(benchmark, "cpu_time"), - ] - ) + times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'), + get_timedelta_field_as_seconds(benchmark, 'cpu_time')]) return gmean(times) if times else array([]) @@ -196,23 +190,19 @@ def calc_utest(timings_cpu, timings_time): - min_rep_cnt = min( - len(timings_time[0]), - len(timings_time[1]), - len(timings_cpu[0]), - len(timings_cpu[1]), - ) + min_rep_cnt = min(len(timings_time[0]), + len(timings_time[1]), + len(timings_cpu[0]), + len(timings_cpu[1])) # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? if min_rep_cnt < UTEST_MIN_REPETITIONS: return False, None, None time_pvalue = mannwhitneyu( - timings_time[0], timings_time[1], alternative="two-sided" - ).pvalue + timings_time[0], timings_time[1], alternative='two-sided').pvalue cpu_pvalue = mannwhitneyu( - timings_cpu[0], timings_cpu[1], alternative="two-sided" - ).pvalue + timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue @@ -222,46 +212,38 @@ return BC_FAIL if pval >= utest_alpha else BC_OKGREEN # Check if we failed miserably with minimum required repetitions for utest - if ( - not utest["have_optimal_repetitions"] - and utest["cpu_pvalue"] is None - and utest["time_pvalue"] is None - ): + if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None: return [] dsc = "U Test, Repetitions: {} vs {}".format( - utest["nr_of_repetitions"], utest["nr_of_repetitions_other"] - ) + utest['nr_of_repetitions'], utest['nr_of_repetitions_other']) dsc_color = BC_OKGREEN # We still got some results to show but issue a warning about it. - if not utest["have_optimal_repetitions"]: + if not utest['have_optimal_repetitions']: dsc_color = BC_WARNING dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( - UTEST_OPTIMAL_REPETITIONS - ) + UTEST_OPTIMAL_REPETITIONS) special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" - return [ - color_format( - use_color, - special_str, - BC_HEADER, - "{}{}".format(bc_name, UTEST_COL_NAME), - first_col_width, - get_utest_color(utest["time_pvalue"]), - utest["time_pvalue"], - get_utest_color(utest["cpu_pvalue"]), - utest["cpu_pvalue"], - dsc_color, - dsc, - endc=BC_ENDC, - ) - ] - - -def get_difference_report(json1, json2, utest=False): + return [color_format(use_color, + special_str, + BC_HEADER, + "{}{}".format(bc_name, UTEST_COL_NAME), + first_col_width, + get_utest_color( + utest['time_pvalue']), utest['time_pvalue'], + get_utest_color( + utest['cpu_pvalue']), utest['cpu_pvalue'], + dsc_color, dsc, + endc=BC_ENDC)] + + +def get_difference_report( + json1, + json2, + utest=False): """ Calculate and report the difference between each test of two benchmarks runs specified as 'json1' and 'json2'. Output is another json containing @@ -272,39 +254,37 @@ diff_report = [] partitions = partition_benchmarks(json1, json2) for partition in partitions: - benchmark_name = partition[0][0]["name"] - time_unit = partition[0][0]["time_unit"] + benchmark_name = partition[0][0]['name'] + label = partition[0][0]['label'] if 'label' in partition[0][0] else '' + time_unit = partition[0][0]['time_unit'] measurements = [] utest_results = {} # Careful, we may have different repetition count. for i in range(min(len(partition[0]), len(partition[1]))): bn = partition[0][i] other_bench = partition[1][i] - measurements.append( - { - "real_time": bn["real_time"], - "cpu_time": bn["cpu_time"], - "real_time_other": other_bench["real_time"], - "cpu_time_other": other_bench["cpu_time"], - "time": calculate_change(bn["real_time"], other_bench["real_time"]), - "cpu": calculate_change(bn["cpu_time"], other_bench["cpu_time"]), - } - ) + measurements.append({ + 'real_time': bn['real_time'], + 'cpu_time': bn['cpu_time'], + 'real_time_other': other_bench['real_time'], + 'cpu_time_other': other_bench['cpu_time'], + 'time': calculate_change(bn['real_time'], other_bench['real_time']), + 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time']) + }) # After processing the whole partition, if requested, do the U test. if utest: - timings_cpu = extract_field(partition, "cpu_time") - timings_time = extract_field(partition, "real_time") + timings_cpu = extract_field(partition, 'cpu_time') + timings_time = extract_field(partition, 'real_time') have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest( - timings_cpu, timings_time - ) + timings_cpu, timings_time) if cpu_pvalue and time_pvalue: utest_results = { - "have_optimal_repetitions": have_optimal_repetitions, - "cpu_pvalue": cpu_pvalue, - "time_pvalue": time_pvalue, - "nr_of_repetitions": len(timings_cpu[0]), - "nr_of_repetitions_other": len(timings_cpu[1]), + 'have_optimal_repetitions': have_optimal_repetitions, + 'cpu_pvalue': cpu_pvalue, + 'time_pvalue': time_pvalue, + 'nr_of_repetitions': len(timings_cpu[0]), + 'nr_of_repetitions_other': len(timings_cpu[1]) } # Store only if we had any measurements for given benchmark. @@ -312,58 +292,47 @@ # time units which are not compatible with other time units in the # benchmark suite. if measurements: - run_type = ( - partition[0][0]["run_type"] if "run_type" in partition[0][0] else "" - ) - aggregate_name = ( - partition[0][0]["aggregate_name"] - if run_type == "aggregate" and "aggregate_name" in partition[0][0] - else "" - ) - diff_report.append( - { - "name": benchmark_name, - "measurements": measurements, - "time_unit": time_unit, - "run_type": run_type, - "aggregate_name": aggregate_name, - "utest": utest_results, - } - ) + run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else '' + aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' + diff_report.append({ + 'name': benchmark_name, + 'label': label, + 'measurements': measurements, + 'time_unit': time_unit, + 'run_type': run_type, + 'aggregate_name': aggregate_name, + 'utest': utest_results + }) lhs_gmean = calculate_geomean(json1) rhs_gmean = calculate_geomean(json2) if lhs_gmean.any() and rhs_gmean.any(): - diff_report.append( - { - "name": "OVERALL_GEOMEAN", - "measurements": [ - { - "real_time": lhs_gmean[0], - "cpu_time": lhs_gmean[1], - "real_time_other": rhs_gmean[0], - "cpu_time_other": rhs_gmean[1], - "time": calculate_change(lhs_gmean[0], rhs_gmean[0]), - "cpu": calculate_change(lhs_gmean[1], rhs_gmean[1]), - } - ], - "time_unit": "s", - "run_type": "aggregate", - "aggregate_name": "geomean", - "utest": {}, - } - ) + diff_report.append({ + 'name': 'OVERALL_GEOMEAN', + 'label': '', + 'measurements': [{ + 'real_time': lhs_gmean[0], + 'cpu_time': lhs_gmean[1], + 'real_time_other': rhs_gmean[0], + 'cpu_time_other': rhs_gmean[1], + 'time': calculate_change(lhs_gmean[0], rhs_gmean[0]), + 'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1]) + }], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + }) return diff_report def print_difference_report( - json_diff_report, - include_aggregates_only=False, - utest=False, - utest_alpha=0.05, - use_color=True, -): + json_diff_report, + include_aggregates_only=False, + utest=False, + utest_alpha=0.05, + use_color=True): """ Calculate and report the difference between each test of two benchmarks runs specified as 'json1' and 'json2'. @@ -379,53 +348,44 @@ return BC_CYAN first_col_width = find_longest_name(json_diff_report) - first_col_width = max(first_col_width, len("Benchmark")) + first_col_width = max( + first_col_width, + len('Benchmark')) first_col_width += len(UTEST_COL_NAME) first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format( - "Benchmark", 12 + first_col_width - ) - output_strs = [first_line, "-" * len(first_line)] + 'Benchmark', 12 + first_col_width) + output_strs = [first_line, '-' * len(first_line)] fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" for benchmark in json_diff_report: # *If* we were asked to only include aggregates, # and if it is non-aggregate, then don't print it. - if ( - not include_aggregates_only - or not "run_type" in benchmark - or benchmark["run_type"] == "aggregate" - ): - for measurement in benchmark["measurements"]: - output_strs += [ - color_format( - use_color, - fmt_str, - BC_HEADER, - benchmark["name"], - first_col_width, - get_color(measurement["time"]), - measurement["time"], - get_color(measurement["cpu"]), - measurement["cpu"], - measurement["real_time"], - measurement["real_time_other"], - measurement["cpu_time"], - measurement["cpu_time_other"], - endc=BC_ENDC, - ) - ] + if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': + for measurement in benchmark['measurements']: + output_strs += [color_format(use_color, + fmt_str, + BC_HEADER, + benchmark['name'], + first_col_width, + get_color(measurement['time']), + measurement['time'], + get_color(measurement['cpu']), + measurement['cpu'], + measurement['real_time'], + measurement['real_time_other'], + measurement['cpu_time'], + measurement['cpu_time_other'], + endc=BC_ENDC)] # After processing the measurements, if requested and # if applicable (e.g. u-test exists for given benchmark), # print the U test. - if utest and benchmark["utest"]: - output_strs += print_utest( - benchmark["name"], - benchmark["utest"], - utest_alpha=utest_alpha, - first_col_width=first_col_width, - use_color=use_color, - ) + if utest and benchmark['utest']: + output_strs += print_utest(benchmark['name'], + benchmark['utest'], + utest_alpha=utest_alpha, + first_col_width=first_col_width, + use_color=use_color) return output_strs @@ -437,19 +397,21 @@ class TestGetUniqueBenchmarkNames(unittest.TestCase): def load_results(self): import json - - testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), "Inputs") - testOutput = os.path.join(testInputs, "test3_run0.json") - with open(testOutput, "r") as f: + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test3_run0.json') + with open(testOutput, 'r') as f: json = json.load(f) return json def test_basic(self): expect_lines = [ - "BM_One", - "BM_Two", - "short", # These two are not sorted - "medium", # These two are not sorted + 'BM_One', + 'BM_Two', + 'short', # These two are not sorted + 'medium', # These two are not sorted ] json = self.load_results() output_lines = get_unique_benchmark_names(json) @@ -465,15 +427,15 @@ def setUpClass(cls): def load_results(): import json - testInputs = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "Inputs" - ) - testOutput1 = os.path.join(testInputs, "test1_run1.json") - testOutput2 = os.path.join(testInputs, "test1_run2.json") - with open(testOutput1, "r") as f: + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test1_run1.json') + testOutput2 = os.path.join(testInputs, 'test1_run2.json') + with open(testOutput1, 'r') as f: json1 = json.load(f) - with open(testOutput2, "r") as f: + with open(testOutput2, 'r') as f: json2 = json.load(f) return json1, json2 @@ -482,236 +444,171 @@ def test_json_diff_report_pretty_printing(self): expect_lines = [ - ["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"], - ["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"], - ["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"], - ["BM_1PercentFaster", "-0.0100", "-0.0100", "100", "99", "100", "99"], - ["BM_1PercentSlower", "+0.0100", "+0.0100", "100", "101", "100", "101"], - ["BM_10PercentFaster", "-0.1000", "-0.1000", "100", "90", "100", "90"], - ["BM_10PercentSlower", "+0.1000", "+0.1000", "100", "110", "100", "110"], - ["BM_100xSlower", "+99.0000", "+99.0000", "100", "10000", "100", "10000"], - ["BM_100xFaster", "-0.9900", "-0.9900", "10000", "100", "10000", "100"], - ["BM_10PercentCPUToTime", "+0.1000", "-0.1000", "100", "110", "100", "90"], - ["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"], - ["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"], - ["OVERALL_GEOMEAN", "-0.8344", "-0.8026", "0", "0", "0", "0"], + ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], + ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], + ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'], + ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'], + ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'], + ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'], + ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'], + ['BM_100xSlower', '+99.0000', '+99.0000', + '100', '10000', '100', '10000'], + ['BM_100xFaster', '-0.9900', '-0.9900', + '10000', '100', '10000', '100'], + ['BM_10PercentCPUToTime', '+0.1000', + '-0.1000', '100', '110', '100', '90'], + ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], + ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], + ['BM_hasLabel', '+0.0000', '+0.0000', '1', '1', '1', '1'], + ['OVERALL_GEOMEAN', '-0.8113', '-0.7779', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( - self.json_diff_report, use_color=False - ) + self.json_diff_report, use_color=False) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(" ") if x] + parts = [x for x in output_lines[i].split(' ') if x] self.assertEqual(len(parts), 7) self.assertEqual(expect_lines[i], parts) def test_json_diff_report_output(self): expected_output = [ { - "name": "BM_SameTimes", - "measurements": [ - { - "time": 0.0000, - "cpu": 0.0000, - "real_time": 10, - "real_time_other": 10, - "cpu_time": 10, - "cpu_time_other": 10, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_SameTimes', + 'label': '', + 'measurements': [{'time': 0.0000, 'cpu': 0.0000, + 'real_time': 10, 'real_time_other': 10, + 'cpu_time': 10, 'cpu_time_other': 10}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_2xFaster", - "measurements": [ - { - "time": -0.5000, - "cpu": -0.5000, - "real_time": 50, - "real_time_other": 25, - "cpu_time": 50, - "cpu_time_other": 25, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_2xFaster', + 'label': '', + 'measurements': [{'time': -0.5000, 'cpu': -0.5000, + 'real_time': 50, 'real_time_other': 25, + 'cpu_time': 50, 'cpu_time_other': 25}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_2xSlower", - "measurements": [ - { - "time": 1.0000, - "cpu": 1.0000, - "real_time": 50, - "real_time_other": 100, - "cpu_time": 50, - "cpu_time_other": 100, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_2xSlower', + 'label': '', + 'measurements': [{'time': 1.0000, 'cpu': 1.0000, + 'real_time': 50, 'real_time_other': 100, + 'cpu_time': 50, 'cpu_time_other': 100}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_1PercentFaster", - "measurements": [ - { - "time": -0.0100, - "cpu": -0.0100, - "real_time": 100, - "real_time_other": 98.9999999, - "cpu_time": 100, - "cpu_time_other": 98.9999999, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_1PercentFaster', + 'label': '', + 'measurements': [{'time': -0.0100, 'cpu': -0.0100, + 'real_time': 100, 'real_time_other': 98.9999999, + 'cpu_time': 100, 'cpu_time_other': 98.9999999}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_1PercentSlower", - "measurements": [ - { - "time": 0.0100, - "cpu": 0.0100, - "real_time": 100, - "real_time_other": 101, - "cpu_time": 100, - "cpu_time_other": 101, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_1PercentSlower', + 'label': '', + 'measurements': [{'time': 0.0100, 'cpu': 0.0100, + 'real_time': 100, 'real_time_other': 101, + 'cpu_time': 100, 'cpu_time_other': 101}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_10PercentFaster", - "measurements": [ - { - "time": -0.1000, - "cpu": -0.1000, - "real_time": 100, - "real_time_other": 90, - "cpu_time": 100, - "cpu_time_other": 90, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_10PercentFaster', + 'label': '', + 'measurements': [{'time': -0.1000, 'cpu': -0.1000, + 'real_time': 100, 'real_time_other': 90, + 'cpu_time': 100, 'cpu_time_other': 90}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_10PercentSlower", - "measurements": [ - { - "time": 0.1000, - "cpu": 0.1000, - "real_time": 100, - "real_time_other": 110, - "cpu_time": 100, - "cpu_time_other": 110, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_10PercentSlower', + 'label': '', + 'measurements': [{'time': 0.1000, 'cpu': 0.1000, + 'real_time': 100, 'real_time_other': 110, + 'cpu_time': 100, 'cpu_time_other': 110}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_100xSlower", - "measurements": [ - { - "time": 99.0000, - "cpu": 99.0000, - "real_time": 100, - "real_time_other": 10000, - "cpu_time": 100, - "cpu_time_other": 10000, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_100xSlower', + 'label': '', + 'measurements': [{'time': 99.0000, 'cpu': 99.0000, + 'real_time': 100, 'real_time_other': 10000, + 'cpu_time': 100, 'cpu_time_other': 10000}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_100xFaster", - "measurements": [ - { - "time": -0.9900, - "cpu": -0.9900, - "real_time": 10000, - "real_time_other": 100, - "cpu_time": 10000, - "cpu_time_other": 100, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_100xFaster', + 'label': '', + 'measurements': [{'time': -0.9900, 'cpu': -0.9900, + 'real_time': 10000, 'real_time_other': 100, + 'cpu_time': 10000, 'cpu_time_other': 100}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_10PercentCPUToTime", - "measurements": [ - { - "time": 0.1000, - "cpu": -0.1000, - "real_time": 100, - "real_time_other": 110, - "cpu_time": 100, - "cpu_time_other": 90, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_10PercentCPUToTime', + 'label': '', + 'measurements': [{'time': 0.1000, 'cpu': -0.1000, + 'real_time': 100, 'real_time_other': 110, + 'cpu_time': 100, 'cpu_time_other': 90}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_ThirdFaster", - "measurements": [ - { - "time": -0.3333, - "cpu": -0.3334, - "real_time": 100, - "real_time_other": 67, - "cpu_time": 100, - "cpu_time_other": 67, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': 'BM_ThirdFaster', + 'label': '', + 'measurements': [{'time': -0.3333, 'cpu': -0.3334, + 'real_time': 100, 'real_time_other': 67, + 'cpu_time': 100, 'cpu_time_other': 67}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_NotBadTimeUnit", - "measurements": [ - { - "time": -0.9000, - "cpu": 0.2000, - "real_time": 0.4, - "real_time_other": 0.04, - "cpu_time": 0.5, - "cpu_time_other": 0.6, - } - ], - "time_unit": "s", - "utest": {}, + 'name': 'BM_NotBadTimeUnit', + 'label': '', + 'measurements': [{'time': -0.9000, 'cpu': 0.2000, + 'real_time': 0.4, 'real_time_other': 0.04, + 'cpu_time': 0.5, 'cpu_time_other': 0.6}], + 'time_unit': 's', + 'utest': {} }, { - "name": "OVERALL_GEOMEAN", - "measurements": [ - { - "real_time": 1.193776641714438e-06, - "cpu_time": 1.2144445585302297e-06, - "real_time_other": 1.9768988699420897e-07, - "cpu_time_other": 2.397447755209533e-07, - "time": -0.834399601997324, - "cpu": -0.8025889499549471, - } - ], - "time_unit": "s", - "run_type": "aggregate", - "aggregate_name": "geomean", - "utest": {}, + 'name': 'BM_hasLabel', + 'label': 'a label', + 'measurements': [{'time': 0.0000, 'cpu': 0.0000, + 'real_time': 1, 'real_time_other': 1, + 'cpu_time': 1, 'cpu_time_other': 1}], + 'time_unit': 's', + 'utest': {} + }, + { + 'name': 'OVERALL_GEOMEAN', + 'label': '', + 'measurements': [{'real_time': 3.1622776601683826e-06, 'cpu_time': 3.2130844755623912e-06, + 'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07, + 'time': -0.8112976497120911, 'cpu': -0.7778551721181174}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', 'utest': {} }, ] self.assertEqual(len(self.json_diff_report), len(expected_output)) - for out, expected in zip(self.json_diff_report, expected_output): - self.assertEqual(out["name"], expected["name"]) - self.assertEqual(out["time_unit"], expected["time_unit"]) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['label'], expected['label']) + self.assertEqual(out['time_unit'], expected['time_unit']) assert_utest(self, out, expected) assert_measurements(self, out, expected) @@ -721,12 +618,12 @@ def setUpClass(cls): def load_result(): import json - testInputs = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "Inputs" - ) - testOutput = os.path.join(testInputs, "test2_run.json") - with open(testOutput, "r") as f: + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test2_run.json') + with open(testOutput, 'r') as f: json = json.load(f) return json @@ -737,108 +634,65 @@ def test_json_diff_report_pretty_printing(self): expect_lines = [ - [".", "-0.5000", "-0.5000", "10", "5", "10", "5"], - ["./4", "-0.5000", "-0.5000", "40", "20", "40", "20"], - ["Prefix/.", "-0.5000", "-0.5000", "20", "10", "20", "10"], - ["Prefix/./3", "-0.5000", "-0.5000", "30", "15", "30", "15"], - ["OVERALL_GEOMEAN", "-0.5000", "-0.5000", "0", "0", "0", "0"], + ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], + ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], + ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], + ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], + ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( - self.json_diff_report, use_color=False - ) + self.json_diff_report, use_color=False) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(" ") if x] + parts = [x for x in output_lines[i].split(' ') if x] self.assertEqual(len(parts), 7) self.assertEqual(expect_lines[i], parts) def test_json_diff_report(self): expected_output = [ { - "name": ".", - "measurements": [ - { - "time": -0.5, - "cpu": -0.5, - "real_time": 10, - "real_time_other": 5, - "cpu_time": 10, - "cpu_time_other": 5, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': u'.', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "./4", - "measurements": [ - { - "time": -0.5, - "cpu": -0.5, - "real_time": 40, - "real_time_other": 20, - "cpu_time": 40, - "cpu_time_other": 20, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': u'./4', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}], + 'time_unit': 'ns', + 'utest': {}, }, { - "name": "Prefix/.", - "measurements": [ - { - "time": -0.5, - "cpu": -0.5, - "real_time": 20, - "real_time_other": 10, - "cpu_time": 20, - "cpu_time_other": 10, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': u'Prefix/.', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "Prefix/./3", - "measurements": [ - { - "time": -0.5, - "cpu": -0.5, - "real_time": 30, - "real_time_other": 15, - "cpu_time": 30, - "cpu_time_other": 15, - } - ], - "time_unit": "ns", - "utest": {}, + 'name': u'Prefix/./3', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], + 'time_unit': 'ns', + 'utest': {} }, { - "name": "OVERALL_GEOMEAN", - "measurements": [ - { - "real_time": 2.213363839400641e-08, - "cpu_time": 2.213363839400641e-08, - "real_time_other": 1.1066819197003185e-08, - "cpu_time_other": 1.1066819197003185e-08, - "time": -0.5000000000000009, - "cpu": -0.5000000000000009, - } - ], - "time_unit": "s", - "run_type": "aggregate", - "aggregate_name": "geomean", - "utest": {}, - }, + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08, + 'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08, + 'time': -0.5000000000000009, 'cpu': -0.5000000000000009}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) - for out, expected in zip(self.json_diff_report, expected_output): - self.assertEqual(out["name"], expected["name"]) - self.assertEqual(out["time_unit"], expected["time_unit"]) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) assert_utest(self, out, expected) assert_measurements(self, out, expected) @@ -848,487 +702,424 @@ def setUpClass(cls): def load_results(): import json - testInputs = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "Inputs" - ) - testOutput1 = os.path.join(testInputs, "test3_run0.json") - testOutput2 = os.path.join(testInputs, "test3_run1.json") - with open(testOutput1, "r") as f: + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test3_run0.json') + testOutput2 = os.path.join(testInputs, 'test3_run1.json') + with open(testOutput1, 'r') as f: json1 = json.load(f) - with open(testOutput2, "r") as f: + with open(testOutput2, 'r') as f: json2 = json.load(f) return json1, json2 json1, json2 = load_results() - cls.json_diff_report = get_difference_report(json1, json2, utest=True) + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) def test_json_diff_report_pretty_printing(self): expect_lines = [ - ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"], - ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"], - ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"], - [ - "BM_Two_pvalue", - "1.0000", - "0.6667", - "U", - "Test,", - "Repetitions:", - "2", - "vs", - "2.", - "WARNING:", - "Results", - "unreliable!", - "9+", - "repetitions", - "recommended.", - ], - ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"], - ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"], - [ - "short_pvalue", - "0.7671", - "0.2000", - "U", - "Test,", - "Repetitions:", - "2", - "vs", - "3.", - "WARNING:", - "Results", - "unreliable!", - "9+", - "repetitions", - "recommended.", - ], - ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"], - ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"], + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], + ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], + ['BM_Two_pvalue', + '1.0000', + '0.6667', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.2000', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( - self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False - ) + self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(" ") if x] + parts = [x for x in output_lines[i].split(' ') if x] self.assertEqual(expect_lines[i], parts) def test_json_diff_report_pretty_printing_aggregates_only(self): expect_lines = [ - ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"], - [ - "BM_Two_pvalue", - "1.0000", - "0.6667", - "U", - "Test,", - "Repetitions:", - "2", - "vs", - "2.", - "WARNING:", - "Results", - "unreliable!", - "9+", - "repetitions", - "recommended.", - ], - ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"], - ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"], - [ - "short_pvalue", - "0.7671", - "0.2000", - "U", - "Test,", - "Repetitions:", - "2", - "vs", - "3.", - "WARNING:", - "Results", - "unreliable!", - "9+", - "repetitions", - "recommended.", - ], - ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"], + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two_pvalue', + '1.0000', + '0.6667', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.2000', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( - self.json_diff_report, - include_aggregates_only=True, - utest=True, - utest_alpha=0.05, - use_color=False, - ) + self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(" ") if x] + parts = [x for x in output_lines[i].split(' ') if x] self.assertEqual(expect_lines[i], parts) def test_json_diff_report(self): expected_output = [ { - "name": "BM_One", - "measurements": [ - { - "time": -0.1, - "cpu": 0.1, - "real_time": 10, - "real_time_other": 9, - "cpu_time": 100, - "cpu_time_other": 110, - } + 'name': u'BM_One', + 'measurements': [ + {'time': -0.1, + 'cpu': 0.1, + 'real_time': 10, + 'real_time_other': 9, + 'cpu_time': 100, + 'cpu_time_other': 110} ], - "time_unit": "ns", - "utest": {}, + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_Two", - "measurements": [ - { - "time": 0.1111111111111111, - "cpu": -0.011111111111111112, - "real_time": 9, - "real_time_other": 10, - "cpu_time": 90, - "cpu_time_other": 89, - }, - { - "time": -0.125, - "cpu": -0.16279069767441862, - "real_time": 8, - "real_time_other": 7, - "cpu_time": 86, - "cpu_time_other": 72, - }, + 'name': u'BM_Two', + 'measurements': [ + {'time': 0.1111111111111111, + 'cpu': -0.011111111111111112, + 'real_time': 9, + 'real_time_other': 10, + 'cpu_time': 90, + 'cpu_time_other': 89}, + {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, + 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} ], - "time_unit": "ns", - "utest": { - "have_optimal_repetitions": False, - "cpu_pvalue": 0.6666666666666666, - "time_pvalue": 1.0, - }, + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 + } }, { - "name": "short", - "measurements": [ - { - "time": -0.125, - "cpu": -0.0625, - "real_time": 8, - "real_time_other": 7, - "cpu_time": 80, - "cpu_time_other": 75, - }, - { - "time": -0.4325, - "cpu": -0.13506493506493514, - "real_time": 8, - "real_time_other": 4.54, - "cpu_time": 77, - "cpu_time_other": 66.6, - }, + 'name': u'short', + 'measurements': [ + {'time': -0.125, + 'cpu': -0.0625, + 'real_time': 8, + 'real_time_other': 7, + 'cpu_time': 80, + 'cpu_time_other': 75}, + {'time': -0.4325, + 'cpu': -0.13506493506493514, + 'real_time': 8, + 'real_time_other': 4.54, + 'cpu_time': 77, + 'cpu_time_other': 66.6} ], - "time_unit": "ns", - "utest": { - "have_optimal_repetitions": False, - "cpu_pvalue": 0.2, - "time_pvalue": 0.7670968684102772, - }, + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 + } }, { - "name": "medium", - "measurements": [ - { - "time": -0.375, - "cpu": -0.3375, - "real_time": 8, - "real_time_other": 5, - "cpu_time": 80, - "cpu_time_other": 53, - } + 'name': u'medium', + 'measurements': [ + {'time': -0.375, + 'cpu': -0.3375, + 'real_time': 8, + 'real_time_other': 5, + 'cpu_time': 80, + 'cpu_time_other': 53} ], - "time_unit": "ns", - "utest": {}, + 'time_unit': 'ns', + 'utest': {} }, { - "name": "OVERALL_GEOMEAN", - "measurements": [ - { - "real_time": 8.48528137423858e-09, - "cpu_time": 8.441336246629233e-08, - "real_time_other": 2.2405267593145244e-08, - "cpu_time_other": 2.5453661413660466e-08, - "time": 1.6404861082353634, - "cpu": -0.6984640740519662, - } - ], - "time_unit": "s", - "run_type": "aggregate", - "aggregate_name": "geomean", - "utest": {}, - }, + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, + 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, + 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) - for out, expected in zip(self.json_diff_report, expected_output): - self.assertEqual(out["name"], expected["name"]) - self.assertEqual(out["time_unit"], expected["time_unit"]) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) assert_utest(self, out, expected) assert_measurements(self, out, expected) -class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(unittest.TestCase): +class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( + unittest.TestCase): @classmethod def setUpClass(cls): def load_results(): import json - testInputs = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "Inputs" - ) - testOutput1 = os.path.join(testInputs, "test3_run0.json") - testOutput2 = os.path.join(testInputs, "test3_run1.json") - with open(testOutput1, "r") as f: + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test3_run0.json') + testOutput2 = os.path.join(testInputs, 'test3_run1.json') + with open(testOutput1, 'r') as f: json1 = json.load(f) - with open(testOutput2, "r") as f: + with open(testOutput2, 'r') as f: json2 = json.load(f) return json1, json2 json1, json2 = load_results() - cls.json_diff_report = get_difference_report(json1, json2, utest=True) + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) def test_json_diff_report_pretty_printing(self): expect_lines = [ - ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"], - ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"], - ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"], - [ - "BM_Two_pvalue", - "1.0000", - "0.6667", - "U", - "Test,", - "Repetitions:", - "2", - "vs", - "2.", - "WARNING:", - "Results", - "unreliable!", - "9+", - "repetitions", - "recommended.", - ], - ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"], - ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"], - [ - "short_pvalue", - "0.7671", - "0.2000", - "U", - "Test,", - "Repetitions:", - "2", - "vs", - "3.", - "WARNING:", - "Results", - "unreliable!", - "9+", - "repetitions", - "recommended.", - ], - ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"], - ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"], + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], + ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], + ['BM_Two_pvalue', + '1.0000', + '0.6667', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.2000', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( - self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False - ) + self.json_diff_report, + utest=True, utest_alpha=0.05, use_color=False) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(" ") if x] + parts = [x for x in output_lines[i].split(' ') if x] self.assertEqual(expect_lines[i], parts) def test_json_diff_report(self): expected_output = [ { - "name": "BM_One", - "measurements": [ - { - "time": -0.1, - "cpu": 0.1, - "real_time": 10, - "real_time_other": 9, - "cpu_time": 100, - "cpu_time_other": 110, - } + 'name': u'BM_One', + 'measurements': [ + {'time': -0.1, + 'cpu': 0.1, + 'real_time': 10, + 'real_time_other': 9, + 'cpu_time': 100, + 'cpu_time_other': 110} ], - "time_unit": "ns", - "utest": {}, + 'time_unit': 'ns', + 'utest': {} }, { - "name": "BM_Two", - "measurements": [ - { - "time": 0.1111111111111111, - "cpu": -0.011111111111111112, - "real_time": 9, - "real_time_other": 10, - "cpu_time": 90, - "cpu_time_other": 89, - }, - { - "time": -0.125, - "cpu": -0.16279069767441862, - "real_time": 8, - "real_time_other": 7, - "cpu_time": 86, - "cpu_time_other": 72, - }, + 'name': u'BM_Two', + 'measurements': [ + {'time': 0.1111111111111111, + 'cpu': -0.011111111111111112, + 'real_time': 9, + 'real_time_other': 10, + 'cpu_time': 90, + 'cpu_time_other': 89}, + {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, + 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} ], - "time_unit": "ns", - "utest": { - "have_optimal_repetitions": False, - "cpu_pvalue": 0.6666666666666666, - "time_pvalue": 1.0, - }, + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 + } }, { - "name": "short", - "measurements": [ - { - "time": -0.125, - "cpu": -0.0625, - "real_time": 8, - "real_time_other": 7, - "cpu_time": 80, - "cpu_time_other": 75, - }, - { - "time": -0.4325, - "cpu": -0.13506493506493514, - "real_time": 8, - "real_time_other": 4.54, - "cpu_time": 77, - "cpu_time_other": 66.6, - }, + 'name': u'short', + 'measurements': [ + {'time': -0.125, + 'cpu': -0.0625, + 'real_time': 8, + 'real_time_other': 7, + 'cpu_time': 80, + 'cpu_time_other': 75}, + {'time': -0.4325, + 'cpu': -0.13506493506493514, + 'real_time': 8, + 'real_time_other': 4.54, + 'cpu_time': 77, + 'cpu_time_other': 66.6} ], - "time_unit": "ns", - "utest": { - "have_optimal_repetitions": False, - "cpu_pvalue": 0.2, - "time_pvalue": 0.7670968684102772, - }, + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 + } }, { - "name": "medium", - "measurements": [ - { - "real_time_other": 5, - "cpu_time": 80, - "time": -0.375, - "real_time": 8, - "cpu_time_other": 53, - "cpu": -0.3375, - } + 'name': u'medium', + 'measurements': [ + {'real_time_other': 5, + 'cpu_time': 80, + 'time': -0.375, + 'real_time': 8, + 'cpu_time_other': 53, + 'cpu': -0.3375 + } ], - "utest": {}, - "time_unit": "ns", - "aggregate_name": "", + 'utest': {}, + 'time_unit': u'ns', + 'aggregate_name': '' }, { - "name": "OVERALL_GEOMEAN", - "measurements": [ - { - "real_time": 8.48528137423858e-09, - "cpu_time": 8.441336246629233e-08, - "real_time_other": 2.2405267593145244e-08, - "cpu_time_other": 2.5453661413660466e-08, - "time": 1.6404861082353634, - "cpu": -0.6984640740519662, - } - ], - "time_unit": "s", - "run_type": "aggregate", - "aggregate_name": "geomean", - "utest": {}, - }, + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, + 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, + 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) - for out, expected in zip(self.json_diff_report, expected_output): - self.assertEqual(out["name"], expected["name"]) - self.assertEqual(out["time_unit"], expected["time_unit"]) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) assert_utest(self, out, expected) assert_measurements(self, out, expected) -class TestReportDifferenceForPercentageAggregates(unittest.TestCase): +class TestReportDifferenceForPercentageAggregates( + unittest.TestCase): @classmethod def setUpClass(cls): def load_results(): import json - testInputs = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "Inputs" - ) - testOutput1 = os.path.join(testInputs, "test4_run0.json") - testOutput2 = os.path.join(testInputs, "test4_run1.json") - with open(testOutput1, "r") as f: + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test4_run0.json') + testOutput2 = os.path.join(testInputs, 'test4_run1.json') + with open(testOutput1, 'r') as f: json1 = json.load(f) - with open(testOutput2, "r") as f: + with open(testOutput2, 'r') as f: json2 = json.load(f) return json1, json2 json1, json2 = load_results() - cls.json_diff_report = get_difference_report(json1, json2, utest=True) + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) def test_json_diff_report_pretty_printing(self): - expect_lines = [["whocares", "-0.5000", "+0.5000", "0", "0", "0", "0"]] + expect_lines = [ + ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0'] + ] output_lines_with_header = print_difference_report( - self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False - ) + self.json_diff_report, + utest=True, utest_alpha=0.05, use_color=False) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(" ") if x] + parts = [x for x in output_lines[i].split(' ') if x] self.assertEqual(expect_lines[i], parts) def test_json_diff_report(self): expected_output = [ { - "name": "whocares", - "measurements": [ - { - "time": -0.5, - "cpu": 0.5, - "real_time": 0.01, - "real_time_other": 0.005, - "cpu_time": 0.10, - "cpu_time_other": 0.15, - } + 'name': u'whocares', + 'measurements': [ + {'time': -0.5, + 'cpu': 0.5, + 'real_time': 0.01, + 'real_time_other': 0.005, + 'cpu_time': 0.10, + 'cpu_time_other': 0.15} ], - "time_unit": "ns", - "utest": {}, + 'time_unit': 'ns', + 'utest': {} } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) - for out, expected in zip(self.json_diff_report, expected_output): - self.assertEqual(out["name"], expected["name"]) - self.assertEqual(out["time_unit"], expected["time_unit"]) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) assert_utest(self, out, expected) assert_measurements(self, out, expected) @@ -1338,12 +1129,12 @@ def setUpClass(cls): def load_result(): import json - testInputs = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "Inputs" - ) - testOutput = os.path.join(testInputs, "test4_run.json") - with open(testOutput, "r") as f: + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test4_run.json') + with open(testOutput, 'r') as f: json = json.load(f) return json @@ -1364,45 +1155,45 @@ "91 family 1 instance 0 aggregate", "90 family 1 instance 1 repetition 0", "89 family 1 instance 1 repetition 1", - "88 family 1 instance 1 aggregate", + "88 family 1 instance 1 aggregate" ] - for n in range(len(self.json["benchmarks"]) ** 2): - random.shuffle(self.json["benchmarks"]) - sorted_benchmarks = util.sort_benchmark_results(self.json)["benchmarks"] + for n in range(len(self.json['benchmarks']) ** 2): + random.shuffle(self.json['benchmarks']) + sorted_benchmarks = util.sort_benchmark_results(self.json)[ + 'benchmarks'] self.assertEqual(len(expected_names), len(sorted_benchmarks)) for out, expected in zip(sorted_benchmarks, expected_names): - self.assertEqual(out["name"], expected) + self.assertEqual(out['name'], expected) def assert_utest(unittest_instance, lhs, rhs): - if lhs["utest"]: + if lhs['utest']: unittest_instance.assertAlmostEqual( - lhs["utest"]["cpu_pvalue"], rhs["utest"]["cpu_pvalue"] - ) + lhs['utest']['cpu_pvalue'], + rhs['utest']['cpu_pvalue']) unittest_instance.assertAlmostEqual( - lhs["utest"]["time_pvalue"], rhs["utest"]["time_pvalue"] - ) + lhs['utest']['time_pvalue'], + rhs['utest']['time_pvalue']) unittest_instance.assertEqual( - lhs["utest"]["have_optimal_repetitions"], - rhs["utest"]["have_optimal_repetitions"], - ) + lhs['utest']['have_optimal_repetitions'], + rhs['utest']['have_optimal_repetitions']) else: # lhs is empty. assert if rhs is not. - unittest_instance.assertEqual(lhs["utest"], rhs["utest"]) + unittest_instance.assertEqual(lhs['utest'], rhs['utest']) def assert_measurements(unittest_instance, lhs, rhs): - for m1, m2 in zip(lhs["measurements"], rhs["measurements"]): - unittest_instance.assertEqual(m1["real_time"], m2["real_time"]) - unittest_instance.assertEqual(m1["cpu_time"], m2["cpu_time"]) + for m1, m2 in zip(lhs['measurements'], rhs['measurements']): + unittest_instance.assertEqual(m1['real_time'], m2['real_time']) + unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time']) # m1['time'] and m1['cpu'] hold values which are being calculated, # and therefore we must use almost-equal pattern. - unittest_instance.assertAlmostEqual(m1["time"], m2["time"], places=4) - unittest_instance.assertAlmostEqual(m1["cpu"], m2["cpu"], places=4) + unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4) + unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4) -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 diff --git a/third-party/benchmark/tools/gbench/util.py b/third-party/benchmark/tools/gbench/util.py --- a/third-party/benchmark/tools/gbench/util.py +++ b/third-party/benchmark/tools/gbench/util.py @@ -2,17 +2,18 @@ """ import json import os -import tempfile +import re import subprocess import sys -import functools +import tempfile + # Input file type enumeration IT_Invalid = 0 IT_JSON = 1 IT_Executable = 2 -_num_magic_bytes = 2 if sys.platform.startswith("win") else 4 +_num_magic_bytes = 2 if sys.platform.startswith('win') else 4 def is_executable_file(filename): @@ -23,21 +24,21 @@ """ if not os.path.isfile(filename): return False - with open(filename, mode="rb") as f: + with open(filename, mode='rb') as f: magic_bytes = f.read(_num_magic_bytes) - if sys.platform == "darwin": + if sys.platform == 'darwin': return magic_bytes in [ - b"\xfe\xed\xfa\xce", # MH_MAGIC - b"\xce\xfa\xed\xfe", # MH_CIGAM - b"\xfe\xed\xfa\xcf", # MH_MAGIC_64 - b"\xcf\xfa\xed\xfe", # MH_CIGAM_64 - b"\xca\xfe\xba\xbe", # FAT_MAGIC - b"\xbe\xba\xfe\xca", # FAT_CIGAM + b'\xfe\xed\xfa\xce', # MH_MAGIC + b'\xce\xfa\xed\xfe', # MH_CIGAM + b'\xfe\xed\xfa\xcf', # MH_MAGIC_64 + b'\xcf\xfa\xed\xfe', # MH_CIGAM_64 + b'\xca\xfe\xba\xbe', # FAT_MAGIC + b'\xbe\xba\xfe\xca' # FAT_CIGAM ] - elif sys.platform.startswith("win"): - return magic_bytes == b"MZ" + elif sys.platform.startswith('win'): + return magic_bytes == b'MZ' else: - return magic_bytes == b"\x7FELF" + return magic_bytes == b'\x7FELF' def is_json_file(filename): @@ -46,7 +47,7 @@ 'False' otherwise. """ try: - with open(filename, "r") as f: + with open(filename, 'r') as f: json.load(f) return True except BaseException: @@ -58,7 +59,7 @@ """ Return a tuple (type, msg) where 'type' specifies the classified type of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable - string represeting the error. + string representing the error. """ ftype = IT_Invalid err_msg = None @@ -71,9 +72,7 @@ elif is_json_file(filename): ftype = IT_JSON else: - err_msg = ( - "'%s' does not name a valid benchmark executable or JSON file" % filename - ) + err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename return ftype, err_msg @@ -96,11 +95,11 @@ if it is found return the arg it specifies. If specified more than once the last value is returned. If the flag is not found None is returned. """ - assert prefix.startswith("--") and prefix.endswith("=") + assert prefix.startswith('--') and prefix.endswith('=') result = None for f in benchmark_flags: if f.startswith(prefix): - result = f[len(prefix) :] + result = f[len(prefix):] return result @@ -109,49 +108,52 @@ Return a new list containing the specified benchmark_flags except those with the specified prefix. """ - assert prefix.startswith("--") and prefix.endswith("=") + assert prefix.startswith('--') and prefix.endswith('=') return [f for f in benchmark_flags if not f.startswith(prefix)] -def load_benchmark_results(fname): +def load_benchmark_results(fname, benchmark_filter): """ Read benchmark output from a file and return the JSON object. + + Apply benchmark_filter, a regular expression, with nearly the same + semantics of the --benchmark_filter argument. May be None. + Note: the Python regular expression engine is used instead of the + one used by the C++ code, which may produce different results + in complex cases. + REQUIRES: 'fname' names a file containing JSON benchmark output. """ - with open(fname, "r") as f: - return json.load(f) + def benchmark_wanted(benchmark): + if benchmark_filter is None: + return True + name = benchmark.get('run_name', None) or benchmark['name'] + if re.search(benchmark_filter, name): + return True + return False + + with open(fname, 'r') as f: + results = json.load(f) + if 'benchmarks' in results: + results['benchmarks'] = list(filter(benchmark_wanted, + results['benchmarks'])) + return results def sort_benchmark_results(result): - benchmarks = result["benchmarks"] + benchmarks = result['benchmarks'] # From inner key to the outer key! benchmarks = sorted( - benchmarks, - key=lambda benchmark: benchmark["repetition_index"] - if "repetition_index" in benchmark - else -1, - ) + benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1) benchmarks = sorted( - benchmarks, - key=lambda benchmark: 1 - if "run_type" in benchmark and benchmark["run_type"] == "aggregate" - else 0, - ) + benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0) benchmarks = sorted( - benchmarks, - key=lambda benchmark: benchmark["per_family_instance_index"] - if "per_family_instance_index" in benchmark - else -1, - ) + benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1) benchmarks = sorted( - benchmarks, - key=lambda benchmark: benchmark["family_index"] - if "family_index" in benchmark - else -1, - ) + benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1) - result["benchmarks"] = benchmarks + result['benchmarks'] = benchmarks return result @@ -162,21 +164,23 @@ real time console output. RETURNS: A JSON object representing the benchmark output """ - output_name = find_benchmark_flag("--benchmark_out=", benchmark_flags) + output_name = find_benchmark_flag('--benchmark_out=', + benchmark_flags) is_temp_output = False if output_name is None: is_temp_output = True thandle, output_name = tempfile.mkstemp() os.close(thandle) - benchmark_flags = list(benchmark_flags) + ["--benchmark_out=%s" % output_name] + benchmark_flags = list(benchmark_flags) + \ + ['--benchmark_out=%s' % output_name] cmd = [exe_name] + benchmark_flags - print("RUNNING: %s" % " ".join(cmd)) + print("RUNNING: %s" % ' '.join(cmd)) exitCode = subprocess.call(cmd) if exitCode != 0: - print("TEST FAILED...") + print('TEST FAILED...') sys.exit(exitCode) - json_res = load_benchmark_results(output_name) + json_res = load_benchmark_results(output_name, None) if is_temp_output: os.unlink(output_name) return json_res @@ -191,7 +195,9 @@ """ ftype = check_input_file(filename) if ftype == IT_JSON: - return load_benchmark_results(filename) + benchmark_filter = find_benchmark_flag('--benchmark_filter=', + benchmark_flags) + return load_benchmark_results(filename, benchmark_filter) if ftype == IT_Executable: return run_benchmark(filename, benchmark_flags) - raise ValueError("Unknown file type %s" % ftype) + raise ValueError('Unknown file type %s' % ftype) diff --git a/third-party/benchmark/tools/libpfm.BUILD.bazel b/third-party/benchmark/tools/libpfm.BUILD.bazel new file mode 100644 --- /dev/null +++ b/third-party/benchmark/tools/libpfm.BUILD.bazel @@ -0,0 +1,22 @@ +# Build rule for libpfm, which is required to collect performance counters for +# BENCHMARK_ENABLE_LIBPFM builds. + +load("@rules_foreign_cc//foreign_cc:defs.bzl", "make") + +filegroup( + name = "pfm_srcs", + srcs = glob(["**"]), +) + +make( + name = "libpfm", + lib_source = ":pfm_srcs", + lib_name = "libpfm", + copts = [ + "-Wno-format-truncation", + "-Wno-use-after-free", + ], + visibility = [ + "//visibility:public", + ], +) diff --git a/third-party/benchmark/tools/requirements.txt b/third-party/benchmark/tools/requirements.txt --- a/third-party/benchmark/tools/requirements.txt +++ b/third-party/benchmark/tools/requirements.txt @@ -1 +1,2 @@ -scipy>=1.5.0 \ No newline at end of file +numpy == 1.25 +scipy == 1.10.0 diff --git a/third-party/benchmark/tools/strip_asm.py b/third-party/benchmark/tools/strip_asm.py --- a/third-party/benchmark/tools/strip_asm.py +++ b/third-party/benchmark/tools/strip_asm.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ strip_asm.py - Cleanup ASM output for the specified file @@ -9,14 +9,13 @@ import os import re - def find_used_labels(asm): found = set() label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") for l in asm.splitlines(): m = label_re.match(l) if m: - found.add(".L%s" % m.group(1)) + found.add('.L%s' % m.group(1)) return found @@ -29,24 +28,24 @@ decls.add(m.group(0)) if len(decls) == 0: return asm - needs_dot = next(iter(decls))[0] != "." + needs_dot = next(iter(decls))[0] != '.' if not needs_dot: return asm for ld in decls: - asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", "\\1." + ld, asm) + asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) return asm def transform_labels(asm): asm = normalize_labels(asm) used_decls = find_used_labels(asm) - new_asm = "" + new_asm = '' label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") for l in asm.splitlines(): m = label_decl.match(l) if not m or m.group(0) in used_decls: new_asm += l - new_asm += "\n" + new_asm += '\n' return new_asm @@ -54,15 +53,14 @@ if len(tk) == 0: return False first = tk[0] - if not first.isalpha() and first != "_": + if not first.isalpha() and first != '_': return False for i in range(1, len(tk)): c = tk[i] - if not c.isalnum() and c != "_": + if not c.isalnum() and c != '_': return False return True - def process_identifiers(l): """ process_identifiers - process all identifiers and modify them to have @@ -70,15 +68,14 @@ For example, MachO inserts an additional understore at the beginning of names. This function removes that. """ - parts = re.split(r"([a-zA-Z0-9_]+)", l) - new_line = "" + parts = re.split(r'([a-zA-Z0-9_]+)', l) + new_line = '' for tk in parts: if is_identifier(tk): - if tk.startswith("__Z"): + if tk.startswith('__Z'): tk = tk[1:] - elif ( - tk.startswith("_") and len(tk) > 1 and tk[1].isalpha() and tk[1] != "Z" - ): + elif tk.startswith('_') and len(tk) > 1 and \ + tk[1].isalpha() and tk[1] != 'Z': tk = tk[1:] new_line += tk return new_line @@ -88,24 +85,24 @@ """ Strip the ASM of unwanted directives and lines """ - new_contents = "" + new_contents = '' asm = transform_labels(asm) # TODO: Add more things we want to remove discard_regexes = [ - re.compile("\s+\..*$"), # directive - re.compile("\s*#(NO_APP|APP)$"), # inline ASM - re.compile("\s*#.*$"), # comment line - re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), # global directive - re.compile( - "\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)" - ), + re.compile("\s+\..*$"), # directive + re.compile("\s*#(NO_APP|APP)$"), #inline ASM + re.compile("\s*#.*$"), # comment line + re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive + re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), + ] + keep_regexes = [ + ] - keep_regexes = [] fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") for l in asm.splitlines(): # Remove Mach-O attribute - l = l.replace("@GOTPCREL", "") + l = l.replace('@GOTPCREL', '') add_line = True for reg in discard_regexes: if reg.match(l) is not None: @@ -117,21 +114,21 @@ break if add_line: if fn_label_def.match(l) and len(new_contents) != 0: - new_contents += "\n" + new_contents += '\n' l = process_identifiers(l) new_contents += l - new_contents += "\n" + new_contents += '\n' return new_contents - def main(): - parser = ArgumentParser(description="generate a stripped assembly file") + parser = ArgumentParser( + description='generate a stripped assembly file') parser.add_argument( - "input", metavar="input", type=str, nargs=1, help="An input assembly file" - ) + 'input', metavar='input', type=str, nargs=1, + help='An input assembly file') parser.add_argument( - "out", metavar="output", type=str, nargs=1, help="The output file" - ) + 'out', metavar='output', type=str, nargs=1, + help='The output file') args, unknown_args = parser.parse_known_args() input = args.input[0] output = args.out[0] @@ -139,14 +136,14 @@ print(("ERROR: input file '%s' does not exist") % input) sys.exit(1) contents = None - with open(input, "r") as f: + with open(input, 'r') as f: contents = f.read() new_contents = process_asm(contents) - with open(output, "w") as f: + with open(output, 'w') as f: f.write(new_contents) -if __name__ == "__main__": +if __name__ == '__main__': main() # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4