diff --git a/libc/cmake/modules/LLVMLibCRules.cmake b/libc/cmake/modules/LLVMLibCRules.cmake --- a/libc/cmake/modules/LLVMLibCRules.cmake +++ b/libc/cmake/modules/LLVMLibCRules.cmake @@ -317,18 +317,23 @@ set(library_deps "") foreach(dep IN LISTS LIBC_UNITTEST_DEPENDS) - get_target_property(dep_type ${dep} "TARGET_TYPE") - if (dep_type) - string(COMPARE EQUAL ${dep_type} ${ENTRYPOINT_OBJ_TARGET_TYPE} dep_is_entrypoint) + get_property(dep_target_type TARGET ${dep} PROPERTY "TARGET_TYPE") + if (dep_target_type) + string(COMPARE EQUAL ${dep_target_type} ${ENTRYPOINT_OBJ_TARGET_TYPE} dep_is_entrypoint) if(dep_is_entrypoint) get_target_property(obj_file ${dep} "OBJECT_FILE_RAW") list(APPEND library_deps ${obj_file}) continue() endif() + else() + # If the dep is a normal CMake library target then add it to the list of + # library_deps. + get_property(dep_type TARGET ${dep} PROPERTY "TYPE") + if (${dep_type} MATCHES "_LIBRARY$") + list(APPEND library_deps ${dep}) + endif() endif() - # TODO: Check if the dep is a normal CMake library target. If yes, then add it - # to the list of library_deps. - endforeach(dep) + endforeach() add_executable( ${target_name} @@ -357,7 +362,7 @@ gtest ) - target_link_libraries(${target_name} PRIVATE gtest_main gtest) + target_link_libraries(${target_name} PRIVATE gtest_main gtest LLVMSupport) add_custom_command( TARGET ${target_name} diff --git a/libc/utils/CMakeLists.txt b/libc/utils/CMakeLists.txt --- a/libc/utils/CMakeLists.txt +++ b/libc/utils/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(HdrGen) +add_subdirectory(benchmarks) diff --git a/libc/utils/HdrGen/CMakeLists.txt b/libc/utils/HdrGen/CMakeLists.txt --- a/libc/utils/HdrGen/CMakeLists.txt +++ b/libc/utils/HdrGen/CMakeLists.txt @@ -1,3 +1,5 @@ +set(LLVM_LINK_COMPONENTS Support) + add_tablegen(libc-hdrgen llvm-libc Command.h Command.cpp diff --git a/libc/utils/benchmarks/CMakeLists.txt b/libc/utils/benchmarks/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/CMakeLists.txt @@ -0,0 +1,148 @@ +find_package(Threads) + +include(ExternalProject) + +set(LLVM_LINK_COMPONENTS Support) + +#============================================================================== +# Build Google Benchmark +#============================================================================== +set(GOOGLE_BENCHMARK_TARGET_FLAGS ${BENCHMARK_DIALECT_FLAG}) +if (LIBCXX_BENCHMARK_GCC_TOOLCHAIN) + set(GOOGLE_BENCHMARK_TARGET_FLAGS + -gcc-toolchain ${LIBCXX_BENCHMARK_GCC_TOOLCHAIN}) +endif() +string(REPLACE ";" " " GOOGLE_BENCHMARK_TARGET_FLAGS "${GOOGLE_BENCHMARK_TARGET_FLAGS}") + +ExternalProject_Add(google-benchmark + EXCLUDE_FROM_ALL ON + PREFIX google-benchmark + SOURCE_DIR ${LIBC_SOURCE_DIR}/../llvm/utils/benchmark + INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/google-benchmark + CMAKE_CACHE_ARGS + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER} + -DCMAKE_CXX_FLAGS:STRING=${GOOGLE_BENCHMARK_TARGET_FLAGS} + -DCMAKE_BUILD_TYPE:STRING=RELEASE + -DCMAKE_INSTALL_PREFIX:PATH= + -DBENCHMARK_ENABLE_TESTING:BOOL=OFF) + +set(GOOGLE_BENCHMARK_LIBC_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/google-benchmark) +set(GOOGLE_BENCHMARK_LINK_FLAGS -L${GOOGLE_BENCHMARK_LIBC_INSTALL}/lib/) + +#============================================================================== +# Build Google Benchmark for libc +#============================================================================== + +function(fix_rtti target) +# TODO: Make this portable and inline with rtti mode from llvm/ +target_compile_options(${target} PUBLIC -fno-rtti) +endfunction() + +# libc-benchmark +add_library(libc-benchmark + STATIC + EXCLUDE_FROM_ALL + LibcBenchmark.cpp + LibcBenchmark.h +) +add_dependencies(libc-benchmark google-benchmark) +target_include_directories(libc-benchmark PUBLIC "${GOOGLE_BENCHMARK_LIBC_INSTALL}/include") +target_link_options(libc-benchmark PUBLIC "${GOOGLE_BENCHMARK_LINK_FLAGS}") +target_link_libraries(libc-benchmark PUBLIC LLVMSupport -lbenchmark Threads::Threads) +fix_rtti(libc-benchmark) + +add_libc_unittest(libc-benchmark-test + SRCS LibcBenchmarkTest.cpp + DEPENDS libc-benchmark +) + +# libc-memory-benchmark +add_library(libc-memory-benchmark + STATIC + EXCLUDE_FROM_ALL + LibcMemoryBenchmark.cpp + LibcMemoryBenchmark.h +) +target_link_libraries(libc-memory-benchmark PUBLIC libc-benchmark) +fix_rtti(libc-memory-benchmark) + +add_libc_unittest(libc-memory-benchmark-test + SRCS LibcMemoryBenchmarkTest.cpp + DEPENDS libc-memory-benchmark +) + +# json +add_library(json + STATIC + EXCLUDE_FROM_ALL + JSON.cpp + JSON.h +) +target_link_libraries(json PUBLIC libc-memory-benchmark) +fix_rtti(json) + +add_libc_unittest(json-test + SRCS JSONTest.cpp + DEPENDS json +) + +add_custom_target(check-libc-benchmark) +add_dependencies(check-libc-benchmark + libc-benchmark-test + libc-memory-benchmark-test + json-test +) + +#============================================================================== +# Benchmark tests configuration +#============================================================================== + +function(add_libc_benchmark_analysis conf_target run_target) + set(png_file "/tmp/last-${conf_target}.png") + set(render_target render-${conf_target}) + add_custom_target(${render_target} + COMMAND python render.py ${json_file} --headless --output=${png_file} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "render ${libc_target} to ${png_file}" + ) + add_dependencies(${render_target} ${run_target}) + + set(display_target display-${conf_target}) + add_custom_target(${display_target} + COMMAND python render.py ${json_file} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "display ${libc_target}" + ) + add_dependencies(${display_target} ${run_target}) +endfunction() + +function(add_libc_benchmark_configuration target configuration) + set(conf_target ${target}-${configuration}) + set(json_file "/tmp/last-${conf_target}.json") + set(run_target run-${conf_target}) + add_custom_target(${run_target} + COMMAND ${libc_target} --conf=configuration_${configuration}.json -o ${json_file} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + add_libc_benchmark_analysis(${conf_target} ${run_target}) +endfunction() + +function(add_libc_benchmark name file) + set(libc_target libc-${name}-benchmark) + add_executable(${libc_target} + EXCLUDE_FROM_ALL + ${file} + LibcMemoryBenchmarkMain.h + LibcMemoryBenchmarkMain.cpp + ) + target_link_libraries(${libc_target} PUBLIC json) + foreach(configuration "small" "big") + add_libc_benchmark_configuration(${libc_target} ${configuration}) + endforeach() +endfunction() + +add_libc_benchmark(memcpy Memcpy.cpp) +add_libc_benchmark(memcmp Memcmp.cpp) +add_libc_benchmark(memset Memset.cpp) diff --git a/libc/utils/benchmarks/JSON.h b/libc/utils/benchmarks/JSON.h new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/JSON.h @@ -0,0 +1,28 @@ +//===-------- JSON serialization routines -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_UTILS_BENCHMARK_JSON_H +#define LLVM_LIBC_UTILS_BENCHMARK_JSON_H + +#include "LibcBenchmark.h" +#include "LibcMemoryBenchmark.h" +#include "llvm/Support/JSON.h" + +namespace llvm { +namespace libc_benchmarks { + +// Parses a Study from a json string. +Expected ParseJsonStudy(StringRef Content); + +// Serialize a Study as json. +void SerializeToJson(const Study &S, llvm::json::OStream &JOS); + +} // namespace libc_benchmarks +} // namespace llvm + +#endif // LLVM_LIBC_UTILS_BENCHMARK_JSON_H diff --git a/libc/utils/benchmarks/JSON.cpp b/libc/utils/benchmarks/JSON.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/JSON.cpp @@ -0,0 +1,367 @@ +//===-------- JSON serialization routines ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "JSON.h" +#include "LibcBenchmark.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include +#include + +namespace llvm { +namespace libc_benchmarks { + +template +static Error intFromJsonTemplate(const json::Value &V, T &Out) { + if (const auto &MaybeInt64 = V.getAsInteger()) { + int64_t Value = *MaybeInt64; + if (Value < std::numeric_limits::min() || + Value > std::numeric_limits::max()) + return createStringError(errc::io_error, "Out of bound Integer"); + Out = Value; + return Error::success(); + } + return createStringError(errc::io_error, "Can't parse Integer"); +} + +static Error fromJson(const json::Value &V, double &Out) { + if (auto S = V.getAsNumber()) { + Out = *S; + return Error::success(); + } + return createStringError(errc::io_error, "Can't parse Double"); +} + +static Error fromJson(const json::Value &V, std::string &Out) { + if (auto S = V.getAsString()) { + Out = *S; + return Error::success(); + } + return createStringError(errc::io_error, "Can't parse String"); +} + +static Error fromJson(const json::Value &V, uint32_t &Out) { + return intFromJsonTemplate(V, Out); +} + +static Error fromJson(const json::Value &V, uint8_t &Out) { + return intFromJsonTemplate(V, Out); +} + +static Error fromJson(const json::Value &V, int &Out) { + return intFromJsonTemplate(V, Out); +} + +static Error fromJson(const json::Value &V, libc_benchmarks::Duration &D) { + if (V.kind() != json::Value::Kind::Number) + return createStringError(errc::io_error, "Can't parse Duration"); + D = libc_benchmarks::Duration(*V.getAsNumber()); + return Error::success(); +} + +static Error fromJson(const json::Value &V, MaybeAlign &Out) { + const auto MaybeInt = V.getAsInteger(); + if (!MaybeInt) + return createStringError(errc::io_error, + "Can't parse Align, not an Integer"); + const int64_t Value = *MaybeInt; + if (!Value) { + Out = None; + return Error::success(); + } + if (isPowerOf2_64(Value)) { + Out = Align(Value); + return Error::success(); + } + return createStringError(errc::io_error, + "Can't parse Align, not a power of two"); +} + +static Error fromJson(const json::Value &V, + libc_benchmarks::BenchmarkLog &Out) { + if (V.kind() != json::Value::Kind::String) + return createStringError(errc::io_error, + "Can't parse BenchmarkLog, not a String"); + const auto String = *V.getAsString(); + auto Parsed = + llvm::StringSwitch>(String) + .Case("None", libc_benchmarks::BenchmarkLog::None) + .Case("Last", libc_benchmarks::BenchmarkLog::Last) + .Case("Full", libc_benchmarks::BenchmarkLog::Full) + .Default(None); + if (!Parsed) + return createStringError(errc::io_error, + Twine("Can't parse BenchmarkLog, invalid value '") + .concat(String) + .concat("'")); + Out = *Parsed; + return Error::success(); +} + +template +Error vectorFromJsonTemplate(const json::Value &V, C &Out) { + auto *A = V.getAsArray(); + if (!A) + return createStringError(errc::io_error, "Can't parse Array"); + Out.clear(); + Out.resize(A->size()); + for (auto InOutPair : llvm::zip(*A, Out)) + if (auto E = fromJson(std::get<0>(InOutPair), std::get<1>(InOutPair))) + return std::move(E); + return Error::success(); +} + +template +static Error fromJson(const json::Value &V, std::vector &Out) { + return vectorFromJsonTemplate(V, Out); +} + +template +static Error fromJson(const json::Value &V, SmallVectorImpl &Out) { + return vectorFromJsonTemplate(V, Out); +} + +// Same as llvm::json::ObjectMapper but adds a finer error reporting mechanism. +class JsonObjectMapper { + const json::Object *O; + Error E; + SmallDenseSet SeenFields; + +public: + explicit JsonObjectMapper(const json::Value &V) + : O(V.getAsObject()), + E(O ? Error::success() + : createStringError(errc::io_error, "Expected JSON Object")) {} + + Error takeError() { + if (E) + return std::move(E); + for (const auto &Itr : *O) { + const StringRef Key = Itr.getFirst(); + if (!SeenFields.count(Key)) + E = createStringError(errc::io_error, + Twine("Unknown field: ").concat(Key)); + } + return std::move(E); + } + + template void map(StringRef Key, T &Out) { + if (E) + return; + if (const json::Value *Value = O->get(Key)) { + SeenFields.insert(Key); + E = fromJson(*Value, Out); + } + } +}; + +static Error fromJson(const json::Value &V, + libc_benchmarks::BenchmarkOptions &Out) { + JsonObjectMapper O(V); + O.map("MinDuration", Out.MinDuration); + O.map("MaxDuration", Out.MaxDuration); + O.map("InitialIterations", Out.InitialIterations); + O.map("MaxIterations", Out.MaxIterations); + O.map("MinSamples", Out.MinSamples); + O.map("MaxSamples", Out.MaxSamples); + O.map("Epsilon", Out.Epsilon); + O.map("ScalingFactor", Out.ScalingFactor); + O.map("Log", Out.Log); + return O.takeError(); +} + +static Error fromJson(const json::Value &V, libc_benchmarks::SizeRange &Out) { + JsonObjectMapper O(V); + O.map("From", Out.From); + O.map("To", Out.To); + O.map("Step", Out.Step); + return O.takeError(); +} + +static Error fromJson(const json::Value &V, + libc_benchmarks::StudyConfiguration &Out) { + JsonObjectMapper O(V); + O.map("Runs", Out.Runs); + O.map("BufferSize", Out.BufferSize); + O.map("Size", Out.Size); + O.map("AddressAlignment", Out.AddressAlignment); + O.map("MemsetValue", Out.MemsetValue); + O.map("MemcmpMismatchAt", Out.MemcmpMismatchAt); + return O.takeError(); +} + +static Error fromJson(const json::Value &V, libc_benchmarks::CacheInfo &Out) { + JsonObjectMapper O(V); + O.map("Type", Out.Type); + O.map("Level", Out.Level); + O.map("Size", Out.Size); + O.map("NumSharing", Out.NumSharing); + return O.takeError(); +} + +static Error fromJson(const json::Value &V, libc_benchmarks::HostState &Out) { + JsonObjectMapper O(V); + O.map("CpuName", Out.CpuName); + O.map("CpuFrequency", Out.CpuFrequency); + O.map("Caches", Out.Caches); + return O.takeError(); +} + +static Error fromJson(const json::Value &V, + libc_benchmarks::FunctionMeasurements &Out) { + JsonObjectMapper O(V); + O.map("Name", Out.Name); + std::vector Sizes; + O.map("Sizes", Sizes); + std::vector Runtimes; + O.map("Runtimes", Runtimes); + if (Sizes.size() != Runtimes.size()) + return createStringError(errc::io_error, + "Measurement Size and Runtime mistmatch"); + Out.Measurements.resize(Sizes.size()); + for (size_t I = 0; I < Sizes.size(); ++I) { + Out.Measurements[I].Size = Sizes[I]; + Out.Measurements[I].Runtime = Runtimes[I]; + } + return O.takeError(); +} + +static Error fromJson(const json::Value &V, libc_benchmarks::Study &Out) { + JsonObjectMapper O(V); + O.map("Host", Out.Host); + O.map("Options", Out.Options); + O.map("Configuration", Out.Configuration); + O.map("Functions", Out.Functions); + return O.takeError(); +} + +static double Seconds(const Duration &D) { + return std::chrono::duration(D).count(); +} + +Expected ParseJsonStudy(StringRef Content) { + Expected EV = json::parse(Content); + if (!EV) + return EV.takeError(); + Study S; + if (Error E = fromJson(*EV, S)) + return std::move(E); + return S; +} + +static StringRef Serialize(const BenchmarkLog &L) { + switch (L) { + case BenchmarkLog::None: + return "None"; + case BenchmarkLog::Last: + return "Last"; + case BenchmarkLog::Full: + return "Full"; + } + llvm_unreachable("Unhandled BenchmarkLog value"); +} + +static void Serialize(const BenchmarkOptions &BO, json::OStream &JOS) { + JOS.object([&]() { + JOS.attribute("MinDuration", Seconds(BO.MinDuration)); + JOS.attribute("MaxDuration", Seconds(BO.MaxDuration)); + JOS.attribute("InitialIterations", BO.InitialIterations); + JOS.attribute("MaxIterations", BO.MaxIterations); + JOS.attribute("MinSamples", BO.MinSamples); + JOS.attribute("MaxSamples", BO.MaxSamples); + JOS.attribute("Epsilon", BO.Epsilon); + JOS.attribute("ScalingFactor", BO.ScalingFactor); + JOS.attribute("Log", Serialize(BO.Log)); + }); +} + +static void Serialize(const CacheInfo &CI, json::OStream &JOS) { + JOS.object([&]() { + JOS.attribute("Type", CI.Type); + JOS.attribute("Level", CI.Level); + JOS.attribute("Size", CI.Size); + JOS.attribute("NumSharing", CI.NumSharing); + }); +} + +static void Serialize(const HostState &HS, json::OStream &JOS) { + JOS.object([&]() { + JOS.attribute("CpuName", HS.CpuName); + JOS.attribute("CpuFrequency", HS.CpuFrequency); + JOS.attributeArray("Caches", [&]() { + for (const auto &CI : HS.Caches) + Serialize(CI, JOS); + }); + }); +} + +static void Serialize(const StudyConfiguration &SC, json::OStream &JOS) { + JOS.object([&]() { + JOS.attribute("Runs", SC.Runs); + JOS.attribute("BufferSize", SC.BufferSize); + JOS.attributeObject("Size", [&]() { + JOS.attribute("From", SC.Size.From); + JOS.attribute("To", SC.Size.To); + JOS.attribute("Step", SC.Size.Step); + }); + if (SC.AddressAlignment) + JOS.attribute("AddressAlignment", + static_cast(SC.AddressAlignment->value())); + JOS.attribute("MemsetValue", SC.MemsetValue); + JOS.attribute("MemcmpMismatchAt", SC.MemcmpMismatchAt); + }); +} + +static void Serialize(const FunctionMeasurements &FM, json::OStream &JOS) { + JOS.object([&]() { + JOS.attribute("Name", FM.Name); + JOS.attributeArray("Sizes", [&]() { + for (const auto &M : FM.Measurements) + JOS.value(M.Size); + }); + JOS.attributeArray("Runtimes", [&]() { + for (const auto &M : FM.Measurements) + JOS.value(Seconds(M.Runtime)); + }); + }); +} + +void SerializeToJson(const Study &S, json::OStream &JOS) { + JOS.object([&]() { + JOS.attributeBegin("Host"); + Serialize(S.Host, JOS); + JOS.attributeEnd(); + + JOS.attributeBegin("Options"); + Serialize(S.Options, JOS); + JOS.attributeEnd(); + + JOS.attributeBegin("Configuration"); + Serialize(S.Configuration, JOS); + JOS.attributeEnd(); + + if (!S.Functions.empty()) { + JOS.attributeArray("Functions", [&]() { + for (const auto &FM : S.Functions) + Serialize(FM, JOS); + }); + } + }); +} + +} // namespace libc_benchmarks +} // namespace llvm diff --git a/libc/utils/benchmarks/JSONTest.cpp b/libc/utils/benchmarks/JSONTest.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/JSONTest.cpp @@ -0,0 +1,190 @@ +#include "JSON.h" +#include "LibcBenchmark.h" +#include "LibcMemoryBenchmark.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::AllOf; +using testing::ExplainMatchResult; +using testing::Field; +using testing::Pointwise; + +namespace llvm { +namespace libc_benchmarks { +namespace { + +Study getStudy() { + return Study{ + HostState{ + "CpuName", 123, {CacheInfo{"A", 1, 2, 3}, CacheInfo{"B", 4, 5, 6}}}, + BenchmarkOptions{std::chrono::seconds(1), std::chrono::seconds(2), 10, + 100, 6, 100, 0.1, 2, BenchmarkLog::Full}, + StudyConfiguration{2, 3, SizeRange{4, 5, 6}, Align(8), 9, 10}, + {FunctionMeasurements{"A", + {Measurement{3, std::chrono::seconds(3)}, + Measurement{3, std::chrono::seconds(4)}}}, + FunctionMeasurements{"B", {}}}}; +} + +static std::string SerializeToString(const Study &S) { + std::string Buffer; + raw_string_ostream RSO(Buffer); + json::OStream JOS(RSO); + SerializeToJson(S, JOS); + return Buffer; +} + +MATCHER(EqualsCacheInfo, "") { + const CacheInfo &a = ::testing::get<0>(arg); + const CacheInfo &b = ::testing::get<1>(arg); + return ExplainMatchResult(AllOf(Field(&CacheInfo::Type, b.Type), + Field(&CacheInfo::Level, b.Level), + Field(&CacheInfo::Size, b.Size), + Field(&CacheInfo::NumSharing, b.NumSharing)), + a, result_listener); +} + +auto Equals(const HostState &H) -> auto { + return AllOf( + Field(&HostState::CpuName, H.CpuName), + Field(&HostState::CpuFrequency, H.CpuFrequency), + Field(&HostState::Caches, Pointwise(EqualsCacheInfo(), H.Caches))); +} + +auto Equals(const BenchmarkOptions &BO) -> auto { + return AllOf( + Field(&BenchmarkOptions::MinDuration, BO.MinDuration), + Field(&BenchmarkOptions::MaxDuration, BO.MaxDuration), + Field(&BenchmarkOptions::InitialIterations, BO.InitialIterations), + Field(&BenchmarkOptions::MaxIterations, BO.MaxIterations), + Field(&BenchmarkOptions::MinSamples, BO.MinSamples), + Field(&BenchmarkOptions::MaxSamples, BO.MaxSamples), + Field(&BenchmarkOptions::Epsilon, BO.Epsilon), + Field(&BenchmarkOptions::ScalingFactor, BO.ScalingFactor), + Field(&BenchmarkOptions::Log, BO.Log)); +} + +auto Equals(const SizeRange &SR) -> auto { + return AllOf(Field(&SizeRange::From, SR.From), Field(&SizeRange::To, SR.To), + Field(&SizeRange::Step, SR.Step)); +} + +auto Equals(const StudyConfiguration &SC) -> auto { + return AllOf( + Field(&StudyConfiguration::Runs, SC.Runs), + Field(&StudyConfiguration::BufferSize, SC.BufferSize), + Field(&StudyConfiguration::Size, Equals(SC.Size)), + Field(&StudyConfiguration::AddressAlignment, SC.AddressAlignment), + Field(&StudyConfiguration::MemsetValue, SC.MemsetValue), + Field(&StudyConfiguration::MemcmpMismatchAt, SC.MemcmpMismatchAt)); +} + +MATCHER(EqualsMeasurement, "") { + const Measurement &a = ::testing::get<0>(arg); + const Measurement &b = ::testing::get<1>(arg); + return ExplainMatchResult(AllOf(Field(&Measurement::Size, b.Size), + Field(&Measurement::Runtime, b.Runtime)), + a, result_listener); +} + +MATCHER(EqualsFunctions, "") { + const FunctionMeasurements &a = ::testing::get<0>(arg); + const FunctionMeasurements &b = ::testing::get<1>(arg); + return ExplainMatchResult( + AllOf(Field(&FunctionMeasurements::Name, b.Name), + Field(&FunctionMeasurements::Measurements, + Pointwise(EqualsMeasurement(), b.Measurements))), + a, result_listener); +} + +auto Equals(const Study &S) -> auto { + return AllOf( + Field(&Study::Host, Equals(S.Host)), + Field(&Study::Options, Equals(S.Options)), + Field(&Study::Configuration, Equals(S.Configuration)), + Field(&Study::Functions, Pointwise(EqualsFunctions(), S.Functions))); +} + +TEST(JsonTest, RoundTrip) { + const Study S = getStudy(); + auto StudyOrError = ParseJsonStudy(SerializeToString(S)); + if (auto Err = StudyOrError.takeError()) + EXPECT_FALSE(Err) << "Unexpected error"; + const Study &Parsed = *StudyOrError; + EXPECT_THAT(Parsed, Equals(S)); +} + +TEST(JsonTest, SupplementaryField) { + auto Failure = ParseJsonStudy(R"({ + "UnknownField": 10 + } + )"); + EXPECT_EQ(toString(Failure.takeError()), "Unknown field: UnknownField"); +} + +TEST(JsonTest, InvalidType) { + auto Failure = ParseJsonStudy(R"({ + "Options": 1 + } + )"); + EXPECT_EQ(toString(Failure.takeError()), "Expected JSON Object"); +} + +TEST(JsonTest, InvalidDuration) { + auto Failure = ParseJsonStudy(R"({ + "Options": { + "MinDuration": "Duration should be a Number" + } + } + )"); + EXPECT_EQ(toString(Failure.takeError()), "Can't parse Duration"); +} + +TEST(JsonTest, InvalidAlignType) { + auto Failure = ParseJsonStudy(R"({ + "Configuration":{ + "AddressAlignment": "Align should be an Integer" + } + } + )"); + EXPECT_EQ(toString(Failure.takeError()), "Can't parse Align, not an Integer"); +} + +TEST(JsonTest, InvalidAlign) { + auto Failure = ParseJsonStudy(R"({ + "Configuration":{ + "AddressAlignment":3 + } + } + )"); + EXPECT_EQ(toString(Failure.takeError()), + "Can't parse Align, not a power of two"); +} + +TEST(JsonTest, InvalidBenchmarkLogType) { + auto Failure = ParseJsonStudy(R"({ + "Options":{ + "Log": 3 + } + } + )"); + EXPECT_EQ(toString(Failure.takeError()), + "Can't parse BenchmarkLog, not a String"); +} + +TEST(JsonTest, InvalidBenchmarkLog) { + auto Failure = ParseJsonStudy(R"({ + "Options":{ + "Log": "Unknown" + } + } + )"); + EXPECT_EQ(toString(Failure.takeError()), + "Can't parse BenchmarkLog, invalid value 'Unknown'"); +} + +} // namespace +} // namespace libc_benchmarks +} // namespace llvm diff --git a/libc/utils/benchmarks/LibcBenchmark.h b/libc/utils/benchmarks/LibcBenchmark.h new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/LibcBenchmark.h @@ -0,0 +1,324 @@ +//===-------- `Benchmark` function ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This file mainly defines a `Benchmark` function. +// +// The benchmarking process is as follows: +// - We start by measuring the time it takes to run the function +// `InitialIterations` times. This is called a Sample. From this we can derive +// the time it took to run a single iteration. +// +// - We repeat the previous step with a greater number of iterations to lower +// the impact of the measurement. We can derive a more precise estimation of the +// runtime for a single iteration. +// +// - Each sample gives a more accurate estimation of the runtime for a single +// iteration but also takes more time to run. We stop the process when: +// * The measure stabilize under a certain precision (Epsilon), +// * The overall benchmarking time is greater than MaxDuration, +// * The overall sample count is greater than MaxSamples, +// * The last sample used more than MaxIterations iterations. +// +// - We also makes sure that the benchmark doesn't run for a too short period of +// time by defining MinDuration and MinSamples. + +#ifndef LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H +#define LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H + +#include "benchmark/benchmark.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include +#include +#include + +namespace llvm { +namespace libc_benchmarks { + +// Makes sure the binary was compiled in release mode and that frequency +// governor is set on performance. +void checkRequirements(); + +using Duration = std::chrono::duration; + +enum class BenchmarkLog { + None, // Don't keep the internal state of the benchmark. + Last, // Keep only the last batch. + Full // Keep all iterations states, useful for testing or debugging. +}; + +// An object to configure the benchmark stopping conditions. +// See documentation at the beginning of the file for the overall algorithm and +// meaning of each field. +struct BenchmarkOptions { + // The minimum time for which the benchmark is running. + Duration MinDuration = std::chrono::seconds(0); + // The maximum time for which the benchmark is running. + Duration MaxDuration = std::chrono::seconds(10); + // The number of iterations in the first sample. + uint32_t InitialIterations = 1; + // The maximum number of iterations for any given sample. + uint32_t MaxIterations = 10000000; + // The minimum number of samples. + uint32_t MinSamples = 4; + // The maximum number of samples. + uint32_t MaxSamples = 1000; + // The benchmark will stop is the relative difference between the current and + // the last estimation is less than epsilon. This is 1% by default. + double Epsilon = 0.01; + // The number of iterations grows exponentially between each sample. + // Must be greater or equal to 1. + double ScalingFactor = 1.4; + BenchmarkLog Log = BenchmarkLog::None; +}; + +// The state of a benchmark. +enum class BenchmarkStatus { + Running, + MaxDurationReached, + MaxIterationsReached, + MaxSamplesReached, + PrecisionReached, +}; + +// The internal state of the benchmark, useful to debug, test or report +// statistics. +struct BenchmarkState { + size_t LastSampleIterations; + Duration LastBatchElapsed; + BenchmarkStatus CurrentStatus; + Duration CurrentBestGuess; // The time estimation for a single run of `foo`. + double ChangeRatio; // The change in time estimation between previous and + // current samples. +}; + +// A lightweight result for a benchmark. +struct BenchmarkResult { + BenchmarkStatus TerminationStatus = BenchmarkStatus::Running; + Duration BestGuess = {}; + llvm::Optional> MaybeBenchmarkLog; +}; + +// Stores information about a cache in the host memory system. +struct CacheInfo { + std::string Type; // e.g. "Instruction", "Data", "Unified". + int Level; // 0 is closest to processing unit. + int Size; // In bytes. + int NumSharing; // The number of processing units (Hyper-Threading Thread) + // with which this cache is shared. +}; + +// Stores information about the host. +struct HostState { + std::string CpuName; // returns a string compatible with the -march option. + double CpuFrequency; // in Hertz. + std::vector Caches; + + static HostState get(); +}; + +namespace internal { + +struct Measurement { + size_t Iterations = 0; + Duration Elapsed = {}; +}; + +// Updates the estimation of the elapsed time for a single iteration. +class RefinableRuntimeEstimation { + Duration TotalTime = {}; + size_t TotalIterations = 0; + +public: + Duration update(const Measurement &M) { + assert(M.Iterations > 0); + // Duration is encoded as a double (see definition). + // `TotalTime` and `M.Elapsed` are of the same magnitude so we don't expect + // loss of precision due to radically different scales. + TotalTime += M.Elapsed; + TotalIterations += M.Iterations; + return TotalTime / TotalIterations; + } +}; + +// This class tracks the progression of the runtime estimation. +class RuntimeEstimationProgression { + RefinableRuntimeEstimation RRE; + +public: + Duration CurrentEstimation = {}; + + // Returns the change ratio between our best guess so far and the one from the + // new measurement. + double computeImprovement(const Measurement &M) { + const Duration NewEstimation = RRE.update(M); + const double Ratio = fabs(((CurrentEstimation / NewEstimation) - 1.0)); + CurrentEstimation = NewEstimation; + return Ratio; + } +}; + +} // namespace internal + +// Measures the runtime of `foo` until conditions defined by `Options` are met. +// +// To avoid measurement's imprecisions we measure batches of `foo`. +// The batch size is growing by `ScalingFactor` to minimize the effect of +// measuring. +// +// Note: The benchmark is not responsible for serializing the executions of +// `foo`. It is not suitable for measuring, very small & side effect free +// functions, as the processor is free to execute serveral executions in +// parallel. +// +// - Options: A set of parameters controlling the stopping conditions for the +// benchmark. +// - foo: The function under test. It takes one value and returns one value. +// The input value is used to randomize the execution of `foo` as part of a +// batch to mitigate the effect of the branch predictor. Signature: +// `ProductType foo(ParameterProvider::value_type value);` +// The output value is a product of the execution of `foo` and prevents the +// compiler from optimizing out foo's body. +// - ParameterProvider: An object responsible for providing a range of +// `Iterations` values to use as input for `foo`. The `value_type` of the +// returned container has to be compatible with `foo` argument. +// Must implement one of: +// `Container generateBatch(size_t Iterations);` +// `const Container& generateBatch(size_t Iterations);` +// - Clock: An object providing the current time. Must implement: +// `std::chrono::time_point now();` +template +BenchmarkResult benchmark(const BenchmarkOptions &Options, + ParameterProvider &PP, Function foo, + BenchmarkClock &Clock = BenchmarkClock()) { + BenchmarkResult Result; + internal::RuntimeEstimationProgression REP; + Duration TotalBenchmarkDuration = {}; + size_t Iterations = std::max(Options.InitialIterations, uint32_t(1)); + size_t Samples = 0; + if (Options.ScalingFactor < 1.0) + report_fatal_error("ScalingFactor should be >= 1"); + if (Options.Log != BenchmarkLog::None) + Result.MaybeBenchmarkLog.emplace(); + for (;;) { + // Request a new Batch of size `Iterations`. + const auto &Batch = PP.generateBatch(Iterations); + + // Measuring this Batch. + const auto StartTime = Clock.now(); + for (const auto Parameter : Batch) { + const auto Production = foo(Parameter); + benchmark::DoNotOptimize(Production); + } + const auto EndTime = Clock.now(); + const Duration Elapsed = EndTime - StartTime; + + // Updating statistics. + ++Samples; + TotalBenchmarkDuration += Elapsed; + const double ChangeRatio = REP.computeImprovement({Iterations, Elapsed}); + Result.BestGuess = REP.CurrentEstimation; + + // Stopping condition. + if (TotalBenchmarkDuration >= Options.MinDuration && + Samples >= Options.MinSamples && ChangeRatio < Options.Epsilon) + Result.TerminationStatus = BenchmarkStatus::PrecisionReached; + else if (Samples >= Options.MaxSamples) + Result.TerminationStatus = BenchmarkStatus::MaxSamplesReached; + else if (TotalBenchmarkDuration >= Options.MaxDuration) + Result.TerminationStatus = BenchmarkStatus::MaxDurationReached; + else if (Iterations >= Options.MaxIterations) + Result.TerminationStatus = BenchmarkStatus::MaxIterationsReached; + + if (Result.MaybeBenchmarkLog) { + auto &BenchmarkLog = *Result.MaybeBenchmarkLog; + if (Options.Log == BenchmarkLog::Last && !BenchmarkLog.empty()) + BenchmarkLog.pop_back(); + BenchmarkState BS; + BS.LastSampleIterations = Iterations; + BS.LastBatchElapsed = Elapsed; + BS.CurrentStatus = Result.TerminationStatus; + BS.CurrentBestGuess = Result.BestGuess; + BS.ChangeRatio = ChangeRatio; + BenchmarkLog.push_back(BS); + } + + if (Result.TerminationStatus != BenchmarkStatus::Running) + return Result; + + if (Options.ScalingFactor > 1 && + Iterations * Options.ScalingFactor == Iterations) + report_fatal_error( + "`Iterations *= ScalingFactor` is idempotent, increase ScalingFactor " + "or InitialIterations."); + + Iterations *= Options.ScalingFactor; + } +} + +// Interprets `Array` as a circular buffer of `Size` elements. +template class CircularArrayRef { + llvm::ArrayRef Array; + size_t Size; + +public: + using value_type = T; + using reference = T &; + using const_reference = const T &; + using difference_type = ssize_t; + using size_type = size_t; + + class const_iterator + : public std::iterator { + llvm::ArrayRef Array; + size_t Index; + + public: + explicit const_iterator(llvm::ArrayRef Array, size_t Index = 0) + : Array(Array), Index(Index) {} + const_iterator &operator++() { + ++Index; + return *this; + } + bool operator==(const_iterator Other) const { return Index == Other.Index; } + bool operator!=(const_iterator Other) const { return !(*this == Other); } + const T &operator*() const { return Array[Index % Array.size()]; } + }; + + CircularArrayRef(llvm::ArrayRef Array, size_t Size) + : Array(Array), Size(Size) { + assert(Array.size() > 0); + } + + const_iterator begin() const { return const_iterator(Array); } + const_iterator end() const { return const_iterator(Array, Size); } +}; + +// A convenient helper to produce a CircularArrayRef from an ArrayRef. +template +CircularArrayRef cycle(llvm::ArrayRef Array, size_t Size) { + return {Array, Size}; +} + +// Creates an std::array which storage size is constrained under `Bytes`. +template +using ByteConstrainedArray = std::array; + +// A convenient helper to produce a CircularArrayRef from a +// ByteConstrainedArray. +template +CircularArrayRef cycle(const std::array &Container, size_t Size) { + return {llvm::ArrayRef(Container.cbegin(), Container.cend()), Size}; +} + +} // namespace libc_benchmarks +} // namespace llvm + +#endif // LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H diff --git a/libc/utils/benchmarks/LibcBenchmark.cpp b/libc/utils/benchmarks/LibcBenchmark.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/LibcBenchmark.cpp @@ -0,0 +1,40 @@ +//===-------- `Benchmark` function ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LibcBenchmark.h" +#include "llvm/Support/Host.h" + +namespace llvm { +namespace libc_benchmarks { + +void checkRequirements() { + const auto &cpu_info = benchmark::CPUInfo::Get(); + if (cpu_info.scaling_enabled) + report_fatal_error( + "CPU scaling is enabled, the benchmark real time measurements may be " + "noisy and will incur extra overhead."); +} + +HostState HostState::get() { + const auto &CpuInfo = benchmark::CPUInfo::Get(); + HostState H; + H.CpuFrequency = CpuInfo.cycles_per_second; + H.CpuName = llvm::sys::getHostCPUName().str(); + for (const auto &BenchmarkCacheInfo : CpuInfo.caches) { + CacheInfo CI; + CI.Type = BenchmarkCacheInfo.type; + CI.Level = BenchmarkCacheInfo.level; + CI.Size = BenchmarkCacheInfo.size; + CI.NumSharing = BenchmarkCacheInfo.num_sharing; + H.Caches.push_back(std::move(CI)); + } + return H; +} + +} // namespace libc_benchmarks +} // namespace llvm diff --git a/libc/utils/benchmarks/LibcBenchmarkTest.cpp b/libc/utils/benchmarks/LibcBenchmarkTest.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/LibcBenchmarkTest.cpp @@ -0,0 +1,168 @@ +#include "LibcBenchmark.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include +#include +#include + +using std::chrono::nanoseconds; +using ::testing::ElementsAre; +using ::testing::Field; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +namespace llvm { +namespace libc_benchmarks { +namespace { + +// A simple parameter provider returning a zero initialized vector of size +// `iterations`. +struct DummyParameterProvider { + std::vector generateBatch(size_t iterations) { + return std::vector(iterations); + } +}; + +class LibcBenchmark : public ::testing::Test { +public: + // A Clock interface suitable for testing. + // - Either it returns 0, + // - Or a timepoint coming from the `setMeasurements` call. + Duration now() { + if (!MaybeTimepoints) + return {}; + assert(!MaybeTimepoints->empty()); + const Duration timepoint = MaybeTimepoints->front(); + MaybeTimepoints->pop(); + return timepoint; + } + +protected: + void SetUp() override { Options.Log = BenchmarkLog::Full; } + + void TearDown() override { + // We make sure all the expected measurements were performed. + if (MaybeTimepoints) + EXPECT_THAT(*MaybeTimepoints, IsEmpty()); + } + + BenchmarkResult run() { + return benchmark(Options, ParameterProvider, DummyFunction, *this); + } + + void setMeasurements(llvm::ArrayRef Durations) { + MaybeTimepoints.emplace(); // Create the optional value. + Duration current_time = nanoseconds(1); + for (const auto &Duration : Durations) { + MaybeTimepoints->push(current_time); + current_time += Duration; + MaybeTimepoints->push(current_time); + current_time += nanoseconds(1); + } + } + + BenchmarkOptions Options; + +private: + DummyParameterProvider ParameterProvider; + static char DummyFunction(char payload) { return payload; } + llvm::Optional> MaybeTimepoints; +}; + +TEST_F(LibcBenchmark, MaxSamplesReached) { + Options.MaxSamples = 1; + const auto Result = run(); + EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 1); + EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::MaxSamplesReached); +} + +TEST_F(LibcBenchmark, MaxDurationReached) { + Options.MaxDuration = nanoseconds(10); + setMeasurements({nanoseconds(11)}); + const auto Result = run(); + EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 1); + EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::MaxDurationReached); +} + +TEST_F(LibcBenchmark, MaxIterationsReached) { + Options.InitialIterations = 1; + Options.MaxIterations = 20; + Options.ScalingFactor = 2; + Options.Epsilon = 0; // unreachable. + const auto Result = run(); + EXPECT_THAT(*Result.MaybeBenchmarkLog, + ElementsAre(Field(&BenchmarkState::LastSampleIterations, 1), + Field(&BenchmarkState::LastSampleIterations, 2), + Field(&BenchmarkState::LastSampleIterations, 4), + Field(&BenchmarkState::LastSampleIterations, 8), + Field(&BenchmarkState::LastSampleIterations, 16), + Field(&BenchmarkState::LastSampleIterations, 32))); + EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 6); + EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::MaxIterationsReached); +} + +TEST_F(LibcBenchmark, MinSamples) { + Options.MinSamples = 4; + Options.ScalingFactor = 2; + Options.Epsilon = std::numeric_limits::max(); // always reachable. + setMeasurements( + {nanoseconds(1), nanoseconds(2), nanoseconds(4), nanoseconds(8)}); + const auto Result = run(); + EXPECT_THAT(*Result.MaybeBenchmarkLog, + ElementsAre(Field(&BenchmarkState::LastSampleIterations, 1), + Field(&BenchmarkState::LastSampleIterations, 2), + Field(&BenchmarkState::LastSampleIterations, 4), + Field(&BenchmarkState::LastSampleIterations, 8))); + EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 4); + EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::PrecisionReached); +} + +TEST_F(LibcBenchmark, Epsilon) { + Options.MinSamples = 4; + Options.ScalingFactor = 2; + Options.Epsilon = std::numeric_limits::max(); // always reachable. + setMeasurements( + {nanoseconds(1), nanoseconds(2), nanoseconds(4), nanoseconds(8)}); + const auto Result = run(); + EXPECT_THAT(*Result.MaybeBenchmarkLog, + ElementsAre(Field(&BenchmarkState::LastSampleIterations, 1), + Field(&BenchmarkState::LastSampleIterations, 2), + Field(&BenchmarkState::LastSampleIterations, 4), + Field(&BenchmarkState::LastSampleIterations, 8))); + EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 4); + EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::PrecisionReached); +} + +TEST(ArrayRefLoop, Cycle) { + std::array array = {1, 2}; + EXPECT_THAT(cycle(array, 0), ElementsAre()); + EXPECT_THAT(cycle(array, 1), ElementsAre(1)); + EXPECT_THAT(cycle(array, 2), ElementsAre(1, 2)); + EXPECT_THAT(cycle(array, 3), ElementsAre(1, 2, 1)); + EXPECT_THAT(cycle(array, 4), ElementsAre(1, 2, 1, 2)); + EXPECT_THAT(cycle(array, 5), ElementsAre(1, 2, 1, 2, 1)); +} + +TEST(ByteConstrainedArray, Simple) { + EXPECT_THAT((ByteConstrainedArray()), SizeIs(17)); + EXPECT_THAT((ByteConstrainedArray()), SizeIs(8)); + EXPECT_THAT((ByteConstrainedArray()), SizeIs(4)); + EXPECT_THAT((ByteConstrainedArray()), SizeIs(2)); + + EXPECT_LE(sizeof(ByteConstrainedArray), 17U); + EXPECT_LE(sizeof(ByteConstrainedArray), 17U); + EXPECT_LE(sizeof(ByteConstrainedArray), 17U); + EXPECT_LE(sizeof(ByteConstrainedArray), 17U); +} + +TEST(ByteConstrainedArray, Cycle) { + ByteConstrainedArray TwoValues{{1UL, 2UL}}; + EXPECT_THAT(cycle(TwoValues, 5), ElementsAre(1, 2, 1, 2, 1)); +} +} // namespace +} // namespace libc_benchmarks +} // namespace llvm diff --git a/libc/utils/benchmarks/LibcMemoryBenchmark.h b/libc/utils/benchmarks/LibcMemoryBenchmark.h new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/LibcMemoryBenchmark.h @@ -0,0 +1,183 @@ +//===-------- Benchmark memory specific tools -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This file complements the `benchmark` header with memory specific tools and +// benchmarking facilities. + +#ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H +#define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H + +#include "LibcBenchmark.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Alignment.h" +#include +#include + +namespace llvm { +namespace libc_benchmarks { + +//-------------- +// Configuration +//-------------- + +// Specifies a range of sizes to explore. +struct SizeRange { + uint32_t From = 0; // Inclusive + uint32_t To = 1024; // Inclusive + uint32_t Step = 1; +}; + +// An object to define how to test a memory function. +struct StudyConfiguration { + // The number of run for the study. + uint32_t Runs = 1; + + // The size of the buffers (1 buffer for memset but 2 for memcpy or memcmp). + // When testing small sizes, it's important to keep the total allocated + // size under the size of the L1 cache (usually 16 or 32KiB). The framework + // will also use 2KiB of additional L1 memory to store the function + // parameters. + uint32_t BufferSize = 8192; + + // The range of sizes to exercise. + SizeRange Size; + + MaybeAlign AddressAlignment; // Unset : Use start of buffer which is at + // least cache line aligned) + // 1 : Use random address, + // >1 : Use random address aligned to value. + + // The value to use for memset. + uint8_t MemsetValue = 0; + + // The mismatch position for memcmp. + uint32_t MemcmpMismatchAt = 0; // 0 : Buffer compare equal, + // >0 : Buffer compare different at byte N-1. +}; + +//-------- +// Results +//-------- + +// The time to run one iteration of the function under test for the specified +// Size. +struct Measurement { + uint32_t Size = 0; + Duration Runtime = {}; +}; + +// The measurements for a specific function. +struct FunctionMeasurements { + std::string Name; + std::vector Measurements; +}; + +// The root object containing all the data (configuration and measurements). +struct Study { + HostState Host; + BenchmarkOptions Options; + StudyConfiguration Configuration; + SmallVector Functions; +}; + +// Provides an aligned, dynamically allocated buffer. +class AlignedBuffer { + char *const Buffer = nullptr; + size_t Size = 0; + +public: + static constexpr size_t Alignment = 1024; + + explicit AlignedBuffer(size_t Size) + : Buffer(static_cast(aligned_alloc(1024, Size))), Size(Size) {} + ~AlignedBuffer() { free(Buffer); } + + inline char *operator+(size_t Index) { return Buffer + Index; } + inline const char *operator+(size_t Index) const { return Buffer + Index; } + inline char &operator[](size_t Index) { return Buffer[Index]; } + inline const char &operator[](size_t Index) const { return Buffer[Index]; } + inline char *begin() { return Buffer; } + inline char *end() { return Buffer + Size; } +}; + +// Implements the ParameterProvider abstraction needed by the `benchmark` +// function. This implementation makes sure that all parameters will fit into +// `StorageSize` bytes. The total memory accessed during benchmark should be +// less than the data L1 cache, that is the storage for the ParameterProvider +// and the memory buffers. +template +class SmallParameterProvider { + using ParameterType = typename Context::ParameterType; + ByteConstrainedArray Parameters; + size_t LastIterations; + Context &Ctx; + +public: + explicit SmallParameterProvider(Context &C) : Ctx(C) {} + SmallParameterProvider(const SmallParameterProvider &) = delete; + SmallParameterProvider &operator=(const SmallParameterProvider &) = delete; + + // Useful to compute the histogram of the size parameter. + CircularArrayRef getLastBatch() const { + return cycle(Parameters, LastIterations); + } + + // Implements the interface needed by the `benchmark` function. + CircularArrayRef generateBatch(size_t Iterations) { + LastIterations = Iterations; + Ctx.Randomize(Parameters); + return getLastBatch(); + } +}; + +// Helper to generate random buffer offsets that satisfy the configuration +// constraints. +class OffsetDistribution { + std::uniform_int_distribution Distribution; + uint32_t Factor; + +public: + explicit OffsetDistribution(const StudyConfiguration &Conf); + + template uint32_t operator()(Generator &G) { + return Distribution(G) * Factor; + } +}; + +// Helper to generate random buffer offsets that satisfy the configuration +// constraints. It is specifically designed to benchmark `memcmp` functions +// where we may want the Nth byte to differ. +class MismatchOffsetDistribution { + std::uniform_int_distribution MismatchIndexSelector; + llvm::SmallVector MismatchIndices; + const uint32_t MismatchAt; + +public: + explicit MismatchOffsetDistribution(const StudyConfiguration &Conf); + + explicit operator bool() const { return !MismatchIndices.empty(); } + + const llvm::SmallVectorImpl &getMismatchIndices() const { + return MismatchIndices; + } + + template uint32_t operator()(Generator &G, uint32_t Size) { + const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)]; + // We need to position the offset so that a mismatch occurs at MismatchAt. + if (Size >= MismatchAt) + return MismatchIndex - MismatchAt; + // Size is too small to trigger the mismatch. + return MismatchIndex - Size - 1; + } +}; + +} // namespace libc_benchmarks +} // namespace llvm + +#endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H diff --git a/libc/utils/benchmarks/LibcMemoryBenchmark.cpp b/libc/utils/benchmarks/LibcMemoryBenchmark.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/LibcMemoryBenchmark.cpp @@ -0,0 +1,62 @@ +//===-------- Benchmark memory specific tools -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LibcMemoryBenchmark.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include + +namespace llvm { +namespace libc_benchmarks { + +// Returns a distribution that samples the buffer to satisfy the required +// alignment. +// When alignment is set, the distribution is scaled down by `Factor` and scaled +// up again by the same amount during sampling. +static std::uniform_int_distribution +GetOffsetDistribution(const StudyConfiguration &Conf) { + if (Conf.AddressAlignment && + *Conf.AddressAlignment > AlignedBuffer::Alignment) + report_fatal_error( + "AddressAlignment must be less or equal to AlignedBuffer::Alignment"); + if (!Conf.AddressAlignment) + return std::uniform_int_distribution(0, 0); // Always 0. + // If we test up to Size bytes, the returned offset must stay under + // BuffersSize - Size. + int64_t MaxOffset = Conf.BufferSize; + MaxOffset -= Conf.Size.To; + MaxOffset -= 1; + if (MaxOffset < 0) + report_fatal_error( + "BufferSize too small to exercise specified Size configuration"); + MaxOffset /= Conf.AddressAlignment->value(); + return std::uniform_int_distribution(0, MaxOffset); +} + +OffsetDistribution::OffsetDistribution(const StudyConfiguration &Conf) + : Distribution(GetOffsetDistribution(Conf)), + Factor(Conf.AddressAlignment.valueOrOne().value()) {} + +// Precomputes offset where to insert mismatches between the two buffers. +MismatchOffsetDistribution::MismatchOffsetDistribution( + const StudyConfiguration &Conf) + : MismatchAt(Conf.MemcmpMismatchAt) { + if (MismatchAt <= 1) + return; + const auto ToSize = Conf.Size.To; + for (size_t I = ToSize + 1; I < Conf.BufferSize; I += ToSize) + MismatchIndices.push_back(I); + if (MismatchIndices.empty()) + llvm::report_fatal_error("Unable to generate mismatch"); + MismatchIndexSelector = + std::uniform_int_distribution(0, MismatchIndices.size() - 1); +} + +} // namespace libc_benchmarks +} // namespace llvm diff --git a/libc/utils/benchmarks/LibcMemoryBenchmarkMain.h b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.h new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.h @@ -0,0 +1,36 @@ +//===-------- BenchmarkRunner interface -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_MAIN_H +#define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_MAIN_H + +#include "LibcBenchmark.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +namespace libc_benchmarks { + +// Each memory function benchmark implements this interface. +// It is used by the main function to run all benchmarks in a uniform manner. +class BenchmarkRunner { +public: + virtual ~BenchmarkRunner() {} + + // Returns a list of all available functions to test. + virtual ArrayRef getFunctionNames() const = 0; + + // Performs the benchmarking for a particular FunctionName and Size. + virtual BenchmarkResult benchmark(const BenchmarkOptions &Options, + StringRef FunctionName, size_t Size) = 0; +}; + +} // namespace libc_benchmarks +} // namespace llvm + +#endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_MAIN_H diff --git a/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp @@ -0,0 +1,100 @@ +//===-------- Benchmark --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LibcMemoryBenchmarkMain.h" +#include "JSON.h" +#include "LibcBenchmark.h" +#include "LibcMemoryBenchmark.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace libc_benchmarks { + +static cl::opt + Configuration("conf", cl::desc("Specify configuration filename"), + cl::value_desc("filename"), cl::init("")); + +static cl::opt Output("o", cl::desc("Specify output filename"), + cl::value_desc("filename"), cl::init("-")); + +extern std::unique_ptr +getRunner(const StudyConfiguration &Conf); + +void Main() { +#ifndef NDEBUG + static_assert( + false, + "For reproducibility benchmarks should not be compiled in DEBUG mode."); +#endif + checkRequirements(); + ErrorOr> MB = + MemoryBuffer::getFileOrSTDIN(Configuration); + if (!MB) + report_fatal_error( + Twine("Could not open configuration file: ").concat(Configuration)); + auto ErrorOrStudy = ParseJsonStudy((*MB)->getBuffer()); + if (!ErrorOrStudy) + report_fatal_error(ErrorOrStudy.takeError()); + + const auto StudyPrototype = *ErrorOrStudy; + + Study S; + S.Host = HostState::get(); + S.Options = StudyPrototype.Options; + S.Configuration = StudyPrototype.Configuration; + + const auto Runs = S.Configuration.Runs; + const auto &SR = S.Configuration.Size; + std::unique_ptr Runner = getRunner(S.Configuration); + const size_t TotalSteps = + Runner->getFunctionNames().size() * Runs * ((SR.To - SR.From) / SR.Step); + size_t Steps = 0; + for (auto FunctionName : Runner->getFunctionNames()) { + FunctionMeasurements FM; + FM.Name = FunctionName; + for (size_t Run = 0; Run < Runs; ++Run) { + for (uint32_t Size = SR.From; Size <= SR.To; Size += SR.Step) { + const auto Result = Runner->benchmark(S.Options, FunctionName, Size); + Measurement Measurement; + Measurement.Runtime = Result.BestGuess; + Measurement.Size = Size; + FM.Measurements.push_back(Measurement); + outs() << format("%3d%% run: %2d / %2d size: %5d ", + (Steps * 100 / TotalSteps), Run, Runs, Size) + << FunctionName + << " \r"; + ++Steps; + } + } + S.Functions.push_back(std::move(FM)); + } + + std::error_code EC; + raw_fd_ostream FOS(Output, EC); + if (EC) + report_fatal_error(Twine("Could not open file: ") + .concat(EC.message()) + .concat(", ") + .concat(Output)); + json::OStream JOS(FOS); + SerializeToJson(S, JOS); +} + +} // namespace libc_benchmarks +} // namespace llvm + +int main(int argc, char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv); + llvm::libc_benchmarks::Main(); + return EXIT_SUCCESS; +} diff --git a/libc/utils/benchmarks/LibcMemoryBenchmarkTest.cpp b/libc/utils/benchmarks/LibcMemoryBenchmarkTest.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/LibcMemoryBenchmarkTest.cpp @@ -0,0 +1,112 @@ +#include "LibcMemoryBenchmark.h" +#include "llvm/Support/Alignment.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::AllOf; +using testing::AnyOf; +using testing::ElementsAre; +using testing::Ge; +using testing::Gt; +using testing::Le; +using testing::Lt; + +namespace llvm { +namespace libc_benchmarks { +namespace { + +TEST(AlignedBuffer, IsAligned) { + AlignedBuffer AB(0); + EXPECT_TRUE(isAddrAligned(Align(AlignedBuffer::Alignment), AB.begin())); +} + +TEST(AlignedBuffer, Empty) { + AlignedBuffer AB(0); + EXPECT_EQ(std::distance(AB.begin(), AB.end()), 0U); +} + +TEST(OffsetDistribution, AlignToBegin) { + StudyConfiguration Conf; + Conf.BufferSize = 8192; + Conf.AddressAlignment = None; + + OffsetDistribution OD(Conf); + std::default_random_engine Gen; + for (size_t I = 0; I <= 10; ++I) + EXPECT_EQ(OD(Gen), 0U); +} + +TEST(OffsetDistribution, NoAlignment) { + StudyConfiguration Conf; + Conf.BufferSize = 8192; + Conf.AddressAlignment = Align::None(); + Conf.Size.To = 1; + + OffsetDistribution OD(Conf); + std::default_random_engine Gen; + for (size_t I = 0; I <= 10; ++I) + EXPECT_THAT(OD(Gen), AllOf(Ge(0U), Lt(8192U))); +} + +MATCHER_P(IsDivisibleBy, n, "") { + *result_listener << "where the remainder is " << (arg % n); + return (arg % n) == 0; +} + +TEST(OffsetDistribution, Aligned) { + StudyConfiguration Conf; + Conf.BufferSize = 8192; + Conf.AddressAlignment = Align(16); + Conf.Size.To = 1; + + OffsetDistribution OD(Conf); + std::default_random_engine Gen; + for (size_t I = 0; I <= 10; ++I) + EXPECT_THAT(OD(Gen), AllOf(Ge(0U), Lt(8192U), IsDivisibleBy(16U))); +} + +TEST(MismatchOffsetDistribution, EqualBufferDisablesDistribution) { + StudyConfiguration Conf; + Conf.MemcmpMismatchAt = 0; // buffer are equal. + + MismatchOffsetDistribution MOD(Conf); + EXPECT_FALSE(MOD); +} + +TEST(MismatchOffsetDistribution, DifferentBufferDisablesDistribution) { + StudyConfiguration Conf; + Conf.MemcmpMismatchAt = 1; // buffer are different. + + MismatchOffsetDistribution MOD(Conf); + EXPECT_FALSE(MOD); +} + +TEST(MismatchOffsetDistribution, MismatchAt2) { + const uint32_t MismatchAt = 2; + const uint32_t ToSize = 4; + StudyConfiguration Conf; + Conf.BufferSize = 16; + Conf.MemcmpMismatchAt = MismatchAt; // buffer are different at position 2. + Conf.Size.To = ToSize; + + MismatchOffsetDistribution MOD(Conf); + EXPECT_TRUE(MOD); + // We test equality up to ToSize (=4) so we need spans of 4 equal bytes spaced + // by one mismatch. + EXPECT_THAT(MOD.getMismatchIndices(), ElementsAre(5, 9, 13)); + std::default_random_engine Gen; + for (size_t Iterations = 0; Iterations <= 10; ++Iterations) { + for (size_t Size = Conf.Size.From; Size <= ToSize; ++Size) { + if (Size >= MismatchAt) + EXPECT_THAT(MOD(Gen, Size), + AnyOf(5 - MismatchAt, 9 - MismatchAt, 13 - MismatchAt)); + else + EXPECT_THAT(MOD(Gen, Size), + AnyOf(5 - Size - 1, 9 - Size - 1, 13 - Size - 1)); + } + } +} + +} // namespace +} // namespace libc_benchmarks +} // namespace llvm diff --git a/libc/utils/benchmarks/Memcmp.cpp b/libc/utils/benchmarks/Memcmp.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/Memcmp.cpp @@ -0,0 +1,86 @@ +//===-------- Benchmark memcmp implementation -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LibcBenchmark.h" +#include "LibcMemoryBenchmark.h" +#include "LibcMemoryBenchmarkMain.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace libc_benchmarks { + +// The context encapsulates the buffers, parameters and the measure. +struct MemcmpContext : public BenchmarkRunner { + using FunctionPrototype = int (*)(const void *, const void *, size_t); + + struct ParameterType { + uint16_t Offset = 0; + }; + + explicit MemcmpContext(const StudyConfiguration &Conf) + : MOD(Conf), OD(Conf), ABuffer(Conf.BufferSize), BBuffer(Conf.BufferSize), + PP(*this) { + std::uniform_int_distribution Dis; + // Generate random buffer A. + for (size_t I = 0; I < Conf.BufferSize; ++I) + ABuffer[I] = Dis(Gen); + // Copy buffer A to B. + memcpy(BBuffer.begin(), ABuffer.begin(), Conf.BufferSize); + if (Conf.MemcmpMismatchAt == 0) + return; // all same. + else if (Conf.MemcmpMismatchAt == 1) + for (char &c : BBuffer) + ++c; // all different. + else + for (const auto I : MOD.getMismatchIndices()) + ++BBuffer[I]; + } + + // Needed by the ParameterProvider to update the current batch of parameter. + void Randomize(MutableArrayRef Parameters) { + if (MOD) + for (auto &P : Parameters) + P.Offset = MOD(Gen, CurrentSize); + else + for (auto &P : Parameters) + P.Offset = OD(Gen); + } + + ArrayRef getFunctionNames() const override { + static std::array kFunctionNames = {"memcmp"}; + return kFunctionNames; + } + + BenchmarkResult benchmark(const BenchmarkOptions &Options, + StringRef FunctionName, size_t Size) override { + CurrentSize = Size; + FunctionPrototype Function = + StringSwitch(FunctionName).Case("memcmp", &::memcmp); + return llvm::libc_benchmarks::benchmark( + Options, PP, [this, Function, Size](ParameterType p) { + return Function(ABuffer + p.Offset, BBuffer + p.Offset, Size); + }); + } + +private: + std::default_random_engine Gen; + MismatchOffsetDistribution MOD; + OffsetDistribution OD; + size_t CurrentSize = 0; + AlignedBuffer ABuffer; + AlignedBuffer BBuffer; + SmallParameterProvider PP; +}; + +std::unique_ptr getRunner(const StudyConfiguration &Conf) { + return std::make_unique(Conf); +} + +} // namespace libc_benchmarks +} // namespace llvm diff --git a/libc/utils/benchmarks/Memcpy.cpp b/libc/utils/benchmarks/Memcpy.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/Memcpy.cpp @@ -0,0 +1,69 @@ +//===-------- Benchmark memcpy implementation -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LibcBenchmark.h" +#include "LibcMemoryBenchmark.h" +#include "LibcMemoryBenchmarkMain.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { +namespace libc_benchmarks { + +// The context encapsulates the buffers, parameters and the measure. +struct MemcpyContext : public BenchmarkRunner { + using FunctionPrototype = void *(*)(void *, const void *, size_t); + + struct ParameterType { + uint16_t SrcOffset = 0; + uint16_t DstOffset = 0; + }; + + explicit MemcpyContext(const StudyConfiguration &Conf) + : OD(Conf), SrcBuffer(Conf.BufferSize), DstBuffer(Conf.BufferSize), + PP(*this) {} + + // Needed by the ParameterProvider to update the current batch of parameter. + void Randomize(MutableArrayRef Parameters) { + for (auto &P : Parameters) { + P.DstOffset = OD(Gen); + P.SrcOffset = OD(Gen); + } + } + + ArrayRef getFunctionNames() const override { + static std::array kFunctionNames = {"memcpy"}; + return kFunctionNames; + } + + BenchmarkResult benchmark(const BenchmarkOptions &Options, + StringRef FunctionName, size_t Size) override { + FunctionPrototype Function = + StringSwitch(FunctionName).Case("memcpy", &::memcpy); + return llvm::libc_benchmarks::benchmark( + Options, PP, [this, Function, Size](ParameterType p) { + Function(DstBuffer + p.DstOffset, SrcBuffer + p.SrcOffset, Size); + return DstBuffer + p.DstOffset; + }); + } + +private: + std::default_random_engine Gen; + OffsetDistribution OD; + AlignedBuffer SrcBuffer; + AlignedBuffer DstBuffer; + SmallParameterProvider PP; +}; + +std::unique_ptr getRunner(const StudyConfiguration &Conf) { + return std::make_unique(Conf); +} + +} // namespace libc_benchmarks +} // namespace llvm diff --git a/libc/utils/benchmarks/Memset.cpp b/libc/utils/benchmarks/Memset.cpp new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/Memset.cpp @@ -0,0 +1,66 @@ +//===-------- Benchmark memset implementation -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LibcBenchmark.h" +#include "LibcMemoryBenchmark.h" +#include "LibcMemoryBenchmarkMain.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace libc_benchmarks { + +// The context encapsulates the buffers, parameters and the measure. +struct MemsetContext : public BenchmarkRunner { + using FunctionPrototype = void *(*)(void *, int, size_t); + + struct ParameterType { + uint16_t DstOffset = 0; + }; + + explicit MemsetContext(const StudyConfiguration &Conf) + : OD(Conf), DstBuffer(Conf.BufferSize), MemsetValue(Conf.MemsetValue), + PP(*this) {} + + // Needed by the ParameterProvider to update the current batch of parameter. + void Randomize(MutableArrayRef Parameters) { + for (auto &P : Parameters) { + P.DstOffset = OD(Gen); + } + } + + ArrayRef getFunctionNames() const override { + static std::array kFunctionNames = {"memset"}; + return kFunctionNames; + } + + BenchmarkResult benchmark(const BenchmarkOptions &Options, + StringRef FunctionName, size_t Size) override { + FunctionPrototype Function = + StringSwitch(FunctionName).Case("memset", &::memset); + return llvm::libc_benchmarks::benchmark( + Options, PP, [this, Function, Size](ParameterType p) { + Function(DstBuffer + p.DstOffset, MemsetValue, Size); + return DstBuffer + p.DstOffset; + }); + } + +private: + std::default_random_engine Gen; + OffsetDistribution OD; + AlignedBuffer DstBuffer; + const uint8_t MemsetValue; + SmallParameterProvider PP; +}; + +std::unique_ptr getRunner(const StudyConfiguration &Conf) { + return std::make_unique(Conf); +} + +} // namespace libc_benchmarks +} // namespace llvm diff --git a/libc/utils/benchmarks/README.md b/libc/utils/benchmarks/README.md new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/README.md @@ -0,0 +1,85 @@ +# Libc mem* benchmarks + +This framework has been designed to evaluate and compare relative performance of memory function implementations on a particular host. + +It will also be use to track implementations performances over time. + +## Quick start + +### Setup + +Make sure to have `matplotlib`, `scipy` and `numpy` setup correctly + +```shell +apt-get install python3-pip +pip3 install matplotlib scipy numpy +``` + +### Run and display `memcpy` benchmark + +The following commands will run the benchmark and display a 95 percentile confidence interval curve of **time per copied bytes**. It also features **host informations** and **benchmarking configuration**. + +```shell +cd llvm-project +cmake -B/tmp/build -Sllvm -DLLVM_ENABLE_PROJECTS=libc -DCMAKE_BUILD_TYPE=Release +make -C /tmp/build -j display-libc-memcpy-benchmark-small +``` + +## Benchmarking regimes + +Using a profiler to observe size distributions for calls into libc functions, it was found most operations act on a small number of bytes. + +Function | % of calls with size ≤ 128 | % of calls with size ≤ 1024 +------------------ | --------------------------: | ---------------------------: +memcpy | 96% | 99% +memset | 91% | 99.9% +memcmp1 | 99.5% | ~100% + +Benchmarking configurations come in two flavors: + + - [small](libc/utils/benchmarks/configuration_small.json) + - Exercises sizes up to `1KiB`, representative of normal usage + - The data is kept in the `L1` cache to prevent measuring the memory subsystem + - [big](libc/utils/benchmarks/configuration_big.json) + - Exercises sizes up to `32MiB` to test large operations + - Caching effects can show up here which prevents comparing different hosts + +_1 - The size refers to the size of the buffers to compare and not the +number of bytes until the first difference._ + +## Benchmarking targets + +The benchmarking process occurs in two steps: + +1. Benchmark the functions and produce a `json` file +2. Display (or renders) the `json` file + +Targets are of the form `-libc--benchmark-` + + - `action` is one of : + - `run`, runs the benchmark and writes the `json` file + - `display`, displays the graph on screen + - `render`, renders the graph on disk as a `png` file + - `function` is one of : `memcpy`, `memcmp`, `memset` + - `configuration` is one of : `small`, `big` + +## Superposing curves + +It is possible to **merge** several `json` files into a single graph. This is useful to **compare** implementations. + +In the following example we superpose the curves for `memcpy`, `memset` and `memcmp`: + +```shell +> make -C /tmp/build run-libc-memcpy-benchmark-small run-libc-memcmp-benchmark-small run-libc-memset-benchmark-small +> python libc/utils/benchmarks/render.py /tmp/last-libc-memcpy-benchmark-small.json /tmp/last-libc-memcmp-benchmark-small.json /tmp/last-libc-memset-benchmark-small.json +``` + +## Useful `render.py` flags + + - To save the produced graph `--output=/tmp/benchmark_curve.png`. + - To prevent the graph from appearing on the screen `--headless`. + + +## Under the hood + + To learn more about the design decisions behind the benchmarking framework, have a look at the [RATIONALE.md](RATIONALE.md) file. diff --git a/libc/utils/benchmarks/configuration_big.json b/libc/utils/benchmarks/configuration_big.json new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/configuration_big.json @@ -0,0 +1,24 @@ +{ + "Options":{ + "MinDuration":0.001, + "MaxDuration":1, + "InitialIterations":100, + "MaxIterations":10000000, + "MinSamples":1, + "MaxSamples":1, + "Epsilon":0.01, + "ScalingFactor":1.4 + }, + "Configuration":{ + "Runs":5, + "BufferSize":134217728, + "Size":{ + "From":0, + "To":33554432, + "Step":1048576 + }, + "AddressAlignment":1, + "MemsetValue":0, + "MemcmpMismatchAt":0 + } +} diff --git a/libc/utils/benchmarks/configuration_small.json b/libc/utils/benchmarks/configuration_small.json new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/configuration_small.json @@ -0,0 +1,24 @@ +{ + "Options":{ + "MinDuration":0.001, + "MaxDuration":1, + "InitialIterations":100, + "MaxIterations":10000000, + "MinSamples":4, + "MaxSamples":1000, + "Epsilon":0.01, + "ScalingFactor":1.4 + }, + "Configuration":{ + "Runs":10, + "BufferSize":8192, + "Size":{ + "From":0, + "To":1024, + "Step":1 + }, + "AddressAlignment":1, + "MemsetValue":0, + "MemcmpMismatchAt":0 + } +} diff --git a/libc/utils/benchmarks/render.py b/libc/utils/benchmarks/render.py new file mode 100644 --- /dev/null +++ b/libc/utils/benchmarks/render.py @@ -0,0 +1,175 @@ +"""Reads JSON files produced by the benchmarking framework and renders them. + +Installation: +> apt-get install python3-pip +> pip3 install matplotlib scipy numpy + +Run: +> python3 render.py + +Rendering can occur on disk by specifying the --output option or on screen if +the --headless flag is not set. +""" + +import argparse +import collections +import json +import math +import pprint +import sys +import matplotlib.pyplot as plt +from matplotlib.ticker import EngFormatter +import numpy as np +import scipy.stats + + +def format_freq(number): + """Returns a human readable frequency.""" + magnitude = 0 + while math.fabs(number) >= 1000: + number /= 1000.0 + magnitude += 1 + return "%g%sHz" % (number, ["", "k", "M", "G"][magnitude]) + + +def format_size(number): + """Returns number in human readable form.""" + magnitude = 0 + while number >= 1000 and number % 1000 == 0: + number /= 1000 + magnitude += 1 + return "%g%s" % (number, ["", "K", "M", "G"][magnitude]) + + +def mean_confidence_interval(dataset, confidence=0.95): + """Returns the mean and half confidence interval for the dataset.""" + a = 1.0 * np.array(dataset) + n = len(a) + m, se = np.mean(a), scipy.stats.sem(a) + h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1) + return m, h + + +def add_plot(function_name, points): + """Plots measurements for a function.""" + n = len(points.keys()) + x = np.zeros(n) + y = np.zeros(n) + yerr = np.zeros(n) + + for i, key in enumerate(sorted(points.keys())): + values = points[key] + m, e = mean_confidence_interval(values) + x[i] = key + y[i] = m + yerr[i] = e + + plt.plot(x, y, linewidth=1, label=function_name) + plt.fill_between(x, y - yerr, y + yerr, alpha=0.5) + + +def get_title(host): + """Formats the Host object into a title for the plot.""" + cpu_name = host["CpuName"] + cpu_freq = format_freq(host["CpuFrequency"]) + cache_strings = [] + for cache in host["Caches"]: + prefix = { + "Instruction": "i", + "Data": "d", + "Unified": "u", + }.get(cache["Type"]) + cache_strings.append(r"%sL_%d %s_{/%d}" % + (prefix, cache["Level"], format_size( + cache["Size"]), cache["NumSharing"])) + title = "%s (%s)" % (cpu_name, cpu_freq) + subtitle = r"$" + ", ".join(sorted(cache_strings)) + r"$" + return title + "\n" + subtitle + + +def get_host(jsons): + """Returns the host of the different json objects iff they are all the same. + """ + host = None + for root in jsons: + if host and host != root["Host"]: + sys.exit("The datasets are not coming from the same Host") + if not host: + host = root["Host"] + return host + + +def get_configuration(jsons): + """Returns the configuration of the different json objects iff they are all + the same. + """ + config = None + for root in jsons: + if config and config != root["Configuration"]: + return None + if not config: + config = root["Configuration"] + return config + + +def setup_graphs(files): + """Setups the graphs to render from the json files.""" + jsons = [] + for file in files: + with open(file) as json_file: + jsons.append(json.load(json_file)) + if not jsons: + sys.exit("Nothing to process") + + for root in jsons: + for function in root["Functions"]: + function_name = function["Name"] + sizes = function["Sizes"] + runtimes = function["Runtimes"] + assert len(sizes) == len(runtimes) + values = collections.defaultdict(lambda: []) + for i in range(len(sizes)): + values[sizes[i]].append(runtimes[i]) + add_plot(function_name, values) + + config = get_configuration(jsons) + if config: + plt.figtext( + 0.95, + 0.15, + pprint.pformat(config), + verticalalignment="bottom", + horizontalalignment="right", + multialignment="left", + fontsize="small", + bbox=dict(boxstyle="round", facecolor="wheat")) + + axes = plt.gca() + axes.set_title(get_title(get_host(jsons))) + axes.set_ylim(bottom=0) + axes.set_xlabel("Size") + axes.set_ylabel("Time") + axes.xaxis.set_major_formatter(EngFormatter(unit="B")) + axes.yaxis.set_major_formatter(EngFormatter(unit="s")) + plt.legend() + plt.grid() + + +def main(): + parser = argparse.ArgumentParser( + description="Process benchmark json files.") + parser.add_argument("files", nargs="+", help="The json files to read from.") + parser.add_argument("--output", help="The output file to write the graph.") + parser.add_argument( + "--headless", + help="If set do not display the graph.", + action="store_true") + args = parser.parse_args() + setup_graphs(args.files) + if args.output: + plt.savefig(args.output) + if not args.headless: + plt.show() + +if __name__ == "__main__": + main()