diff --git a/libc/benchmarks/CMakeLists.txt b/libc/benchmarks/CMakeLists.txt --- a/libc/benchmarks/CMakeLists.txt +++ b/libc/benchmarks/CMakeLists.txt @@ -20,6 +20,7 @@ SOURCE_DIR ${LIBC_SOURCE_DIR}/../llvm/utils/benchmark INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/google-benchmark CMAKE_CACHE_ARGS + -DBUILD_SHARED_LIBS:BOOL=OFF -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER} @@ -114,7 +115,10 @@ MemorySizeDistributions.cpp MemorySizeDistributions.h ) -target_link_libraries(libc-memory-benchmark PUBLIC libc-benchmark) +target_link_libraries(libc-memory-benchmark + PUBLIC + libc-benchmark +) fix_rtti(libc-memory-benchmark) add_libc_benchmark_unittest(libc-memory-benchmark-test @@ -138,54 +142,14 @@ ) #============================================================================== -# Benchmark tests configuration +# Benchmarking tool #============================================================================== -function(add_libc_benchmark_analysis conf_target run_target) - set(png_file "/tmp/last-${conf_target}.png") - set(render_target render-${conf_target}) - add_custom_target(${render_target} - COMMAND python3 render.py3 ${json_file} --headless --output=${png_file} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "render ${libc_target} to ${png_file}" - ) - add_dependencies(${render_target} ${run_target}) - - set(display_target display-${conf_target}) - add_custom_target(${display_target} - COMMAND python3 render.py3 ${json_file} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "display ${libc_target}" - ) - add_dependencies(${display_target} ${run_target}) -endfunction() - -function(add_libc_benchmark_configuration target configuration) - set(conf_target ${target}-${configuration}) - set(json_file "/tmp/last-${conf_target}.json") - set(run_target run-${conf_target}) - add_custom_target(${run_target} - COMMAND ${libc_target} --conf=configuration_${configuration}.json -o ${json_file} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - ) - add_libc_benchmark_analysis(${conf_target} ${run_target}) -endfunction() - -function(add_libc_benchmark name file entrypoint_target) - set(libc_target libc-${name}-benchmark) - add_executable(${libc_target} - EXCLUDE_FROM_ALL - ${file} - LibcMemoryBenchmarkMain.h - LibcMemoryBenchmarkMain.cpp - ) - +add_executable(libc-benchmark-main + EXCLUDE_FROM_ALL + LibcMemoryBenchmarkMain.cpp +) +foreach(entrypoint_target libc.src.string.memcpy libc.src.string.memset) get_target_property(entrypoint_object_file ${entrypoint_target} "OBJECT_FILE_RAW") - target_link_libraries(${libc_target} PUBLIC json ${entrypoint_object_file}) - foreach(configuration "small" "big") - add_libc_benchmark_configuration(${libc_target} ${configuration}) - endforeach() -endfunction() - -add_libc_benchmark(memcpy Memcpy.cpp libc.src.string.memcpy) -add_libc_benchmark(memset Memset.cpp libc.src.string.memset) + target_link_libraries(libc-benchmark-main PUBLIC json ${entrypoint_object_file}) +endforeach() diff --git a/libc/benchmarks/JSON.h b/libc/benchmarks/JSON.h --- a/libc/benchmarks/JSON.h +++ b/libc/benchmarks/JSON.h @@ -17,10 +17,10 @@ namespace libc_benchmarks { // Parses a Study from a json string. -Expected ParseJsonStudy(StringRef Content); +Expected parseJsonStudy(StringRef Content); // Serialize a Study as json. -void SerializeToJson(const Study &S, llvm::json::OStream &JOS); +void serializeToJson(const Study &S, llvm::json::OStream &JOS); } // namespace libc_benchmarks } // namespace llvm diff --git a/libc/benchmarks/JSON.cpp b/libc/benchmarks/JSON.cpp --- a/libc/benchmarks/JSON.cpp +++ b/libc/benchmarks/JSON.cpp @@ -40,6 +40,14 @@ return createStringError(errc::io_error, "Can't parse Integer"); } +static Error fromJson(const json::Value &V, bool &Out) { + if (auto B = V.getAsBoolean()) { + Out = *B; + return Error::success(); + } + return createStringError(errc::io_error, "Can't parse Boolean"); +} + static Error fromJson(const json::Value &V, double &Out) { if (auto S = V.getAsNumber()) { Out = *S; @@ -60,10 +68,6 @@ return intFromJsonTemplate(V, Out); } -static Error fromJson(const json::Value &V, uint8_t &Out) { - return intFromJsonTemplate(V, Out); -} - static Error fromJson(const json::Value &V, int &Out) { return intFromJsonTemplate(V, Out); } @@ -186,22 +190,15 @@ return O.takeError(); } -static Error fromJson(const json::Value &V, libc_benchmarks::SizeRange &Out) { - JsonObjectMapper O(V); - O.map("From", Out.From); - O.map("To", Out.To); - O.map("Step", Out.Step); - return O.takeError(); -} - static Error fromJson(const json::Value &V, libc_benchmarks::StudyConfiguration &Out) { JsonObjectMapper O(V); - O.map("Runs", Out.Runs); - O.map("BufferSize", Out.BufferSize); - O.map("Size", Out.Size); - O.map("AddressAlignment", Out.AddressAlignment); - O.map("MemsetValue", Out.MemsetValue); + O.map("Function", Out.Function); + O.map("NumTrials", Out.NumTrials); + O.map("IsSweepMode", Out.IsSweepMode); + O.map("SweepModeMaxSize", Out.SweepModeMaxSize); + O.map("SizeDistributionName", Out.SizeDistributionName); + O.map("AccessAlignment", Out.AccessAlignment); O.map("MemcmpMismatchAt", Out.MemcmpMismatchAt); return O.takeError(); } @@ -223,39 +220,29 @@ return O.takeError(); } -static Error fromJson(const json::Value &V, - libc_benchmarks::FunctionMeasurements &Out) { +static Error fromJson(const json::Value &V, libc_benchmarks::Runtime &Out) { JsonObjectMapper O(V); - O.map("Name", Out.Name); - std::vector Sizes; - O.map("Sizes", Sizes); - std::vector Runtimes; - O.map("Runtimes", Runtimes); - if (Sizes.size() != Runtimes.size()) - return createStringError(errc::io_error, - "Measurement Size and Runtime mistmatch"); - Out.Measurements.resize(Sizes.size()); - for (size_t I = 0; I < Sizes.size(); ++I) { - Out.Measurements[I].Size = Sizes[I]; - Out.Measurements[I].Runtime = Runtimes[I]; - } + O.map("Host", Out.Host); + O.map("BufferSize", Out.BufferSize); + O.map("BatchParameterCount", Out.BatchParameterCount); + O.map("BenchmarkOptions", Out.BenchmarkOptions); return O.takeError(); } static Error fromJson(const json::Value &V, libc_benchmarks::Study &Out) { JsonObjectMapper O(V); - O.map("Host", Out.Host); - O.map("Options", Out.Options); + O.map("StudyName", Out.StudyName); + O.map("Runtime", Out.Runtime); O.map("Configuration", Out.Configuration); - O.map("Functions", Out.Functions); + O.map("Measurements", Out.Measurements); return O.takeError(); } -static double Seconds(const Duration &D) { +static double seconds(const Duration &D) { return std::chrono::duration(D).count(); } -Expected ParseJsonStudy(StringRef Content) { +Expected parseJsonStudy(StringRef Content) { Expected EV = json::parse(Content); if (!EV) return EV.takeError(); @@ -265,7 +252,7 @@ return S; } -static StringRef Serialize(const BenchmarkLog &L) { +static StringRef serialize(const BenchmarkLog &L) { switch (L) { case BenchmarkLog::None: return "None"; @@ -277,89 +264,63 @@ llvm_unreachable("Unhandled BenchmarkLog value"); } -static void Serialize(const BenchmarkOptions &BO, json::OStream &JOS) { - JOS.object([&]() { - JOS.attribute("MinDuration", Seconds(BO.MinDuration)); - JOS.attribute("MaxDuration", Seconds(BO.MaxDuration)); - JOS.attribute("InitialIterations", BO.InitialIterations); - JOS.attribute("MaxIterations", BO.MaxIterations); - JOS.attribute("MinSamples", BO.MinSamples); - JOS.attribute("MaxSamples", BO.MaxSamples); - JOS.attribute("Epsilon", BO.Epsilon); - JOS.attribute("ScalingFactor", BO.ScalingFactor); - JOS.attribute("Log", Serialize(BO.Log)); - }); +static void serialize(const BenchmarkOptions &BO, json::OStream &JOS) { + JOS.attribute("MinDuration", seconds(BO.MinDuration)); + JOS.attribute("MaxDuration", seconds(BO.MaxDuration)); + JOS.attribute("InitialIterations", BO.InitialIterations); + JOS.attribute("MaxIterations", BO.MaxIterations); + JOS.attribute("MinSamples", BO.MinSamples); + JOS.attribute("MaxSamples", BO.MaxSamples); + JOS.attribute("Epsilon", BO.Epsilon); + JOS.attribute("ScalingFactor", BO.ScalingFactor); + JOS.attribute("Log", serialize(BO.Log)); } -static void Serialize(const CacheInfo &CI, json::OStream &JOS) { - JOS.object([&]() { - JOS.attribute("Type", CI.Type); - JOS.attribute("Level", CI.Level); - JOS.attribute("Size", CI.Size); - JOS.attribute("NumSharing", CI.NumSharing); - }); +static void serialize(const CacheInfo &CI, json::OStream &JOS) { + JOS.attribute("Type", CI.Type); + JOS.attribute("Level", CI.Level); + JOS.attribute("Size", CI.Size); + JOS.attribute("NumSharing", CI.NumSharing); } -static void Serialize(const HostState &HS, json::OStream &JOS) { - JOS.object([&]() { - JOS.attribute("CpuName", HS.CpuName); - JOS.attribute("CpuFrequency", HS.CpuFrequency); - JOS.attributeArray("Caches", [&]() { - for (const auto &CI : HS.Caches) - Serialize(CI, JOS); - }); - }); +static void serialize(const StudyConfiguration &SC, json::OStream &JOS) { + JOS.attribute("Function", SC.Function); + JOS.attribute("NumTrials", SC.NumTrials); + JOS.attribute("IsSweepMode", SC.IsSweepMode); + JOS.attribute("SweepModeMaxSize", SC.SweepModeMaxSize); + JOS.attribute("SizeDistributionName", SC.SizeDistributionName); + JOS.attribute("AccessAlignment", + static_cast(SC.AccessAlignment->value())); + JOS.attribute("MemcmpMismatchAt", SC.MemcmpMismatchAt); } -static void Serialize(const StudyConfiguration &SC, json::OStream &JOS) { - JOS.object([&]() { - JOS.attribute("Runs", SC.Runs); - JOS.attribute("BufferSize", SC.BufferSize); - JOS.attributeObject("Size", [&]() { - JOS.attribute("From", SC.Size.From); - JOS.attribute("To", SC.Size.To); - JOS.attribute("Step", SC.Size.Step); - }); - if (SC.AddressAlignment) - JOS.attribute("AddressAlignment", - static_cast(SC.AddressAlignment->value())); - JOS.attribute("MemsetValue", SC.MemsetValue); - JOS.attribute("MemcmpMismatchAt", SC.MemcmpMismatchAt); +static void serialize(const HostState &HS, json::OStream &JOS) { + JOS.attribute("CpuName", HS.CpuName); + JOS.attribute("CpuFrequency", HS.CpuFrequency); + JOS.attributeArray("Caches", [&]() { + for (const auto &CI : HS.Caches) + JOS.object([&]() { serialize(CI, JOS); }); }); } -static void Serialize(const FunctionMeasurements &FM, json::OStream &JOS) { - JOS.object([&]() { - JOS.attribute("Name", FM.Name); - JOS.attributeArray("Sizes", [&]() { - for (const auto &M : FM.Measurements) - JOS.value(M.Size); - }); - JOS.attributeArray("Runtimes", [&]() { - for (const auto &M : FM.Measurements) - JOS.value(Seconds(M.Runtime)); - }); - }); +static void serialize(const Runtime &RI, json::OStream &JOS) { + JOS.attributeObject("Host", [&]() { serialize(RI.Host, JOS); }); + JOS.attribute("BufferSize", RI.BufferSize); + JOS.attribute("BatchParameterCount", RI.BatchParameterCount); + JOS.attributeObject("BenchmarkOptions", + [&]() { serialize(RI.BenchmarkOptions, JOS); }); } -void SerializeToJson(const Study &S, json::OStream &JOS) { +void serializeToJson(const Study &S, json::OStream &JOS) { JOS.object([&]() { - JOS.attributeBegin("Host"); - Serialize(S.Host, JOS); - JOS.attributeEnd(); - - JOS.attributeBegin("Options"); - Serialize(S.Options, JOS); - JOS.attributeEnd(); - - JOS.attributeBegin("Configuration"); - Serialize(S.Configuration, JOS); - JOS.attributeEnd(); - - if (!S.Functions.empty()) { - JOS.attributeArray("Functions", [&]() { - for (const auto &FM : S.Functions) - Serialize(FM, JOS); + JOS.attribute("StudyName", S.StudyName); + JOS.attributeObject("Runtime", [&]() { serialize(S.Runtime, JOS); }); + JOS.attributeObject("Configuration", + [&]() { serialize(S.Configuration, JOS); }); + if (!S.Measurements.empty()) { + JOS.attributeArray("Measurements", [&]() { + for (const auto &M : S.Measurements) + JOS.value(seconds(M)); }); } }); diff --git a/libc/benchmarks/JSONTest.cpp b/libc/benchmarks/JSONTest.cpp --- a/libc/benchmarks/JSONTest.cpp +++ b/libc/benchmarks/JSONTest.cpp @@ -25,22 +25,23 @@ Study getStudy() { return Study{ - HostState{ - "CpuName", 123, {CacheInfo{"A", 1, 2, 3}, CacheInfo{"B", 4, 5, 6}}}, - BenchmarkOptions{std::chrono::seconds(1), std::chrono::seconds(2), 10, - 100, 6, 100, 0.1, 2, BenchmarkLog::Full}, - StudyConfiguration{2, 3, SizeRange{4, 5, 6}, Align(8), 9, 10}, - {FunctionMeasurements{"A", - {Measurement{3, std::chrono::seconds(3)}, - Measurement{3, std::chrono::seconds(4)}}}, - FunctionMeasurements{"B", {}}}}; -} - -static std::string SerializeToString(const Study &S) { + "StudyName", + Runtime{HostState{"CpuName", + 123, + {CacheInfo{"A", 1, 2, 3}, CacheInfo{"B", 4, 5, 6}}}, + 456, 789, + BenchmarkOptions{std::chrono::seconds(1), std::chrono::seconds(2), + 10, 100, 6, 100, 0.1, 2, BenchmarkLog::Full}}, + StudyConfiguration{std::string("Function"), 30U, false, 32U, + std::string("Distribution"), Align(16), 3U}, + {std::chrono::seconds(3), std::chrono::seconds(4)}}; +} + +static std::string serializeToString(const Study &S) { std::string Buffer; raw_string_ostream RSO(Buffer); json::OStream JOS(RSO); - SerializeToJson(S, JOS); + serializeToJson(S, JOS); return Buffer; } @@ -54,14 +55,25 @@ A, result_listener); } -auto Equals(const HostState &H) -> auto { +auto equals(const HostState &H) -> auto { return AllOf( Field(&HostState::CpuName, H.CpuName), Field(&HostState::CpuFrequency, H.CpuFrequency), Field(&HostState::Caches, Pointwise(EqualsCacheInfo(), H.Caches))); } -auto Equals(const BenchmarkOptions &BO) -> auto { +auto equals(const StudyConfiguration &SC) -> auto { + return AllOf( + Field(&StudyConfiguration::Function, SC.Function), + Field(&StudyConfiguration::NumTrials, SC.NumTrials), + Field(&StudyConfiguration::IsSweepMode, SC.IsSweepMode), + Field(&StudyConfiguration::SweepModeMaxSize, SC.SweepModeMaxSize), + Field(&StudyConfiguration::SizeDistributionName, SC.SizeDistributionName), + Field(&StudyConfiguration::AccessAlignment, SC.AccessAlignment), + Field(&StudyConfiguration::MemcmpMismatchAt, SC.MemcmpMismatchAt)); +} + +auto equals(const BenchmarkOptions &BO) -> auto { return AllOf( Field(&BenchmarkOptions::MinDuration, BO.MinDuration), Field(&BenchmarkOptions::MaxDuration, BO.MaxDuration), @@ -74,58 +86,33 @@ Field(&BenchmarkOptions::Log, BO.Log)); } -auto Equals(const SizeRange &SR) -> auto { - return AllOf(Field(&SizeRange::From, SR.From), Field(&SizeRange::To, SR.To), - Field(&SizeRange::Step, SR.Step)); -} - -auto Equals(const StudyConfiguration &SC) -> auto { - return AllOf( - Field(&StudyConfiguration::Runs, SC.Runs), - Field(&StudyConfiguration::BufferSize, SC.BufferSize), - Field(&StudyConfiguration::Size, Equals(SC.Size)), - Field(&StudyConfiguration::AddressAlignment, SC.AddressAlignment), - Field(&StudyConfiguration::MemsetValue, SC.MemsetValue), - Field(&StudyConfiguration::MemcmpMismatchAt, SC.MemcmpMismatchAt)); -} - -MATCHER(EqualsMeasurement, "") { - const Measurement &A = ::testing::get<0>(arg); - const Measurement &B = ::testing::get<1>(arg); - return ExplainMatchResult(AllOf(Field(&Measurement::Size, B.Size), - Field(&Measurement::Runtime, B.Runtime)), - A, result_listener); +auto equals(const Runtime &RI) -> auto { + return AllOf(Field(&Runtime::Host, equals(RI.Host)), + Field(&Runtime::BufferSize, RI.BufferSize), + Field(&Runtime::BatchParameterCount, RI.BatchParameterCount), + Field(&Runtime::BenchmarkOptions, equals(RI.BenchmarkOptions))); } -MATCHER(EqualsFunctions, "") { - const FunctionMeasurements &A = ::testing::get<0>(arg); - const FunctionMeasurements &B = ::testing::get<1>(arg); - return ExplainMatchResult( - AllOf(Field(&FunctionMeasurements::Name, B.Name), - Field(&FunctionMeasurements::Measurements, - Pointwise(EqualsMeasurement(), B.Measurements))), - A, result_listener); -} - -auto Equals(const Study &S) -> auto { - return AllOf( - Field(&Study::Host, Equals(S.Host)), - Field(&Study::Options, Equals(S.Options)), - Field(&Study::Configuration, Equals(S.Configuration)), - Field(&Study::Functions, Pointwise(EqualsFunctions(), S.Functions))); +auto equals(const Study &S) -> auto { + return AllOf(Field(&Study::StudyName, S.StudyName), + Field(&Study::Runtime, equals(S.Runtime)), + Field(&Study::Configuration, equals(S.Configuration)), + Field(&Study::Measurements, S.Measurements)); } TEST(JsonTest, RoundTrip) { const Study S = getStudy(); - auto StudyOrError = ParseJsonStudy(SerializeToString(S)); + const auto Serialized = serializeToString(S); + auto StudyOrError = parseJsonStudy(Serialized); if (auto Err = StudyOrError.takeError()) - EXPECT_FALSE(Err) << "Unexpected error"; + EXPECT_FALSE(Err) << "Unexpected error : " << Err << "\n" << Serialized; const Study &Parsed = *StudyOrError; - EXPECT_THAT(Parsed, Equals(S)); + EXPECT_THAT(Parsed, equals(S)) << Serialized << "\n" + << serializeToString(Parsed); } TEST(JsonTest, SupplementaryField) { - auto Failure = ParseJsonStudy(R"({ + auto Failure = parseJsonStudy(R"({ "UnknownField": 10 } )"); @@ -133,17 +120,19 @@ } TEST(JsonTest, InvalidType) { - auto Failure = ParseJsonStudy(R"({ - "Options": 1 + auto Failure = parseJsonStudy(R"({ + "Runtime": 1 } )"); EXPECT_EQ(toString(Failure.takeError()), "Expected JSON Object"); } TEST(JsonTest, InvalidDuration) { - auto Failure = ParseJsonStudy(R"({ - "Options": { - "MinDuration": "Duration should be a Number" + auto Failure = parseJsonStudy(R"({ + "Runtime": { + "BenchmarkOptions": { + "MinDuration": "Duration should be a Number" + } } } )"); @@ -151,9 +140,9 @@ } TEST(JsonTest, InvalidAlignType) { - auto Failure = ParseJsonStudy(R"({ - "Configuration":{ - "AddressAlignment": "Align should be an Integer" + auto Failure = parseJsonStudy(R"({ + "Configuration": { + "AccessAlignment": "Align should be an Integer" } } )"); @@ -161,9 +150,9 @@ } TEST(JsonTest, InvalidAlign) { - auto Failure = ParseJsonStudy(R"({ - "Configuration":{ - "AddressAlignment":3 + auto Failure = parseJsonStudy(R"({ + "Configuration": { + "AccessAlignment": 3 } } )"); @@ -172,9 +161,11 @@ } TEST(JsonTest, InvalidBenchmarkLogType) { - auto Failure = ParseJsonStudy(R"({ - "Options":{ - "Log": 3 + auto Failure = parseJsonStudy(R"({ + "Runtime": { + "BenchmarkOptions":{ + "Log": 3 + } } } )"); @@ -183,9 +174,11 @@ } TEST(JsonTest, InvalidBenchmarkLog) { - auto Failure = ParseJsonStudy(R"({ - "Options":{ - "Log": "Unknown" + auto Failure = parseJsonStudy(R"({ + "Runtime": { + "BenchmarkOptions":{ + "Log": "Unknown" + } } } )"); diff --git a/libc/benchmarks/LibcBenchmark.h b/libc/benchmarks/LibcBenchmark.h --- a/libc/benchmarks/LibcBenchmark.h +++ b/libc/benchmarks/LibcBenchmark.h @@ -41,10 +41,6 @@ namespace llvm { namespace libc_benchmarks { -// Makes sure the binary was compiled in release mode and that frequency -// governor is set on performance. -void checkRequirements(); - using Duration = std::chrono::duration; enum class BenchmarkLog { @@ -318,6 +314,10 @@ return {llvm::ArrayRef(Container.cbegin(), Container.cend()), Size}; } +// Makes sure the binary was compiled in release mode and that frequency +// governor is set on performance. +void checkRequirements(); + } // namespace libc_benchmarks } // namespace llvm diff --git a/libc/benchmarks/LibcMemoryBenchmark.h b/libc/benchmarks/LibcMemoryBenchmark.h --- a/libc/benchmarks/LibcMemoryBenchmark.h +++ b/libc/benchmarks/LibcMemoryBenchmark.h @@ -13,7 +13,6 @@ #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H #include "LibcBenchmark.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Alignment.h" #include @@ -26,66 +25,79 @@ // Configuration //-------------- -// Specifies a range of sizes to explore. -struct SizeRange { - uint32_t From = 0; // Inclusive - uint32_t To = 1024; // Inclusive - uint32_t Step = 1; +struct StudyConfiguration { + // One of 'memcpy', 'memset', 'memcmp'. + // The underlying implementation is always the llvm libc one. + // e.g. 'memcpy' will test '__llvm_libc::memcpy' + std::string Function; + + // The number of trials to run for this benchmark. + // If in SweepMode, each individual sizes are measured 'NumTrials' time. + // i.e 'NumTrials' measurements for 0, 'NumTrials' measurements for 1 ... + uint32_t NumTrials = 1; + + // Toggles between Sweep Mode and Distribution Mode (default). + // See 'SweepModeMaxSize' and 'SizeDistributionName' below. + bool IsSweepMode = false; + + // Maximum size to use when measuring a ramp of size values (SweepMode). + // The benchmark measures all sizes from 0 to SweepModeMaxSize. + // Note: in sweep mode the same size is sampled several times in a row this + // will allow the processor to learn it and optimize the branching pattern. + // The resulting measurement is likely to be idealized. + uint32_t SweepModeMaxSize = 0; // inclusive + + // The name of the distribution to be used to randomize the size parameter. + // This is used when SweepMode is false (default). + std::string SizeDistributionName; + + // This parameter allows to control how the buffers are accessed during + // benchmark: + // None : Use a fixed address that is at least cache line aligned, + // 1 : Use random address, + // >1 : Use random address aligned to value. + MaybeAlign AccessAlignment = None; + + // When Function == 'memcmp', this is the buffers mismatch position. + // 0 : Buffers always compare equal, + // >0 : Buffers compare different at byte N-1. + uint32_t MemcmpMismatchAt = 0; }; -// An object to define how to test a memory function. -struct StudyConfiguration { - // The number of run for the study. - uint32_t Runs = 1; - - // The size of the buffers (1 buffer for memset but 2 for memcpy or memcmp). - // When testing small sizes, it's important to keep the total allocated - // size under the size of the L1 cache (usually 16 or 32KiB). The framework - // will also use 2KiB of additional L1 memory to store the function - // parameters. - uint32_t BufferSize = 8192; - - // The range of sizes to exercise. - SizeRange Size; - - MaybeAlign AddressAlignment; // Unset : Use start of buffer which is at - // least cache line aligned) - // 1 : Use random address, - // >1 : Use random address aligned to value. - - // The value to use for memset. - uint8_t MemsetValue = 0; - - // The mismatch position for memcmp. - uint32_t MemcmpMismatchAt = 0; // 0 : Buffer compare equal, - // >0 : Buffer compare different at byte N-1. +struct Runtime { + // Details about the Host (cpu name, cpu frequency, cache hierarchy). + HostState Host; + + // The framework will populate this value so all data accessed during the + // benchmark will stay in L1 data cache. This includes bookkeeping data. + uint32_t BufferSize = 0; + + // This is the number of distinct parameters used in a single batch. + // The framework always tests a batch of randomized parameter to prevent the + // cpu from learning branching patterns. + uint32_t BatchParameterCount = 0; + + // The benchmark options that were used to perform the measurement. + // This is decided by the framework. + BenchmarkOptions BenchmarkOptions; }; //-------- // Results //-------- -// The time to run one iteration of the function under test for the specified -// Size. -struct Measurement { - uint32_t Size = 0; - Duration Runtime = {}; -}; - -// The measurements for a specific function. -struct FunctionMeasurements { - std::string Name; - std::vector Measurements; -}; - // The root object containing all the data (configuration and measurements). struct Study { - HostState Host; - BenchmarkOptions Options; + std::string StudyName; + Runtime Runtime; StudyConfiguration Configuration; - SmallVector Functions; + std::vector Measurements; }; +//------ +// Utils +//------ + // Provides an aligned, dynamically allocated buffer. class AlignedBuffer { char *const Buffer = nullptr; @@ -95,7 +107,8 @@ static constexpr size_t Alignment = 1024; explicit AlignedBuffer(size_t Size) - : Buffer(static_cast(aligned_alloc(1024, Size))), Size(Size) {} + : Buffer(static_cast(aligned_alloc(Alignment, Size))), + Size(Size) {} ~AlignedBuffer() { free(Buffer); } inline char *operator+(size_t Index) { return Buffer + Index; } @@ -106,36 +119,6 @@ inline char *end() { return Buffer + Size; } }; -// Implements the ParameterProvider abstraction needed by the `benchmark` -// function. This implementation makes sure that all parameters will fit into -// `StorageSize` bytes. The total memory accessed during benchmark should be -// less than the data L1 cache, that is the storage for the ParameterProvider -// and the memory buffers. -template -class SmallParameterProvider { - using ParameterType = typename Context::ParameterType; - ByteConstrainedArray Parameters; - size_t LastIterations; - Context &Ctx; - -public: - explicit SmallParameterProvider(Context &C) : Ctx(C) {} - SmallParameterProvider(const SmallParameterProvider &) = delete; - SmallParameterProvider &operator=(const SmallParameterProvider &) = delete; - - // Useful to compute the histogram of the size parameter. - CircularArrayRef getLastBatch() const { - return cycle(Parameters, LastIterations); - } - - // Implements the interface needed by the `benchmark` function. - CircularArrayRef generateBatch(size_t Iterations) { - LastIterations = Iterations; - Ctx.Randomize(Parameters); - return getLastBatch(); - } -}; - // Helper to generate random buffer offsets that satisfy the configuration // constraints. class OffsetDistribution { @@ -143,7 +126,8 @@ uint32_t Factor; public: - explicit OffsetDistribution(const StudyConfiguration &Conf); + explicit OffsetDistribution(size_t BufferSize, size_t MaxSizeValue, + MaybeAlign AccessAlignment); template uint32_t operator()(Generator &G) { return Distribution(G) * Factor; @@ -159,7 +143,8 @@ const uint32_t MismatchAt; public: - explicit MismatchOffsetDistribution(const StudyConfiguration &Conf); + explicit MismatchOffsetDistribution(size_t BufferSize, size_t MaxSizeValue, + size_t MismatchAt); explicit operator bool() const { return !MismatchIndices.empty(); } diff --git a/libc/benchmarks/LibcMemoryBenchmark.cpp b/libc/benchmarks/LibcMemoryBenchmark.cpp --- a/libc/benchmarks/LibcMemoryBenchmark.cpp +++ b/libc/benchmarks/LibcMemoryBenchmark.cpp @@ -20,40 +20,42 @@ // When alignment is set, the distribution is scaled down by `Factor` and scaled // up again by the same amount during sampling. static std::uniform_int_distribution -GetOffsetDistribution(const StudyConfiguration &Conf) { - if (Conf.AddressAlignment && - *Conf.AddressAlignment > AlignedBuffer::Alignment) +getOffsetDistribution(size_t BufferSize, size_t MaxSizeValue, + MaybeAlign AccessAlignment) { + if (AccessAlignment && *AccessAlignment > AlignedBuffer::Alignment) report_fatal_error( - "AddressAlignment must be less or equal to AlignedBuffer::Alignment"); - if (!Conf.AddressAlignment) + "AccessAlignment must be less or equal to AlignedBuffer::Alignment"); + if (!AccessAlignment) return std::uniform_int_distribution(0, 0); // Always 0. // If we test up to Size bytes, the returned offset must stay under // BuffersSize - Size. - int64_t MaxOffset = Conf.BufferSize; - MaxOffset -= Conf.Size.To; + int64_t MaxOffset = BufferSize; + MaxOffset -= MaxSizeValue; MaxOffset -= 1; if (MaxOffset < 0) report_fatal_error( "BufferSize too small to exercise specified Size configuration"); - MaxOffset /= Conf.AddressAlignment->value(); + MaxOffset /= AccessAlignment->value(); return std::uniform_int_distribution(0, MaxOffset); } -OffsetDistribution::OffsetDistribution(const StudyConfiguration &Conf) - : Distribution(GetOffsetDistribution(Conf)), - Factor(Conf.AddressAlignment.valueOrOne().value()) {} +OffsetDistribution::OffsetDistribution(size_t BufferSize, size_t MaxSizeValue, + MaybeAlign AccessAlignment) + : Distribution( + getOffsetDistribution(BufferSize, MaxSizeValue, AccessAlignment)), + Factor(AccessAlignment.valueOrOne().value()) {} // Precomputes offset where to insert mismatches between the two buffers. -MismatchOffsetDistribution::MismatchOffsetDistribution( - const StudyConfiguration &Conf) - : MismatchAt(Conf.MemcmpMismatchAt) { +MismatchOffsetDistribution::MismatchOffsetDistribution(size_t BufferSize, + size_t MaxSizeValue, + size_t MismatchAt) + : MismatchAt(MismatchAt) { if (MismatchAt <= 1) return; - const auto ToSize = Conf.Size.To; - for (size_t I = ToSize + 1; I < Conf.BufferSize; I += ToSize) + for (size_t I = MaxSizeValue + 1; I < BufferSize; I += MaxSizeValue) MismatchIndices.push_back(I); if (MismatchIndices.empty()) - llvm::report_fatal_error("Unable to generate mismatch"); + report_fatal_error("Unable to generate mismatch"); MismatchIndexSelector = std::uniform_int_distribution(0, MismatchIndices.size() - 1); } diff --git a/libc/benchmarks/LibcMemoryBenchmarkMain.h b/libc/benchmarks/LibcMemoryBenchmarkMain.h deleted file mode 100644 --- a/libc/benchmarks/LibcMemoryBenchmarkMain.h +++ /dev/null @@ -1,36 +0,0 @@ -//===-- BenchmarkRunner interface -------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_MAIN_H -#define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_MAIN_H - -#include "LibcBenchmark.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" - -namespace llvm { -namespace libc_benchmarks { - -// Each memory function benchmark implements this interface. -// It is used by the main function to run all benchmarks in a uniform manner. -class BenchmarkRunner { -public: - virtual ~BenchmarkRunner() {} - - // Returns a list of all available functions to test. - virtual ArrayRef getFunctionNames() const = 0; - - // Performs the benchmarking for a particular FunctionName and Size. - virtual BenchmarkResult benchmark(const BenchmarkOptions &Options, - StringRef FunctionName, size_t Size) = 0; -}; - -} // namespace libc_benchmarks -} // namespace llvm - -#endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_MAIN_H diff --git a/libc/benchmarks/LibcMemoryBenchmarkMain.cpp b/libc/benchmarks/LibcMemoryBenchmarkMain.cpp --- a/libc/benchmarks/LibcMemoryBenchmarkMain.cpp +++ b/libc/benchmarks/LibcMemoryBenchmarkMain.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "LibcMemoryBenchmarkMain.h" #include "JSON.h" #include "LibcBenchmark.h" #include "LibcMemoryBenchmark.h" +#include "MemorySizeDistributions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" @@ -17,70 +17,310 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" -#include +namespace __llvm_libc { + +extern void *memcpy(void *__restrict, const void *__restrict, size_t); +extern void *memset(void *, int, size_t); + +} // namespace __llvm_libc namespace llvm { namespace libc_benchmarks { +enum Function { memcpy, memset }; + +static cl::opt + StudyName("study-name", cl::desc("The name for this study"), cl::Required); + +static cl::opt + MemoryFunction("function", cl::desc("Sets the function to benchmark:"), + cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"), + clEnumVal(memset, "__llvm_libc::memset")), + cl::Required); + static cl::opt - Configuration("conf", cl::desc("Specify configuration filename"), - cl::value_desc("filename"), cl::init("")); + SizeDistributionName("size-distribution-name", + cl::desc("The name of the distribution to use")); + +static cl::opt + SweepMode("sweep-mode", + cl::desc("If set, benchmark all sizes from 0 to sweep-max-size")); + +static cl::opt + SweepMaxSize("sweep-max-size", + cl::desc("The maximum size to use in sweep-mode"), + cl::init(256)); -static cl::opt Output("o", cl::desc("Specify output filename"), +static cl::opt + AlignedAccess("aligned-access", + cl::desc("The alignment to use when accessing the buffers\n" + "Default is unaligned\n" + "Use 0 to disable address randomization"), + cl::init(1)); + +static cl::opt Output("output", + cl::desc("Specify output filename"), cl::value_desc("filename"), cl::init("-")); -extern std::unique_ptr -getRunner(const StudyConfiguration &Conf); +static cl::opt + NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"), + cl::init(1)); -void Main() { -#ifndef NDEBUG - static_assert( - false, - "For reproducibility benchmarks should not be compiled in DEBUG mode."); -#endif - checkRequirements(); - ErrorOr> MB = - MemoryBuffer::getFileOrSTDIN(Configuration); - if (!MB) - report_fatal_error( - Twine("Could not open configuration file: ").concat(Configuration)); - auto ErrorOrStudy = ParseJsonStudy((*MB)->getBuffer()); - if (!ErrorOrStudy) - report_fatal_error(ErrorOrStudy.takeError()); - - const auto StudyPrototype = *ErrorOrStudy; - - Study S; - S.Host = HostState::get(); - S.Options = StudyPrototype.Options; - S.Configuration = StudyPrototype.Configuration; - - const auto Runs = S.Configuration.Runs; - const auto &SR = S.Configuration.Size; - std::unique_ptr Runner = getRunner(S.Configuration); - const size_t TotalSteps = - Runner->getFunctionNames().size() * Runs * ((SR.To - SR.From) / SR.Step); - size_t Steps = 0; - for (auto FunctionName : Runner->getFunctionNames()) { - FunctionMeasurements FM; - FM.Name = std::string(FunctionName); - for (size_t Run = 0; Run < Runs; ++Run) { - for (uint32_t Size = SR.From; Size <= SR.To; Size += SR.Step) { - const auto Result = Runner->benchmark(S.Options, FunctionName, Size); - Measurement Measurement; - Measurement.Runtime = Result.BestGuess; - Measurement.Size = Size; - FM.Measurements.push_back(Measurement); - outs() << format("%3d%% run: %2d / %2d size: %5d ", - (Steps * 100 / TotalSteps), Run, Runs, Size) - << FunctionName - << " \r"; - ++Steps; +static constexpr int64_t KiB = 1024; +static constexpr int64_t ParameterStorageBytes = 4 * KiB; +static constexpr int64_t L1LeftAsideBytes = 1 * KiB; + +struct ParameterType { + unsigned OffsetBytes : 16; // max : 16 KiB - 1 + unsigned SizeBytes : 16; // max : 16 KiB - 1 +}; + +struct MemcpyBenchmark { + static constexpr auto GetDistributions = &getMemcpySizeDistributions; + static constexpr size_t BufferCount = 2; + static void amend(Study &S) { S.Configuration.Function = "memcpy"; } + + MemcpyBenchmark(const size_t BufferSize) + : SrcBuffer(BufferSize), DstBuffer(BufferSize) {} + + inline auto functor() { + return [this](ParameterType P) { + __llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes, + P.SizeBytes); + return DstBuffer + P.OffsetBytes; + }; + } + + AlignedBuffer SrcBuffer; + AlignedBuffer DstBuffer; +}; + +struct MemsetBenchmark { + static constexpr auto GetDistributions = &getMemsetSizeDistributions; + static constexpr size_t BufferCount = 1; + static void amend(Study &S) { S.Configuration.Function = "memset"; } + + MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {} + + inline auto functor() { + return [this](ParameterType P) { + __llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF, + P.SizeBytes); + return DstBuffer + P.OffsetBytes; + }; + } + + AlignedBuffer DstBuffer; +}; + +template struct Harness : Benchmark { + using Benchmark::functor; + + Harness(const size_t BufferSize, size_t BatchParameterCount, + std::function SizeSampler, + std::function OffsetSampler) + : Benchmark(BufferSize), BufferSize(BufferSize), + BatchParameterCount(BatchParameterCount), + Parameters(BatchParameterCount), SizeSampler(SizeSampler), + OffsetSampler(OffsetSampler) {} + + CircularArrayRef generateBatch(size_t Iterations) { + for (auto &P : Parameters) { + P.OffsetBytes = OffsetSampler(); + P.SizeBytes = SizeSampler(); + if (P.OffsetBytes + P.SizeBytes >= BufferSize) + report_fatal_error("Call would result in buffer overflow"); + } + return cycle(makeArrayRef(Parameters), Iterations); + } + +private: + const size_t BufferSize; + const size_t BatchParameterCount; + std::vector Parameters; + std::function SizeSampler; + std::function OffsetSampler; +}; + +struct IBenchmark { + virtual ~IBenchmark() {} + virtual Study run() = 0; +}; + +size_t getL1DataCacheSize() { + const std::vector &CacheInfos = HostState::get().Caches; + const auto IsL1DataCache = [](const CacheInfo &CI) { + return CI.Type == "Data" && CI.Level == 1; + }; + const auto CacheIt = find_if(CacheInfos, IsL1DataCache); + if (CacheIt != CacheInfos.end()) + return CacheIt->Size; + report_fatal_error("Unable to read L1 Cache Data Size"); +} + +template struct MemfunctionBenchmark : IBenchmark { + MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize()) + : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes), + BufferSize(AvailableSize / Benchmark::BufferCount), + BatchParameterCount(BufferSize / sizeof(ParameterType)) { + // Handling command line flags + if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100) + report_fatal_error("Not enough L1 cache"); + + if (!isPowerOfTwoOrZero(AlignedAccess)) + report_fatal_error(AlignedAccess.ArgStr + + Twine(" must be a power of two or zero")); + + const bool HasDistributionName = !SizeDistributionName.empty(); + if (SweepMode && HasDistributionName) + report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) + + "` or `--" + Twine(SizeDistributionName.ArgStr) + "`"); + + if (SweepMode) { + MaxSizeValue = SweepMaxSize; + } else { + std::map Map; + for (MemorySizeDistribution Distribution : Benchmark::GetDistributions()) + Map[Distribution.Name] = Distribution; + if (Map.count(SizeDistributionName) == 0) { + std::string Message; + raw_string_ostream Stream(Message); + Stream << "Unknown --" << SizeDistributionName.ArgStr << "='" + << SizeDistributionName << "', available distributions:\n"; + for (const auto &Pair : Map) + Stream << "'" << Pair.first << "'\n"; + report_fatal_error(Stream.str()); } + SizeDistribution = Map[SizeDistributionName]; + MaxSizeValue = SizeDistribution.Probabilities.size() - 1; } - S.Functions.push_back(std::move(FM)); + + // Setup study. + Study.StudyName = StudyName; + Runtime &RI = Study.Runtime; + RI.Host = HostState::get(); + RI.BufferSize = BufferSize; + RI.BatchParameterCount = BatchParameterCount; + + BenchmarkOptions &BO = RI.BenchmarkOptions; + BO.MinDuration = std::chrono::milliseconds(1); + BO.MaxDuration = std::chrono::seconds(1); + BO.MaxIterations = 10'000'000U; + BO.MinSamples = 4; + BO.MaxSamples = 1000; + BO.Epsilon = 0.01; // 1% + BO.ScalingFactor = 1.4; + + StudyConfiguration &SC = Study.Configuration; + SC.NumTrials = NumTrials; + SC.IsSweepMode = SweepMode; + if (SweepMode) + SC.SweepModeMaxSize = SweepMaxSize; + else + SC.SizeDistributionName = SizeDistributionName; + SC.AccessAlignment = MaybeAlign(AlignedAccess); + + // Delegate specific flags and configuration. + Benchmark::amend(Study); + } + + Study run() override { + if (SweepMode) + runSweepMode(); + else + runDistributionMode(); + return Study; + } + +private: + const int64_t AvailableSize; + const int64_t BufferSize; + const size_t BatchParameterCount; + size_t MaxSizeValue = 0; + MemorySizeDistribution SizeDistribution; + Study Study; + std::mt19937_64 Gen; + + static constexpr bool isPowerOfTwoOrZero(size_t Value) { + return (Value & (Value - 1U)) == 0; + } + + std::function geOffsetSampler() { + return [this]() { + static OffsetDistribution OD(BufferSize, MaxSizeValue, + Study.Configuration.AccessAlignment); + return OD(Gen); + }; + } + + std::function getSizeSampler() { + return [this]() { + static std::discrete_distribution Distribution( + SizeDistribution.Probabilities.begin(), + SizeDistribution.Probabilities.end()); + return Distribution(Gen); + }; + } + + void reportProgress(BenchmarkStatus BS) { + const size_t TotalSteps = Study.Measurements.capacity(); + const size_t Steps = Study.Measurements.size(); + const size_t Percent = 100 * Steps / TotalSteps; + size_t I = 0; + errs() << '['; + for (; I <= Percent; ++I) + errs() << '#'; + for (; I <= 100; ++I) + errs() << '_'; + errs() << "] " << Percent << "%\r"; + } + + void runTrials(const BenchmarkOptions &Options, + std::function SizeSampler, + std::function OffsetSampler) { + Harness B(BufferSize, BatchParameterCount, SizeSampler, + OffsetSampler); + for (size_t i = 0; i < NumTrials; ++i) { + const BenchmarkResult Result = benchmark(Options, B, B.functor()); + Study.Measurements.push_back(Result.BestGuess); + reportProgress(Result.TerminationStatus); + } + } + + void runSweepMode() { + Study.Measurements.reserve(NumTrials * SweepMaxSize); + + BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; + BO.MinDuration = std::chrono::milliseconds(1); + BO.InitialIterations = 100; + + for (size_t Size = 0; Size <= SweepMaxSize; ++Size) { + const auto SizeSampler = [Size]() { return Size; }; + runTrials(BO, SizeSampler, geOffsetSampler()); + } + } + + void runDistributionMode() { + Study.Measurements.reserve(NumTrials); + + BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; + BO.MinDuration = std::chrono::milliseconds(10); + BO.InitialIterations = BatchParameterCount * 10; + + runTrials(BO, getSizeSampler(), geOffsetSampler()); } +}; +std::unique_ptr getMemfunctionBenchmark() { + switch (MemoryFunction) { + case memcpy: + return std::make_unique>(); + case memset: + return std::make_unique>(); + } +} + +void writeStudy(const Study &S) { std::error_code EC; raw_fd_ostream FOS(Output, EC); if (EC) @@ -89,7 +329,13 @@ .concat(", ") .concat(Output)); json::OStream JOS(FOS); - SerializeToJson(S, JOS); + serializeToJson(S, JOS); +} + +void main() { + checkRequirements(); + auto MB = getMemfunctionBenchmark(); + writeStudy(MB->run()); } } // namespace libc_benchmarks @@ -97,6 +343,11 @@ int main(int argc, char **argv) { llvm::cl::ParseCommandLineOptions(argc, argv); - llvm::libc_benchmarks::Main(); +#ifndef NDEBUG + static_assert( + false, + "For reproducibility benchmarks should not be compiled in DEBUG mode."); +#endif + llvm::libc_benchmarks::main(); return EXIT_SUCCESS; } diff --git a/libc/benchmarks/LibcMemoryBenchmarkTest.cpp b/libc/benchmarks/LibcMemoryBenchmarkTest.cpp --- a/libc/benchmarks/LibcMemoryBenchmarkTest.cpp +++ b/libc/benchmarks/LibcMemoryBenchmarkTest.cpp @@ -34,22 +34,16 @@ } TEST(OffsetDistribution, AlignToBegin) { - StudyConfiguration Conf; - Conf.BufferSize = 8192; - Conf.AddressAlignment = None; - - OffsetDistribution OD(Conf); + const size_t BufferSize = 8192; + OffsetDistribution OD(BufferSize, 1024, None); std::default_random_engine Gen; for (size_t I = 0; I <= 10; ++I) EXPECT_EQ(OD(Gen), 0U); } TEST(OffsetDistribution, NoAlignment) { - StudyConfiguration Conf; - Conf.BufferSize = 8192; - Conf.Size.To = 1; - - OffsetDistribution OD(Conf); + const size_t BufferSize = 8192; + OffsetDistribution OD(BufferSize, 1, Align(1)); std::default_random_engine Gen; for (size_t I = 0; I <= 10; ++I) EXPECT_THAT(OD(Gen), AllOf(Ge(0U), Lt(8192U))); @@ -61,49 +55,42 @@ } TEST(OffsetDistribution, Aligned) { - StudyConfiguration Conf; - Conf.BufferSize = 8192; - Conf.AddressAlignment = Align(16); - Conf.Size.To = 1; - - OffsetDistribution OD(Conf); + const size_t BufferSize = 8192; + OffsetDistribution OD(BufferSize, 1, Align(16)); std::default_random_engine Gen; for (size_t I = 0; I <= 10; ++I) EXPECT_THAT(OD(Gen), AllOf(Ge(0U), Lt(8192U), IsDivisibleBy(16U))); } TEST(MismatchOffsetDistribution, EqualBufferDisablesDistribution) { - StudyConfiguration Conf; - Conf.MemcmpMismatchAt = 0; // buffer are equal. + const size_t BufferSize = 8192; + const uint32_t MismatchAt = 0; // buffer are equal. - MismatchOffsetDistribution MOD(Conf); + MismatchOffsetDistribution MOD(BufferSize, 1024, MismatchAt); EXPECT_FALSE(MOD); } TEST(MismatchOffsetDistribution, DifferentBufferDisablesDistribution) { - StudyConfiguration Conf; - Conf.MemcmpMismatchAt = 1; // buffer are different. + const size_t BufferSize = 8192; + const uint32_t MismatchAt = 1; // buffer are different. - MismatchOffsetDistribution MOD(Conf); + MismatchOffsetDistribution MOD(BufferSize, 1024, MismatchAt); EXPECT_FALSE(MOD); } TEST(MismatchOffsetDistribution, MismatchAt2) { - const uint32_t MismatchAt = 2; - const uint32_t ToSize = 4; - StudyConfiguration Conf; - Conf.BufferSize = 16; - Conf.MemcmpMismatchAt = MismatchAt; // buffer are different at position 2. - Conf.Size.To = ToSize; - - MismatchOffsetDistribution MOD(Conf); + const size_t BufferSize = 16; + const uint32_t MismatchAt = 2; // buffer are different at position 2. + const uint32_t MaxSize = 4; + + MismatchOffsetDistribution MOD(BufferSize, MaxSize, MismatchAt); EXPECT_TRUE(MOD); - // We test equality up to ToSize (=4) so we need spans of 4 equal bytes spaced - // by one mismatch. + // We test equality up to MaxSize (=4) so we need spans of 4 equal bytes + // spaced by one mismatch. EXPECT_THAT(MOD.getMismatchIndices(), ElementsAre(5, 9, 13)); std::default_random_engine Gen; for (size_t Iterations = 0; Iterations <= 10; ++Iterations) { - for (size_t Size = Conf.Size.From; Size <= ToSize; ++Size) { + for (size_t Size = 0; Size <= MaxSize; ++Size) { if (Size >= MismatchAt) EXPECT_THAT(MOD(Gen, Size), AnyOf(5 - MismatchAt, 9 - MismatchAt, 13 - MismatchAt)); diff --git a/libc/benchmarks/Memcmp.cpp b/libc/benchmarks/Memcmp.cpp deleted file mode 100644 --- a/libc/benchmarks/Memcmp.cpp +++ /dev/null @@ -1,87 +0,0 @@ -//===-- Benchmark memcmp implementation -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "LibcBenchmark.h" -#include "LibcMemoryBenchmark.h" -#include "LibcMemoryBenchmarkMain.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { -namespace libc_benchmarks { - -// The context encapsulates the buffers, parameters and the measure. -struct MemcmpContext : public BenchmarkRunner { - using FunctionPrototype = int (*)(const void *, const void *, size_t); - - struct ParameterType { - uint16_t Offset = 0; - }; - - explicit MemcmpContext(const StudyConfiguration &Conf) - : MOD(Conf), OD(Conf), ABuffer(Conf.BufferSize), BBuffer(Conf.BufferSize), - PP(*this) { - std::uniform_int_distribution Dis; - // Generate random buffer A. - for (size_t I = 0; I < Conf.BufferSize; ++I) - ABuffer[I] = Dis(Gen); - // Copy buffer A to B. - ::memcpy(BBuffer.begin(), ABuffer.begin(), Conf.BufferSize); - if (Conf.MemcmpMismatchAt == 0) - return; // all same. - else if (Conf.MemcmpMismatchAt == 1) - for (char &c : BBuffer) - ++c; // all different. - else - for (const auto I : MOD.getMismatchIndices()) - ++BBuffer[I]; - } - - // Needed by the ParameterProvider to update the current batch of parameter. - void Randomize(MutableArrayRef Parameters) { - if (MOD) - for (auto &P : Parameters) - P.Offset = MOD(Gen, CurrentSize); - else - for (auto &P : Parameters) - P.Offset = OD(Gen); - } - - ArrayRef getFunctionNames() const override { - static std::array kFunctionNames = {"memcmp"}; - return kFunctionNames; - } - - BenchmarkResult benchmark(const BenchmarkOptions &Options, - StringRef FunctionName, size_t Size) override { - CurrentSize = Size; - // FIXME: Add `bcmp` once we're guaranteed that the function is provided. - FunctionPrototype Function = - StringSwitch(FunctionName).Case("memcmp", &::memcmp); - return llvm::libc_benchmarks::benchmark( - Options, PP, [this, Function, Size](ParameterType p) { - return Function(ABuffer + p.Offset, BBuffer + p.Offset, Size); - }); - } - -private: - std::default_random_engine Gen; - MismatchOffsetDistribution MOD; - OffsetDistribution OD; - size_t CurrentSize = 0; - AlignedBuffer ABuffer; - AlignedBuffer BBuffer; - SmallParameterProvider PP; -}; - -std::unique_ptr getRunner(const StudyConfiguration &Conf) { - return std::make_unique(Conf); -} - -} // namespace libc_benchmarks -} // namespace llvm diff --git a/libc/benchmarks/Memcpy.cpp b/libc/benchmarks/Memcpy.cpp deleted file mode 100644 --- a/libc/benchmarks/Memcpy.cpp +++ /dev/null @@ -1,73 +0,0 @@ -//===-- Benchmark memcpy implementation -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "LibcBenchmark.h" -#include "LibcMemoryBenchmark.h" -#include "LibcMemoryBenchmarkMain.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/raw_ostream.h" -#include - -namespace __llvm_libc { -extern void *memcpy(void *__restrict, const void *__restrict, size_t); -} // namespace __llvm_libc - -namespace llvm { -namespace libc_benchmarks { - -// The context encapsulates the buffers, parameters and the measure. -struct MemcpyContext : public BenchmarkRunner { - using FunctionPrototype = void *(*)(void *, const void *, size_t); - - struct ParameterType { - uint16_t SrcOffset = 0; - uint16_t DstOffset = 0; - }; - - explicit MemcpyContext(const StudyConfiguration &Conf) - : OD(Conf), SrcBuffer(Conf.BufferSize), DstBuffer(Conf.BufferSize), - PP(*this) {} - - // Needed by the ParameterProvider to update the current batch of parameter. - void Randomize(MutableArrayRef Parameters) { - for (auto &P : Parameters) { - P.DstOffset = OD(Gen); - P.SrcOffset = OD(Gen); - } - } - - ArrayRef getFunctionNames() const override { - static std::array kFunctionNames = {"memcpy"}; - return kFunctionNames; - } - - BenchmarkResult benchmark(const BenchmarkOptions &Options, - StringRef FunctionName, size_t Size) override { - FunctionPrototype Function = StringSwitch(FunctionName) - .Case("memcpy", &__llvm_libc::memcpy); - return llvm::libc_benchmarks::benchmark( - Options, PP, [this, Function, Size](ParameterType p) { - Function(DstBuffer + p.DstOffset, SrcBuffer + p.SrcOffset, Size); - return DstBuffer + p.DstOffset; - }); - } - -private: - std::default_random_engine Gen; - OffsetDistribution OD; - AlignedBuffer SrcBuffer; - AlignedBuffer DstBuffer; - SmallParameterProvider PP; -}; - -std::unique_ptr getRunner(const StudyConfiguration &Conf) { - return std::make_unique(Conf); -} - -} // namespace libc_benchmarks -} // namespace llvm diff --git a/libc/benchmarks/MemorySizeDistributions.cpp b/libc/benchmarks/MemorySizeDistributions.cpp --- a/libc/benchmarks/MemorySizeDistributions.cpp +++ b/libc/benchmarks/MemorySizeDistributions.cpp @@ -22,6 +22,7 @@ static constexpr double kMemcpyGoogleS[] = {0.0184049, 0.0306748, 0.00613497, 0.0122699, 0.0306748, 0.0122699, 0.0245399, 0.0245399, 0.00613497, 0.0122699, 0.00613497, 0.0122699, 0.0490798, 0.0920245, 0.0122699, 0, 0.0122699, 0.00613497, 0.147239, 0.239264, 0.00613497, 0.0184049, 0.0245399, 0, 0.0122699, 0.00613497, 0.00613497, 0.0122699, 0.00613497, 0, 0, 0.00613497, 0, 0, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00613497, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0.0184049, 0, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0122699, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0.00613497, 0.00613497, 0, 0.0122699, 0.0122699, 0, 0, 0, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00613497, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00613497}; static constexpr double kMemcpyGoogleU[] = {0.0177, 0.2767, 0.0548, 0.011, 0.0156, 0.1325, 0.0614, 0.0195, 0.1576, 0.0099, 0.0066, 0.0083, 0.0066, 0.0087, 0.0054, 0.0051, 0.0243, 0.0059, 0.0124, 0.0062, 0.0045, 0.0055, 0.0061, 0.0044, 0.0121, 0.0028, 0.0038, 0.0031, 0.0033, 0.0028, 0.0028, 0.0026, 0.0109, 0.0019, 0.0013, 0.0013, 0.0008, 0.001, 0.0009, 0.0011, 0.0003, 0.0094, 0.0003, 0.0008, 0.0004, 0.0003, 0.0008, 0.0014, 0.0007, 0.0004, 0.0001, 0.0003, 0.0002, 0.0004, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0001, 0.0002, 0.0097, 0.0002, 0.0001, 0.0001, 0.0003, 0.0001, 0.0001, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0004, 0, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002, 0.0001, 0.0005, 0, 0.0001, 0.0001, 0.0002, 0.0001, 0.0001, 0, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0001, 0.0001, 0.0014, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0, 0.0001, 0.0001, 0, 0.0033, 0, 0, 0.0004, 0.0001, 0.0001, 0, 0, 0.0002, 0.0001, 0, 0, 0.0001, 0.0001, 0, 0, 0.0011, 0, 0.0001, 0, 0, 0.0001, 0, 0, 0.0001, 0, 0, 0.0001, 0, 0, 0, 0, 0.0001, 0, 0, 0.0001, 0, 0, 0, 0.0001, 0, 0, 0.0001, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0.0001, 0, 0, 0.0001, 0.0001, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0.0001, 0.0001, 0, 0, 0.0001, 0.0045, 0, 0, 0.0001, 0, 0, 0.0001, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0.0003, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0048, 0, 0, 0, 0, 0, 0, 0, 0.0002, 0, 0, 0, 0, 0, 0.0001, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0013, 0.0001, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0.0005, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0.0001}; static constexpr double kMemcpyGoogleW[] = {0.0199353, 0, 0.0245151, 0, 0.102101, 0, 0.0301724, 0, 0.0347522, 0, 0.0309806, 0, 0.0242457, 0, 0.0239763, 0, 0.0360991, 0, 0.0358297, 0, 0.03125, 0, 0.0188578, 0, 0.0261315, 0, 0.028556, 0, 0.00942888, 0, 0.0118534, 0, 0.0282866, 0, 0.0172414, 0, 0.0180496, 0, 0.0185884, 0, 0.012931, 0, 0.0145474, 0, 0.00915948, 0, 0.0107759, 0, 0.0153556, 0, 0.0115841, 0, 0.00969828, 0, 0.0123922, 0, 0.0105065, 0, 0.00996767, 0, 0.0121228, 0, 0.0107759, 0, 0.0307112, 0, 0.0102371, 0, 0.00727371, 0, 0.0113147, 0, 0.00889009, 0, 0.00915948, 0, 0.00835129, 0, 0.00915948, 0, 0.0105065, 0, 0.00484914, 0, 0.00835129, 0, 0.00538793, 0, 0.00996767, 0, 0.00538793, 0, 0.00862069, 0, 0.00511853, 0, 0.00592672, 0, 0.00565733, 0, 0.00511853, 0, 0.0080819, 0, 0.00377155, 0, 0.00404095, 0, 0.00511853, 0, 0.00457974, 0, 0.00431034, 0, 0.00296336, 0, 0.00242457, 0, 0.00404095, 0, 0.00565733, 0, 0.00188578, 0, 0.00215517, 0, 0.00242457, 0, 0.0080819, 0, 0.00080819, 0, 0.00080819, 0, 0.000269397, 0, 0.00188578, 0, 0.00161638, 0, 0, 0, 0.000269397, 0, 0.00269397, 0, 0.00107759, 0, 0, 0, 0.000269397, 0, 0.000538793, 0, 0, 0, 0.000269397, 0, 0.00134698, 0, 0.00134698, 0, 0.000538793, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0.000538793, 0, 0.00215517, 0, 0.00080819, 0, 0.000269397, 0, 0.000269397, 0, 0.000538793, 0, 0.000538793, 0, 0.00107759, 0, 0.00161638, 0, 0.00134698, 0, 0.00296336, 0, 0.000538793, 0, 0.000269397, 0, 0.000269397, 0, 0.00134698, 0, 0, 0, 0.00107759, 0, 0.00161638, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000269397, 0, 0.00080819, 0, 0.000269397, 0, 0.00080819, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0, 0, 0.00161638, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0.000538793, 0, 0, 0, 0, 0, 0, 0, 0.00404095, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0.000269396, 0, 0.000538793, 0, 0.000269397, 0, 0.000538793, 0, 0.000269397, 0, 0, 0, 0.000538793, 0, 0.000269397, 0, 0, 0, 0.000538793, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0.000269396, 0, 0.00080819, 0, 0.000269397, 0, 0.000538793, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0.000538793, 0, 0.000269397, 0, 0.000538793, 0, 0, 0, 0, 0, 0.000269396, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0.000269397, 0, 0, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000269396, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000538793, 0, 0, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000269397, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0.000269397, 0, 0.000269397, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0.000269397, 0, 0, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0.000269397, 0, 0, 0, 0, 0, 0.000269397, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00215517}; +static constexpr double kMemcpyUniform384To4096[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; static constexpr double kMemsetGoogleB[] = {0, 0, 0, 0, 0, 0, 0, 0, 0.0681818, 0, 0, 0, 0, 0, 0.5, 0, 0.0227273, 0, 0, 0, 0, 0, 0.0227273, 0, 0.0454545, 0.0227273, 0, 0, 0, 0, 0, 0.0227273, 0.0227273, 0, 0, 0, 0, 0, 0, 0.0227273, 0.0227273, 0, 0, 0, 0, 0, 0, 0, 0.0454545, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0227273, 0, 0, 0, 0, 0, 0, 0, 0.0227273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0681818, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0227273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0227273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0227273}; static constexpr double kMemsetGoogleD[] = {0.0243902, 0, 0, 0, 0, 0, 0, 0, 0.0487805, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0243902, 0, 0, 0, 0, 0, 0, 0.0609756, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0121951, 0, 0, 0, 0.0121951, 0, 0, 0, 0.0365854, 0, 0, 0, 0, 0, 0, 0, 0.0121951, 0, 0, 0, 0, 0, 0, 0, 0.0609756, 0, 0, 0, 0, 0, 0, 0, 0.0243902, 0, 0, 0, 0, 0, 0, 0, 0.0121951, 0, 0, 0, 0, 0, 0, 0, 0.0609756, 0, 0, 0, 0, 0, 0, 0, 0.0487805, 0, 0, 0, 0, 0, 0, 0, 0.0243902, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0853659, 0, 0, 0, 0, 0, 0, 0, 0.0487805, 0, 0, 0, 0, 0, 0, 0, 0.0121951, 0, 0, 0, 0, 0, 0, 0, 0.0853659, 0, 0, 0, 0, 0, 0, 0, 0.0731707, 0, 0, 0, 0, 0, 0, 0, 0.0365854, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0121951, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0243902, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0365854, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0121951, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0121951, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0121951, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0121951, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0365854, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0365854}; static constexpr double kMemsetGoogleQ[] = {0, 0, 0.0043, 0.0055, 0.0033, 0.0025, 0.0037, 0.0026, 0.0038, 0.0024, 0.0023, 0.003, 0.0025, 0.0008, 0.001, 0.0012, 0.0035, 0.0013, 0.001, 0.0002, 0.0004, 0.0009, 0.0011, 0.0007, 0.0008, 0.0014, 0.001, 0.0005, 0.0006, 0, 0.0007, 0.0021, 0.2082, 0.0013, 0.0004, 0.0003, 0.0005, 0.0005, 0.0008, 0.0003, 0.0005, 0.0007, 0.0002, 0.0001, 0.0007, 0.0005, 0.0005, 0.001, 0.0007, 0, 0.0005, 0.0001, 0.0002, 0, 0.0003, 0.0004, 0.0543, 0, 0.0005, 0.0003, 0.0003, 0.0004, 0.0001, 0, 0.0602, 0.0001, 0.0002, 0, 0, 0, 0.0003, 0.0004, 0.1578, 0, 0.0003, 0.0002, 0.001, 0, 0, 0.0013, 0.1236, 0, 0.0001, 0.0002, 0.0028, 0.0002, 0, 0.0002, 0.033, 0.0002, 0, 0, 0.0002, 0.0001, 0.0002, 0, 0.0129, 0.0002, 0.0002, 0.0003, 0.0018, 0, 0, 0.0002, 0.0126, 0.0002, 0.0002, 0.0003, 0.0007, 0.0001, 0, 0.0004, 0.0166, 0, 0.0001, 0, 0.0014, 0.0001, 0, 0, 0.0002, 0, 0, 0, 0.002, 0.0066, 0, 0, 0, 0, 0, 0, 0.0005, 0, 0, 0, 0.0002, 0, 0, 0, 0.0005, 0, 0, 0.0005, 0.0002, 0, 0, 0, 0, 0, 0, 0, 0.0003, 0, 0, 0, 0.0013, 0, 0, 0, 0.0023, 0, 0, 0, 0.0017, 0, 0, 0, 0, 0, 0, 0, 0.0017, 0, 0, 0, 0.0064, 0, 0, 0, 0, 0, 0, 0, 0.0009, 0, 0, 0, 0.0005, 0, 0, 0, 0.0702, 0, 0, 0, 0.0015, 0, 0, 0, 0.0017, 0, 0, 0, 0.0018, 0, 0, 0, 0.0002, 0, 0, 0, 0.0028, 0, 0, 0, 0.003, 0, 0, 0, 0, 0, 0, 0, 0.0121, 0, 0, 0, 0, 0, 0, 0, 0.0039, 0, 0, 0, 0.0021, 0, 0, 0, 0.0005, 0, 0, 0, 0.0037, 0, 0, 0, 0.0045, 0, 0, 0, 0.0019, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0014, 0, 0, 0, 0, 0, 0, 0.0163, 0.0011, 0, 0, 0, 0.0037, 0, 0, 0, 0.0006, 0, 0, 0, 0.0005, 0, 0, 0, 0.0015, 0, 0, 0, 0.0014, 0, 0, 0, 0.0044, 0, 0, 0, 0.002, 0, 0, 0, 0.0027, 0, 0, 0, 0, 0, 0, 0, 0.001, 0, 0, 0, 0, 0, 0, 0, 0.0026, 0, 0, 0, 0.0004, 0, 0, 0, 0.0023, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0003, 0, 0, 0, 0.0002, 0, 0, 0, 0.0028, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0009, 0, 0, 0, 0.0006, 0, 0, 0, 0, 0, 0, 0, 0.0002, 0, 0, 0, 0, 0, 0, 0, 0.0012, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0003, 0, 0, 0, 0, 0, 0, 0, 0.0008, 0, 0, 0, 0, 0, 0, 0, 0.0015, 0, 0, 0, 0.002, 0, 0, 0, 0.0007, 0, 0, 0, 0, 0, 0, 0, 0.0016, 0, 0, 0, 0, 0, 0, 0, 0.0009, 0, 0, 0, 0, 0, 0, 0, 0.001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0019, 0, 0, 0, 0, 0, 0, 0, 0.0007, 0, 0, 0, 0, 0, 0, 0, 0.0002, 0, 0, 0, 0.0286, 0, 0, 0, 0.0006, 0, 0, 0, 0, 0, 0, 0, 0.0007, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0.0002, 0, 0, 0, 0, 0, 0, 0, 0.0022, 0, 0, 0, 0, 0, 0, 0, 0.0038, 0, 0, 0, 0.0003, 0, 0, 0, 0.0004, 0, 0, 0, 0, 0, 0, 0, 0.0006, 0, 0, 0, 0, 0, 0, 0, 0.0009, 0, 0, 0, 0, 0, 0, 0, 0.0001, 0, 0, 0, 0, 0, 0, 0, 0.001, 0, 0, 0, 0, 0, 0, 0, 0.0002, 0, 0, 0, 0.0005, 0, 0, 0, 0.001, 0, 0, 0, 0.0018, 0, 0, 0, 0, 0, 0, 0, 0.0005, 0, 0, 0, 0, 0, 0, 0, 0.0002, 0, 0, 0, 0, 0, 0, 0, 0.0003}; @@ -30,11 +31,16 @@ ArrayRef getMemcpySizeDistributions() { static constexpr MemorySizeDistribution kDistributions[] = { - {"memcpy Google A", kMemcpyGoogleA}, {"memcpy Google B", kMemcpyGoogleB}, - {"memcpy Google D", kMemcpyGoogleD}, {"memcpy Google L", kMemcpyGoogleL}, - {"memcpy Google M", kMemcpyGoogleM}, {"memcpy Google Q", kMemcpyGoogleQ}, - {"memcpy Google S", kMemcpyGoogleS}, {"memcpy Google U", kMemcpyGoogleU}, + {"memcpy Google A", kMemcpyGoogleA}, + {"memcpy Google B", kMemcpyGoogleB}, + {"memcpy Google D", kMemcpyGoogleD}, + {"memcpy Google L", kMemcpyGoogleL}, + {"memcpy Google M", kMemcpyGoogleM}, + {"memcpy Google Q", kMemcpyGoogleQ}, + {"memcpy Google S", kMemcpyGoogleS}, + {"memcpy Google U", kMemcpyGoogleU}, {"memcpy Google W", kMemcpyGoogleW}, + {"uniform 384 to 4096", kMemcpyUniform384To4096}, }; return kDistributions; } diff --git a/libc/benchmarks/Memset.cpp b/libc/benchmarks/Memset.cpp deleted file mode 100644 --- a/libc/benchmarks/Memset.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===-- Benchmark memset implementation -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "LibcBenchmark.h" -#include "LibcMemoryBenchmark.h" -#include "LibcMemoryBenchmarkMain.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/raw_ostream.h" - -namespace __llvm_libc { -void *memset(void *, int, size_t); -} // namespace __llvm_libc - -namespace llvm { -namespace libc_benchmarks { - -// The context encapsulates the buffers, parameters and the measure. -struct MemsetContext : public BenchmarkRunner { - using FunctionPrototype = void *(*)(void *, int, size_t); - - struct ParameterType { - uint16_t DstOffset = 0; - }; - - explicit MemsetContext(const StudyConfiguration &Conf) - : OD(Conf), DstBuffer(Conf.BufferSize), MemsetValue(Conf.MemsetValue), - PP(*this) {} - - // Needed by the ParameterProvider to update the current batch of parameter. - void Randomize(MutableArrayRef Parameters) { - for (auto &P : Parameters) { - P.DstOffset = OD(Gen); - } - } - - ArrayRef getFunctionNames() const override { - static std::array kFunctionNames = {"memset"}; - return kFunctionNames; - } - - BenchmarkResult benchmark(const BenchmarkOptions &Options, - StringRef FunctionName, size_t Size) override { - FunctionPrototype Function = StringSwitch(FunctionName) - .Case("memset", &__llvm_libc::memset); - return llvm::libc_benchmarks::benchmark( - Options, PP, [this, Function, Size](ParameterType p) { - Function(DstBuffer + p.DstOffset, MemsetValue, Size); - return DstBuffer + p.DstOffset; - }); - } - -private: - std::default_random_engine Gen; - OffsetDistribution OD; - AlignedBuffer DstBuffer; - const uint8_t MemsetValue; - SmallParameterProvider PP; -}; - -std::unique_ptr getRunner(const StudyConfiguration &Conf) { - return std::make_unique(Conf); -} - -} // namespace libc_benchmarks -} // namespace llvm diff --git a/libc/benchmarks/RATIONALE.md b/libc/benchmarks/RATIONALE.md --- a/libc/benchmarks/RATIONALE.md +++ b/libc/benchmarks/RATIONALE.md @@ -33,7 +33,7 @@ ## Challenges -As seen in the [README.md](README.md#benchmarking-regimes) the microbenchmarking +As seen in the [README.md](README.md#stochastic-mode) the microbenchmarking facility should focus on measuring **low latency code**. If copying a few bytes takes in the order of a few cycles, the benchmark should be able to **measure accurately down to the cycle**. @@ -76,7 +76,7 @@ meaning. Although we want to benchmark `llvm-libc` memory functions for all available [target triples](https://clang.llvm.org/docs/CrossCompilation.html#target-triple), there -are **no guarantees that the counter we're interested in is available.** +are **no guarantees that the counter we're interested in is available.** ### Additional imprecisions diff --git a/libc/benchmarks/README.md b/libc/benchmarks/README.md --- a/libc/benchmarks/README.md +++ b/libc/benchmarks/README.md @@ -1,65 +1,59 @@ # Libc mem* benchmarks -This framework has been designed to evaluate and compare relative performance of -memory function implementations on a particular host. +This framework has been designed to evaluate and compare relative performance of memory function implementations on a particular machine. -It will also be use to track implementations performances over time. +It relies on two tools: + - `libc-benchmark-main` a C++ benchmarking utility producing raw measurements, + - `libc-benchmark-analysis.py3` a tool to process the measurements into reports. -## Quick start +## Benchmarking tool ### Setup -**Python 2** [being deprecated](https://www.python.org/doc/sunset-python-2/) it is -advised to used **Python 3**. - -Then make sure to have `matplotlib`, `scipy` and `numpy` setup correctly: - ```shell -apt-get install python3-pip -pip3 install matplotlib scipy numpy +cd llvm-project +cmake -B/tmp/build -Sllvm -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;libc' -DCMAKE_BUILD_TYPE=Release -G Ninja +ninja -C /tmp/build libc-benchmark-main ``` -You may need `python3-gtk` or similar package for displaying benchmark results. - -To get good reproducibility it is important to make sure that the system runs in -`performance` mode. This is achieved by running: +> Note: The machine should run in `performance` mode. This is achieved by running: ```shell cpupower frequency-set --governor performance ``` -### Run and display `memcpy` benchmark +### Usage -The following commands will run the benchmark and display a 95 percentile -confidence interval curve of **time per copied bytes**. It also features **host -informations** and **benchmarking configuration**. +`libc-benchmark-main` can run in two modes: + - **stochastic mode** returns the average time per call for a particular size distribution, + - **sweep mode** returns the average time per size over a range of sizes. -```shell -cd llvm-project -cmake -B/tmp/build -Sllvm -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;libc' -DCMAKE_BUILD_TYPE=Release -G Ninja -ninja -C /tmp/build display-libc-memcpy-benchmark-small -``` +The tool requires the following flags to be set: + - `--study-name`: a name to identify a run and provide label during analysis, + - `--function`: the name of the function under test. -The display target will attempt to open a window on the machine where you're -running the benchmark. If this may not work for you then you may want `render` -or `run` instead as detailed below. +It also provides optional flags: + - `--num-trials`: repeats the benchmark more times, the analysis tool can take this into account and give confidence intervals. + - `--output`: specifies a file to write the report - or standard output if not set. + - `--aligned-access`: The alignment to use when accessing the buffers, default is unaligned, 0 disables address randomization. -## Benchmarking targets +> Note: `--function` takes a generic function name like `memcpy` or `memset` but the actual function being tested is the llvm-libc implementation (e.g. `__llvm_libc::memcpy`). -The benchmarking process occurs in two steps: +### Stochastic mode -1. Benchmark the functions and produce a `json` file -2. Display (or renders) the `json` file +This is the preferred mode to use. The function parameters are randomized and the branch predictor is less likely to kick in. -Targets are of the form `-libc--benchmark-` +```shell +/tmp/build/bin/libc-benchmark-main \ + --study-name="new memcpy" \ + --function=memcpy \ + --size-distribution-name="memcpy Google A" \ + --num-trials=30 \ + --output=/tmp/benchmark_result.json +``` - - `action` is one of : - - `run`, runs the benchmark and writes the `json` file - - `display`, displays the graph on screen - - `render`, renders the graph on disk as a `png` file - - `function` is one of : `memcpy`, `memcmp`, `memset` - - `configuration` is one of : `small`, `big` +The `--size-distribution-name` flag is mandatory and points to one of the [predefined distribution](libc/benchmarks/MemorySizeDistributions.h). -## Benchmarking regimes +> Note: These distributions are gathered from several important binaries at Google (servers, databases, realtime and batch jobs) and reflect the importance of focusing on small sizes. Using a profiler to observe size distributions for calls into libc functions, it was found most operations act on a small number of bytes. @@ -70,37 +64,48 @@ memset | 91% | 99.9% memcmp1 | 99.5% | ~100% -Benchmarking configurations come in two flavors: - - - [small](libc/utils/benchmarks/configuration_small.json) - - Exercises sizes up to `1KiB`, representative of normal usage - - The data is kept in the `L1` cache to prevent measuring the memory - subsystem - - [big](libc/utils/benchmarks/configuration_big.json) - - Exercises sizes up to `32MiB` to test large operations - - Caching effects can show up here which prevents comparing different hosts - _1 - The size refers to the size of the buffers to compare and not the number of bytes until the first difference._ -## Superposing curves +### Sweep mode + +This mode is used to measure call latency per size for a certain range of sizes. Because it exercises the same size over and over again the branch predictor can kick in. It can still be useful to compare strength and weaknesses of particular implementations. + +```shell +/tmp/build/bin/libc-benchmark-main \ + --study-name="new memcpy" \ + --function=memcpy \ + --sweep-mode \ + --sweep-max-size=128 \ + --output=/tmp/benchmark_result.json +``` + +## Analysis tool -It is possible to **merge** several `json` files into a single graph. This is -useful to **compare** implementations. +### Setup + +Make sure to have `matplotlib`, `pandas` and `seaborn` setup correctly: + +```shell +apt-get install python3-pip +pip3 install matplotlib pandas seaborn +``` +You may need `python3-gtk` or similar package to display the graphs. -In the following example we superpose the curves for `memcpy`, `memset` and -`memcmp`: +### Usage ```shell -> make -C /tmp/build run-libc-memcpy-benchmark-small run-libc-memcmp-benchmark-small run-libc-memset-benchmark-small -> python libc/utils/benchmarks/render.py3 /tmp/last-libc-memcpy-benchmark-small.json /tmp/last-libc-memcmp-benchmark-small.json /tmp/last-libc-memset-benchmark-small.json +python3 libc/benchmarks/libc-benchmark-analysis.py3 /tmp/benchmark_result.json ... ``` -## Useful `render.py3` flags +When used with __multiple trials Sweep Mode data__ the tool displays the 95% confidence interval. - - To save the produced graph `--output=/tmp/benchmark_curve.png`. - - To prevent the graph from appearing on the screen `--headless`. +When providing with multiple reports at the same time, all the graphs from the same machine are displayed side by side to allow for comparison. +The Y-axis unit can be changed via the `--mode` flag: + - `time` displays the measured time (this is the default), + - `cycles` displays the number of cycles computed from the cpu frequency, + - `bytespercycle` displays the number of bytes per cycle (for `Sweep Mode` reports only). ## Under the hood diff --git a/libc/benchmarks/configuration_big.json b/libc/benchmarks/configuration_big.json deleted file mode 100644 --- a/libc/benchmarks/configuration_big.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "Options":{ - "MinDuration":0.001, - "MaxDuration":1, - "InitialIterations":100, - "MaxIterations":10000000, - "MinSamples":1, - "MaxSamples":1, - "Epsilon":0.01, - "ScalingFactor":1.4 - }, - "Configuration":{ - "Runs":5, - "BufferSize":134217728, - "Size":{ - "From":0, - "To":33554432, - "Step":1048576 - }, - "AddressAlignment":1, - "MemsetValue":0, - "MemcmpMismatchAt":0 - } -} diff --git a/libc/benchmarks/configuration_small.json b/libc/benchmarks/configuration_small.json deleted file mode 100644 --- a/libc/benchmarks/configuration_small.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "Options":{ - "MinDuration":0.001, - "MaxDuration":1, - "InitialIterations":100, - "MaxIterations":10000000, - "MinSamples":4, - "MaxSamples":1000, - "Epsilon":0.01, - "ScalingFactor":1.4 - }, - "Configuration":{ - "Runs":10, - "BufferSize":8192, - "Size":{ - "From":0, - "To":1024, - "Step":1 - }, - "AddressAlignment":1, - "MemsetValue":0, - "MemcmpMismatchAt":0 - } -} diff --git a/libc/benchmarks/libc-benchmark-analysis.py3 b/libc/benchmarks/libc-benchmark-analysis.py3 new file mode 100644 --- /dev/null +++ b/libc/benchmarks/libc-benchmark-analysis.py3 @@ -0,0 +1,128 @@ +"""Reads JSON files produced by the benchmarking framework and renders them. + +Installation: +> apt-get install python3-pip +> pip3 install matplotlib pandas seaborn + +Run: +> python3 libc/benchmarks/libc-benchmark-analysis.py3 +""" + +import argparse +import json +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +from matplotlib.ticker import EngFormatter + +def formatUnit(value, unit): + return EngFormatter(unit, sep="").format_data(value) + +def formatCache(cache): + letter = cache["Type"][0].lower() + level = cache["Level"] + size = formatUnit(cache["Size"], "B") + ways = cache["NumSharing"] + return F'{letter}L{level}:{size}/{ways}' + +def getCpuFrequency(study): + return study["Runtime"]["Host"]["CpuFrequency"] + +def getId(study): + CpuName = study["Runtime"]["Host"]["CpuName"] + CpuFrequency = formatUnit(getCpuFrequency(study), "Hz") + Mode = " (Sweep)" if study["Configuration"]["IsSweepMode"] else "" + CpuCaches = ", ".join(formatCache(c) for c in study["Runtime"]["Host"]["Caches"]) + return F'{CpuName} {CpuFrequency}{Mode}\n{CpuCaches}' + +def getFunction(study): + return study["Configuration"]["Function"] + +def getLabel(study): + return F'{getFunction(study)} {study["StudyName"]}' + +def displaySweepData(id, studies, mode): + df = None + for study in studies: + Measurements = study["Measurements"] + SweepModeMaxSize = study["Configuration"]["SweepModeMaxSize"] + NumSizes = SweepModeMaxSize + 1 + NumTrials = study["Configuration"]["NumTrials"] + assert NumTrials * NumSizes == len(Measurements), 'not a multiple of NumSizes' + Index = pd.MultiIndex.from_product([range(NumSizes), range(NumTrials)], names=['size', 'trial']) + if df is None: + df = pd.DataFrame(Measurements, index=Index, columns=[getLabel(study)]) + else: + df[getLabel(study)] = pd.Series(Measurements, index=Index) + df = df.reset_index(level='trial', drop=True) + if mode == "cycles": + df *= getCpuFrequency(study) + if mode == "bytespercycle": + df *= getCpuFrequency(study) + for col in df.columns: + df[col] = pd.Series(data=df.index, index=df.index).divide(df[col]) + FormatterUnit = {"time":"s","cycles":"","bytespercycle":"B/cycle"}[mode] + Label = {"time":"Time","cycles":"Cycles","bytespercycle":"Byte/cycle"}[mode] + graph = sns.lineplot(data=df, palette="muted", ci=95) + graph.set_title(id) + graph.yaxis.set_major_formatter(EngFormatter(unit=FormatterUnit)) + graph.yaxis.set_label_text(Label) + graph.xaxis.set_major_formatter(EngFormatter(unit="B")) + graph.xaxis.set_label_text("Copy Size") + _ = plt.xticks(rotation=90) + plt.show() + +def displayDistributionData(id, studies, mode): + distributions = set() + df = None + for study in studies: + distribution = study["Configuration"]["SizeDistributionName"] + distributions.add(distribution) + local = pd.DataFrame(study["Measurements"], columns=["time"]) + local["distribution"] = distribution + local["label"] = getLabel(study) + local["cycles"] = local["time"] * getCpuFrequency(study) + if df is None: + df = local + else: + df = df.append(local) + if mode == "bytespercycle": + mode = "time" + print("`--mode=bytespercycle` is ignored for distribution mode reports") + FormatterUnit = {"time":"s","cycles":""}[mode] + Label = {"time":"Time","cycles":"Cycles"}[mode] + graph = sns.violinplot(data=df, x="distribution", y=mode, palette="muted", hue="label", order=sorted(distributions)) + graph.set_title(id) + graph.yaxis.set_major_formatter(EngFormatter(unit=FormatterUnit)) + graph.yaxis.set_label_text(Label) + _ = plt.xticks(rotation=90) + plt.show() + + +def main(): + parser = argparse.ArgumentParser(description="Process benchmark json files.") + parser.add_argument("--mode", choices=["time", "cycles", "bytespercycle"], default="time", help="Use to display either 'time', 'cycles' or 'bytes/cycle'.") + parser.add_argument("files", nargs="+", help="The json files to read from.") + + args = parser.parse_args() + study_groups = dict() + for file in args.files: + with open(file) as json_file: + json_obj = json.load(json_file) + Id = getId(json_obj) + if Id in study_groups: + study_groups[Id].append(json_obj) + else: + study_groups[Id] = [json_obj] + + plt.tight_layout() + sns.set_theme(style="ticks") + for id, study_collection in study_groups.items(): + if "(Sweep)" in id: + displaySweepData(id, study_collection, args.mode) + else: + displayDistributionData(id, study_collection, args.mode) + + +if __name__ == "__main__": + main() diff --git a/libc/benchmarks/render.py3 b/libc/benchmarks/render.py3 deleted file mode 100644 --- a/libc/benchmarks/render.py3 +++ /dev/null @@ -1,194 +0,0 @@ -"""Reads JSON files produced by the benchmarking framework and renders them. - -Installation: -> apt-get install python3-pip -> pip3 install matplotlib scipy numpy - -Run: -> python3 render.py3 - -Rendering can occur on disk by specifying the --output option or on screen if -the --headless flag is not set. -""" - -import argparse -import collections -import json -import math -import pprint -import sys -import matplotlib.pyplot as plt -from matplotlib.ticker import EngFormatter -import numpy as np -import scipy.stats - - -def format_freq(number): - """Returns a human readable frequency.""" - magnitude = 0 - while math.fabs(number) >= 1000: - number /= 1000.0 - magnitude += 1 - return "%g%sHz" % (number, ["", "k", "M", "G"][magnitude]) - - -def format_size(number): - """Returns number in human readable form.""" - magnitude = 0 - while number >= 1000 and number % 1000 == 0: - number /= 1000 - magnitude += 1 - return "%g%s" % (number, ["", "K", "M", "G"][magnitude]) - - -def mean_confidence_interval(dataset, confidence=0.95): - """Returns the mean and half confidence interval for the dataset.""" - a = 1.0 * np.array(dataset) - n = len(a) - m, se = np.mean(a), scipy.stats.sem(a) - h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1) - return m, h - - -def add_plot(function_name, points): - """Plots measurements for a function.""" - n = len(points.keys()) - x = np.zeros(n) - y = np.zeros(n) - yerr = np.zeros(n) - - for i, key in enumerate(sorted(points.keys())): - values = points[key] - m, e = mean_confidence_interval(values) - x[i] = key - y[i] = m - yerr[i] = e - - plt.plot(x, y, linewidth=1, label=function_name) - plt.fill_between(x, y - yerr, y + yerr, alpha=0.5) - - -def get_title(host): - """Formats the Host object into a title for the plot.""" - cpu_name = host["CpuName"] - cpu_freq = format_freq(host["CpuFrequency"]) - cache_strings = [] - for cache in host["Caches"]: - prefix = { - "Instruction": "i", - "Data": "d", - "Unified": "u", - }.get(cache["Type"]) - cache_strings.append(r"%sL_%d %s_{/%d}" % - (prefix, cache["Level"], format_size( - cache["Size"]), cache["NumSharing"])) - title = "%s (%s)" % (cpu_name, cpu_freq) - subtitle = r"$" + ", ".join(sorted(cache_strings)) + r"$" - return title + "\n" + subtitle - - -def get_host(jsons): - """Returns the host of the different json objects iff they are all the same. - """ - host = None - for root in jsons: - if host and host != root["Host"]: - sys.exit("The datasets are not coming from the same Host") - if not host: - host = root["Host"] - return host - - -def get_configuration(jsons): - """Returns the configuration of the different json objects iff they are all - the same. - """ - config = None - for root in jsons: - if config and config != root["Configuration"]: - return None - if not config: - config = root["Configuration"] - return config - - -def setup_graphs(files, display): - """Setups the graphs to render from the json files.""" - jsons = [] - for file in files: - with open(file) as json_file: - jsons.append(json.load(json_file)) - if not jsons: - sys.exit("Nothing to process") - - for root in jsons: - frequency = root["Host"]["CpuFrequency"] - for function in root["Functions"]: - function_name = function["Name"] - sizes = function["Sizes"] - runtimes = function["Runtimes"] - assert len(sizes) == len(runtimes) - values = collections.defaultdict(lambda: []) - for i in range(len(sizes)): - value = runtimes[i] - if display == "cycles": - value = value * frequency - if display == "bytespercycle": - value = value * frequency - value = sizes[i] / value - values[sizes[i]].append(value) - add_plot(function_name, values) - - config = get_configuration(jsons) - if config: - plt.figtext( - 0.95, - 0.15, - pprint.pformat(config), - verticalalignment="bottom", - horizontalalignment="right", - multialignment="left", - fontsize="small", - bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.5)) - - axes = plt.gca() - axes.set_title(get_title(get_host(jsons))) - axes.set_ylim(bottom=0) - axes.set_xlabel("Size") - axes.xaxis.set_major_formatter(EngFormatter(unit="B")) - if display == "cycles": - axes.set_ylabel("Cycles") - if display == "time": - axes.set_ylabel("Time") - axes.yaxis.set_major_formatter(EngFormatter(unit="s")) - if display == "bytespercycle": - axes.set_ylabel("bytes/cycle") - - plt.legend() - plt.grid() - - -def main(): - parser = argparse.ArgumentParser( - description="Process benchmark json files.") - parser.add_argument("files", nargs="+", help="The json files to read from.") - parser.add_argument("--output", help="The output file to write the graph.") - parser.add_argument( - "--headless", - help="If set do not display the graph.", - action="store_true") - parser.add_argument( - "--display", - choices= ["time", "cycles", "bytespercycle"], - default="time", - help="Use to display either 'time', 'cycles' or 'bytes/cycle'.") - - args = parser.parse_args() - setup_graphs(args.files, args.display) - if args.output: - plt.savefig(args.output) - if not args.headless: - plt.show() - -if __name__ == "__main__": - main()