diff --git a/libc/cmake/modules/LLVMLibCRules.cmake b/libc/cmake/modules/LLVMLibCRules.cmake
--- a/libc/cmake/modules/LLVMLibCRules.cmake
+++ b/libc/cmake/modules/LLVMLibCRules.cmake
@@ -317,18 +317,23 @@
 
   set(library_deps "")
   foreach(dep IN LISTS LIBC_UNITTEST_DEPENDS)
-    get_target_property(dep_type ${dep} "TARGET_TYPE")
-    if (dep_type)
-      string(COMPARE EQUAL ${dep_type} ${ENTRYPOINT_OBJ_TARGET_TYPE} dep_is_entrypoint)
+    get_property(dep_target_type TARGET ${dep} PROPERTY "TARGET_TYPE")
+    if (dep_target_type)
+      string(COMPARE EQUAL ${dep_target_type} ${ENTRYPOINT_OBJ_TARGET_TYPE} dep_is_entrypoint)
       if(dep_is_entrypoint)
         get_target_property(obj_file ${dep} "OBJECT_FILE_RAW")
         list(APPEND library_deps ${obj_file})
         continue()
       endif()
+    else()
+      # If the dep is a normal CMake library target then add it to the list of
+      # library_deps.
+      get_property(dep_type TARGET ${dep} PROPERTY "TYPE")
+      if (${dep_type} MATCHES "_LIBRARY$")
+        list(APPEND library_deps ${dep})
+      endif()
     endif()
-    # TODO: Check if the dep is a normal CMake library target. If yes, then add it
-    # to the list of library_deps.
-  endforeach(dep)
+  endforeach()
 
   add_executable(
     ${target_name}
@@ -357,7 +362,7 @@
     gtest
   )
 
-  target_link_libraries(${target_name} PRIVATE gtest_main gtest)
+  target_link_libraries(${target_name} PRIVATE gtest_main gtest LLVMSupport)
 
   add_custom_command(
     TARGET ${target_name}
diff --git a/libc/utils/CMakeLists.txt b/libc/utils/CMakeLists.txt
--- a/libc/utils/CMakeLists.txt
+++ b/libc/utils/CMakeLists.txt
@@ -1 +1,2 @@
 add_subdirectory(HdrGen)
+add_subdirectory(benchmarks)
diff --git a/libc/utils/HdrGen/CMakeLists.txt b/libc/utils/HdrGen/CMakeLists.txt
--- a/libc/utils/HdrGen/CMakeLists.txt
+++ b/libc/utils/HdrGen/CMakeLists.txt
@@ -1,3 +1,5 @@
+set(LLVM_LINK_COMPONENTS Support)
+
 add_tablegen(libc-hdrgen llvm-libc
   Command.h
   Command.cpp
diff --git a/libc/utils/benchmarks/CMakeLists.txt b/libc/utils/benchmarks/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/CMakeLists.txt
@@ -0,0 +1,148 @@
+find_package(Threads)
+
+include(ExternalProject)
+
+set(LLVM_LINK_COMPONENTS Support)
+
+#==============================================================================
+# Build Google Benchmark
+#==============================================================================
+set(GOOGLE_BENCHMARK_TARGET_FLAGS ${BENCHMARK_DIALECT_FLAG})
+if (LIBCXX_BENCHMARK_GCC_TOOLCHAIN)
+  set(GOOGLE_BENCHMARK_TARGET_FLAGS
+      -gcc-toolchain ${LIBCXX_BENCHMARK_GCC_TOOLCHAIN})
+endif()
+string(REPLACE ";" " " GOOGLE_BENCHMARK_TARGET_FLAGS "${GOOGLE_BENCHMARK_TARGET_FLAGS}")
+
+ExternalProject_Add(google-benchmark
+    EXCLUDE_FROM_ALL ON
+    PREFIX google-benchmark
+    SOURCE_DIR ${LIBC_SOURCE_DIR}/../llvm/utils/benchmark
+    INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/google-benchmark
+    CMAKE_CACHE_ARGS
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+        -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER}
+        -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER}
+        -DCMAKE_CXX_FLAGS:STRING=${GOOGLE_BENCHMARK_TARGET_FLAGS}
+        -DCMAKE_BUILD_TYPE:STRING=RELEASE
+        -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
+        -DBENCHMARK_ENABLE_TESTING:BOOL=OFF)
+
+set(GOOGLE_BENCHMARK_LIBC_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/google-benchmark)
+set(GOOGLE_BENCHMARK_LINK_FLAGS    -L${GOOGLE_BENCHMARK_LIBC_INSTALL}/lib/)
+
+#==============================================================================
+# Build Google Benchmark for libc
+#==============================================================================
+
+function(fix_rtti target)
+# TODO: Make this portable and inline with rtti mode from llvm/
+target_compile_options(${target} PUBLIC -fno-rtti)
+endfunction()
+
+# libc-benchmark
+add_library(libc-benchmark
+    STATIC
+    EXCLUDE_FROM_ALL
+    LibcBenchmark.cpp
+    LibcBenchmark.h
+)
+add_dependencies(libc-benchmark google-benchmark)
+target_include_directories(libc-benchmark PUBLIC "${GOOGLE_BENCHMARK_LIBC_INSTALL}/include")
+target_link_options(libc-benchmark PUBLIC "${GOOGLE_BENCHMARK_LINK_FLAGS}")
+target_link_libraries(libc-benchmark PUBLIC LLVMSupport -lbenchmark Threads::Threads)
+fix_rtti(libc-benchmark)
+
+add_libc_unittest(libc-benchmark-test
+    SRCS LibcBenchmarkTest.cpp
+    DEPENDS libc-benchmark
+)
+
+# libc-memory-benchmark
+add_library(libc-memory-benchmark
+    STATIC
+    EXCLUDE_FROM_ALL
+    LibcMemoryBenchmark.cpp
+    LibcMemoryBenchmark.h
+)
+target_link_libraries(libc-memory-benchmark PUBLIC libc-benchmark)
+fix_rtti(libc-memory-benchmark)
+
+add_libc_unittest(libc-memory-benchmark-test
+    SRCS LibcMemoryBenchmarkTest.cpp
+    DEPENDS libc-memory-benchmark
+)
+
+# json
+add_library(json
+    STATIC
+    EXCLUDE_FROM_ALL
+    JSON.cpp
+    JSON.h
+)
+target_link_libraries(json PUBLIC libc-memory-benchmark)
+fix_rtti(json)
+
+add_libc_unittest(json-test
+    SRCS JSONTest.cpp
+    DEPENDS json
+)
+
+add_custom_target(check-libc-benchmark)
+add_dependencies(check-libc-benchmark
+    libc-benchmark-test
+    libc-memory-benchmark-test
+    json-test
+)
+
+#==============================================================================
+# Benchmark tests configuration
+#==============================================================================
+
+function(add_libc_benchmark_analysis conf_target run_target)
+    set(png_file "/tmp/last-${conf_target}.png")
+    set(render_target render-${conf_target})
+    add_custom_target(${render_target}
+        COMMAND python render.py ${json_file} --headless --output=${png_file}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        COMMENT "render ${libc_target} to ${png_file}"
+    )
+    add_dependencies(${render_target} ${run_target})
+
+    set(display_target display-${conf_target})
+    add_custom_target(${display_target}
+        COMMAND python render.py ${json_file}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        COMMENT "display ${libc_target}"
+    )
+    add_dependencies(${display_target} ${run_target})
+endfunction()
+
+function(add_libc_benchmark_configuration target configuration)
+    set(conf_target ${target}-${configuration})
+    set(json_file "/tmp/last-${conf_target}.json")
+    set(run_target run-${conf_target})
+    add_custom_target(${run_target}
+        COMMAND ${libc_target} --conf=configuration_${configuration}.json -o ${json_file}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    )
+    add_libc_benchmark_analysis(${conf_target} ${run_target})
+endfunction()
+
+function(add_libc_benchmark name file)
+    set(libc_target libc-${name}-benchmark)
+    add_executable(${libc_target}
+        EXCLUDE_FROM_ALL
+        ${file}
+        LibcMemoryBenchmarkMain.h
+        LibcMemoryBenchmarkMain.cpp
+    )
+    target_link_libraries(${libc_target} PUBLIC json)
+    foreach(configuration "small" "big")
+        add_libc_benchmark_configuration(${libc_target} ${configuration})
+    endforeach()
+endfunction()
+
+add_libc_benchmark(memcpy Memcpy.cpp)
+add_libc_benchmark(memcmp Memcmp.cpp)
+add_libc_benchmark(memset Memset.cpp)
diff --git a/libc/utils/benchmarks/JSON.h b/libc/utils/benchmarks/JSON.h
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/JSON.h
@@ -0,0 +1,28 @@
+//===-------- JSON serialization routines -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_BENCHMARK_JSON_H
+#define LLVM_LIBC_UTILS_BENCHMARK_JSON_H
+
+#include "LibcBenchmark.h"
+#include "LibcMemoryBenchmark.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace libc_benchmarks {
+
+// Parses a Study from a json string.
+Expected<Study> ParseJsonStudy(StringRef Content);
+
+// Serialize a Study as json.
+void SerializeToJson(const Study &S, llvm::json::OStream &JOS);
+
+} // namespace libc_benchmarks
+} // namespace llvm
+
+#endif // LLVM_LIBC_UTILS_BENCHMARK_JSON_H
diff --git a/libc/utils/benchmarks/JSON.cpp b/libc/utils/benchmarks/JSON.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/JSON.cpp
@@ -0,0 +1,367 @@
+//===-------- JSON serialization routines ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "JSON.h"
+#include "LibcBenchmark.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/MathExtras.h"
+#include <chrono>
+#include <limits>
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace libc_benchmarks {
+
+template <typename T>
+static Error intFromJsonTemplate(const json::Value &V, T &Out) {
+  if (const auto &MaybeInt64 = V.getAsInteger()) {
+    int64_t Value = *MaybeInt64;
+    if (Value < std::numeric_limits<T>::min() ||
+        Value > std::numeric_limits<T>::max())
+      return createStringError(errc::io_error, "Out of bound Integer");
+    Out = Value;
+    return Error::success();
+  }
+  return createStringError(errc::io_error, "Can't parse Integer");
+}
+
+static Error fromJson(const json::Value &V, double &Out) {
+  if (auto S = V.getAsNumber()) {
+    Out = *S;
+    return Error::success();
+  }
+  return createStringError(errc::io_error, "Can't parse Double");
+}
+
+static Error fromJson(const json::Value &V, std::string &Out) {
+  if (auto S = V.getAsString()) {
+    Out = *S;
+    return Error::success();
+  }
+  return createStringError(errc::io_error, "Can't parse String");
+}
+
+static Error fromJson(const json::Value &V, uint32_t &Out) {
+  return intFromJsonTemplate(V, Out);
+}
+
+static Error fromJson(const json::Value &V, uint8_t &Out) {
+  return intFromJsonTemplate(V, Out);
+}
+
+static Error fromJson(const json::Value &V, int &Out) {
+  return intFromJsonTemplate(V, Out);
+}
+
+static Error fromJson(const json::Value &V, libc_benchmarks::Duration &D) {
+  if (V.kind() != json::Value::Kind::Number)
+    return createStringError(errc::io_error, "Can't parse Duration");
+  D = libc_benchmarks::Duration(*V.getAsNumber());
+  return Error::success();
+}
+
+static Error fromJson(const json::Value &V, MaybeAlign &Out) {
+  const auto MaybeInt = V.getAsInteger();
+  if (!MaybeInt)
+    return createStringError(errc::io_error,
+                             "Can't parse Align, not an Integer");
+  const int64_t Value = *MaybeInt;
+  if (!Value) {
+    Out = None;
+    return Error::success();
+  }
+  if (isPowerOf2_64(Value)) {
+    Out = Align(Value);
+    return Error::success();
+  }
+  return createStringError(errc::io_error,
+                           "Can't parse Align, not a power of two");
+}
+
+static Error fromJson(const json::Value &V,
+                      libc_benchmarks::BenchmarkLog &Out) {
+  if (V.kind() != json::Value::Kind::String)
+    return createStringError(errc::io_error,
+                             "Can't parse BenchmarkLog, not a String");
+  const auto String = *V.getAsString();
+  auto Parsed =
+      llvm::StringSwitch<Optional<libc_benchmarks::BenchmarkLog>>(String)
+          .Case("None", libc_benchmarks::BenchmarkLog::None)
+          .Case("Last", libc_benchmarks::BenchmarkLog::Last)
+          .Case("Full", libc_benchmarks::BenchmarkLog::Full)
+          .Default(None);
+  if (!Parsed)
+    return createStringError(errc::io_error,
+                             Twine("Can't parse BenchmarkLog, invalid value '")
+                                 .concat(String)
+                                 .concat("'"));
+  Out = *Parsed;
+  return Error::success();
+}
+
+template <typename C>
+Error vectorFromJsonTemplate(const json::Value &V, C &Out) {
+  auto *A = V.getAsArray();
+  if (!A)
+    return createStringError(errc::io_error, "Can't parse Array");
+  Out.clear();
+  Out.resize(A->size());
+  for (auto InOutPair : llvm::zip(*A, Out))
+    if (auto E = fromJson(std::get<0>(InOutPair), std::get<1>(InOutPair)))
+      return std::move(E);
+  return Error::success();
+}
+
+template <typename T>
+static Error fromJson(const json::Value &V, std::vector<T> &Out) {
+  return vectorFromJsonTemplate(V, Out);
+}
+
+template <typename T>
+static Error fromJson(const json::Value &V, SmallVectorImpl<T> &Out) {
+  return vectorFromJsonTemplate(V, Out);
+}
+
+// Same as llvm::json::ObjectMapper but adds a finer error reporting mechanism.
+class JsonObjectMapper {
+  const json::Object *O;
+  Error E;
+  SmallDenseSet<StringRef> SeenFields;
+
+public:
+  explicit JsonObjectMapper(const json::Value &V)
+      : O(V.getAsObject()),
+        E(O ? Error::success()
+            : createStringError(errc::io_error, "Expected JSON Object")) {}
+
+  Error takeError() {
+    if (E)
+      return std::move(E);
+    for (const auto &Itr : *O) {
+      const StringRef Key = Itr.getFirst();
+      if (!SeenFields.count(Key))
+        E = createStringError(errc::io_error,
+                              Twine("Unknown field: ").concat(Key));
+    }
+    return std::move(E);
+  }
+
+  template <typename T> void map(StringRef Key, T &Out) {
+    if (E)
+      return;
+    if (const json::Value *Value = O->get(Key)) {
+      SeenFields.insert(Key);
+      E = fromJson(*Value, Out);
+    }
+  }
+};
+
+static Error fromJson(const json::Value &V,
+                      libc_benchmarks::BenchmarkOptions &Out) {
+  JsonObjectMapper O(V);
+  O.map("MinDuration", Out.MinDuration);
+  O.map("MaxDuration", Out.MaxDuration);
+  O.map("InitialIterations", Out.InitialIterations);
+  O.map("MaxIterations", Out.MaxIterations);
+  O.map("MinSamples", Out.MinSamples);
+  O.map("MaxSamples", Out.MaxSamples);
+  O.map("Epsilon", Out.Epsilon);
+  O.map("ScalingFactor", Out.ScalingFactor);
+  O.map("Log", Out.Log);
+  return O.takeError();
+}
+
+static Error fromJson(const json::Value &V, libc_benchmarks::SizeRange &Out) {
+  JsonObjectMapper O(V);
+  O.map("From", Out.From);
+  O.map("To", Out.To);
+  O.map("Step", Out.Step);
+  return O.takeError();
+}
+
+static Error fromJson(const json::Value &V,
+                      libc_benchmarks::StudyConfiguration &Out) {
+  JsonObjectMapper O(V);
+  O.map("Runs", Out.Runs);
+  O.map("BufferSize", Out.BufferSize);
+  O.map("Size", Out.Size);
+  O.map("AddressAlignment", Out.AddressAlignment);
+  O.map("MemsetValue", Out.MemsetValue);
+  O.map("MemcmpMismatchAt", Out.MemcmpMismatchAt);
+  return O.takeError();
+}
+
+static Error fromJson(const json::Value &V, libc_benchmarks::CacheInfo &Out) {
+  JsonObjectMapper O(V);
+  O.map("Type", Out.Type);
+  O.map("Level", Out.Level);
+  O.map("Size", Out.Size);
+  O.map("NumSharing", Out.NumSharing);
+  return O.takeError();
+}
+
+static Error fromJson(const json::Value &V, libc_benchmarks::HostState &Out) {
+  JsonObjectMapper O(V);
+  O.map("CpuName", Out.CpuName);
+  O.map("CpuFrequency", Out.CpuFrequency);
+  O.map("Caches", Out.Caches);
+  return O.takeError();
+}
+
+static Error fromJson(const json::Value &V,
+                      libc_benchmarks::FunctionMeasurements &Out) {
+  JsonObjectMapper O(V);
+  O.map("Name", Out.Name);
+  std::vector<uint32_t> Sizes;
+  O.map("Sizes", Sizes);
+  std::vector<libc_benchmarks::Duration> Runtimes;
+  O.map("Runtimes", Runtimes);
+  if (Sizes.size() != Runtimes.size())
+    return createStringError(errc::io_error,
+                             "Measurement Size and Runtime mistmatch");
+  Out.Measurements.resize(Sizes.size());
+  for (size_t I = 0; I < Sizes.size(); ++I) {
+    Out.Measurements[I].Size = Sizes[I];
+    Out.Measurements[I].Runtime = Runtimes[I];
+  }
+  return O.takeError();
+}
+
+static Error fromJson(const json::Value &V, libc_benchmarks::Study &Out) {
+  JsonObjectMapper O(V);
+  O.map("Host", Out.Host);
+  O.map("Options", Out.Options);
+  O.map("Configuration", Out.Configuration);
+  O.map("Functions", Out.Functions);
+  return O.takeError();
+}
+
+static double Seconds(const Duration &D) {
+  return std::chrono::duration<double>(D).count();
+}
+
+Expected<Study> ParseJsonStudy(StringRef Content) {
+  Expected<json::Value> EV = json::parse(Content);
+  if (!EV)
+    return EV.takeError();
+  Study S;
+  if (Error E = fromJson(*EV, S))
+    return std::move(E);
+  return S;
+}
+
+static StringRef Serialize(const BenchmarkLog &L) {
+  switch (L) {
+  case BenchmarkLog::None:
+    return "None";
+  case BenchmarkLog::Last:
+    return "Last";
+  case BenchmarkLog::Full:
+    return "Full";
+  }
+  llvm_unreachable("Unhandled BenchmarkLog value");
+}
+
+static void Serialize(const BenchmarkOptions &BO, json::OStream &JOS) {
+  JOS.object([&]() {
+    JOS.attribute("MinDuration", Seconds(BO.MinDuration));
+    JOS.attribute("MaxDuration", Seconds(BO.MaxDuration));
+    JOS.attribute("InitialIterations", BO.InitialIterations);
+    JOS.attribute("MaxIterations", BO.MaxIterations);
+    JOS.attribute("MinSamples", BO.MinSamples);
+    JOS.attribute("MaxSamples", BO.MaxSamples);
+    JOS.attribute("Epsilon", BO.Epsilon);
+    JOS.attribute("ScalingFactor", BO.ScalingFactor);
+    JOS.attribute("Log", Serialize(BO.Log));
+  });
+}
+
+static void Serialize(const CacheInfo &CI, json::OStream &JOS) {
+  JOS.object([&]() {
+    JOS.attribute("Type", CI.Type);
+    JOS.attribute("Level", CI.Level);
+    JOS.attribute("Size", CI.Size);
+    JOS.attribute("NumSharing", CI.NumSharing);
+  });
+}
+
+static void Serialize(const HostState &HS, json::OStream &JOS) {
+  JOS.object([&]() {
+    JOS.attribute("CpuName", HS.CpuName);
+    JOS.attribute("CpuFrequency", HS.CpuFrequency);
+    JOS.attributeArray("Caches", [&]() {
+      for (const auto &CI : HS.Caches)
+        Serialize(CI, JOS);
+    });
+  });
+}
+
+static void Serialize(const StudyConfiguration &SC, json::OStream &JOS) {
+  JOS.object([&]() {
+    JOS.attribute("Runs", SC.Runs);
+    JOS.attribute("BufferSize", SC.BufferSize);
+    JOS.attributeObject("Size", [&]() {
+      JOS.attribute("From", SC.Size.From);
+      JOS.attribute("To", SC.Size.To);
+      JOS.attribute("Step", SC.Size.Step);
+    });
+    if (SC.AddressAlignment)
+      JOS.attribute("AddressAlignment",
+                    static_cast<int64_t>(SC.AddressAlignment->value()));
+    JOS.attribute("MemsetValue", SC.MemsetValue);
+    JOS.attribute("MemcmpMismatchAt", SC.MemcmpMismatchAt);
+  });
+}
+
+static void Serialize(const FunctionMeasurements &FM, json::OStream &JOS) {
+  JOS.object([&]() {
+    JOS.attribute("Name", FM.Name);
+    JOS.attributeArray("Sizes", [&]() {
+      for (const auto &M : FM.Measurements)
+        JOS.value(M.Size);
+    });
+    JOS.attributeArray("Runtimes", [&]() {
+      for (const auto &M : FM.Measurements)
+        JOS.value(Seconds(M.Runtime));
+    });
+  });
+}
+
+void SerializeToJson(const Study &S, json::OStream &JOS) {
+  JOS.object([&]() {
+    JOS.attributeBegin("Host");
+    Serialize(S.Host, JOS);
+    JOS.attributeEnd();
+
+    JOS.attributeBegin("Options");
+    Serialize(S.Options, JOS);
+    JOS.attributeEnd();
+
+    JOS.attributeBegin("Configuration");
+    Serialize(S.Configuration, JOS);
+    JOS.attributeEnd();
+
+    if (!S.Functions.empty()) {
+      JOS.attributeArray("Functions", [&]() {
+        for (const auto &FM : S.Functions)
+          Serialize(FM, JOS);
+      });
+    }
+  });
+}
+
+} // namespace libc_benchmarks
+} // namespace llvm
diff --git a/libc/utils/benchmarks/JSONTest.cpp b/libc/utils/benchmarks/JSONTest.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/JSONTest.cpp
@@ -0,0 +1,190 @@
+#include "JSON.h"
+#include "LibcBenchmark.h"
+#include "LibcMemoryBenchmark.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::AllOf;
+using testing::ExplainMatchResult;
+using testing::Field;
+using testing::Pointwise;
+
+namespace llvm {
+namespace libc_benchmarks {
+namespace {
+
+Study getStudy() {
+  return Study{
+      HostState{
+          "CpuName", 123, {CacheInfo{"A", 1, 2, 3}, CacheInfo{"B", 4, 5, 6}}},
+      BenchmarkOptions{std::chrono::seconds(1), std::chrono::seconds(2), 10,
+                       100, 6, 100, 0.1, 2, BenchmarkLog::Full},
+      StudyConfiguration{2, 3, SizeRange{4, 5, 6}, Align(8), 9, 10},
+      {FunctionMeasurements{"A",
+                            {Measurement{3, std::chrono::seconds(3)},
+                             Measurement{3, std::chrono::seconds(4)}}},
+       FunctionMeasurements{"B", {}}}};
+}
+
+static std::string SerializeToString(const Study &S) {
+  std::string Buffer;
+  raw_string_ostream RSO(Buffer);
+  json::OStream JOS(RSO);
+  SerializeToJson(S, JOS);
+  return Buffer;
+}
+
+MATCHER(EqualsCacheInfo, "") {
+  const CacheInfo &a = ::testing::get<0>(arg);
+  const CacheInfo &b = ::testing::get<1>(arg);
+  return ExplainMatchResult(AllOf(Field(&CacheInfo::Type, b.Type),
+                                  Field(&CacheInfo::Level, b.Level),
+                                  Field(&CacheInfo::Size, b.Size),
+                                  Field(&CacheInfo::NumSharing, b.NumSharing)),
+                            a, result_listener);
+}
+
+auto Equals(const HostState &H) -> auto {
+  return AllOf(
+      Field(&HostState::CpuName, H.CpuName),
+      Field(&HostState::CpuFrequency, H.CpuFrequency),
+      Field(&HostState::Caches, Pointwise(EqualsCacheInfo(), H.Caches)));
+}
+
+auto Equals(const BenchmarkOptions &BO) -> auto {
+  return AllOf(
+      Field(&BenchmarkOptions::MinDuration, BO.MinDuration),
+      Field(&BenchmarkOptions::MaxDuration, BO.MaxDuration),
+      Field(&BenchmarkOptions::InitialIterations, BO.InitialIterations),
+      Field(&BenchmarkOptions::MaxIterations, BO.MaxIterations),
+      Field(&BenchmarkOptions::MinSamples, BO.MinSamples),
+      Field(&BenchmarkOptions::MaxSamples, BO.MaxSamples),
+      Field(&BenchmarkOptions::Epsilon, BO.Epsilon),
+      Field(&BenchmarkOptions::ScalingFactor, BO.ScalingFactor),
+      Field(&BenchmarkOptions::Log, BO.Log));
+}
+
+auto Equals(const SizeRange &SR) -> auto {
+  return AllOf(Field(&SizeRange::From, SR.From), Field(&SizeRange::To, SR.To),
+               Field(&SizeRange::Step, SR.Step));
+}
+
+auto Equals(const StudyConfiguration &SC) -> auto {
+  return AllOf(
+      Field(&StudyConfiguration::Runs, SC.Runs),
+      Field(&StudyConfiguration::BufferSize, SC.BufferSize),
+      Field(&StudyConfiguration::Size, Equals(SC.Size)),
+      Field(&StudyConfiguration::AddressAlignment, SC.AddressAlignment),
+      Field(&StudyConfiguration::MemsetValue, SC.MemsetValue),
+      Field(&StudyConfiguration::MemcmpMismatchAt, SC.MemcmpMismatchAt));
+}
+
+MATCHER(EqualsMeasurement, "") {
+  const Measurement &a = ::testing::get<0>(arg);
+  const Measurement &b = ::testing::get<1>(arg);
+  return ExplainMatchResult(AllOf(Field(&Measurement::Size, b.Size),
+                                  Field(&Measurement::Runtime, b.Runtime)),
+                            a, result_listener);
+}
+
+MATCHER(EqualsFunctions, "") {
+  const FunctionMeasurements &a = ::testing::get<0>(arg);
+  const FunctionMeasurements &b = ::testing::get<1>(arg);
+  return ExplainMatchResult(
+      AllOf(Field(&FunctionMeasurements::Name, b.Name),
+            Field(&FunctionMeasurements::Measurements,
+                  Pointwise(EqualsMeasurement(), b.Measurements))),
+      a, result_listener);
+}
+
+auto Equals(const Study &S) -> auto {
+  return AllOf(
+      Field(&Study::Host, Equals(S.Host)),
+      Field(&Study::Options, Equals(S.Options)),
+      Field(&Study::Configuration, Equals(S.Configuration)),
+      Field(&Study::Functions, Pointwise(EqualsFunctions(), S.Functions)));
+}
+
+TEST(JsonTest, RoundTrip) {
+  const Study S = getStudy();
+  auto StudyOrError = ParseJsonStudy(SerializeToString(S));
+  if (auto Err = StudyOrError.takeError())
+    EXPECT_FALSE(Err) << "Unexpected error";
+  const Study &Parsed = *StudyOrError;
+  EXPECT_THAT(Parsed, Equals(S));
+}
+
+TEST(JsonTest, SupplementaryField) {
+  auto Failure = ParseJsonStudy(R"({
+      "UnknownField": 10
+    }
+  )");
+  EXPECT_EQ(toString(Failure.takeError()), "Unknown field: UnknownField");
+}
+
+TEST(JsonTest, InvalidType) {
+  auto Failure = ParseJsonStudy(R"({
+      "Options": 1
+    }
+  )");
+  EXPECT_EQ(toString(Failure.takeError()), "Expected JSON Object");
+}
+
+TEST(JsonTest, InvalidDuration) {
+  auto Failure = ParseJsonStudy(R"({
+      "Options": {
+        "MinDuration": "Duration should be a Number"
+      }
+    }
+  )");
+  EXPECT_EQ(toString(Failure.takeError()), "Can't parse Duration");
+}
+
+TEST(JsonTest, InvalidAlignType) {
+  auto Failure = ParseJsonStudy(R"({
+      "Configuration":{
+        "AddressAlignment": "Align should be an Integer"
+      }
+    }
+  )");
+  EXPECT_EQ(toString(Failure.takeError()), "Can't parse Align, not an Integer");
+}
+
+TEST(JsonTest, InvalidAlign) {
+  auto Failure = ParseJsonStudy(R"({
+      "Configuration":{
+        "AddressAlignment":3
+      }
+    }
+  )");
+  EXPECT_EQ(toString(Failure.takeError()),
+            "Can't parse Align, not a power of two");
+}
+
+TEST(JsonTest, InvalidBenchmarkLogType) {
+  auto Failure = ParseJsonStudy(R"({
+      "Options":{
+        "Log": 3
+      }
+    }
+  )");
+  EXPECT_EQ(toString(Failure.takeError()),
+            "Can't parse BenchmarkLog, not a String");
+}
+
+TEST(JsonTest, InvalidBenchmarkLog) {
+  auto Failure = ParseJsonStudy(R"({
+      "Options":{
+        "Log": "Unknown"
+      }
+    }
+  )");
+  EXPECT_EQ(toString(Failure.takeError()),
+            "Can't parse BenchmarkLog, invalid value 'Unknown'");
+}
+
+} // namespace
+} // namespace libc_benchmarks
+} // namespace llvm
diff --git a/libc/utils/benchmarks/LibcBenchmark.h b/libc/utils/benchmarks/LibcBenchmark.h
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/LibcBenchmark.h
@@ -0,0 +1,324 @@
+//===-------- `Benchmark` function ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This file mainly defines a `Benchmark` function.
+//
+// The benchmarking process is as follows:
+// - We start by measuring the time it takes to run the function
+// `InitialIterations` times. This is called a Sample. From this we can derive
+// the time it took to run a single iteration.
+//
+// - We repeat the previous step with a greater number of iterations to lower
+// the impact of the measurement. We can derive a more precise estimation of the
+// runtime for a single iteration.
+//
+// - Each sample gives a more accurate estimation of the runtime for a single
+// iteration but also takes more time to run. We stop the process when:
+//   * The measure stabilize under a certain precision (Epsilon),
+//   * The overall benchmarking time is greater than MaxDuration,
+//   * The overall sample count is greater than MaxSamples,
+//   * The last sample used more than MaxIterations iterations.
+//
+// - We also makes sure that the benchmark doesn't run for a too short period of
+// time by defining MinDuration and MinSamples.
+
+#ifndef LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
+#define LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
+
+#include "benchmark/benchmark.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include <array>
+#include <chrono>
+#include <cstdint>
+
+namespace llvm {
+namespace libc_benchmarks {
+
+// Makes sure the binary was compiled in release mode and that frequency
+// governor is set on performance.
+void checkRequirements();
+
+using Duration = std::chrono::duration<double>;
+
+enum class BenchmarkLog {
+  None, // Don't keep the internal state of the benchmark.
+  Last, // Keep only the last batch.
+  Full  // Keep all iterations states, useful for testing or debugging.
+};
+
+// An object to configure the benchmark stopping conditions.
+// See documentation at the beginning of the file for the overall algorithm and
+// meaning of each field.
+struct BenchmarkOptions {
+  // The minimum time for which the benchmark is running.
+  Duration MinDuration = std::chrono::seconds(0);
+  // The maximum time for which the benchmark is running.
+  Duration MaxDuration = std::chrono::seconds(10);
+  // The number of iterations in the first sample.
+  uint32_t InitialIterations = 1;
+  // The maximum number of iterations for any given sample.
+  uint32_t MaxIterations = 10000000;
+  // The minimum number of samples.
+  uint32_t MinSamples = 4;
+  // The maximum number of samples.
+  uint32_t MaxSamples = 1000;
+  // The benchmark will stop is the relative difference between the current and
+  // the last estimation is less than epsilon. This is 1% by default.
+  double Epsilon = 0.01;
+  // The number of iterations grows exponentially between each sample.
+  // Must be greater or equal to 1.
+  double ScalingFactor = 1.4;
+  BenchmarkLog Log = BenchmarkLog::None;
+};
+
+// The state of a benchmark.
+enum class BenchmarkStatus {
+  Running,
+  MaxDurationReached,
+  MaxIterationsReached,
+  MaxSamplesReached,
+  PrecisionReached,
+};
+
+// The internal state of the benchmark, useful to debug, test or report
+// statistics.
+struct BenchmarkState {
+  size_t LastSampleIterations;
+  Duration LastBatchElapsed;
+  BenchmarkStatus CurrentStatus;
+  Duration CurrentBestGuess; // The time estimation for a single run of `foo`.
+  double ChangeRatio; // The change in time estimation between previous and
+                      // current samples.
+};
+
+// A lightweight result for a benchmark.
+struct BenchmarkResult {
+  BenchmarkStatus TerminationStatus = BenchmarkStatus::Running;
+  Duration BestGuess = {};
+  llvm::Optional<llvm::SmallVector<BenchmarkState, 16>> MaybeBenchmarkLog;
+};
+
+// Stores information about a cache in the host memory system.
+struct CacheInfo {
+  std::string Type; //  e.g. "Instruction", "Data", "Unified".
+  int Level;        // 0 is closest to processing unit.
+  int Size;         // In bytes.
+  int NumSharing;   // The number of processing units (Hyper-Threading Thread)
+                    // with which this cache is shared.
+};
+
+// Stores information about the host.
+struct HostState {
+  std::string CpuName; // returns a string compatible with the -march option.
+  double CpuFrequency; // in Hertz.
+  std::vector<CacheInfo> Caches;
+
+  static HostState get();
+};
+
+namespace internal {
+
+struct Measurement {
+  size_t Iterations = 0;
+  Duration Elapsed = {};
+};
+
+// Updates the estimation of the elapsed time for a single iteration.
+class RefinableRuntimeEstimation {
+  Duration TotalTime = {};
+  size_t TotalIterations = 0;
+
+public:
+  Duration update(const Measurement &M) {
+    assert(M.Iterations > 0);
+    // Duration is encoded as a double (see definition).
+    // `TotalTime` and `M.Elapsed` are of the same magnitude so we don't expect
+    // loss of precision due to radically different scales.
+    TotalTime += M.Elapsed;
+    TotalIterations += M.Iterations;
+    return TotalTime / TotalIterations;
+  }
+};
+
+// This class tracks the progression of the runtime estimation.
+class RuntimeEstimationProgression {
+  RefinableRuntimeEstimation RRE;
+
+public:
+  Duration CurrentEstimation = {};
+
+  // Returns the change ratio between our best guess so far and the one from the
+  // new measurement.
+  double computeImprovement(const Measurement &M) {
+    const Duration NewEstimation = RRE.update(M);
+    const double Ratio = fabs(((CurrentEstimation / NewEstimation) - 1.0));
+    CurrentEstimation = NewEstimation;
+    return Ratio;
+  }
+};
+
+} // namespace internal
+
+// Measures the runtime of `foo` until conditions defined by `Options` are met.
+//
+// To avoid measurement's imprecisions we measure batches of `foo`.
+// The batch size is growing by `ScalingFactor` to minimize the effect of
+// measuring.
+//
+// Note: The benchmark is not responsible for serializing the executions of
+// `foo`. It is not suitable for measuring, very small & side effect free
+// functions, as the processor is free to execute serveral executions in
+// parallel.
+//
+// - Options: A set of parameters controlling the stopping conditions for the
+//     benchmark.
+// - foo: The function under test. It takes one value and returns one value.
+//     The input value is used to randomize the execution of `foo` as part of a
+//     batch to mitigate the effect of the branch predictor. Signature:
+//     `ProductType foo(ParameterProvider::value_type value);`
+//     The output value is a product of the execution of `foo` and prevents the
+//     compiler from optimizing out foo's body.
+// - ParameterProvider: An object responsible for providing a range of
+//     `Iterations` values to use as input for `foo`. The `value_type` of the
+//     returned container has to be compatible with `foo` argument.
+//     Must implement one of:
+//     `Container<ParameterType> generateBatch(size_t Iterations);`
+//     `const Container<ParameterType>& generateBatch(size_t Iterations);`
+// - Clock: An object providing the current time. Must implement:
+//     `std::chrono::time_point now();`
+template <typename Function, typename ParameterProvider,
+          typename BenchmarkClock = const std::chrono::high_resolution_clock>
+BenchmarkResult benchmark(const BenchmarkOptions &Options,
+                          ParameterProvider &PP, Function foo,
+                          BenchmarkClock &Clock = BenchmarkClock()) {
+  BenchmarkResult Result;
+  internal::RuntimeEstimationProgression REP;
+  Duration TotalBenchmarkDuration = {};
+  size_t Iterations = std::max(Options.InitialIterations, uint32_t(1));
+  size_t Samples = 0;
+  if (Options.ScalingFactor < 1.0)
+    report_fatal_error("ScalingFactor should be >= 1");
+  if (Options.Log != BenchmarkLog::None)
+    Result.MaybeBenchmarkLog.emplace();
+  for (;;) {
+    // Request a new Batch of size `Iterations`.
+    const auto &Batch = PP.generateBatch(Iterations);
+
+    // Measuring this Batch.
+    const auto StartTime = Clock.now();
+    for (const auto Parameter : Batch) {
+      const auto Production = foo(Parameter);
+      benchmark::DoNotOptimize(Production);
+    }
+    const auto EndTime = Clock.now();
+    const Duration Elapsed = EndTime - StartTime;
+
+    // Updating statistics.
+    ++Samples;
+    TotalBenchmarkDuration += Elapsed;
+    const double ChangeRatio = REP.computeImprovement({Iterations, Elapsed});
+    Result.BestGuess = REP.CurrentEstimation;
+
+    // Stopping condition.
+    if (TotalBenchmarkDuration >= Options.MinDuration &&
+        Samples >= Options.MinSamples && ChangeRatio < Options.Epsilon)
+      Result.TerminationStatus = BenchmarkStatus::PrecisionReached;
+    else if (Samples >= Options.MaxSamples)
+      Result.TerminationStatus = BenchmarkStatus::MaxSamplesReached;
+    else if (TotalBenchmarkDuration >= Options.MaxDuration)
+      Result.TerminationStatus = BenchmarkStatus::MaxDurationReached;
+    else if (Iterations >= Options.MaxIterations)
+      Result.TerminationStatus = BenchmarkStatus::MaxIterationsReached;
+
+    if (Result.MaybeBenchmarkLog) {
+      auto &BenchmarkLog = *Result.MaybeBenchmarkLog;
+      if (Options.Log == BenchmarkLog::Last && !BenchmarkLog.empty())
+        BenchmarkLog.pop_back();
+      BenchmarkState BS;
+      BS.LastSampleIterations = Iterations;
+      BS.LastBatchElapsed = Elapsed;
+      BS.CurrentStatus = Result.TerminationStatus;
+      BS.CurrentBestGuess = Result.BestGuess;
+      BS.ChangeRatio = ChangeRatio;
+      BenchmarkLog.push_back(BS);
+    }
+
+    if (Result.TerminationStatus != BenchmarkStatus::Running)
+      return Result;
+
+    if (Options.ScalingFactor > 1 &&
+        Iterations * Options.ScalingFactor == Iterations)
+      report_fatal_error(
+          "`Iterations *= ScalingFactor` is idempotent, increase ScalingFactor "
+          "or InitialIterations.");
+
+    Iterations *= Options.ScalingFactor;
+  }
+}
+
+// Interprets `Array` as a circular buffer of `Size` elements.
+template <typename T> class CircularArrayRef {
+  llvm::ArrayRef<T> Array;
+  size_t Size;
+
+public:
+  using value_type = T;
+  using reference = T &;
+  using const_reference = const T &;
+  using difference_type = ssize_t;
+  using size_type = size_t;
+
+  class const_iterator
+      : public std::iterator<std::input_iterator_tag, T, ssize_t> {
+    llvm::ArrayRef<T> Array;
+    size_t Index;
+
+  public:
+    explicit const_iterator(llvm::ArrayRef<T> Array, size_t Index = 0)
+        : Array(Array), Index(Index) {}
+    const_iterator &operator++() {
+      ++Index;
+      return *this;
+    }
+    bool operator==(const_iterator Other) const { return Index == Other.Index; }
+    bool operator!=(const_iterator Other) const { return !(*this == Other); }
+    const T &operator*() const { return Array[Index % Array.size()]; }
+  };
+
+  CircularArrayRef(llvm::ArrayRef<T> Array, size_t Size)
+      : Array(Array), Size(Size) {
+    assert(Array.size() > 0);
+  }
+
+  const_iterator begin() const { return const_iterator(Array); }
+  const_iterator end() const { return const_iterator(Array, Size); }
+};
+
+// A convenient helper to produce a CircularArrayRef from an ArrayRef.
+template <typename T>
+CircularArrayRef<T> cycle(llvm::ArrayRef<T> Array, size_t Size) {
+  return {Array, Size};
+}
+
+// Creates an std::array which storage size is constrained under `Bytes`.
+template <typename T, size_t Bytes>
+using ByteConstrainedArray = std::array<T, Bytes / sizeof(T)>;
+
+// A convenient helper to produce a CircularArrayRef from a
+// ByteConstrainedArray.
+template <typename T, size_t N>
+CircularArrayRef<T> cycle(const std::array<T, N> &Container, size_t Size) {
+  return {llvm::ArrayRef<T>(Container.cbegin(), Container.cend()), Size};
+}
+
+} // namespace libc_benchmarks
+} // namespace llvm
+
+#endif // LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
diff --git a/libc/utils/benchmarks/LibcBenchmark.cpp b/libc/utils/benchmarks/LibcBenchmark.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/LibcBenchmark.cpp
@@ -0,0 +1,40 @@
+//===-------- `Benchmark` function ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LibcBenchmark.h"
+#include "llvm/Support/Host.h"
+
+namespace llvm {
+namespace libc_benchmarks {
+
+void checkRequirements() {
+  const auto &cpu_info = benchmark::CPUInfo::Get();
+  if (cpu_info.scaling_enabled)
+    report_fatal_error(
+        "CPU scaling is enabled, the benchmark real time measurements may be "
+        "noisy and will incur extra overhead.");
+}
+
+HostState HostState::get() {
+  const auto &CpuInfo = benchmark::CPUInfo::Get();
+  HostState H;
+  H.CpuFrequency = CpuInfo.cycles_per_second;
+  H.CpuName = llvm::sys::getHostCPUName().str();
+  for (const auto &BenchmarkCacheInfo : CpuInfo.caches) {
+    CacheInfo CI;
+    CI.Type = BenchmarkCacheInfo.type;
+    CI.Level = BenchmarkCacheInfo.level;
+    CI.Size = BenchmarkCacheInfo.size;
+    CI.NumSharing = BenchmarkCacheInfo.num_sharing;
+    H.Caches.push_back(std::move(CI));
+  }
+  return H;
+}
+
+} // namespace libc_benchmarks
+} // namespace llvm
diff --git a/libc/utils/benchmarks/LibcBenchmarkTest.cpp b/libc/utils/benchmarks/LibcBenchmarkTest.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/LibcBenchmarkTest.cpp
@@ -0,0 +1,168 @@
+#include "LibcBenchmark.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <chrono>
+#include <limits>
+#include <queue>
+#include <vector>
+
+using std::chrono::nanoseconds;
+using ::testing::ElementsAre;
+using ::testing::Field;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+namespace llvm {
+namespace libc_benchmarks {
+namespace {
+
+// A simple parameter provider returning a zero initialized vector of size
+// `iterations`.
+struct DummyParameterProvider {
+  std::vector<char> generateBatch(size_t iterations) {
+    return std::vector<char>(iterations);
+  }
+};
+
+class LibcBenchmark : public ::testing::Test {
+public:
+  // A Clock interface suitable for testing.
+  // - Either it returns 0,
+  // - Or a timepoint coming from the `setMeasurements` call.
+  Duration now() {
+    if (!MaybeTimepoints)
+      return {};
+    assert(!MaybeTimepoints->empty());
+    const Duration timepoint = MaybeTimepoints->front();
+    MaybeTimepoints->pop();
+    return timepoint;
+  }
+
+protected:
+  void SetUp() override { Options.Log = BenchmarkLog::Full; }
+
+  void TearDown() override {
+    // We make sure all the expected measurements were performed.
+    if (MaybeTimepoints)
+      EXPECT_THAT(*MaybeTimepoints, IsEmpty());
+  }
+
+  BenchmarkResult run() {
+    return benchmark(Options, ParameterProvider, DummyFunction, *this);
+  }
+
+  void setMeasurements(llvm::ArrayRef<Duration> Durations) {
+    MaybeTimepoints.emplace(); // Create the optional value.
+    Duration current_time = nanoseconds(1);
+    for (const auto &Duration : Durations) {
+      MaybeTimepoints->push(current_time);
+      current_time += Duration;
+      MaybeTimepoints->push(current_time);
+      current_time += nanoseconds(1);
+    }
+  }
+
+  BenchmarkOptions Options;
+
+private:
+  DummyParameterProvider ParameterProvider;
+  static char DummyFunction(char payload) { return payload; }
+  llvm::Optional<std::queue<Duration>> MaybeTimepoints;
+};
+
+TEST_F(LibcBenchmark, MaxSamplesReached) {
+  Options.MaxSamples = 1;
+  const auto Result = run();
+  EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 1);
+  EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::MaxSamplesReached);
+}
+
+TEST_F(LibcBenchmark, MaxDurationReached) {
+  Options.MaxDuration = nanoseconds(10);
+  setMeasurements({nanoseconds(11)});
+  const auto Result = run();
+  EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 1);
+  EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::MaxDurationReached);
+}
+
+TEST_F(LibcBenchmark, MaxIterationsReached) {
+  Options.InitialIterations = 1;
+  Options.MaxIterations = 20;
+  Options.ScalingFactor = 2;
+  Options.Epsilon = 0; // unreachable.
+  const auto Result = run();
+  EXPECT_THAT(*Result.MaybeBenchmarkLog,
+              ElementsAre(Field(&BenchmarkState::LastSampleIterations, 1),
+                          Field(&BenchmarkState::LastSampleIterations, 2),
+                          Field(&BenchmarkState::LastSampleIterations, 4),
+                          Field(&BenchmarkState::LastSampleIterations, 8),
+                          Field(&BenchmarkState::LastSampleIterations, 16),
+                          Field(&BenchmarkState::LastSampleIterations, 32)));
+  EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 6);
+  EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::MaxIterationsReached);
+}
+
+TEST_F(LibcBenchmark, MinSamples) {
+  Options.MinSamples = 4;
+  Options.ScalingFactor = 2;
+  Options.Epsilon = std::numeric_limits<double>::max(); // always reachable.
+  setMeasurements(
+      {nanoseconds(1), nanoseconds(2), nanoseconds(4), nanoseconds(8)});
+  const auto Result = run();
+  EXPECT_THAT(*Result.MaybeBenchmarkLog,
+              ElementsAre(Field(&BenchmarkState::LastSampleIterations, 1),
+                          Field(&BenchmarkState::LastSampleIterations, 2),
+                          Field(&BenchmarkState::LastSampleIterations, 4),
+                          Field(&BenchmarkState::LastSampleIterations, 8)));
+  EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 4);
+  EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::PrecisionReached);
+}
+
+TEST_F(LibcBenchmark, Epsilon) {
+  Options.MinSamples = 4;
+  Options.ScalingFactor = 2;
+  Options.Epsilon = std::numeric_limits<double>::max(); // always reachable.
+  setMeasurements(
+      {nanoseconds(1), nanoseconds(2), nanoseconds(4), nanoseconds(8)});
+  const auto Result = run();
+  EXPECT_THAT(*Result.MaybeBenchmarkLog,
+              ElementsAre(Field(&BenchmarkState::LastSampleIterations, 1),
+                          Field(&BenchmarkState::LastSampleIterations, 2),
+                          Field(&BenchmarkState::LastSampleIterations, 4),
+                          Field(&BenchmarkState::LastSampleIterations, 8)));
+  EXPECT_THAT(Result.MaybeBenchmarkLog->size(), 4);
+  EXPECT_THAT(Result.TerminationStatus, BenchmarkStatus::PrecisionReached);
+}
+
+TEST(ArrayRefLoop, Cycle) {
+  std::array<int, 2> array = {1, 2};
+  EXPECT_THAT(cycle(array, 0), ElementsAre());
+  EXPECT_THAT(cycle(array, 1), ElementsAre(1));
+  EXPECT_THAT(cycle(array, 2), ElementsAre(1, 2));
+  EXPECT_THAT(cycle(array, 3), ElementsAre(1, 2, 1));
+  EXPECT_THAT(cycle(array, 4), ElementsAre(1, 2, 1, 2));
+  EXPECT_THAT(cycle(array, 5), ElementsAre(1, 2, 1, 2, 1));
+}
+
+TEST(ByteConstrainedArray, Simple) {
+  EXPECT_THAT((ByteConstrainedArray<char, 17>()), SizeIs(17));
+  EXPECT_THAT((ByteConstrainedArray<uint16_t, 17>()), SizeIs(8));
+  EXPECT_THAT((ByteConstrainedArray<uint32_t, 17>()), SizeIs(4));
+  EXPECT_THAT((ByteConstrainedArray<uint64_t, 17>()), SizeIs(2));
+
+  EXPECT_LE(sizeof(ByteConstrainedArray<char, 17>), 17U);
+  EXPECT_LE(sizeof(ByteConstrainedArray<uint16_t, 17>), 17U);
+  EXPECT_LE(sizeof(ByteConstrainedArray<uint32_t, 17>), 17U);
+  EXPECT_LE(sizeof(ByteConstrainedArray<uint64_t, 17>), 17U);
+}
+
+TEST(ByteConstrainedArray, Cycle) {
+  ByteConstrainedArray<uint64_t, 17> TwoValues{{1UL, 2UL}};
+  EXPECT_THAT(cycle(TwoValues, 5), ElementsAre(1, 2, 1, 2, 1));
+}
+} // namespace
+} // namespace libc_benchmarks
+} // namespace llvm
diff --git a/libc/utils/benchmarks/LibcMemoryBenchmark.h b/libc/utils/benchmarks/LibcMemoryBenchmark.h
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/LibcMemoryBenchmark.h
@@ -0,0 +1,183 @@
+//===-------- Benchmark memory specific tools -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This file complements the `benchmark` header with memory specific tools and
+// benchmarking facilities.
+
+#ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
+#define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
+
+#include "LibcBenchmark.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Alignment.h"
+#include <cstdint>
+#include <random>
+
+namespace llvm {
+namespace libc_benchmarks {
+
+//--------------
+// Configuration
+//--------------
+
+// Specifies a range of sizes to explore.
+struct SizeRange {
+  uint32_t From = 0;  // Inclusive
+  uint32_t To = 1024; // Inclusive
+  uint32_t Step = 1;
+};
+
+// An object to define how to test a memory function.
+struct StudyConfiguration {
+  // The number of run for the study.
+  uint32_t Runs = 1;
+
+  // The size of the buffers (1 buffer for memset but 2 for memcpy or memcmp).
+  // When testing small sizes, it's important to keep the total allocated
+  // size under the size of the L1 cache (usually 16 or 32KiB). The framework
+  // will also use 2KiB of additional L1 memory to store the function
+  // parameters.
+  uint32_t BufferSize = 8192;
+
+  // The range of sizes to exercise.
+  SizeRange Size;
+
+  MaybeAlign AddressAlignment; //  Unset : Use start of buffer which is at
+                               //         least cache line aligned)
+                               //     1 : Use random address,
+                               //    >1 : Use random address aligned to value.
+
+  // The value to use for memset.
+  uint8_t MemsetValue = 0;
+
+  // The mismatch position for memcmp.
+  uint32_t MemcmpMismatchAt = 0; //  0 : Buffer compare equal,
+                                 // >0 : Buffer compare different at byte N-1.
+};
+
+//--------
+// Results
+//--------
+
+// The time to run one iteration of the function under test for the specified
+// Size.
+struct Measurement {
+  uint32_t Size = 0;
+  Duration Runtime = {};
+};
+
+// The measurements for a specific function.
+struct FunctionMeasurements {
+  std::string Name;
+  std::vector<Measurement> Measurements;
+};
+
+// The root object containing all the data (configuration and measurements).
+struct Study {
+  HostState Host;
+  BenchmarkOptions Options;
+  StudyConfiguration Configuration;
+  SmallVector<FunctionMeasurements, 4> Functions;
+};
+
+// Provides an aligned, dynamically allocated buffer.
+class AlignedBuffer {
+  char *const Buffer = nullptr;
+  size_t Size = 0;
+
+public:
+  static constexpr size_t Alignment = 1024;
+
+  explicit AlignedBuffer(size_t Size)
+      : Buffer(static_cast<char *>(aligned_alloc(1024, Size))), Size(Size) {}
+  ~AlignedBuffer() { free(Buffer); }
+
+  inline char *operator+(size_t Index) { return Buffer + Index; }
+  inline const char *operator+(size_t Index) const { return Buffer + Index; }
+  inline char &operator[](size_t Index) { return Buffer[Index]; }
+  inline const char &operator[](size_t Index) const { return Buffer[Index]; }
+  inline char *begin() { return Buffer; }
+  inline char *end() { return Buffer + Size; }
+};
+
+// Implements the ParameterProvider abstraction needed by the `benchmark`
+// function. This implementation makes sure that all parameters will fit into
+// `StorageSize` bytes. The total memory accessed during benchmark should be
+// less than the data L1 cache, that is the storage for the ParameterProvider
+// and the memory buffers.
+template <typename Context, size_t StorageSize = 8 * 1024>
+class SmallParameterProvider {
+  using ParameterType = typename Context::ParameterType;
+  ByteConstrainedArray<ParameterType, StorageSize> Parameters;
+  size_t LastIterations;
+  Context &Ctx;
+
+public:
+  explicit SmallParameterProvider(Context &C) : Ctx(C) {}
+  SmallParameterProvider(const SmallParameterProvider &) = delete;
+  SmallParameterProvider &operator=(const SmallParameterProvider &) = delete;
+
+  // Useful to compute the histogram of the size parameter.
+  CircularArrayRef<ParameterType> getLastBatch() const {
+    return cycle(Parameters, LastIterations);
+  }
+
+  // Implements the interface needed by the `benchmark` function.
+  CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
+    LastIterations = Iterations;
+    Ctx.Randomize(Parameters);
+    return getLastBatch();
+  }
+};
+
+// Helper to generate random buffer offsets that satisfy the configuration
+// constraints.
+class OffsetDistribution {
+  std::uniform_int_distribution<uint32_t> Distribution;
+  uint32_t Factor;
+
+public:
+  explicit OffsetDistribution(const StudyConfiguration &Conf);
+
+  template <class Generator> uint32_t operator()(Generator &G) {
+    return Distribution(G) * Factor;
+  }
+};
+
+// Helper to generate random buffer offsets that satisfy the configuration
+// constraints. It is specifically designed to benchmark `memcmp` functions
+// where we may want the Nth byte to differ.
+class MismatchOffsetDistribution {
+  std::uniform_int_distribution<size_t> MismatchIndexSelector;
+  llvm::SmallVector<uint32_t, 16> MismatchIndices;
+  const uint32_t MismatchAt;
+
+public:
+  explicit MismatchOffsetDistribution(const StudyConfiguration &Conf);
+
+  explicit operator bool() const { return !MismatchIndices.empty(); }
+
+  const llvm::SmallVectorImpl<uint32_t> &getMismatchIndices() const {
+    return MismatchIndices;
+  }
+
+  template <class Generator> uint32_t operator()(Generator &G, uint32_t Size) {
+    const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)];
+    // We need to position the offset so that a mismatch occurs at MismatchAt.
+    if (Size >= MismatchAt)
+      return MismatchIndex - MismatchAt;
+    // Size is too small to trigger the mismatch.
+    return MismatchIndex - Size - 1;
+  }
+};
+
+} // namespace libc_benchmarks
+} // namespace llvm
+
+#endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
diff --git a/libc/utils/benchmarks/LibcMemoryBenchmark.cpp b/libc/utils/benchmarks/LibcMemoryBenchmark.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/LibcMemoryBenchmark.cpp
@@ -0,0 +1,62 @@
+//===-------- Benchmark memory specific tools -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LibcMemoryBenchmark.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+
+namespace llvm {
+namespace libc_benchmarks {
+
+// Returns a distribution that samples the buffer to satisfy the required
+// alignment.
+// When alignment is set, the distribution is scaled down by `Factor` and scaled
+// up again by the same amount during sampling.
+static std::uniform_int_distribution<uint32_t>
+GetOffsetDistribution(const StudyConfiguration &Conf) {
+  if (Conf.AddressAlignment &&
+      *Conf.AddressAlignment > AlignedBuffer::Alignment)
+    report_fatal_error(
+        "AddressAlignment must be less or equal to AlignedBuffer::Alignment");
+  if (!Conf.AddressAlignment)
+    return std::uniform_int_distribution<uint32_t>(0, 0); // Always 0.
+  // If we test up to Size bytes, the returned offset must stay under
+  // BuffersSize - Size.
+  int64_t MaxOffset = Conf.BufferSize;
+  MaxOffset -= Conf.Size.To;
+  MaxOffset -= 1;
+  if (MaxOffset < 0)
+    report_fatal_error(
+        "BufferSize too small to exercise specified Size configuration");
+  MaxOffset /= Conf.AddressAlignment->value();
+  return std::uniform_int_distribution<uint32_t>(0, MaxOffset);
+}
+
+OffsetDistribution::OffsetDistribution(const StudyConfiguration &Conf)
+    : Distribution(GetOffsetDistribution(Conf)),
+      Factor(Conf.AddressAlignment.valueOrOne().value()) {}
+
+// Precomputes offset where to insert mismatches between the two buffers.
+MismatchOffsetDistribution::MismatchOffsetDistribution(
+    const StudyConfiguration &Conf)
+    : MismatchAt(Conf.MemcmpMismatchAt) {
+  if (MismatchAt <= 1)
+    return;
+  const auto ToSize = Conf.Size.To;
+  for (size_t I = ToSize + 1; I < Conf.BufferSize; I += ToSize)
+    MismatchIndices.push_back(I);
+  if (MismatchIndices.empty())
+    llvm::report_fatal_error("Unable to generate mismatch");
+  MismatchIndexSelector =
+      std::uniform_int_distribution<size_t>(0, MismatchIndices.size() - 1);
+}
+
+} // namespace libc_benchmarks
+} // namespace llvm
diff --git a/libc/utils/benchmarks/LibcMemoryBenchmarkMain.h b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.h
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.h
@@ -0,0 +1,36 @@
+//===-------- BenchmarkRunner interface -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_MAIN_H
+#define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_MAIN_H
+
+#include "LibcBenchmark.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+namespace libc_benchmarks {
+
+// Each memory function benchmark implements this interface.
+// It is used by the main function to run all benchmarks in a uniform manner.
+class BenchmarkRunner {
+public:
+  virtual ~BenchmarkRunner() {}
+
+  // Returns a list of all available functions to test.
+  virtual ArrayRef<StringRef> getFunctionNames() const = 0;
+
+  // Performs the benchmarking for a particular FunctionName and Size.
+  virtual BenchmarkResult benchmark(const BenchmarkOptions &Options,
+                                    StringRef FunctionName, size_t Size) = 0;
+};
+
+} // namespace libc_benchmarks
+} // namespace llvm
+
+#endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_MAIN_H
diff --git a/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp
@@ -0,0 +1,100 @@
+//===-------- Benchmark  --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LibcMemoryBenchmarkMain.h"
+#include "JSON.h"
+#include "LibcBenchmark.h"
+#include "LibcMemoryBenchmark.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace libc_benchmarks {
+
+static cl::opt<std::string>
+    Configuration("conf", cl::desc("Specify configuration filename"),
+                  cl::value_desc("filename"), cl::init(""));
+
+static cl::opt<std::string> Output("o", cl::desc("Specify output filename"),
+                                   cl::value_desc("filename"), cl::init("-"));
+
+extern std::unique_ptr<BenchmarkRunner>
+getRunner(const StudyConfiguration &Conf);
+
+void Main() {
+#ifndef NDEBUG
+  static_assert(
+      false,
+      "For reproducibility benchmarks should not be compiled in DEBUG mode.");
+#endif
+  checkRequirements();
+  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+      MemoryBuffer::getFileOrSTDIN(Configuration);
+  if (!MB)
+    report_fatal_error(
+        Twine("Could not open configuration file: ").concat(Configuration));
+  auto ErrorOrStudy = ParseJsonStudy((*MB)->getBuffer());
+  if (!ErrorOrStudy)
+    report_fatal_error(ErrorOrStudy.takeError());
+
+  const auto StudyPrototype = *ErrorOrStudy;
+
+  Study S;
+  S.Host = HostState::get();
+  S.Options = StudyPrototype.Options;
+  S.Configuration = StudyPrototype.Configuration;
+
+  const auto Runs = S.Configuration.Runs;
+  const auto &SR = S.Configuration.Size;
+  std::unique_ptr<BenchmarkRunner> Runner = getRunner(S.Configuration);
+  const size_t TotalSteps =
+      Runner->getFunctionNames().size() * Runs * ((SR.To - SR.From) / SR.Step);
+  size_t Steps = 0;
+  for (auto FunctionName : Runner->getFunctionNames()) {
+    FunctionMeasurements FM;
+    FM.Name = FunctionName;
+    for (size_t Run = 0; Run < Runs; ++Run) {
+      for (uint32_t Size = SR.From; Size <= SR.To; Size += SR.Step) {
+        const auto Result = Runner->benchmark(S.Options, FunctionName, Size);
+        Measurement Measurement;
+        Measurement.Runtime = Result.BestGuess;
+        Measurement.Size = Size;
+        FM.Measurements.push_back(Measurement);
+        outs() << format("%3d%% run: %2d / %2d size: %5d ",
+                         (Steps * 100 / TotalSteps), Run, Runs, Size)
+               << FunctionName
+               << "                                                  \r";
+        ++Steps;
+      }
+    }
+    S.Functions.push_back(std::move(FM));
+  }
+
+  std::error_code EC;
+  raw_fd_ostream FOS(Output, EC);
+  if (EC)
+    report_fatal_error(Twine("Could not open file: ")
+                           .concat(EC.message())
+                           .concat(", ")
+                           .concat(Output));
+  json::OStream JOS(FOS);
+  SerializeToJson(S, JOS);
+}
+
+} // namespace libc_benchmarks
+} // namespace llvm
+
+int main(int argc, char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv);
+  llvm::libc_benchmarks::Main();
+  return EXIT_SUCCESS;
+}
diff --git a/libc/utils/benchmarks/LibcMemoryBenchmarkTest.cpp b/libc/utils/benchmarks/LibcMemoryBenchmarkTest.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/LibcMemoryBenchmarkTest.cpp
@@ -0,0 +1,112 @@
+#include "LibcMemoryBenchmark.h"
+#include "llvm/Support/Alignment.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::AllOf;
+using testing::AnyOf;
+using testing::ElementsAre;
+using testing::Ge;
+using testing::Gt;
+using testing::Le;
+using testing::Lt;
+
+namespace llvm {
+namespace libc_benchmarks {
+namespace {
+
+TEST(AlignedBuffer, IsAligned) {
+  AlignedBuffer AB(0);
+  EXPECT_TRUE(isAddrAligned(Align(AlignedBuffer::Alignment), AB.begin()));
+}
+
+TEST(AlignedBuffer, Empty) {
+  AlignedBuffer AB(0);
+  EXPECT_EQ(std::distance(AB.begin(), AB.end()), 0U);
+}
+
+TEST(OffsetDistribution, AlignToBegin) {
+  StudyConfiguration Conf;
+  Conf.BufferSize = 8192;
+  Conf.AddressAlignment = None;
+
+  OffsetDistribution OD(Conf);
+  std::default_random_engine Gen;
+  for (size_t I = 0; I <= 10; ++I)
+    EXPECT_EQ(OD(Gen), 0U);
+}
+
+TEST(OffsetDistribution, NoAlignment) {
+  StudyConfiguration Conf;
+  Conf.BufferSize = 8192;
+  Conf.AddressAlignment = Align::None();
+  Conf.Size.To = 1;
+
+  OffsetDistribution OD(Conf);
+  std::default_random_engine Gen;
+  for (size_t I = 0; I <= 10; ++I)
+    EXPECT_THAT(OD(Gen), AllOf(Ge(0U), Lt(8192U)));
+}
+
+MATCHER_P(IsDivisibleBy, n, "") {
+  *result_listener << "where the remainder is " << (arg % n);
+  return (arg % n) == 0;
+}
+
+TEST(OffsetDistribution, Aligned) {
+  StudyConfiguration Conf;
+  Conf.BufferSize = 8192;
+  Conf.AddressAlignment = Align(16);
+  Conf.Size.To = 1;
+
+  OffsetDistribution OD(Conf);
+  std::default_random_engine Gen;
+  for (size_t I = 0; I <= 10; ++I)
+    EXPECT_THAT(OD(Gen), AllOf(Ge(0U), Lt(8192U), IsDivisibleBy(16U)));
+}
+
+TEST(MismatchOffsetDistribution, EqualBufferDisablesDistribution) {
+  StudyConfiguration Conf;
+  Conf.MemcmpMismatchAt = 0; // buffer are equal.
+
+  MismatchOffsetDistribution MOD(Conf);
+  EXPECT_FALSE(MOD);
+}
+
+TEST(MismatchOffsetDistribution, DifferentBufferDisablesDistribution) {
+  StudyConfiguration Conf;
+  Conf.MemcmpMismatchAt = 1; // buffer are different.
+
+  MismatchOffsetDistribution MOD(Conf);
+  EXPECT_FALSE(MOD);
+}
+
+TEST(MismatchOffsetDistribution, MismatchAt2) {
+  const uint32_t MismatchAt = 2;
+  const uint32_t ToSize = 4;
+  StudyConfiguration Conf;
+  Conf.BufferSize = 16;
+  Conf.MemcmpMismatchAt = MismatchAt; // buffer are different at position 2.
+  Conf.Size.To = ToSize;
+
+  MismatchOffsetDistribution MOD(Conf);
+  EXPECT_TRUE(MOD);
+  // We test equality up to ToSize (=4) so we need spans of 4 equal bytes spaced
+  // by one mismatch.
+  EXPECT_THAT(MOD.getMismatchIndices(), ElementsAre(5, 9, 13));
+  std::default_random_engine Gen;
+  for (size_t Iterations = 0; Iterations <= 10; ++Iterations) {
+    for (size_t Size = Conf.Size.From; Size <= ToSize; ++Size) {
+      if (Size >= MismatchAt)
+        EXPECT_THAT(MOD(Gen, Size),
+                    AnyOf(5 - MismatchAt, 9 - MismatchAt, 13 - MismatchAt));
+      else
+        EXPECT_THAT(MOD(Gen, Size),
+                    AnyOf(5 - Size - 1, 9 - Size - 1, 13 - Size - 1));
+    }
+  }
+}
+
+} // namespace
+} // namespace libc_benchmarks
+} // namespace llvm
diff --git a/libc/utils/benchmarks/Memcmp.cpp b/libc/utils/benchmarks/Memcmp.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/Memcmp.cpp
@@ -0,0 +1,86 @@
+//===-------- Benchmark memcmp implementation -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LibcBenchmark.h"
+#include "LibcMemoryBenchmark.h"
+#include "LibcMemoryBenchmarkMain.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace libc_benchmarks {
+
+// The context encapsulates the buffers, parameters and the measure.
+struct MemcmpContext : public BenchmarkRunner {
+  using FunctionPrototype = int (*)(const void *, const void *, size_t);
+
+  struct ParameterType {
+    uint16_t Offset = 0;
+  };
+
+  explicit MemcmpContext(const StudyConfiguration &Conf)
+      : MOD(Conf), OD(Conf), ABuffer(Conf.BufferSize), BBuffer(Conf.BufferSize),
+        PP(*this) {
+    std::uniform_int_distribution<char> Dis;
+    // Generate random buffer A.
+    for (size_t I = 0; I < Conf.BufferSize; ++I)
+      ABuffer[I] = Dis(Gen);
+    // Copy buffer A to B.
+    memcpy(BBuffer.begin(), ABuffer.begin(), Conf.BufferSize);
+    if (Conf.MemcmpMismatchAt == 0)
+      return; // all same.
+    else if (Conf.MemcmpMismatchAt == 1)
+      for (char &c : BBuffer)
+        ++c; // all different.
+    else
+      for (const auto I : MOD.getMismatchIndices())
+        ++BBuffer[I];
+  }
+
+  // Needed by the ParameterProvider to update the current batch of parameter.
+  void Randomize(MutableArrayRef<ParameterType> Parameters) {
+    if (MOD)
+      for (auto &P : Parameters)
+        P.Offset = MOD(Gen, CurrentSize);
+    else
+      for (auto &P : Parameters)
+        P.Offset = OD(Gen);
+  }
+
+  ArrayRef<StringRef> getFunctionNames() const override {
+    static std::array<StringRef, 1> kFunctionNames = {"memcmp"};
+    return kFunctionNames;
+  }
+
+  BenchmarkResult benchmark(const BenchmarkOptions &Options,
+                            StringRef FunctionName, size_t Size) override {
+    CurrentSize = Size;
+    FunctionPrototype Function =
+        StringSwitch<FunctionPrototype>(FunctionName).Case("memcmp", &::memcmp);
+    return llvm::libc_benchmarks::benchmark(
+        Options, PP, [this, Function, Size](ParameterType p) {
+          return Function(ABuffer + p.Offset, BBuffer + p.Offset, Size);
+        });
+  }
+
+private:
+  std::default_random_engine Gen;
+  MismatchOffsetDistribution MOD;
+  OffsetDistribution OD;
+  size_t CurrentSize = 0;
+  AlignedBuffer ABuffer;
+  AlignedBuffer BBuffer;
+  SmallParameterProvider<MemcmpContext> PP;
+};
+
+std::unique_ptr<BenchmarkRunner> getRunner(const StudyConfiguration &Conf) {
+  return std::make_unique<MemcmpContext>(Conf);
+}
+
+} // namespace libc_benchmarks
+} // namespace llvm
diff --git a/libc/utils/benchmarks/Memcpy.cpp b/libc/utils/benchmarks/Memcpy.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/Memcpy.cpp
@@ -0,0 +1,69 @@
+//===-------- Benchmark memcpy implementation -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LibcBenchmark.h"
+#include "LibcMemoryBenchmark.h"
+#include "LibcMemoryBenchmarkMain.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+
+namespace llvm {
+namespace libc_benchmarks {
+
+// The context encapsulates the buffers, parameters and the measure.
+struct MemcpyContext : public BenchmarkRunner {
+  using FunctionPrototype = void *(*)(void *, const void *, size_t);
+
+  struct ParameterType {
+    uint16_t SrcOffset = 0;
+    uint16_t DstOffset = 0;
+  };
+
+  explicit MemcpyContext(const StudyConfiguration &Conf)
+      : OD(Conf), SrcBuffer(Conf.BufferSize), DstBuffer(Conf.BufferSize),
+        PP(*this) {}
+
+  // Needed by the ParameterProvider to update the current batch of parameter.
+  void Randomize(MutableArrayRef<ParameterType> Parameters) {
+    for (auto &P : Parameters) {
+      P.DstOffset = OD(Gen);
+      P.SrcOffset = OD(Gen);
+    }
+  }
+
+  ArrayRef<StringRef> getFunctionNames() const override {
+    static std::array<StringRef, 1> kFunctionNames = {"memcpy"};
+    return kFunctionNames;
+  }
+
+  BenchmarkResult benchmark(const BenchmarkOptions &Options,
+                            StringRef FunctionName, size_t Size) override {
+    FunctionPrototype Function =
+        StringSwitch<FunctionPrototype>(FunctionName).Case("memcpy", &::memcpy);
+    return llvm::libc_benchmarks::benchmark(
+        Options, PP, [this, Function, Size](ParameterType p) {
+          Function(DstBuffer + p.DstOffset, SrcBuffer + p.SrcOffset, Size);
+          return DstBuffer + p.DstOffset;
+        });
+  }
+
+private:
+  std::default_random_engine Gen;
+  OffsetDistribution OD;
+  AlignedBuffer SrcBuffer;
+  AlignedBuffer DstBuffer;
+  SmallParameterProvider<MemcpyContext> PP;
+};
+
+std::unique_ptr<BenchmarkRunner> getRunner(const StudyConfiguration &Conf) {
+  return std::make_unique<MemcpyContext>(Conf);
+}
+
+} // namespace libc_benchmarks
+} // namespace llvm
diff --git a/libc/utils/benchmarks/Memset.cpp b/libc/utils/benchmarks/Memset.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/Memset.cpp
@@ -0,0 +1,66 @@
+//===-------- Benchmark memset implementation -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LibcBenchmark.h"
+#include "LibcMemoryBenchmark.h"
+#include "LibcMemoryBenchmarkMain.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace libc_benchmarks {
+
+// The context encapsulates the buffers, parameters and the measure.
+struct MemsetContext : public BenchmarkRunner {
+  using FunctionPrototype = void *(*)(void *, int, size_t);
+
+  struct ParameterType {
+    uint16_t DstOffset = 0;
+  };
+
+  explicit MemsetContext(const StudyConfiguration &Conf)
+      : OD(Conf), DstBuffer(Conf.BufferSize), MemsetValue(Conf.MemsetValue),
+        PP(*this) {}
+
+  // Needed by the ParameterProvider to update the current batch of parameter.
+  void Randomize(MutableArrayRef<ParameterType> Parameters) {
+    for (auto &P : Parameters) {
+      P.DstOffset = OD(Gen);
+    }
+  }
+
+  ArrayRef<StringRef> getFunctionNames() const override {
+    static std::array<StringRef, 1> kFunctionNames = {"memset"};
+    return kFunctionNames;
+  }
+
+  BenchmarkResult benchmark(const BenchmarkOptions &Options,
+                            StringRef FunctionName, size_t Size) override {
+    FunctionPrototype Function =
+        StringSwitch<FunctionPrototype>(FunctionName).Case("memset", &::memset);
+    return llvm::libc_benchmarks::benchmark(
+        Options, PP, [this, Function, Size](ParameterType p) {
+          Function(DstBuffer + p.DstOffset, MemsetValue, Size);
+          return DstBuffer + p.DstOffset;
+        });
+  }
+
+private:
+  std::default_random_engine Gen;
+  OffsetDistribution OD;
+  AlignedBuffer DstBuffer;
+  const uint8_t MemsetValue;
+  SmallParameterProvider<MemsetContext> PP;
+};
+
+std::unique_ptr<BenchmarkRunner> getRunner(const StudyConfiguration &Conf) {
+  return std::make_unique<MemsetContext>(Conf);
+}
+
+} // namespace libc_benchmarks
+} // namespace llvm
diff --git a/libc/utils/benchmarks/README.md b/libc/utils/benchmarks/README.md
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/README.md
@@ -0,0 +1,85 @@
+# Libc mem* benchmarks
+
+This framework has been designed to evaluate and compare relative performance of memory function implementations on a particular host.
+
+It will also be use to track implementations performances over time.
+
+## Quick start
+
+### Setup
+
+Make sure to have `matplotlib`, `scipy` and `numpy` setup correctly
+
+```shell
+apt-get install python3-pip
+pip3 install matplotlib scipy numpy
+```
+
+### Run and display `memcpy` benchmark
+
+The following commands will run the benchmark and display a 95 percentile confidence interval curve of **time per copied bytes**. It also features **host informations** and **benchmarking configuration**.
+
+```shell
+cd llvm-project
+cmake -B/tmp/build -Sllvm -DLLVM_ENABLE_PROJECTS=libc -DCMAKE_BUILD_TYPE=Release
+make -C /tmp/build -j display-libc-memcpy-benchmark-small
+```
+
+## Benchmarking regimes
+
+Using a profiler to observe size distributions for calls into libc functions, it was found most operations act on a small number of bytes.
+
+Function           | % of calls with size ≤ 128 | % of calls with size ≤ 1024
+------------------ | --------------------------: | ---------------------------:
+memcpy             | 96%                         | 99%
+memset             | 91%                         | 99.9%
+memcmp<sup>1</sup> | 99.5%                       | ~100%
+
+Benchmarking configurations come in two flavors:
+
+ - [small](libc/utils/benchmarks/configuration_small.json)
+    - Exercises sizes up to `1KiB`, representative of normal usage
+    - The data is kept in the `L1` cache to prevent measuring the memory subsystem
+ - [big](libc/utils/benchmarks/configuration_big.json)
+    - Exercises sizes up to `32MiB` to test large operations
+    - Caching effects can show up here which prevents comparing different hosts
+
+_<sup>1</sup> - The size refers to the size of the buffers to compare and not the
+number of bytes until the first difference._
+
+## Benchmarking targets
+
+The benchmarking process occurs in two steps:
+
+1. Benchmark the functions and produce a `json` file
+2. Display (or renders) the `json` file
+
+Targets are of the form `<action>-libc-<function>-benchmark-<configuration>`
+
+ - `action` is one of :
+    - `run`, runs the benchmark and writes the `json` file
+    - `display`, displays the graph on screen
+    - `render`, renders the graph on disk as a `png` file
+ - `function` is one of : `memcpy`, `memcmp`, `memset`
+ - `configuration` is one of : `small`, `big`
+
+## Superposing curves
+
+It is possible to **merge** several `json` files into a single graph. This is useful to **compare** implementations.
+
+In the following example we superpose the curves for `memcpy`, `memset` and `memcmp`:
+
+```shell
+> make -C /tmp/build run-libc-memcpy-benchmark-small run-libc-memcmp-benchmark-small run-libc-memset-benchmark-small
+> python libc/utils/benchmarks/render.py /tmp/last-libc-memcpy-benchmark-small.json /tmp/last-libc-memcmp-benchmark-small.json /tmp/last-libc-memset-benchmark-small.json
+```
+
+## Useful `render.py` flags
+
+ - To save the produced graph `--output=/tmp/benchmark_curve.png`.
+ - To prevent the graph from appearing on the screen `--headless`.
+
+
+## Under the hood
+
+ To learn more about the design decisions behind the benchmarking framework, have a look at the [RATIONALE.md](RATIONALE.md) file.
diff --git a/libc/utils/benchmarks/configuration_big.json b/libc/utils/benchmarks/configuration_big.json
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/configuration_big.json
@@ -0,0 +1,24 @@
+{
+   "Options":{
+      "MinDuration":0.001,
+      "MaxDuration":1,
+      "InitialIterations":100,
+      "MaxIterations":10000000,
+      "MinSamples":1,
+      "MaxSamples":1,
+      "Epsilon":0.01,
+      "ScalingFactor":1.4
+   },
+   "Configuration":{
+      "Runs":5,
+      "BufferSize":134217728,
+      "Size":{
+        "From":0,
+        "To":33554432,
+        "Step":1048576
+      },
+      "AddressAlignment":1,
+      "MemsetValue":0,
+      "MemcmpMismatchAt":0
+   }
+}
diff --git a/libc/utils/benchmarks/configuration_small.json b/libc/utils/benchmarks/configuration_small.json
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/configuration_small.json
@@ -0,0 +1,24 @@
+{
+   "Options":{
+      "MinDuration":0.001,
+      "MaxDuration":1,
+      "InitialIterations":100,
+      "MaxIterations":10000000,
+      "MinSamples":4,
+      "MaxSamples":1000,
+      "Epsilon":0.01,
+      "ScalingFactor":1.4
+   },
+   "Configuration":{
+      "Runs":10,
+      "BufferSize":8192,
+      "Size":{
+        "From":0,
+        "To":1024,
+        "Step":1
+      },
+      "AddressAlignment":1,
+      "MemsetValue":0,
+      "MemcmpMismatchAt":0
+   }
+}
diff --git a/libc/utils/benchmarks/render.py b/libc/utils/benchmarks/render.py
new file mode 100644
--- /dev/null
+++ b/libc/utils/benchmarks/render.py
@@ -0,0 +1,175 @@
+"""Reads JSON files produced by the benchmarking framework and renders them.
+
+Installation:
+> apt-get install python3-pip
+> pip3 install matplotlib scipy numpy
+
+Run:
+> python3 render.py <files>
+
+Rendering can occur on disk by specifying the --output option or on screen if
+the --headless flag is not set.
+"""
+
+import argparse
+import collections
+import json
+import math
+import pprint
+import sys
+import matplotlib.pyplot as plt
+from matplotlib.ticker import EngFormatter
+import numpy as np
+import scipy.stats
+
+
+def format_freq(number):
+    """Returns a human readable frequency."""
+    magnitude = 0
+    while math.fabs(number) >= 1000:
+        number /= 1000.0
+        magnitude += 1
+    return "%g%sHz" % (number, ["", "k", "M", "G"][magnitude])
+
+
+def format_size(number):
+    """Returns number in human readable form."""
+    magnitude = 0
+    while number >= 1000 and number % 1000 == 0:
+        number /= 1000
+        magnitude += 1
+    return "%g%s" % (number, ["", "K", "M", "G"][magnitude])
+
+
+def mean_confidence_interval(dataset, confidence=0.95):
+    """Returns the mean and half confidence interval for the dataset."""
+    a = 1.0 * np.array(dataset)
+    n = len(a)
+    m, se = np.mean(a), scipy.stats.sem(a)
+    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
+    return m, h
+
+
+def add_plot(function_name, points):
+    """Plots measurements for a function."""
+    n = len(points.keys())
+    x = np.zeros(n)
+    y = np.zeros(n)
+    yerr = np.zeros(n)
+
+    for i, key in enumerate(sorted(points.keys())):
+        values = points[key]
+        m, e = mean_confidence_interval(values)
+        x[i] = key
+        y[i] = m
+        yerr[i] = e
+
+    plt.plot(x, y, linewidth=1, label=function_name)
+    plt.fill_between(x, y - yerr, y + yerr, alpha=0.5)
+
+
+def get_title(host):
+    """Formats the Host object into a title for the plot."""
+    cpu_name = host["CpuName"]
+    cpu_freq = format_freq(host["CpuFrequency"])
+    cache_strings = []
+    for cache in host["Caches"]:
+        prefix = {
+            "Instruction": "i",
+            "Data": "d",
+            "Unified": "u",
+        }.get(cache["Type"])
+        cache_strings.append(r"%sL_%d %s_{/%d}" %
+                             (prefix, cache["Level"], format_size(
+                                 cache["Size"]), cache["NumSharing"]))
+    title = "%s (%s)" % (cpu_name, cpu_freq)
+    subtitle = r"$" + ", ".join(sorted(cache_strings)) + r"$"
+    return title + "\n" + subtitle
+
+
+def get_host(jsons):
+    """Returns the host of the different json objects iff they are all the same.
+    """
+    host = None
+    for root in jsons:
+        if host and host != root["Host"]:
+            sys.exit("The datasets are not coming from the same Host")
+        if not host:
+            host = root["Host"]
+    return host
+
+
+def get_configuration(jsons):
+    """Returns the configuration of the different json objects iff they are all
+    the same.
+    """
+    config = None
+    for root in jsons:
+        if config and config != root["Configuration"]:
+            return None
+        if not config:
+            config = root["Configuration"]
+    return config
+
+
+def setup_graphs(files):
+    """Setups the graphs to render from the json files."""
+    jsons = []
+    for file in files:
+        with open(file) as json_file:
+            jsons.append(json.load(json_file))
+    if not jsons:
+        sys.exit("Nothing to process")
+
+    for root in jsons:
+        for function in root["Functions"]:
+            function_name = function["Name"]
+            sizes = function["Sizes"]
+            runtimes = function["Runtimes"]
+            assert len(sizes) == len(runtimes)
+            values = collections.defaultdict(lambda: [])
+            for i in range(len(sizes)):
+              values[sizes[i]].append(runtimes[i])
+            add_plot(function_name, values)
+
+    config = get_configuration(jsons)
+    if config:
+        plt.figtext(
+            0.95,
+            0.15,
+            pprint.pformat(config),
+            verticalalignment="bottom",
+            horizontalalignment="right",
+            multialignment="left",
+            fontsize="small",
+            bbox=dict(boxstyle="round", facecolor="wheat"))
+
+    axes = plt.gca()
+    axes.set_title(get_title(get_host(jsons)))
+    axes.set_ylim(bottom=0)
+    axes.set_xlabel("Size")
+    axes.set_ylabel("Time")
+    axes.xaxis.set_major_formatter(EngFormatter(unit="B"))
+    axes.yaxis.set_major_formatter(EngFormatter(unit="s"))
+    plt.legend()
+    plt.grid()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Process benchmark json files.")
+    parser.add_argument("files", nargs="+", help="The json files to read from.")
+    parser.add_argument("--output", help="The output file to write the graph.")
+    parser.add_argument(
+        "--headless",
+        help="If set do not display the graph.",
+        action="store_true")
+    args = parser.parse_args()
+    setup_graphs(args.files)
+    if args.output:
+        plt.savefig(args.output)
+    if not args.headless:
+        plt.show()
+
+if __name__ == "__main__":
+    main()