diff --git a/libc/benchmarks/automemcpy/FunctionDescriptor.h b/libc/benchmarks/automemcpy/FunctionDescriptor.h
new file mode 100644
--- /dev/null
+++ b/libc/benchmarks/automemcpy/FunctionDescriptor.h
@@ -0,0 +1,159 @@
+//===-- Pod structs to describe a memory function----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H
+#define LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H
+
+#include <climits>
+#include <cstddef>
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/Hashing.h>
+#include <llvm/ADT/Optional.h>
+#include <llvm/ADT/StringRef.h>
+#include <tuple>
+
+namespace llvm {
+namespace automemcpy {
+
+// Boilerplate code to be able to sort and hash types.
+#define COMPARABLE_AND_HASHABLE(T, ...)                                        \
+  inline auto asTuple() const { return std::tie(__VA_ARGS__); }                \
+  bool operator==(const T &O) const { return asTuple() == O.asTuple(); }       \
+  bool operator<(const T &O) const { return asTuple() < O.asTuple(); }         \
+  struct Hasher {                                                              \
+    std::size_t operator()(const T &K) const {                                 \
+      return llvm::hash_value(K.asTuple());                                    \
+    }                                                                          \
+  };
+
+// Represents the maximum value for the size parameter of a memory function.
+// This is an `int` so we can use it as an expression in Z3.
+// It also allows for a more readable and compact representation when storing
+// the SizeSpan in the autogenerated C++ file.
+static constexpr int kMaxSize = INT_MAX;
+
+// This mimics the `Arg` type in libc/src/string/memory_utils/elements.h without
+// having to depend on it.
+enum class AlignArg { _1, _2, ARRAY_SIZE };
+
+// Describes a range of sizes.
+// We use the begin/end representation instead of first/last to allow for empty
+// range (i.e. Begin == End)
+struct SizeSpan {
+  size_t Begin = 0;
+  size_t End = 0;
+
+  COMPARABLE_AND_HASHABLE(SizeSpan, Begin, End)
+};
+
+// Describes a contiguous region.
+// In such a region all sizes are handled individually.
+// e.g. with Span = {0, 2};
+// if(size == 0) return Handle<0>();
+// if(size == 1) return Handle<1>();
+struct Contiguous {
+  SizeSpan Span;
+
+  COMPARABLE_AND_HASHABLE(Contiguous, Span)
+};
+
+// This struct represents a range of sizes over which to use an overlapping
+// strategy. An overlapping strategy of size N handles all sizes from N to 2xN.
+// The span may represent several contiguous overlaps.
+// e.g. with Span = {16, 128};
+// if(size >= 16 and size < 32) return Handle<Overlap<16>>();
+// if(size >= 32 and size < 64) return Handle<Overlap<32>>();
+// if(size >= 64 and size < 128) return Handle<Overlap<64>>();
+struct Overlap {
+  SizeSpan Span;
+
+  COMPARABLE_AND_HASHABLE(Overlap, Span)
+};
+
+// Describes a region using a loop handling BlockSize bytes at a time. The
+// remaining bytes of the loop are handled with an overlapping operation.
+struct Loop {
+  SizeSpan Span;
+  size_t BlockSize = 0;
+
+  COMPARABLE_AND_HASHABLE(Loop, Span, BlockSize)
+};
+
+// Same as `Loop` but starts by aligning a buffer on `Alignment` bytes.
+// A first operation handling 'Alignment` bytes is performed followed by a
+// sequence of Loop.BlockSize bytes operation. The Loop starts processing from
+// the next aligned byte in the chosen buffer. The remaining bytes of the loop
+// are handled with an overlapping operation.
+struct AlignedLoop {
+  Loop Loop;
+  size_t Alignment = 0;            // Size of the alignment.
+  AlignArg AlignTo = AlignArg::_1; // Which buffer to align.
+
+  COMPARABLE_AND_HASHABLE(AlignedLoop, Loop, Alignment, AlignTo)
+};
+
+// Some processors offer special instruction to handle the memory function
+// completely, we refer to such instructions as accelerators.
+struct Accelerator {
+  SizeSpan Span;
+
+  COMPARABLE_AND_HASHABLE(Accelerator, Span)
+};
+
+// The memory functions are assembled out of primitives that can be implemented
+// with regular scalar operations (SCALAR), with the help of vector or bitcount
+// instructions (NATIVE) or by deferring it to the compiler (BUILTIN).
+enum class ElementTypeClass {
+  SCALAR,
+  NATIVE,
+  BUILTIN,
+};
+
+// A simple enum to categorize which function is being implemented.
+enum class FunctionType {
+  MEMCPY,
+  MEMCMP,
+  BCMP,
+  MEMSET,
+  BZERO,
+};
+
+// This struct describes the skeleton of the implementation, it does not go into
+// every detail but is enough to uniquely identify the implementation.
+struct FunctionDescriptor {
+  FunctionType Type;
+  Optional<Contiguous> Contiguous;
+  Optional<Overlap> Overlap;
+  Optional<Loop> Loop;
+  Optional<AlignedLoop> AlignedLoop;
+  Optional<Accelerator> Accelerator;
+  ElementTypeClass ElementClass;
+
+  COMPARABLE_AND_HASHABLE(FunctionDescriptor, Type, Contiguous, Overlap, Loop,
+                          AlignedLoop, Accelerator, ElementClass)
+
+  inline size_t id() const { return llvm::hash_value(asTuple()); }
+};
+
+// Same as above but with the function name.
+struct NamedFunctionDescriptor {
+  StringRef Name;
+  FunctionDescriptor Desc;
+};
+
+template <typename T> llvm::hash_code hash_value(const ArrayRef<T> &V) {
+  return llvm::hash_combine_range(V.begin(), V.end());
+}
+template <typename T> llvm::hash_code hash_value(const T &O) {
+  return llvm::hash_value(O.asTuple());
+}
+
+} // namespace automemcpy
+} // namespace llvm
+
+#endif /* LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H */
diff --git a/libc/benchmarks/automemcpy/README.md b/libc/benchmarks/automemcpy/README.md
new file mode 100644
--- /dev/null
+++ b/libc/benchmarks/automemcpy/README.md
@@ -0,0 +1,111 @@
+This folder contains an implementation of [automemcpy: A framework for automatic generation of fundamental memory operations](https://research.google/pubs/pub50338/).
+
+It uses the [Z3 theorem prover](https://github.com/Z3Prover/z3) to enumerate a subset of valid memory function implementations. These implementations are then materialized as C++ code and can be [benchmarked](../) against various [size distributions](../distributions). This process helps the design of efficient implementations for a particular environnement (size distribution, processor or custom compilation options).
+
+This is not enabled by default, as it is mostly useful when working on tuning the library implementation. To build it, use `LIBC_BUILD_AUTOMEMCPY=ON` (see below).
+
+## Prerequisites
+
+You may need to install `Z3` from source if it's not available on your system.
+Here we show instructions to install it into `<Z3_INSTALL_DIR>`.
+You may need to `sudo` to `make install`.
+
+```shell
+mkdir -p ~/git
+cd ~/git
+git clone https://github.com/Z3Prover/z3.git
+python scripts/mk_make.py --prefix=<Z3_INSTALL_DIR>
+cd build
+make -j
+make install 
+```
+
+## Configuration
+
+```shell
+mkdir -p <BUILD_DIR>
+cd <LLVM_PROJECT_DIR>/llvm
+cmake -DCMAKE_C_COMPILER=/usr/bin/clang \
+ -DCMAKE_CXX_COMPILER=/usr/bin/clang++ \
+ -DLLVM_ENABLE_PROJECTS="libc" \
+ -DLLVM_ENABLE_Z3_SOLVER=ON \
+ -DLLVM_Z3_INSTALL_DIR=<Z3_INSTALL_DIR> \
+ -DLIBC_BUILD_AUTOMEMCPY=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -B<BUILD_DIR>
+```
+
+## Targets and compilation
+
+There are three main CMake targets
+ 1. `automemcpy_implementations`
+    - runs `Z3` and materializes valid memory functions as C++ code, a message will display its ondisk location.
+    - the source code is then compiled using the native host optimizations (i.e. `-march=native` or `-mcpu=native` depending on the architecture).
+ 2. `automemcpy`
+    - the binary that benchmarks the autogenerated implementations.
+ 3. `automemcpy_result_analyser`
+    - the binary that analyses the benchmark results.
+
+You may only compile the binaries as they both pull the autogenerated code as a dependency.
+
+```shell
+make -C <BUILD_DIR> -j automemcpy automemcpy_result_analyser
+```
+
+## Running the benchmarks
+
+Make sure to save the results of the benchmark as a json file.
+
+```shell
+<BUILD_DIR>/bin/automemcpy --benchmark_out_format=json --benchmark_out=<RESULTS_DIR>/results.json
+```
+
+### Additional useful options
+
+
+ - `--benchmark_min_time=.2`
+
+     By default, each function is benchmarked for at least one second, here we lower it to 200ms.
+
+ - `--benchmark_filter="BM_Memset|BM_Bzero"`
+ 
+     By default, all functions are benchmarked, here we restrict them to `memset` and `bzero`.
+
+Other options might be useful, use `--help` for more information.
+
+## Analyzing the benchmarks
+
+Analysis is performed by running `automemcpy_result_analyser` on one or more json result files.
+
+```shell
+<BUILD_DIR>/bin/automemcpy_result_analyser <RESULTS_DIR>/results.json
+```
+
+What it does:
+  1. Gathers all throughput values for each function / distribution pair and picks the median one.\
+  This allows picking a representative value over many runs of the benchmark. Please make sure all the runs happen under similar circumstances.
+
+  2. For each distribution, look at the span of throughputs for functions of the same type (e.g. For distribution `A`, memcpy throughput spans from 2GiB/s to 5GiB/s).
+
+  3. For each distribution, give a normalized score to each function (e.g. For distribution `A`, function `M` scores 0.65).\
+  This score is then turned into a grade `EXCELLENT`, `VERY_GOOD`, `GOOD`, `PASSABLE`, `INADEQUATE`, `MEDIOCRE`, `BAD` - so that each distribution categorizes how function perform according to them.
+
+  4. A [Majority Judgement](https://en.wikipedia.org/wiki/Majority_judgment) process is then used to categorize each function. This enables finer analysis of how distributions agree on which function is better. In the following example, `Function_1` and `Function_2` are rated `EXCELLENT` but looking at the grade's distribution might help decide which is best.
+
+|            | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | INADEQUATE | MEDIOCRE | BAD |
+|------------|:---------:|:---------:|:----:|:--------:|:----------:|:--------:|:---:|
+| Function_1 |     7     |     1     |   2  |          |            |          |     |
+| Function_2 |     6     |     4     |      |          |            |          |     |
+
+The tool outputs the histogram of grades for each function. In case of tie, other dimensions might help decide (e.g. code size, performance on other microarchitectures).
+
+```
+EXCELLENT  |█▁▂    |  Function_0
+EXCELLENT  |█▅     |  Function_1
+VERY_GOOD  |▂█▁ ▁  |  Function_2
+GOOD       | ▁█▄   |  Function_3
+PASSABLE   | ▂▆▄█  |  Function_4
+INADEQUATE |  ▃▃█▁ |  Function_5
+MEDIOCRE   |    █▆▁|  Function_6
+BAD        |    ▁▁█|  Function_7
+```