diff --git a/libc/src/__support/swisstable/asimd.h b/libc/src/__support/swisstable/asimd.h
new file mode 100644
--- /dev/null
+++ b/libc/src/__support/swisstable/asimd.h
@@ -0,0 +1,86 @@
+//===-- SwissTable ASIMD Specialization -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/swisstable/common.h"
+#include <arm_neon.h>
+
+namespace __llvm_libc::cpp::swisstable {
+
+// According to abseil-cpp, ARM's 16-byte ASIMD operations may
+// introduce too much latency.
+// Reference:
+// https://github.com/abseil/abseil-cpp/commit/6481443560a92d0a3a55a31807de0cd712cd4f88
+// With ASIMD, some bitmasks are not iteratale. This is because we
+// do not want to clear the lower bits in each stride with extra
+// `AND` operation.
+using BitMask = BitMaskAdaptor<uint64_t, 0x8080808080808080ull, 0x8ull>;
+using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
+
+struct Group {
+  int8x8_t data;
+
+  // Load a group of control words from an arbitary address.
+  static Group load(const void *__restrict addr) {
+    return {vld1_s8(static_cast<const int8_t *>(addr))};
+  }
+
+  // Load a group of control words from an aligned address.
+  // Notice that there is no difference of aligned/unaigned
+  // loading in ASIMD.
+  static Group aligned_load(const void *__restrict addr) {
+
+    return {vld1_s8(static_cast<const int8_t *>(addr))};
+  }
+
+  // Store a group of control words to an aligned address.
+  void aligned_store(void *addr) const {
+    vst1_s8(static_cast<int8_t *>(addr), data);
+  }
+
+  // Find out the lanes equal to the given byte and return the bitmask
+  // with corresponding bits set.
+  IteratableBitMask match_byte(uint8_t byte) const {
+    auto duplicated = vdup_n_s8(byte);
+    auto cmp = vceq_s8(data, duplicated);
+    auto converted = vget_lane_u64(vreinterpret_u64_u8(cmp), 0);
+    return {converted & BitMask::MASK};
+  }
+
+  // Find out the lanes equal to EMPTY and return the bitmask
+  // with corresponding bits set.
+  BitMask mask_empty() const {
+    auto duplicated = vdup_n_s8(EMPTY);
+    auto cmp = vceq_s8(data, duplicated);
+    auto converted = vget_lane_u64(vreinterpret_u64_u8(cmp), 0);
+    return {converted};
+  }
+
+  // Find out the lanes equal to EMPTY or DELETE (highest bit set) and
+  // return the bitmask with corresponding bits set.
+  BitMask mask_empty_or_deleted() const {
+    auto converted = vget_lane_u64(vreinterpret_u64_s8(data), 0);
+    return {converted & BitMask::MASK};
+  }
+
+  // Find out the lanes reprensting full cells (without highest bit) and
+  // return the bitmask with corresponding bits set.
+  BitMask mask_full() const { return mask_empty_or_deleted().invert(); }
+
+  // Performs the following transformation on all bytes in the group:
+  // - `EMPTY => EMPTY`
+  // - `DELETED => EMPTY`
+  // - `FULL => DELETED`
+  Group convert_special_to_empty_and_full_to_deleted() const {
+    auto dup = vdup_n_s8(0x80);
+    auto zero = vdup_n_s8(0x00);
+    auto special = vcgt_s8(zero, data);
+    return vorr_s8(dup, vreinterpret_s8_u8(special));
+  }
+};
+
+} // namespace __llvm_libc::cpp::swisstable
diff --git a/libc/src/__support/swisstable/common.h b/libc/src/__support/swisstable/common.h
new file mode 100644
--- /dev/null
+++ b/libc/src/__support/swisstable/common.h
@@ -0,0 +1,89 @@
+
+//===-- SwissTable Common Definitions ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include <src/__support/builtin_wrappers.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+namespace __llvm_libc::cpp::swisstable {
+// special values of the control byte
+using CtrlWord = uint8_t;
+using HashWord = uint64_t;
+constexpr static inline CtrlWord EMPTY = 0b11111111u;
+constexpr static inline CtrlWord DELETED = 0b10000000u;
+
+// Implementations of the bitmask.
+// The backend word type may vary depending on different microarchitectures.
+// For example, with X86 SSE2, the bitmask is just the 16bit unsigned integer
+// corresponding to lanes in a SIMD register.
+template <typename T, T WORD_MASK, size_t WORD_STRIDE> struct BitMaskAdaptor {
+  // A masked constant whose bits are all set.
+  constexpr static inline T MASK = WORD_MASK;
+  // A stride in the bitmask may use multiple bits.
+  constexpr static inline size_t STRIDE = WORD_STRIDE;
+
+  T word;
+
+  // Invert zeros and ones inside the word.
+  BitMaskAdaptor invert() const { return {static_cast<T>(word ^ WORD_MASK)}; }
+
+  // Operator helper to do bit manipulations.
+  BitMaskAdaptor operator^(T value) const {
+    return {static_cast<T>(this->word ^ value)};
+  }
+
+  // Check if any bit is set inside the word.
+  bool any_bit_set() const { return word != 0; }
+
+  // Count trailing zeros with respect to stride.
+  size_t trailing_zeros() const { return safe_ctz<T>(word) / WORD_STRIDE; }
+
+  // Count trailing zeros with respect to stride. (Assume the bitmask is none
+  // zero.)
+  size_t lowest_set_bit_nonzero() const {
+    return unsafe_ctz<T>(word) / WORD_STRIDE;
+  }
+
+  // Count leading zeros with respect to stride.
+  size_t leading_zeros() const {
+    // move the word to the highest location.
+    return safe_clz<T>(word) / WORD_STRIDE;
+  }
+};
+
+template <class BitMask> struct IteratableBitMaskAdaptor : public BitMask {
+  // Use the bitmask as an iterator. Update the state and return current lowest
+  // set bit. To make the bitmask iterable, each stride must contain 0 or exact
+  // 1 set bit.
+  void remove_lowest_bit() {
+    // Remove the last set bit inside the word:
+    //    word              = 011110100 (original value)
+    //    word - 1          = 011110011 (invert all bits up to the last set bit)
+    //    word & (word - 1) = 011110000 (value with the last bit cleared)
+    this->word = this->word & (this->word - 1);
+  }
+  using value_type = size_t;
+  using iterator = BitMask;
+  using const_iterator = BitMask;
+  size_t operator*() { return this->lowest_set_bit_nonzero(); }
+  IteratableBitMaskAdaptor &operator++() {
+    this->remove_lowest_bit();
+    return *this;
+  }
+  IteratableBitMaskAdaptor begin() { return *this; }
+  IteratableBitMaskAdaptor end() { return {0}; }
+  bool operator==(const IteratableBitMaskAdaptor &other) {
+    return this->word == other.word;
+  }
+  bool operator!=(const IteratableBitMaskAdaptor &other) {
+    return this->word != other.word;
+  }
+};
+
+} // namespace __llvm_libc::cpp::swisstable
diff --git a/libc/src/__support/swisstable/dispatch.h b/libc/src/__support/swisstable/dispatch.h
new file mode 100644
--- /dev/null
+++ b/libc/src/__support/swisstable/dispatch.h
@@ -0,0 +1,16 @@
+//===-- SwissTable Platform Dispatch ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "src/__support/architectures.h"
+#if defined(LLVM_LIBC_ARCH_X86_64) && defined(__SSE2__)
+#include "src/__support/swisstable/sse2.h"
+#elif defined(LLVM_LIBC_ARCH_ANY_ARM) && defined(__ARM_NEON) &&                \
+    defined(__ORDER_LITTLE_ENDIAN__)
+#include "src/__support/swisstable/asimd.h"
+#else
+#include "src/__support/swisstable/generic.h"
+#endif
diff --git a/libc/src/__support/swisstable/generic.h b/libc/src/__support/swisstable/generic.h
new file mode 100644
--- /dev/null
+++ b/libc/src/__support/swisstable/generic.h
@@ -0,0 +1,149 @@
+//===-- SwissTable Generic Fallback -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "src/__support/endian.h"
+#include "src/__support/swisstable/common.h"
+#include "src/string/memory_utils/memcpy_implementations.h"
+
+namespace __llvm_libc::cpp::swisstable {
+
+// Helper function to spread a byte across the whole word.
+// Accumutively, the procedure looks like:
+//    byte                  = 0x00000000000000ff
+//    byte | (byte << 8)    = 0x000000000000ffff
+//    byte | (byte << 16)   = 0x00000000ffffffff
+//    byte | (byte << 32)   = 0xffffffffffffffff
+constexpr static inline uintptr_t repeat(uintptr_t byte) {
+  size_t shift_amount = 8;
+  while (shift_amount < sizeof(uintptr_t) * 8) {
+    byte |= byte << shift_amount;
+    shift_amount <<= 1;
+  }
+  return byte;
+}
+
+using BitMask = BitMaskAdaptor<uintptr_t, repeat(0x80), 0x8ull>;
+using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
+
+struct Group {
+  uintptr_t data;
+
+  // Load a group of control words from an arbitary address.
+  static Group load(const void *__restrict addr) {
+    uintptr_t data;
+    inline_memcpy(reinterpret_cast<char *>(&data),
+                  static_cast<const char *>(addr), sizeof(data));
+    return {data};
+  }
+
+  // Load a group of control words from an aligned address.
+  // Notice that there is no difference of aligned/unaigned
+  // loading in ASIMD.
+  static Group aligned_load(const void *__restrict addr) {
+    uintptr_t data = *static_cast<const uintptr_t *>(addr);
+    return {data};
+  }
+
+  // Store a group of control words to an aligned address.
+  void aligned_store(void *addr) const {
+    *static_cast<uintptr_t *>(addr) = data;
+  }
+
+  // Find out the lanes equal to the given byte and return the bitmask
+  // with corresponding bits set.
+  IteratableBitMask match_byte(uint8_t byte) const {
+    // Given byte = 0x10, suppose the data is:
+    //
+    //       data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ]
+    //
+    // First, we compare the byte using XOR operation:
+    //
+    //        [ 0x10 | 0x10 | 0x10 | 0x10 | ... ]   (0)
+    //      ^ [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ]   (1)
+    //      = [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ]   (2)
+    //
+    // Notice that the equal positions will now be 0x00, so if we substract 0x01
+    // respective to every byte, it will need to carry the substraction to upper
+    // bits (assume no carry from the hidden parts)
+    //        [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ]   (2)
+    //      - [ 0x01 | 0x01 | 0x01 | 0x01 | ... ]   (3)
+    //      = [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ]   (4)
+    //
+    // But there may be some bytes whose highest bit is already set after the
+    // xor operation. To rule out these positions, we AND them with the NOT
+    // of the XOR result:
+    //
+    //        [ 0xFF | 0xFF | 0xEF | 0x1E | ... ]   (5, NOT (2))
+    //      & [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ]   (4)
+    //      = [ 0xFE | 0xFF | 0x0F | 0x10 | ... ]   (6)
+    //
+    // To make the bitmask iteratable, only one bit can be set in each stride.
+    // So we AND each byte with 0x80 and keep only the highest bit:
+    //
+    //        [ 0xFE | 0xFF | 0x0F | 0x10 | ... ]   (6)
+    //      & [ 0x80 | 0x80 | 0x80 | 0x80 | ... ]   (7)
+    //      = [ 0x80 | 0x80 | 0x00 | 0x00 | ... ]   (8)
+    //
+    // However, there are possitbilites for false positives. For example, if the
+    // data is [ 0x10 | 0x11 | 0x10 | 0xF1 | ... ]. This only happens when there
+    // is a key only differs from the searched by the lowest bit. The claims
+    // are:
+    //
+    //  - This never happens for `EMPTY` and `DELETED`, only full entries.
+    //  - The check for key equality will catch these.
+    //  - This only happens if there is at least 1 true match.
+    //  - The chance of this happening is very low (< 1% chance per byte).
+    auto cmp = data ^ repeat(byte);
+    auto result =
+        Endian::to_little_endian((cmp - repeat(0x01)) & ~cmp & repeat(0x80));
+    return {result};
+  }
+
+  // Find out the lanes equal to EMPTY and return the bitmask
+  // with corresponding bits set.
+  BitMask mask_empty() const {
+    // If the high bit is set, then the byte must be either:
+    // 1111_1111 (EMPTY) or 1000_0000 (DELETED).
+    // So we can just check if the top two bits are 1 by ANDing them.
+    return {Endian::to_little_endian(data & (data << 1) & repeat(0x80))};
+  }
+
+  // Find out the lanes equal to EMPTY or DELETE (highest bit set) and
+  // return the bitmask with corresponding bits set.
+  BitMask mask_empty_or_deleted() const {
+    return {Endian::to_little_endian(data) & repeat(0x80)};
+  }
+
+  // Find out the lanes reprensting full cells (without highest bit) and
+  // return the bitmask with corresponding bits set.
+  BitMask mask_full() const { return mask_empty_or_deleted().invert(); }
+
+  // Performs the following transformation on all bytes in the group:
+  // - `EMPTY => EMPTY`
+  // - `DELETED => EMPTY`
+  // - `FULL => DELETED`
+  Group convert_special_to_empty_and_full_to_deleted() const {
+    // Set the highest bit only for positions whose highest bit is not set
+    // before.
+    //
+    //   data = [ 00000000 | 11111111 | 10000000 | ... ]
+    //  ~data = [ 11111111 | 00000000 | 00000000 | ... ]
+    //   full = [ 10000000 | 00000000 | 00000000 | ... ]
+
+    auto full = (~data) & repeat(0x80);
+
+    // Inverse the bit and convert `01111111` to `1000000` by
+    // add `1` in that bit. The carry will not propogate outside
+    // that byte:
+    //      ~full = [ 01111111 | 11111111 | 11111111 | ... ]
+    //  full >> 1 = [ 00000001 | 00000000 | 00000000 | ... ]
+    //     result = [ 10000000 | 11111111 | 11111111 | ... ]
+    return {~full + (full >> 1)};
+  }
+};
+
+} // namespace __llvm_libc::cpp::swisstable
diff --git a/libc/src/__support/swisstable/sse2.h b/libc/src/__support/swisstable/sse2.h
new file mode 100644
--- /dev/null
+++ b/libc/src/__support/swisstable/sse2.h
@@ -0,0 +1,78 @@
+//===-- SwissTable SSE2 Specialization --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/swisstable/common.h"
+#include <emmintrin.h>
+
+namespace __llvm_libc::cpp::swisstable {
+
+// With SSE2, every bitmask is iteratable: because
+// we use single bit to encode the data.
+
+using BitMask = BitMaskAdaptor<uint16_t, 0xffffu, 0x1u>;
+using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
+
+struct Group {
+  __m128i data;
+
+  // Load a group of control words from an arbitary address.
+  static Group load(const void *__restrict addr) {
+    return {_mm_loadu_si128(static_cast<const __m128i *>(addr))};
+  }
+
+  // Load a group of control words from an aligned address.
+  static Group aligned_load(const void *__restrict addr) {
+    return {_mm_load_si128(static_cast<const __m128i *>(addr))};
+  }
+
+  // Store a group of control words to an aligned address.
+  void aligned_store(void *addr) const {
+    _mm_store_si128(static_cast<__m128i *>(addr), data);
+  }
+
+  // Find out the lanes equal to the given byte and return the bitmask
+  // with corresponding bits set.
+  IteratableBitMask match_byte(uint8_t byte) const {
+    auto cmp = _mm_cmpeq_epi8(data, _mm_set1_epi8(byte));
+    auto bitmask = static_cast<uint16_t>(_mm_movemask_epi8(cmp));
+    return {bitmask};
+  }
+
+  // Find out the lanes equal to EMPTY and return the bitmask
+  // with corresponding bits set.
+  BitMask mask_empty() const { return match_byte(EMPTY); }
+
+  // Find out the lanes equal to EMPTY or DELETE (highest bit set) and
+  // return the bitmask with corresponding bits set.
+  BitMask mask_empty_or_deleted() const {
+    auto bitmask = static_cast<uint16_t>(_mm_movemask_epi8(data));
+    return {bitmask};
+  }
+
+  // Find out the lanes reprensting full cells (without highest bit) and
+  // return the bitmask with corresponding bits set.
+  BitMask mask_full() const { return mask_empty_or_deleted().invert(); }
+
+  // Performs the following transformation on all bytes in the group:
+  // - `EMPTY => EMPTY`
+  // - `DELETED => EMPTY`
+  // - `FULL => DELETED`
+  Group convert_special_to_empty_and_full_to_deleted() const {
+    // Recall that EMPTY and DELETED are distinguished from others in
+    // their highest bit. This makes them negative when considered as
+    // signed integer. And for full ones, highest bits are all zeros.
+    // So we first identify those lanes smaller than or equal to zero
+    // and then convert them by setting the highest bit of them.
+    __m128i zero = _mm_setzero_si128();
+    __m128i special = _mm_cmpgt_epi8(zero, data);
+    __m128i converted = _mm_or_si128(special, _mm_set1_epi8(0x80u));
+    return {converted};
+  }
+};
+
+} // namespace __llvm_libc::cpp::swisstable