diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@@ -33,6 +33,13 @@
 # Flags to pass down to the compiler while building the libc functions.
 set(LIBC_COMPILE_OPTIONS_DEFAULT "" CACHE STRING "Architecture to tell clang to optimize for (e.g. -march=... or -mcpu=...)")
 
+option(LIBC_FAST_UNSAFE_WORD_READ "Use the faster strlen algorithm that depends on undefined behavior, and may fail on some systems." OFF)
+if(LIBC_FAST_UNSAFE_WORD_READ)
+  list(APPEND LIBC_COMPILE_OPTIONS_DEFAULT "-DLIBC_FAST_UNSAFE_WORD_READ")
+endif()
+
+
+
 # Check --print-resource-dir to find the compiler resource dir if this flag
 # is supported by the compiler.
 execute_process(
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -186,6 +186,7 @@
   HDRS
     strlen.h
   DEPENDS
+    .string_utils
     libc.include.string
 )
 
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -19,22 +19,133 @@
 namespace __llvm_libc {
 namespace internal {
 
-// Returns the length of a string, denoted by the first occurrence
-// of a null terminator.
-static inline size_t string_length(const char *src) {
+template <typename T> constexpr T repeat_byte(T byte) {
+  constexpr size_t BITS_IN_BYTE = 8;
+  constexpr size_t BYTE_MASK = 0xff;
+  T result = 0;
+  byte = byte & BYTE_MASK;
+  for (size_t i = 0; i < sizeof(T); ++i)
+    result = (result << BITS_IN_BYTE) | byte;
+  return result;
+}
+
+// The goal of this function is to take in a block of arbitrary size and return
+// if it has any bytes equal to zero without branching. This is done by
+// transforming the block such that zero bytes become non-zero and non-zero
+// bytes become zero.
+// The first transformation relies on the properties of carrying in arithmetic
+// subtraction. Specifically, if 0x01 is subtracted from a byte that is 0x00,
+// then the result for that byte must be equal to 0xff (or 0xfe if the next byte
+// needs a carry as well).
+// The next transformation is a simple mask. All zero bytes will have the high
+// bit set after the subtraction, so each byte is masked with 0x80. This narrows
+// the set of bytes that result in a non-zero value to only zero bytes and bytes
+// with the high bit and any other bit set.
+// The final transformation masks the result of the previous transformations
+// with the inverse of the original byte. This means that any byte that had the
+// high bit set will no longer have it set, narrowing the list of bytes which
+// result in non-zero values to just the zero byte.
+template <typename T> constexpr bool has_zeroes(T block) {
+  constexpr T LOW_BITS = repeat_byte<T>(0x01);
+  constexpr T HIGH_BITS = repeat_byte<T>(0x80);
+  T subtracted = block - LOW_BITS;
+  T inverted = ~block;
+  return (subtracted & inverted & HIGH_BITS) != 0;
+}
+
+template <typename T>
+static inline size_t string_length_unsafe(const char *src) {
+  const char *char_ptr = src;
+  // Step 1: read 1 byte at a time to align to block size
+  for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(T) != 0; ++char_ptr) {
+    if (*char_ptr == 0)
+      return char_ptr - src;
+  }
+  // Step 2: read blocks
+  for (const T *block_ptr = reinterpret_cast<const T *>(char_ptr);
+       !has_zeroes<T>(*block_ptr); ++block_ptr) {
+    char_ptr = reinterpret_cast<const char *>(block_ptr);
+  }
+  // Step 3: find the zero in the block
+  for (; *char_ptr != 0; ++char_ptr) {
+    ;
+  }
+  return char_ptr - src;
+}
+
+static inline size_t string_length_safe(const char *src) {
   size_t length;
   for (length = 0; *src; ++src, ++length)
     ;
   return length;
 }
 
+// Returns the length of a string, denoted by the first occurrence
+// of a null terminator.
+static inline size_t string_length(const char *src) {
+#ifdef LIBC_FAST_UNSAFE_WORD_READ
+  // TODO: pick size of var based on requested size.
+  return string_length_unsafe<uintptr_t>(src);
+#else
+  return string_length_safe(src);
+#endif
+}
+
+template <typename T>
+static inline void *find_first_character_unsafe(const unsigned char *src,
+                                                unsigned char ch, size_t n) {
+  const unsigned char *char_ptr = src;
+  size_t cur = 0;
+
+  // Step 1: read 1 byte at a time to align to block size
+  for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(T) != 0 && cur < n;
+       ++char_ptr, ++cur) {
+    if (*char_ptr == ch)
+      return const_cast<unsigned char *>(char_ptr);
+  }
+
+  const T ch_mask = repeat_byte<T>(ch);
+
+  // Step 2: read blocks
+  for (const T *block_ptr = reinterpret_cast<const T *>(char_ptr);
+       !has_zeroes<T>((*block_ptr) ^ ch_mask) && cur < n;
+       ++block_ptr, cur += sizeof(T)) {
+    char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
+  }
+
+  // Step 3: find the zero in the block
+  for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) {
+    ;
+  }
+
+  if (*char_ptr != ch || cur >= n)
+    return static_cast<void *>(nullptr);
+
+  return const_cast<unsigned char *>(char_ptr);
+}
+
+static inline void *find_first_character_safe(const unsigned char *src,
+                                              unsigned char ch, size_t n) {
+  for (; n && *src != ch; --n, ++src)
+    ;
+  return n ? const_cast<unsigned char *>(src) : nullptr;
+}
+
 // Returns the first occurrence of 'ch' within the first 'n' characters of
 // 'src'. If 'ch' is not found, returns nullptr.
 static inline void *find_first_character(const unsigned char *src,
                                          unsigned char ch, size_t n) {
-  for (; n && *src != ch; --n, ++src)
-    ;
-  return n ? const_cast<unsigned char *>(src) : nullptr;
+#ifdef LIBC_FAST_UNSAFE_WORD_READ
+  // If n is small, the overhead of generating the ch_mask may be greater than
+  // the gains from reading larger chunks.
+  // The number 4 is a guess, but it seems reasonable.
+  if (n > (sizeof(uintptr_t) * 4)) {
+    // TODO: pick size of var based on requested size.
+    return find_first_character_unsafe<uintptr_t>(src, ch, n);
+  }
+#else
+  return find_first_character_safe(src, ch, n);
+#endif
 }
 
 // Returns the maximum length span that contains only characters not found in