Index: Bitcode/CMakeLists.txt
===================================================================
--- Bitcode/CMakeLists.txt
+++ Bitcode/CMakeLists.txt
@@ -11,6 +11,7 @@
   add_subdirectory(Benchmarks)
   if(NOT TEST_SUITE_BENCHMARKING_ONLY)
     add_subdirectory(Regression)
+    add_subdirectory(UnitTests)
     if(ARCH STREQUAL "x86" OR ARCH STREQUAL "AArch64" OR ARCH STREQUAL "ARM")
       add_subdirectory(simd_ops)
     endif()
Index: Bitcode/UnitTests/CMakeLists.txt
===================================================================
--- /dev/null
+++ Bitcode/UnitTests/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(NO_REFERENCE_OUTPUT true)
+
+# We need at least clang 5.0 to process the bitcode here.
+if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND
+   CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND
+   NOT CMAKE_C_COMPILER_VERSION VERSION_LESS "5.0" AND
+   NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
+  add_subdirectory(large_int)
+endif()
Index: Bitcode/UnitTests/README.md
===================================================================
--- /dev/null
+++ Bitcode/UnitTests/README.md
@@ -0,0 +1,30 @@
+# Bitcode Unit Tests
+
+These are execution unit tests written directly in LLVM IR. We store maintain
+them in LLVM's bitcode format as that is guaranteed to have at least a certain
+degree of stability. However, the textual IR version is kept available for
+exposition and code review. It should be considered documentation only, as only
+the bitcode is actually tested.
+
+Each unittest should consist of a C++ driver that makes raw function calls into
+LLVM function defined manually in some number of .bc files (built from .ll
+files). These function calls will often rely on LLVM implementation details of
+calling conventions for C functions with various unusual type parameters. The
+ABI that results from these calls should not be taken as in any way stable or
+part of what is being tested. Instead, if LLVM changes its conventions, the C++
+code should simply be adjusted to match that platforms convention as necessary.
+
+## Updating the test IR, both bitcode and textual
+
+To refresh the bitcode from the textual IR, use the script `update_bc.sh`. This
+will regenerate the `.bc` files next to each `.ll` file using the `llvm-as`
+binary found in your `$PATH`.
+
+To refresh the textual IR for these tests, use the script `update_ll.sh`. It
+will recreate the `.ll` file next to each `.bc` by disassembling it with the
+`llvm-dis` binary found in your `$PATH`. Note that this will in many cases
+destroy valuable comments. You may want to merge the new IR text with the
+existing IR text using a process similar to `git add -p` so that you preserve
+the existing comments but update the actual IR. In many cases, it may be simpler
+to manually apply a necessary update to the textual IR rather than regenerating
+it.
Index: Bitcode/UnitTests/large_int/CMakeLists.txt
===================================================================
--- /dev/null
+++ Bitcode/UnitTests/large_int/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(NO_REFERENCE_OUTPUT true)
+
+SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/large_int.bc PROPERTIES LANGUAGE CXX)
+
+set(Source ${CMAKE_CURRENT_SOURCE_DIR}/driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/large_int.bc)
+set(PROG large_int)
+llvm_multisource()
Index: Bitcode/UnitTests/large_int/driver.cpp
===================================================================
--- /dev/null
+++ Bitcode/UnitTests/large_int/driver.cpp
@@ -0,0 +1,115 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+
+static constexpr int NumChunks = 4096 / 64;
+static_assert(
+    NumChunks <= 64,
+    "We use 64-bit math below to set up operations and don't want to deal with "
+    "more than 64 chunks due to running out of bits per chunk.");
+
+extern "C" {
+
+struct FakeInt4096 {
+  // LLVM doesn't actually specify a precise or stable ABI for very large
+  // integers, so this relies on implementation details, but lets us more
+  // easily wire tests up with arbitrary hand-crafted LLVM IR.
+  //
+  // Note that these chunks appear to be operated on in big-endian even on
+  // little-endian systems.
+  uint64_t Chunks[NumChunks] = {};
+};
+
+// Functions defined in hand-written LLVM IR.
+void add4096(FakeInt4096 *Result, const FakeInt4096 *LHS, const FakeInt4096 *RHS);
+void sub4096(FakeInt4096 *Result, const FakeInt4096 *LHS, const FakeInt4096 *RHS);
+void mul4096(FakeInt4096 *Result, const FakeInt4096 *LHS, const FakeInt4096 *RHS);
+
+}
+
+void print(const FakeInt4096 &Int) {
+  printf("0x");
+  for (int i = 0; i < NumChunks; ++i)
+    printf(" %016llx", Int.Chunks[i]);
+
+  putchar('\n');
+}
+
+void check(const FakeInt4096 &Result, const FakeInt4096 &Expected) {
+  for (int i = 0; i < NumChunks; ++i)
+    if (Result.Chunks[i] != Expected.Chunks[i]) {
+      printf("ERROR: Expected chunk %d to be 0x%016x!\n", i, Expected.Chunks[i]);
+      abort();
+    }
+  printf("SUCCESS!\n----\n");
+}
+
+int main() {
+  FakeInt4096 Result, LHS, RHS;
+
+  {
+    for (int i = 0; i < (NumChunks - 1); ++i)
+    LHS.Chunks[i] = -1;
+    RHS.Chunks[0] = 1;
+    add4096(&Result, &LHS, &RHS);
+    printf("LHS:    ");
+    print(LHS);
+    printf("RHS:    ");
+    print(RHS);
+    printf("Result: ");
+    print(Result);
+    FakeInt4096 Expected;
+    Expected.Chunks[NumChunks - 1] = 1;
+    check(Result, Expected);
+  }
+
+  {
+    FakeInt4096 Expected = LHS;
+    LHS = Result;
+    sub4096(&Result, &LHS, &RHS);
+    printf("LHS:    ");
+    print(LHS);
+    printf("RHS:    ");
+    print(RHS);
+    printf("Result: ");
+    print(Result);
+    check(Result, Expected);
+  }
+
+  // Do a range of different multiplies to exercise different carry patterns.
+  // The idea is to trigger a different set of carries across each 64-bit chunk
+  // (if this is lowered with 64 chunks). This is somewhat tailored to the
+  // obvious, naive expansion of a multiply across N bits.
+  //
+  // This somewhat delightful test pattern courtesy of Richard Smith.
+  for (int j = 0; j < 63; ++j) {
+    // Set each 64-bit chunk to a different value shifted to a different range.
+    LHS.Chunks[0] = 2ull << j;
+    for (int i = 1; i < NumChunks; ++i)
+      LHS.Chunks[i] = 1ull << j;
+    // Set each 64-bit chunk to the largest 64-bit prime.
+    for (int i = 0; i < NumChunks; ++i)
+      RHS.Chunks[i] = 1ull << (63 - i);
+    printf("LHS:    ");
+    print(LHS);
+    printf("RHS:    ");
+    print(RHS);
+
+    // We expect to end up with exactly one in all but the first chunk due to
+    // the carried bits.
+    FakeInt4096 Expected;
+    for (int i = 2; i < NumChunks; ++i)
+      Expected.Chunks[i] = 1ull << j;
+
+    mul4096(&Result, &LHS, &RHS);
+    printf("Result: ");
+    print(Result);
+    check(Result, Expected);
+
+    // Reverse the operands and ensure we still get the same answer.
+    mul4096(&Result, &RHS, &LHS);
+    printf("Reversed result: ");
+    print(Result);
+    check(Result, Expected);
+  }
+}
Index: Bitcode/UnitTests/large_int/large_int.ll
===================================================================
--- /dev/null
+++ Bitcode/UnitTests/large_int/large_int.ll
@@ -0,0 +1,30 @@
+; This is the IR source file used to produce the bitcode for this test. The
+; test itself uses the bitcode to avoid revision lock, but we try to keep the
+; textual IR up to date so that the test can be easily extended.
+
+define void @add4096(i4096* %result_addr, i4096* %lhs_addr, i4096* %rhs_addr) {
+entry:
+  %lhs = load i4096, i4096* %lhs_addr
+  %rhs = load i4096, i4096* %rhs_addr
+  %result = add i4096 %lhs, %rhs
+  store i4096 %result, i4096* %result_addr
+  ret void
+}
+
+define void @sub4096(i4096* %result_addr, i4096* %lhs_addr, i4096* %rhs_addr) {
+entry:
+  %lhs = load i4096, i4096* %lhs_addr
+  %rhs = load i4096, i4096* %rhs_addr
+  %result = sub i4096 %lhs, %rhs
+  store i4096 %result, i4096* %result_addr
+  ret void
+}
+
+define void @mul4096(i4096* %result_addr, i4096* %lhs_addr, i4096* %rhs_addr) {
+entry:
+  %lhs = load i4096, i4096* %lhs_addr
+  %rhs = load i4096, i4096* %rhs_addr
+  %result = mul i4096 %lhs, %rhs
+  store i4096 %result, i4096* %result_addr
+  ret void
+}
Index: Bitcode/UnitTests/update_bc.sh
===================================================================
--- /dev/null
+++ Bitcode/UnitTests/update_bc.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+#
+# This script simply finds all .ll files in immediate subdirectories of the
+# script itself and runs `llvm-as` to convert them into bitcode.
+
+SCRIPT_DIR="$(dirname $0)"
+
+for ll in "$SCRIPT_DIR"/*/*.ll; do
+  bc="$(dirname "$ll")/$(basename --suffix=.ll "$ll").bc"
+  echo "Generating $bc ..."
+  llvm-as -o "$bc" "$ll"
+done