Index: Bitcode/CMakeLists.txt =================================================================== --- Bitcode/CMakeLists.txt +++ Bitcode/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(Benchmarks) if(NOT TEST_SUITE_BENCHMARKING_ONLY) add_subdirectory(Regression) + add_subdirectory(UnitTests) if(ARCH STREQUAL "x86" OR ARCH STREQUAL "AArch64" OR ARCH STREQUAL "ARM") add_subdirectory(simd_ops) endif() Index: Bitcode/UnitTests/CMakeLists.txt =================================================================== --- /dev/null +++ Bitcode/UnitTests/CMakeLists.txt @@ -0,0 +1,9 @@ +set(NO_REFERENCE_OUTPUT true) + +# We need at least clang 5.0 to process the bitcode here. +if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND + CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + NOT CMAKE_C_COMPILER_VERSION VERSION_LESS "5.0" AND + NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") + add_subdirectory(large_int) +endif() Index: Bitcode/UnitTests/README.md =================================================================== --- /dev/null +++ Bitcode/UnitTests/README.md @@ -0,0 +1,30 @@ +# Bitcode Unit Tests + +These are execution unit tests written directly in LLVM IR. We store maintain +them in LLVM's bitcode format as that is guaranteed to have at least a certain +degree of stability. However, the textual IR version is kept available for +exposition and code review. It should be considered documentation only, as only +the bitcode is actually tested. + +Each unittest should consist of a C++ driver that makes raw function calls into +LLVM function defined manually in some number of .bc files (built from .ll +files). These function calls will often rely on LLVM implementation details of +calling conventions for C functions with various unusual type parameters. The +ABI that results from these calls should not be taken as in any way stable or +part of what is being tested. Instead, if LLVM changes its conventions, the C++ +code should simply be adjusted to match that platforms convention as necessary. + +## Updating the test IR, both bitcode and textual + +To refresh the bitcode from the textual IR, use the script `update_bc.sh`. This +will regenerate the `.bc` files next to each `.ll` file using the `llvm-as` +binary found in your `$PATH`. + +To refresh the textual IR for these tests, use the script `update_ll.sh`. It +will recreate the `.ll` file next to each `.bc` by disassembling it with the +`llvm-dis` binary found in your `$PATH`. Note that this will in many cases +destroy valuable comments. You may want to merge the new IR text with the +existing IR text using a process similar to `git add -p` so that you preserve +the existing comments but update the actual IR. In many cases, it may be simpler +to manually apply a necessary update to the textual IR rather than regenerating +it. Index: Bitcode/UnitTests/large_int/CMakeLists.txt =================================================================== --- /dev/null +++ Bitcode/UnitTests/large_int/CMakeLists.txt @@ -0,0 +1,7 @@ +set(NO_REFERENCE_OUTPUT true) + +SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/large_int.bc PROPERTIES LANGUAGE CXX) + +set(Source ${CMAKE_CURRENT_SOURCE_DIR}/driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/large_int.bc) +set(PROG large_int) +llvm_multisource() Index: Bitcode/UnitTests/large_int/driver.cpp =================================================================== --- /dev/null +++ Bitcode/UnitTests/large_int/driver.cpp @@ -0,0 +1,115 @@ +#include +#include +#include + +static constexpr int NumChunks = 4096 / 64; +static_assert( + NumChunks <= 64, + "We use 64-bit math below to set up operations and don't want to deal with " + "more than 64 chunks due to running out of bits per chunk."); + +extern "C" { + +struct FakeInt4096 { + // LLVM doesn't actually specify a precise or stable ABI for very large + // integers, so this relies on implementation details, but lets us more + // easily wire tests up with arbitrary hand-crafted LLVM IR. + // + // Note that these chunks appear to be operated on in big-endian even on + // little-endian systems. + uint64_t Chunks[NumChunks] = {}; +}; + +// Functions defined in hand-written LLVM IR. +void add4096(FakeInt4096 *Result, const FakeInt4096 *LHS, const FakeInt4096 *RHS); +void sub4096(FakeInt4096 *Result, const FakeInt4096 *LHS, const FakeInt4096 *RHS); +void mul4096(FakeInt4096 *Result, const FakeInt4096 *LHS, const FakeInt4096 *RHS); + +} + +void print(const FakeInt4096 &Int) { + printf("0x"); + for (int i = 0; i < NumChunks; ++i) + printf(" %016llx", Int.Chunks[i]); + + putchar('\n'); +} + +void check(const FakeInt4096 &Result, const FakeInt4096 &Expected) { + for (int i = 0; i < NumChunks; ++i) + if (Result.Chunks[i] != Expected.Chunks[i]) { + printf("ERROR: Expected chunk %d to be 0x%016x!\n", i, Expected.Chunks[i]); + abort(); + } + printf("SUCCESS!\n----\n"); +} + +int main() { + FakeInt4096 Result, LHS, RHS; + + { + for (int i = 0; i < (NumChunks - 1); ++i) + LHS.Chunks[i] = -1; + RHS.Chunks[0] = 1; + add4096(&Result, &LHS, &RHS); + printf("LHS: "); + print(LHS); + printf("RHS: "); + print(RHS); + printf("Result: "); + print(Result); + FakeInt4096 Expected; + Expected.Chunks[NumChunks - 1] = 1; + check(Result, Expected); + } + + { + FakeInt4096 Expected = LHS; + LHS = Result; + sub4096(&Result, &LHS, &RHS); + printf("LHS: "); + print(LHS); + printf("RHS: "); + print(RHS); + printf("Result: "); + print(Result); + check(Result, Expected); + } + + // Do a range of different multiplies to exercise different carry patterns. + // The idea is to trigger a different set of carries across each 64-bit chunk + // (if this is lowered with 64 chunks). This is somewhat tailored to the + // obvious, naive expansion of a multiply across N bits. + // + // This somewhat delightful test pattern courtesy of Richard Smith. + for (int j = 0; j < 63; ++j) { + // Set each 64-bit chunk to a different value shifted to a different range. + LHS.Chunks[0] = 2ull << j; + for (int i = 1; i < NumChunks; ++i) + LHS.Chunks[i] = 1ull << j; + // Set each 64-bit chunk to the largest 64-bit prime. + for (int i = 0; i < NumChunks; ++i) + RHS.Chunks[i] = 1ull << (63 - i); + printf("LHS: "); + print(LHS); + printf("RHS: "); + print(RHS); + + // We expect to end up with exactly one in all but the first chunk due to + // the carried bits. + FakeInt4096 Expected; + for (int i = 2; i < NumChunks; ++i) + Expected.Chunks[i] = 1ull << j; + + mul4096(&Result, &LHS, &RHS); + printf("Result: "); + print(Result); + check(Result, Expected); + + // Reverse the operands and ensure we still get the same answer. + mul4096(&Result, &RHS, &LHS); + printf("Reversed result: "); + print(Result); + check(Result, Expected); + } +} Index: Bitcode/UnitTests/large_int/large_int.ll =================================================================== --- /dev/null +++ Bitcode/UnitTests/large_int/large_int.ll @@ -0,0 +1,30 @@ +; This is the IR source file used to produce the bitcode for this test. The +; test itself uses the bitcode to avoid revision lock, but we try to keep the +; textual IR up to date so that the test can be easily extended. + +define void @add4096(i4096* %result_addr, i4096* %lhs_addr, i4096* %rhs_addr) { +entry: + %lhs = load i4096, i4096* %lhs_addr + %rhs = load i4096, i4096* %rhs_addr + %result = add i4096 %lhs, %rhs + store i4096 %result, i4096* %result_addr + ret void +} + +define void @sub4096(i4096* %result_addr, i4096* %lhs_addr, i4096* %rhs_addr) { +entry: + %lhs = load i4096, i4096* %lhs_addr + %rhs = load i4096, i4096* %rhs_addr + %result = sub i4096 %lhs, %rhs + store i4096 %result, i4096* %result_addr + ret void +} + +define void @mul4096(i4096* %result_addr, i4096* %lhs_addr, i4096* %rhs_addr) { +entry: + %lhs = load i4096, i4096* %lhs_addr + %rhs = load i4096, i4096* %rhs_addr + %result = mul i4096 %lhs, %rhs + store i4096 %result, i4096* %result_addr + ret void +} Index: Bitcode/UnitTests/update_bc.sh =================================================================== --- /dev/null +++ Bitcode/UnitTests/update_bc.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# +# This script simply finds all .ll files in immediate subdirectories of the +# script itself and runs `llvm-as` to convert them into bitcode. + +SCRIPT_DIR="$(dirname $0)" + +for ll in "$SCRIPT_DIR"/*/*.ll; do + bc="$(dirname "$ll")/$(basename --suffix=.ll "$ll").bc" + echo "Generating $bc ..." + llvm-as -o "$bc" "$ll" +done