diff --git a/SingleSource/UnitTests/Vectorizer/CMakeLists.txt b/SingleSource/UnitTests/Vectorizer/CMakeLists.txt --- a/SingleSource/UnitTests/Vectorizer/CMakeLists.txt +++ b/SingleSource/UnitTests/Vectorizer/CMakeLists.txt @@ -1 +1,2 @@ +list(APPEND CXXFLAGS -std=c++17) llvm_singlesource() diff --git a/SingleSource/UnitTests/Vectorizer/runtime-checks.cpp b/SingleSource/UnitTests/Vectorizer/runtime-checks.cpp new file mode 100644 --- /dev/null +++ b/SingleSource/UnitTests/Vectorizer/runtime-checks.cpp @@ -0,0 +1,264 @@ +#include +#include +#include + +// Tests for memory runtime checks generated by the vectorizer. Runs scalar and +// vectorized versions of a loop requiring runtime checks on the same inputs +// with pointers to the same buffer using various offsets. Fails if they do not +// produce the same results. +// +static std::mt19937 rng; + +// Initialize arrays A with random numbers. +template +static void init_data(const std::unique_ptr &A, unsigned N) { + std::uniform_int_distribution distrib(-1000, 1000); + for (unsigned i = 0; i < N; i++) + A[i] = distrib(rng); +} + +template +static void check(const std::unique_ptr &Reference, + const std::unique_ptr &Tmp, unsigned NumElements, + int Offset) { + if (!std::equal(&Reference[0], &Reference[0] + NumElements, &Tmp[0])) { + std::cerr << "Miscompare with offset " << Offset << "\n"; + exit(1); + } +} + +#define DEFINE_SCALAR_AND_VECTOR_FN2(Loop) \ + auto ScalarFn = [](auto *A, auto *B, unsigned TC) { \ + _Pragma("clang loop vectorize(disable)") Loop \ + }; \ + auto VectorFn = [](auto *A, auto *B, unsigned TC) { \ + _Pragma("clang loop vectorize(enable)") Loop \ + }; + +#define DEFINE_SCALAR_AND_VECTOR_FN3(Loop) \ + auto ScalarFn = [](auto *A, auto *B, auto *C, unsigned TC) { \ + _Pragma("clang loop vectorize(disable)") Loop \ + }; \ + auto VectorFn = [](auto *A, auto *B, auto *C, unsigned TC) { \ + _Pragma("clang loop vectorize(enable)") Loop \ + }; + +template using Fn2Ty = std::function; + +// Run both \p ScalarFn and \p VectorFn on the same inputs with pointers to the +// same buffer. Fail if they do not produce the same results. +template +static void checkOverlappingMemoryOneRuntimeCheck(Fn2Ty ScalarFn, + Fn2Ty VectorFn, + const char *Name) + __attribute__((noinline)) { + std::cout << "Checking " << Name << "\n"; + unsigned N = 100; + // Make sure we have enough extra elements so we can be liberal with offsets. + unsigned NumArrayElements = N * 8; + auto CheckWithOffset = [&](int Offset) { + std::unique_ptr Input1(new Ty[NumArrayElements]); + std::unique_ptr Reference(new Ty[NumArrayElements]); + std::unique_ptr ToCheck(new Ty[NumArrayElements]); + + init_data(Input1, NumArrayElements); + for (unsigned i = 0; i < NumArrayElements; i++) { + Reference[i] = Input1[i]; + ToCheck[i] = Input1[i]; + } + + // Run scalar function to generate reference output. + auto *ReferenceStart = &Reference[0] + NumArrayElements / 2; + ScalarFn(ReferenceStart + Offset, ReferenceStart, N); + + // Run vector function to generate output to check. + auto *StartPtr = &ToCheck[0] + NumArrayElements / 2; + VectorFn(StartPtr + Offset, StartPtr, N); + + // Compare scalar and vector output. + check(Reference, ToCheck, NumArrayElements, Offset); + }; + + for (int i = -200; i <= 200; i++) + CheckWithOffset(i); +} + +template +using Fn3Ty = std::function; +template +static void checkOverlappingMemoryTwoRuntimeChecks(Fn3Ty ScalarFn, + Fn3Ty VectorFn, + const char *Name) + __attribute__((noinline)) { + std::cout << "Checking " << Name << "\n"; + unsigned N = 100; + // Make sure we have enough extra elements so we can be liberal with offsets. + unsigned NumArrayElements = N * 8; + auto CheckWithOffsetSecond = [&](int Offset) { + std::unique_ptr Input1(new Ty[NumArrayElements]); + std::unique_ptr Input2(new Ty[NumArrayElements]); + std::unique_ptr Reference(new Ty[NumArrayElements]); + std::unique_ptr ToCheck(new Ty[NumArrayElements]); + + init_data(Input1, NumArrayElements); + init_data(Input2, NumArrayElements); + for (unsigned i = 0; i < NumArrayElements; i++) { + Reference[i] = Input1[i]; + ToCheck[i] = Input1[i]; + } + + // Run scalar function to generate reference output. + auto *ReferenceStart = &Reference[0] + NumArrayElements / 2; + ScalarFn(ReferenceStart + Offset, &Input2[0], ReferenceStart, N); + + // Run vector function to generate output to check. + auto *StartPtr = &ToCheck[0] + NumArrayElements / 2; + VectorFn(StartPtr + Offset, &Input2[0], StartPtr, N); + + // Compare scalar and vector output. + check(Reference, ToCheck, NumArrayElements, Offset); + }; + + for (int i = -200; i <= 200; i++) + CheckWithOffsetSecond(i); +} + +int main(void) { + rng = std::mt19937(15); + + { + DEFINE_SCALAR_AND_VECTOR_FN2( + for (unsigned i = 0; i < TC; i++) + A[i] = B[i] + 10; + ); + + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, step 1, char"); + checkOverlappingMemoryOneRuntimeCheck(ScalarFn, VectorFn, + "1 read, 1 write, step 1, int"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, step 1, long long"); + } + + { + DEFINE_SCALAR_AND_VECTOR_FN2( + for (unsigned i = 0; i < TC; i++) + A[i] = B[i + 3] + 10; + ); + + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, offset 3, char"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, offset 3, int"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, offset 3, long long"); + } + + { + DEFINE_SCALAR_AND_VECTOR_FN2( + for (unsigned i = 3; i < TC; i++) + A[i] = B[i - 3] + 10; + ); + + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, offset -3, char"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, offset -3, int"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, offset -3, long long"); + } + + { + DEFINE_SCALAR_AND_VECTOR_FN2( + for (unsigned i = TC; i > 0; i--) + A[i] = B[i] + 10; + ); + + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, index count down, char"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, index count down, int"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, index count down, long long"); + } + + { + DEFINE_SCALAR_AND_VECTOR_FN2( + for (unsigned i = TC; i > 2; i -= 2) + A[i] = B[i] + 10; + ); + + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, index count down 2, char"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, index count down 2, int"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, index count down 2, long long"); + } + + { + DEFINE_SCALAR_AND_VECTOR_FN2( + for (unsigned i = 0, j = 0; i < TC; i++) { + A[i] = B[j] + 10; + j += 2; + }); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, + "1 read, 1 write, 2 inductions, different steps, char"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, + "1 read, 1 write, 2 inductions, different steps, int"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, + "1 read, 1 write, 2 inductions, different steps, long long"); + } + + { + DEFINE_SCALAR_AND_VECTOR_FN2( + _Pragma("loop unroll(disable)") + for (unsigned i = 0; i < TC; i += 2) { + A[i] = B[i] + 10; + }); + + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, induction increment 2, char"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, "1 read, 1 write, induction increment 2, int"); + checkOverlappingMemoryOneRuntimeCheck( + ScalarFn, VectorFn, + "1 read, 1 write, induction increment 2, long long"); + } + + { + DEFINE_SCALAR_AND_VECTOR_FN3( + for (unsigned i = 0; i < TC; i++) + A[i] = B[i] + C[i] + 10; + ); + + checkOverlappingMemoryTwoRuntimeChecks( + ScalarFn, VectorFn, "2 reads, 1 write, simple indices, int"); + checkOverlappingMemoryTwoRuntimeChecks( + ScalarFn, VectorFn, "2 reads, 1 write, simple indices, char"); + checkOverlappingMemoryTwoRuntimeChecks( + ScalarFn, VectorFn, "2 reads, 1 write, simple indices, long long"); + } + + { + DEFINE_SCALAR_AND_VECTOR_FN3( + for (unsigned i = 0; i < TC; i++) { + auto X = C[i] + 10; + A[i] = X; + B[i] = X + 9; + } + ); + + checkOverlappingMemoryTwoRuntimeChecks( + ScalarFn, VectorFn, "1 read, 2 writes, simple indices, char"); + checkOverlappingMemoryTwoRuntimeChecks( + ScalarFn, VectorFn, "1 read, 2 writes, simple indices, int"); + checkOverlappingMemoryTwoRuntimeChecks( + ScalarFn, VectorFn, "1 read, 2 writes, simple indices, long long"); + } + + return 0; +} diff --git a/SingleSource/UnitTests/Vectorizer/runtime-checks.reference_output b/SingleSource/UnitTests/Vectorizer/runtime-checks.reference_output new file mode 100644 --- /dev/null +++ b/SingleSource/UnitTests/Vectorizer/runtime-checks.reference_output @@ -0,0 +1,28 @@ +Checking 1 read, 1 write, step 1, char +Checking 1 read, 1 write, step 1, int +Checking 1 read, 1 write, step 1, long long +Checking 1 read, 1 write, offset 3, char +Checking 1 read, 1 write, offset 3, int +Checking 1 read, 1 write, offset 3, long long +Checking 1 read, 1 write, offset -3, char +Checking 1 read, 1 write, offset -3, int +Checking 1 read, 1 write, offset -3, long long +Checking 1 read, 1 write, index count down, char +Checking 1 read, 1 write, index count down, int +Checking 1 read, 1 write, index count down, long long +Checking 1 read, 1 write, index count down 2, char +Checking 1 read, 1 write, index count down 2, int +Checking 1 read, 1 write, index count down 2, long long +Checking 1 read, 1 write, 2 inductions, different steps, char +Checking 1 read, 1 write, 2 inductions, different steps, int +Checking 1 read, 1 write, 2 inductions, different steps, long long +Checking 1 read, 1 write, induction increment 2, char +Checking 1 read, 1 write, induction increment 2, int +Checking 1 read, 1 write, induction increment 2, long long +Checking 2 reads, 1 write, simple indices, int +Checking 2 reads, 1 write, simple indices, char +Checking 2 reads, 1 write, simple indices, long long +Checking 1 read, 2 writes, simple indices, char +Checking 1 read, 2 writes, simple indices, int +Checking 1 read, 2 writes, simple indices, long long +exit 0