Index: SingleSource/UnitTests/Vectorizer/common.h =================================================================== --- SingleSource/UnitTests/Vectorizer/common.h +++ SingleSource/UnitTests/Vectorizer/common.h @@ -17,6 +17,24 @@ _Pragma("clang loop vectorize(enable)") Loop \ }; +#define DEFINE_SCALAR_AND_VECTOR_FN4(InnerLoopCode) \ + auto ScalarFn = [](auto *A, auto *B, auto *C, unsigned TC) { \ + for (unsigned long i = 0; i < TC; i++) { \ + _Pragma("clang loop vectorize(disable) interleave_count(1)") \ + for (unsigned long j = 0; j < TC; j++) { \ + InnerLoopCode \ + } \ + } \ + }; \ + auto VectorFn = [](auto *A, auto *B, auto *C, unsigned TC) { \ + for (unsigned long i = 0; i < TC; i++) { \ + _Pragma("clang loop vectorize(enable)") \ + for (unsigned long j = 0; j < TC; j++) { \ + InnerLoopCode \ + } \ + } \ + }; + static std::mt19937 rng; // Initialize arrays A with random numbers. Index: SingleSource/UnitTests/Vectorizer/runtime-checks.cpp =================================================================== --- SingleSource/UnitTests/Vectorizer/runtime-checks.cpp +++ SingleSource/UnitTests/Vectorizer/runtime-checks.cpp @@ -7,6 +7,7 @@ #include "common.h" + // Tests for memory runtime checks generated by the vectorizer. Runs scalar and // vectorized versions of a loop requiring runtime checks on the same inputs // with pointers to the same buffer using various offsets between reads and @@ -108,6 +109,52 @@ CheckWithOffsetSecond(i); } + + +template +using Fn3Ty = std::function; +template +static void checkOverlappingMemoryTwoRuntimeChecksNested(Fn3Ty ScalarFn, + Fn3Ty VectorFn, + const char *Name) { + std::cout << "Checking " << Name << "\n"; + + const int N = 100; + // Make sure we have enough extra elements so we can be liberal with offsets. + const unsigned NumArrayElements = (N * (N + 1)) * 8; + std::unique_ptr Input1(new Ty[NumArrayElements]); + std::unique_ptr Input2(new Ty[NumArrayElements]); + std::unique_ptr Reference(new Ty[NumArrayElements]); + std::unique_ptr ToCheck(new Ty[NumArrayElements]); + + auto CheckWithOffsetSecond = [&](int Offset) { + init_data(Input1, NumArrayElements); + init_data(Input2, NumArrayElements); + for (unsigned i = 0; i < NumArrayElements; i++) { + Reference[i] = Input1[i]; + ToCheck[i] = Input1[i]; + } + + // Run scalar function to generate reference output. + Ty *ReferenceStart = &Reference[NumArrayElements / 2]; + ScalarFn(ReferenceStart + Offset, &Input2[0], ReferenceStart, N); + + // Run vector function to generate output to check. + Ty *StartPtr = &ToCheck[NumArrayElements / 2]; + callThroughOptnone(VectorFn, StartPtr + Offset, &Input2[0], StartPtr, N); + + // Compare scalar and vector output. + check(Reference, ToCheck, NumArrayElements, Offset); + }; + + // With a nested loop, sometimes the runtime checks will fail and sometimes + // succeed. For example, with large offsets you'd expect for the first and + // last one or two executions of the inner loop there is no overlap. + for (int i = -(2 * (N + 1)); i <= (2 * (N + 1)); i++) + CheckWithOffsetSecond(i); +} + + int main(void) { rng = std::mt19937(15); @@ -261,5 +308,33 @@ ScalarFn, VectorFn, "1 read, 2 writes, simple indices, uint64_t"); } + { + DEFINE_SCALAR_AND_VECTOR_FN4( + auto X = C[(i * TC) + j]; + A[(i * (TC + 1)) + j] = X; + ); + + checkOverlappingMemoryTwoRuntimeChecksNested( + ScalarFn, VectorFn, "1 read, 1 write, nested loop, uint8_t"); + checkOverlappingMemoryTwoRuntimeChecksNested( + ScalarFn, VectorFn, "1 read, 1 write, nested loop, uint32_t"); + checkOverlappingMemoryTwoRuntimeChecksNested( + ScalarFn, VectorFn, "1 read, 1 write, nested loop, uint64_t"); + } + + { + DEFINE_SCALAR_AND_VECTOR_FN4( + auto X = C[(i * TC) + j]; + A[(i * (TC + 1)) + j] += X; + ); + + checkOverlappingMemoryTwoRuntimeChecksNested( + ScalarFn, VectorFn, "2 reads, 1 write, nested loop, uint8_t"); + checkOverlappingMemoryTwoRuntimeChecksNested( + ScalarFn, VectorFn, "2 reads, 1 write, nested loop, uint32_t"); + checkOverlappingMemoryTwoRuntimeChecksNested( + ScalarFn, VectorFn, "2 reads, 1 write, nested loop, uint64_t"); + } + return 0; } Index: SingleSource/UnitTests/Vectorizer/runtime-checks.reference_output =================================================================== --- SingleSource/UnitTests/Vectorizer/runtime-checks.reference_output +++ SingleSource/UnitTests/Vectorizer/runtime-checks.reference_output @@ -28,4 +28,10 @@ Checking 1 read, 2 writes, simple indices, uint8_t Checking 1 read, 2 writes, simple indices, uint32_t Checking 1 read, 2 writes, simple indices, uint64_t +Checking 1 read, 1 write, nested loop, uint8_t +Checking 1 read, 1 write, nested loop, uint32_t +Checking 1 read, 1 write, nested loop, uint64_t +Checking 2 reads, 1 write, nested loop, uint8_t +Checking 2 reads, 1 write, nested loop, uint32_t +Checking 2 reads, 1 write, nested loop, uint64_t exit 0