This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
SingleSource/UnitTests/Vector/AVX512VL/
-
UnitTests/
-
Vector/
-
AVX512VL/
1/3
i64gather_32.c

Differential D79158

[X86] Bugfix on check function of _mm_mmask_i64gather_epi32 and _mm_mmask_i64gather_ps for rL349334
ClosedPublic

Authored by FreddyYe on Apr 30 2020, 1:51 AM.

Download Raw Diff

Details

Reviewers

craig.topper
RKSimon
LuoYuanke
pengfei
skan
yubing

Summary

_mm_mmask_i64gather_epi32 and _mm_mmask_i64gather_ps operates the lower 64 bits and zeroes the higher 64 bits of the return value . Old version steps 64 bits in the do_intrin_ loop, which will overlap higher 64 bits of the last iteration. This is a wrong usage of the intrin. Especially when compiler malloc dst128_f and mask128 's address adjacent, this test will run fail, happenning in LLVM HEAD. So I modified the loop step into 128 bits, and add a new check function to check these two intrins (failed to find similar check function other tests).

Diff Detail

Event Timeline

FreddyYe created this revision.Apr 30 2020, 1:51 AM

craig.topper added inline comments.May 2 2020, 12:00 AM

SingleSource/UnitTests/Vector/AVX512VL/i64gather_32.c
54	I wonder if it would be better to change to a 64-bit store _mm_storel_epi64 instead?

FreddyYe marked an inline comment as done.May 5 2020, 11:44 PM

FreddyYe added inline comments.

SingleSource/UnitTests/Vector/AVX512VL/i64gather_32.c
54	I'm confused on this, too. A 64-bit store is disable to verify the higher 64 bits of the return value (even if they are zero). I think it depends on the original design purpose of this test suite.

LGTM

SingleSource/UnitTests/Vector/AVX512VL/i64gather_32.c
54	Ok, then what you've done here seems fine.

This revision is now accepted and ready to land.May 6 2020, 12:09 AM

Committed in https://github.com/llvm/llvm-test-suite/commit/33904171b17a6ecebb8822f1179fec559a7be9a1

Revision Contents

Path

Size

SingleSource/

UnitTests/

Vector/

AVX512VL/

i64gather_32.c

38 lines

Diff 261146

SingleSource/UnitTests/Vector/AVX512VL/i64gather_32.c

Show All 40 Lines	if (i % 2 == 0) {
mask256[i / 4] = (i * 31) & 0xff;		mask256[i / 4] = (i * 31) & 0xff;
}		}
}		}
}		}
}		}

void do_mm_mmask_i64gather_epi32() {		void do_mm_mmask_i64gather_epi32() {
int i;		int i;
for (i = 0; i < NUM; i += 2) {		for (i = 0; i < NUM; i += 4) {
__m128i ind = _mm_loadu_si128((const __m128i *)(g_index + i));		__m128i ind = _mm_loadu_si128((const __m128i *)(g_index + i));
__m128i old_dst = _mm_loadu_si128((__m128i const *)(dst_i + i));		__m128i old_dst = _mm_loadu_si128((__m128i const *)(dst_i + i));
__m128i gtr =		__m128i gtr =
_mm_mmask_i64gather_epi32(old_dst, mask128[i / 2], ind, src_i, SCALE);		_mm_mmask_i64gather_epi32(old_dst, mask128[i / 4], ind, src_i, SCALE);
_mm_storeu_si128((__m128i *)(dst128_i + i), gtr);		_mm_storeu_si128((__m128i *)(dst128_i + i), gtr);
		craig.topperUnsubmitted Not Done Reply Inline Actions I wonder if it would be better to change to a 64-bit store _mm_storel_epi64 instead? craig.topper: I wonder if it would be better to change to a 64-bit store _mm_storel_epi64 instead?
		FreddyYeAuthorUnsubmitted Done Reply Inline Actions I'm confused on this, too. A 64-bit store is disable to verify the higher 64 bits of the return value (even if they are zero). I think it depends on the original design purpose of this test suite. FreddyYe: I'm confused on this, too. A 64-bit store is disable to verify the higher 64 bits of the return…
		craig.topperUnsubmitted Not Done Reply Inline Actions Ok, then what you've done here seems fine. craig.topper: Ok, then what you've done here seems fine.
}		}
}		}

void do_mm_mmask_i64gather_ps() {		void do_mm_mmask_i64gather_ps() {
int i;		int i;
for (i = 0; i < NUM; i += 2) {		for (i = 0; i < NUM; i += 4) {
__m128i ind = _mm_loadu_si128((const __m128i *)(g_index + i));		__m128i ind = _mm_loadu_si128((const __m128i *)(g_index + i));
__m128 old_dst = _mm_loadu_ps(dst_f + i);		__m128 old_dst = _mm_loadu_ps(dst_f + i);
__m128 gtr =		__m128 gtr =
_mm_mmask_i64gather_ps(old_dst, mask128[i / 2], ind, src_f, SCALE);		_mm_mmask_i64gather_ps(old_dst, mask128[i / 4], ind, src_f, SCALE);
_mm_storeu_ps(dst128_f + i, gtr);		_mm_storeu_ps(dst128_f + i, gtr);
}		}
}		}

void do_mm256_mmask_i64gather_epi32() {		void do_mm256_mmask_i64gather_epi32() {
int i;		int i;
for (i = 0; i < NUM; i += 4) {		for (i = 0; i < NUM; i += 4) {
__m256i ind = _mm256_loadu_si256((const __m256i *)(g_index + i));		__m256i ind = _mm256_loadu_si256((const __m256i *)(g_index + i));
Show All 32 Lines	if (v != res_dst[i]) {
printf("Expected value %d, actual %d\n", v, res_dst[i]);		printf("Expected value %d, actual %d\n", v, res_dst[i]);

return -1;		return -1;
}		}
}		}
return 0;		return 0;
}		}

		int checkh(int id, int res_dst, int pass_thru_vals, int mask, int src,
		int elems_in_vector) {
		int i;
		for (i = 0; i < NUM; i++) {
		int kmask = mask[i / elems_in_vector];
		int kmask_bit = kmask & (1 << (i % elems_in_vector));

		int v;
		if (i % elems_in_vector >= elems_in_vector / 2)
		v = 0;
		else
		v = kmask_bit ? src[g_index[i]] : pass_thru_vals[i];
		// printf("v= %d, g_index[i] = %ld, src[g_index[i]]= %d, res_dst[i]=%d\n ",
		// v, g_index[i], src[g_index[i]], res_dst[i]);

		if (v != res_dst[i]) {
		printf("The testcase #%d FAILed at %d iteration\n", id, i);

		printf("Expected value %d, actual %d\n", v, res_dst[i]);

		return -1;
		}
		}
		return 0;
		}

int main() {		int main() {
int error = 0;		int error = 0;

init_data();		init_data();

do_mm_mmask_i64gather_epi32();		do_mm_mmask_i64gather_epi32();
error \|= check(1, dst128_i, dst_i, mask128, src_i, 2);		error \|= checkh(1, dst128_i, dst_i, mask128, src_i, 4);

do_mm_mmask_i64gather_ps();		do_mm_mmask_i64gather_ps();
error \|= check(2, (int )dst128_f, (int )dst_f, mask128, (int *)src_f, 2);		error \|= checkh(2, (int )dst128_f, (int )dst_f, mask128, (int *)src_f, 4);

do_mm256_mmask_i64gather_epi32();		do_mm256_mmask_i64gather_epi32();
error \|= check(3, dst256_i, dst_i, mask256, src_i, 4);		error \|= check(3, dst256_i, dst_i, mask256, src_i, 4);

do_mm256_mmask_i64gather_ps();		do_mm256_mmask_i64gather_ps();
error \|= check(4, (int )dst256_f, (int )dst_f, mask256, (int *)src_f, 4);		error \|= check(4, (int )dst256_f, (int )dst_f, mask256, (int *)src_f, 4);

if (error != 0) {		if (error != 0) {
printf("FAILED\n");		printf("FAILED\n");
return 1;		return 1;
}		}

printf("PASSED\n");		printf("PASSED\n");
return 0;		return 0;
}		}