Diff 125526

lib/Headers/avx512bwintrin.h

Show First 20 Lines • Show All 1,848 Lines • ▼ Show 20 Lines	_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
return (__m512i) __builtin_ia32_selectb_512(__M,		return (__m512i) __builtin_ia32_selectb_512(__M,
(__v64qi) _mm512_set1_epi8(__A),		(__v64qi) _mm512_set1_epi8(__A),
(__v64qi) _mm512_setzero_si512());		(__v64qi) _mm512_setzero_si512());
}		}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS		static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)		_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
{		{
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,		return (__mmask64) (( __A & 0xFFFFFFFF) \| ( __B << 32));
		RKSimonUnsubmitted Not Done Reply Inline Actions Is this right? The Intel docs says it should be: k[31:0] := a[31:0] k[63:32] := b[31:0] k[MAX:64] := 0 Also, is the cast on __A necessary? Same for the others. RKSimon: Is this right? The Intel docs says it should be: ``` k[31:0] := a[31:0] k[63:32] := b[31:0] k…
		jina.nahiasAuthorUnsubmitted Not Done Reply Inline Actions you are right, i fixed it. jina.nahias: you are right, i fixed it.
(__mmask64) __B);
}		}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS		static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)		_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
{		{
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,		return (__mmask32) (( __A & 0xFFFF) \| ( __B << 16));
(__mmask32) __B);
}		}

static __inline__ __m512i __DEFAULT_FN_ATTRS		static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)		_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
{		{
return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,		return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
(__v32hi) __W,		(__v32hi) __W,
(__mmask32) __U);		(__mmask32) __U);
▲ Show 20 Lines • Show All 267 Lines • Show Last 20 Lines

lib/Headers/avx512fintrin.h

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 8,781 Lines • ▼ Show 20 Lines
	_mm512_kortestz (__mmask16 __A, __mmask16 __B)			_mm512_kortestz (__mmask16 __A, __mmask16 __B)
	{			{
	return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);			return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
	}			}

	static __inline__ __mmask16 __DEFAULT_FN_ATTRS			static __inline__ __mmask16 __DEFAULT_FN_ATTRS
	_mm512_kunpackb (__mmask16 __A, __mmask16 __B)			_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
	{			{
	return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);			return (__mmask16) (( __A & 0xFF) \| ( __B << 8));
	}			}

	static __inline__ __mmask16 __DEFAULT_FN_ATTRS			static __inline__ __mmask16 __DEFAULT_FN_ATTRS
	_mm512_kxnor (__mmask16 __A, __mmask16 __B)			_mm512_kxnor (__mmask16 __A, __mmask16 __B)
	{			{
	return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);			return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
	}			}

	▲ Show 20 Lines • Show All 1,435 Lines • Show Last 20 Lines

test/CodeGen/avx512bw-builtins.c

Show First 20 Lines • Show All 1,620 Lines • ▼ Show 20 Lines	__m512i test_mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
// CHECK: insertelement <64 x i8> %{{.}}, i8 %{{.}}, i32 60		// CHECK: insertelement <64 x i8> %{{.}}, i8 %{{.}}, i32 60
// CHECK: insertelement <64 x i8> %{{.}}, i8 %{{.}}, i32 61		// CHECK: insertelement <64 x i8> %{{.}}, i8 %{{.}}, i32 61
// CHECK: insertelement <64 x i8> %{{.}}, i8 %{{.}}, i32 62		// CHECK: insertelement <64 x i8> %{{.}}, i8 %{{.}}, i32 62
// CHECK: insertelement <64 x i8> %{{.}}, i8 %{{.}}, i32 63		// CHECK: insertelement <64 x i8> %{{.}}, i8 %{{.}}, i32 63
// CHECK: select <64 x i1> %{{.}}, <64 x i8> %{{.}}, <64 x i8> %{{.*}}		// CHECK: select <64 x i1> %{{.}}, <64 x i8> %{{.}}, <64 x i8> %{{.*}}
return _mm512_maskz_set1_epi8(__M, __A);		return _mm512_maskz_set1_epi8(__M, __A);
}		}

__mmask64 test_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {		__mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: @test_mm512_kunpackd		// CHECK-LABEL: @test_mm512_kunpackd
// CHECK: @llvm.x86.avx512.kunpck.dq		// CHECK: bitcast <64 x i1> %{{.*}} to i64
return _mm512_kunpackd(__A, __B);		// CHECK: bitcast <64 x i1> %{{.*}} to i64
		// CHECK: and i64 %{{.*}}, 4294967295
		// CHECK: shl i64 %{{.*}}, 32
		// CHECK: or i64 %{{.}}, %{{.}}
		// CHECK: bitcast i64 %{{.*}} to <64 x i1>
		return _mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, __A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F);
}		}

__mmask32 test_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {		__mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: @test_mm512_kunpackw		// CHECK-LABEL: @test_mm512_kunpackw
// CHECK: @llvm.x86.avx512.kunpck.wd		// CHECK: bitcast <32 x i1> %{{.*}} to i32
return _mm512_kunpackw(__A, __B);		// CHECK: bitcast <32 x i1> %{{.*}} to i32
		// CHECK: and i32 %{{.*}}, 65535
		// CHECK: shl i32 %{{.*}}, 16
		// CHECK: or i32 %{{.}}, %{{.}}
		// CHECK: bitcast i32 %{{.*}} to <32 x i1>
		return _mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, __A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F);
}		}

__m512i test_mm512_mask_loadu_epi16(__m512i __W, __mmask32 __U, void const *__P) {		__m512i test_mm512_mask_loadu_epi16(__m512i __W, __mmask32 __U, void const *__P) {
// CHECK-LABEL: @test_mm512_mask_loadu_epi16		// CHECK-LABEL: @test_mm512_mask_loadu_epi16
// CHECK: @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* %{{.}}, i32 1, <32 x i1> %{{.}}, <32 x i16> %{{.*}})		// CHECK: @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* %{{.}}, i32 1, <32 x i1> %{{.}}, <32 x i16> %{{.*}})
return _mm512_mask_loadu_epi16(__W, __U, __P);		return _mm512_mask_loadu_epi16(__W, __U, __P);
}		}

▲ Show 20 Lines • Show All 311 Lines • Show Last 20 Lines

test/CodeGen/avx512f-builtins.c

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 6,235 Lines • ▼ Show 20 Lines
	}			}

	int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) {			int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
	// CHECK-LABEL: @test_mm512_kortestz			// CHECK-LABEL: @test_mm512_kortestz
	// CHECK: @llvm.x86.avx512.kortestz.w			// CHECK: @llvm.x86.avx512.kortestz.w
	return _mm512_kortestz(__A, __B);			return _mm512_kortestz(__A, __B);
	}			}

	__mmask16 test_mm512_kunpackb(__mmask16 __A, __mmask16 __B) {			__mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
	// CHECK-LABEL: @test_mm512_kunpackb			// CHECK-LABEL: @test_mm512_kunpackb
	// CHECK: @llvm.x86.avx512.kunpck.bw			// CHECK: bitcast <16 x i1> %{{.*}} to i16
	return _mm512_kunpackb(__A, __B);			// CHECK: bitcast <16 x i1> %{{.*}} to i16
				// CHECK: and i32 %{{.*}}, 255
				craig.topperUnsubmitted Not Done Reply Inline Actions Does this really produce kunpackb in the backend? The type promotion here makes me skeptic craig.topper: Does this really produce kunpackb in the backend? The type promotion here makes me skeptic
				jina.nahiasAuthorUnsubmitted Not Done Reply Inline Actions yes, the code we get: vpcmpneqd %zmm1, %zmm0, %k0 vpcmpneqd %zmm3, %zmm2, %k1 kunpckbw %k1, %k0, %k1 vpcmpneqd %zmm5, %zmm4, %k0 {%k1} kmovd %k0, %eax kill: %AX<def> %AX<kill> %EAX<kill> vzeroupper jina.nahias: yes, the code we get: vpcmpneqd %zmm1, %zmm0, %k0 vpcmpneqd %zmm3, %zmm2, %k1 kunpckbw %k1…
				// CHECK: shl i32 %{{.*}}, 8
				// CHECK: or i32 %{{.}}, %{{.}}
				// CHECK: bitcast i16 %{{.*}} to <16 x i1>
				return _mm512_mask_cmpneq_epu32_mask(_mm512_kunpackb(_mm512_cmpneq_epu32_mask(__A, __B),
				craig.topperUnsubmitted Not Done Reply Inline Actions Please add some line breaks to this. craig.topper: Please add some line breaks to this.
				_mm512_cmpneq_epu32_mask(__C, __D)),
				__E, __F);
	}			}

	__mmask16 test_mm512_kxnor(__mmask16 __A, __mmask16 __B) {			__mmask16 test_mm512_kxnor(__mmask16 __A, __mmask16 __B) {
	// CHECK-LABEL: @test_mm512_kxnor			// CHECK-LABEL: @test_mm512_kxnor
	// CHECK: @llvm.x86.avx512.kxnor.w			// CHECK: @llvm.x86.avx512.kxnor.w
	return _mm512_kxnor(__A, __B);			return _mm512_kxnor(__A, __B);
	}			}

	▲ Show 20 Lines • Show All 2,230 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86][AVX512] lowering kunpack intrinsic - clang part
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 125526

lib/Headers/avx512bwintrin.h

lib/Headers/avx512fintrin.h

test/CodeGen/avx512bw-builtins.c

test/CodeGen/avx512f-builtins.c

This is an archive of the discontinued LLVM Phabricator instance.

[X86][AVX512] lowering kunpack intrinsic - clang partClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 125526

lib/Headers/avx512bwintrin.h

lib/Headers/avx512fintrin.h

test/CodeGen/avx512bw-builtins.c

test/CodeGen/avx512f-builtins.c

[X86][AVX512] lowering kunpack intrinsic - clang part
ClosedPublic