Diff 323653

clang/include/clang/Basic/BuiltinsX86.def

	Show First 20 Lines • Show All 1,872 Lines • ▼ Show 20 Lines

	// generic reduction intrinsics			// generic reduction intrinsics
	TARGET_BUILTIN(__builtin_ia32_reduce_add_d512, "iV16i", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_add_d512, "iV16i", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_add_q512, "OiV8Oi", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_add_q512, "OiV8Oi", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f")
				TARGET_BUILTIN(__builtin_ia32_reduce_fmax_pd512, "dV8d", "ncV:512:", "avx512f")
				TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ps512, "fV16f", "ncV:512:", "avx512f")
				TARGET_BUILTIN(__builtin_ia32_reduce_fmin_pd512, "dV8d", "ncV:512:", "avx512f")
				TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ps512, "fV16f", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_mul_q512, "OiV8Oi", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_mul_q512, "OiV8Oi", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_or_d512, "iV16i", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_or_d512, "iV16i", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_or_q512, "OiV8Oi", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_or_q512, "OiV8Oi", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_smax_d512, "iV16i", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_smax_d512, "iV16i", "ncV:512:", "avx512f")
	TARGET_BUILTIN(__builtin_ia32_reduce_smax_q512, "OiV8Oi", "ncV:512:", "avx512f")			TARGET_BUILTIN(__builtin_ia32_reduce_smax_q512, "OiV8Oi", "ncV:512:", "avx512f")
	▲ Show 20 Lines • Show All 95 Lines • Show Last 20 Lines

clang/lib/CodeGen/CGBuiltin.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 13,845 Lines • ▼ Show 20 Lines	case X86::BI__builtin_ia32_reduce_and_q512: {
Function *F =		Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_and, Ops[0]->getType());		CGM.getIntrinsic(Intrinsic::vector_reduce_and, Ops[0]->getType());
return Builder.CreateCall(F, {Ops[0]});		return Builder.CreateCall(F, {Ops[0]});
}		}
case X86::BI__builtin_ia32_reduce_fadd_pd512:		case X86::BI__builtin_ia32_reduce_fadd_pd512:
case X86::BI__builtin_ia32_reduce_fadd_ps512: {		case X86::BI__builtin_ia32_reduce_fadd_ps512: {
Function *F =		Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());		CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
Builder.getFastMathFlags().setAllowReassoc(true);		Builder.getFastMathFlags().setAllowReassoc();
return Builder.CreateCall(F, {Ops[0], Ops[1]});		return Builder.CreateCall(F, {Ops[0], Ops[1]});
}		}
case X86::BI__builtin_ia32_reduce_fmul_pd512:		case X86::BI__builtin_ia32_reduce_fmul_pd512:
case X86::BI__builtin_ia32_reduce_fmul_ps512: {		case X86::BI__builtin_ia32_reduce_fmul_ps512: {
Function *F =		Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());		CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
Builder.getFastMathFlags().setAllowReassoc(true);		Builder.getFastMathFlags().setAllowReassoc();
return Builder.CreateCall(F, {Ops[0], Ops[1]});		return Builder.CreateCall(F, {Ops[0], Ops[1]});
}		}
		case X86::BI__builtin_ia32_reduce_fmax_pd512:
		case X86::BI__builtin_ia32_reduce_fmax_ps512: {
		Function *F =
		CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
		Builder.getFastMathFlags().setNoNaNs();
		return Builder.CreateCall(F, {Ops[0]});
		}
		case X86::BI__builtin_ia32_reduce_fmin_pd512:
		case X86::BI__builtin_ia32_reduce_fmin_ps512: {
		Function *F =
		CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
		Builder.getFastMathFlags().setNoNaNs();
		return Builder.CreateCall(F, {Ops[0]});
		}
case X86::BI__builtin_ia32_reduce_mul_d512:		case X86::BI__builtin_ia32_reduce_mul_d512:
case X86::BI__builtin_ia32_reduce_mul_q512: {		case X86::BI__builtin_ia32_reduce_mul_q512: {
Function *F =		Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType());		CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType());
return Builder.CreateCall(F, {Ops[0]});		return Builder.CreateCall(F, {Ops[0]});
}		}
case X86::BI__builtin_ia32_reduce_or_d512:		case X86::BI__builtin_ia32_reduce_or_d512:
case X86::BI__builtin_ia32_reduce_or_q512: {		case X86::BI__builtin_ia32_reduce_or_q512: {
▲ Show 20 Lines • Show All 3,800 Lines • Show Last 20 Lines

clang/lib/Headers/avx512fintrin.h

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,294 Lines • ▼ Show 20 Lines	_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);		return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
}		}

/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as		/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
* outputs. This class of vector operation forms the basis of many scientific		* outputs. This class of vector operation forms the basis of many scientific
* computations. In vector-reduction arithmetic, the evaluation order is		* computations. In vector-reduction arithmetic, the evaluation order is
* independent of the order of the input elements of V.		* independent of the order of the input elements of V.

* For floating point types, we always assume the elements are reassociable even		* For floating-point intrinsics:
* if -fast-math is off.		* 1. When using fadd/fmul intrinsics, the order of operations within the
		* vector is unspecified (associative math).
		spatelUnsubmitted Not Done Reply Inline Actions If I understand correctly, there's nothing preventing -0.0 or NaN values in these ops. But if either of those are present, then the result is potentially indeterminate. For the LLVM intrinsic, we have this text in LangRef: "The result will always be a number unless all elements of the vector are NaN. For a vector with minimum element magnitude 0.0 and containing both +0.0 and -0.0 elements, the sign of the result is unspecified." spatel: If I understand correctly, there's nothing preventing -0.0 or NaN values in these ops. But if…
		pengfeiAuthorUnsubmitted Done Reply Inline Actions Thanks @spatel for the information. I checked that the LLVM intrinsic does ignore the sign of zeros. https://godbolt.org/z/a9Yj8a. So we can remove the no signed zero assumption. But X86 fmin/fmax instructions have difference with the LLVM intrinsic, i.e. the result might be NaN even there's only one NaN in elements. Besides, the LangRef also says "If the intrinsic call has the nnan fast-math flag, then the operation can assume that NaNs are not present in the input vector." So we still need the no nan assumption here. pengfei: Thanks @spatel for the information. I checked that the LLVM intrinsic does ignore the sign of…
		spatelUnsubmitted Not Done Reply Inline Actions I understand the behavior that we want to specify for this API, but we are not stating it clearly for the user. I think we should do that. How about: * For floating-point intrinsics: * 1. When using fadd/fmul intrinsics, the order of operations within the vector is unspecified (associative math). * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector produce unspecified results. spatel: I understand the behavior that we want to specify for this API, but we are not stating it…
		pengfeiAuthorUnsubmitted Done Reply Inline Actions Great! It's much clear now. Thanks @spatel ! pengfei: Great! It's much clear now. Thanks @spatel !
		* 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
		* produce unspecified results.

* Used bisection method. At each step, we partition the vector with previous		* Used bisection method. At each step, we partition the vector with previous
* step in half, and the operation is performed on its two halves.		* step in half, and the operation is performed on its two halves.
* This takes log2(n) steps where n is the number of elements in the vector.		* This takes log2(n) steps where n is the number of elements in the vector.
*/		*/

static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {		static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
return __builtin_ia32_reduce_add_q512(__W);		return __builtin_ia32_reduce_add_q512(__W);
▲ Show 20 Lines • Show All 206 Lines • ▼ Show 20 Lines
}		}

static __inline__ unsigned int __DEFAULT_FN_ATTRS512		static __inline__ unsigned int __DEFAULT_FN_ATTRS512
_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {		_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
__V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);		__V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
return __builtin_ia32_reduce_umin_d512((__v16si)__V);		return __builtin_ia32_reduce_umin_d512((__v16si)__V);
}		}

#define _mm512_mask_reduce_operator(op) \
__m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
__m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \
__m256d __t3 = _mm256_##op(__t1, __t2); \
__m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
__m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
__m128d __t6 = _mm_##op(__t4, __t5); \
__m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
__m128d __t8 = _mm_##op(__t6, __t7); \
return __t8[0]

static __inline__ double __DEFAULT_FN_ATTRS512		static __inline__ double __DEFAULT_FN_ATTRS512
_mm512_reduce_max_pd(__m512d __V) {		_mm512_reduce_max_pd(__m512d __V) {
_mm512_mask_reduce_operator(max_pd);		return __builtin_ia32_reduce_fmax_pd512(__V);
}		}

static __inline__ double __DEFAULT_FN_ATTRS512		static __inline__ double __DEFAULT_FN_ATTRS512
_mm512_reduce_min_pd(__m512d __V) {		_mm512_reduce_min_pd(__m512d __V) {
_mm512_mask_reduce_operator(min_pd);		return __builtin_ia32_reduce_fmin_pd512(__V);
}		}

static __inline__ double __DEFAULT_FN_ATTRS512		static __inline__ double __DEFAULT_FN_ATTRS512
_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {		_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
__V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);		__V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
_mm512_mask_reduce_operator(max_pd);		return __builtin_ia32_reduce_fmax_pd512(__V);
}		}

static __inline__ double __DEFAULT_FN_ATTRS512		static __inline__ double __DEFAULT_FN_ATTRS512
_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {		_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
__V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);		__V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
_mm512_mask_reduce_operator(min_pd);		return __builtin_ia32_reduce_fmin_pd512(__V);
}		}
#undef _mm512_mask_reduce_operator

#define _mm512_mask_reduce_operator(op) \
__m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \
__m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \
__m256 __t3 = _mm256_##op(__t1, __t2); \
__m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
__m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
__m128 __t6 = _mm_##op(__t4, __t5); \
__m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
__m128 __t8 = _mm_##op(__t6, __t7); \
__m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
__m128 __t10 = _mm_##op(__t8, __t9); \
return __t10[0]

static __inline__ float __DEFAULT_FN_ATTRS512		static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_reduce_max_ps(__m512 __V) {		_mm512_reduce_max_ps(__m512 __V) {
_mm512_mask_reduce_operator(max_ps);		return __builtin_ia32_reduce_fmax_ps512(__V);
}		}

static __inline__ float __DEFAULT_FN_ATTRS512		static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_reduce_min_ps(__m512 __V) {		_mm512_reduce_min_ps(__m512 __V) {
_mm512_mask_reduce_operator(min_ps);		return __builtin_ia32_reduce_fmin_ps512(__V);
}		}

static __inline__ float __DEFAULT_FN_ATTRS512		static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {		_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
__V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);		__V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
_mm512_mask_reduce_operator(max_ps);		return __builtin_ia32_reduce_fmax_ps512(__V);
}		}

static __inline__ float __DEFAULT_FN_ATTRS512		static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {		_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
__V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);		__V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
_mm512_mask_reduce_operator(min_ps);		return __builtin_ia32_reduce_fmin_ps512(__V);
}		}
#undef _mm512_mask_reduce_operator

/// Moves the least significant 32 bits of a vector of [16 x i32] to a		/// Moves the least significant 32 bits of a vector of [16 x i32] to a
/// 32-bit signed integer value.		/// 32-bit signed integer value.
///		///
/// \headerfile <x86intrin.h>		/// \headerfile <x86intrin.h>
///		///
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.		/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
///		///
Show All 15 Lines

clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c

	// RUN: %clang_cc1 -fexperimental-new-pass-manager -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror \| FileCheck %s			// RUN: %clang_cc1 -fexperimental-new-pass-manager -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror \| FileCheck %s

	#include <immintrin.h>			#include <immintrin.h>

	long long test_mm512_reduce_max_epi64(__m512i __W){			long long test_mm512_reduce_max_epi64(__m512i __W){
	// CHECK-LABEL: @test_mm512_reduce_max_epi64(			// CHECK-LABEL: @test_mm512_reduce_max_epi64(
	// CHECK: call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})			// CHECK: call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
	return _mm512_reduce_max_epi64(__W);			return _mm512_reduce_max_epi64(__W);
	}			}

	unsigned long long test_mm512_reduce_max_epu64(__m512i __W){			unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
	// CHECK-LABEL: @test_mm512_reduce_max_epu64(			// CHECK-LABEL: @test_mm512_reduce_max_epu64(
	// CHECK: call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})			// CHECK: call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
	return _mm512_reduce_max_epu64(__W);			return _mm512_reduce_max_epu64(__W);
	}			}

	double test_mm512_reduce_max_pd(__m512d __W){			double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
	// CHECK-LABEL: @test_mm512_reduce_max_pd(			// CHECK-LABEL: @test_mm512_reduce_max_pd(
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			// CHECK-NOT: nnan
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			// CHECK: call nnan double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
	// CHECK: call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.}}, <4 x double> %{{.}})			// CHECK-NOT: nnan
	// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>			return _mm512_reduce_max_pd(__W) + ExtraAddOp;
	// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
	// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.}}, <2 x double> %{{.}})
	// CHECK: shufflevector <2 x double> %{{.}}, <2 x double> %{{.}}, <2 x i32> <i32 1, i32 0>
	// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.}}, <2 x double> %{{.}})
	// CHECK: extractelement <2 x double> %{{.*}}, i32 0
	return _mm512_reduce_max_pd(__W);
	}			}

	long long test_mm512_reduce_min_epi64(__m512i __W){			long long test_mm512_reduce_min_epi64(__m512i __W){
	// CHECK-LABEL: @test_mm512_reduce_min_epi64(			// CHECK-LABEL: @test_mm512_reduce_min_epi64(
	// CHECK: call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})			// CHECK: call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
	return _mm512_reduce_min_epi64(__W);			return _mm512_reduce_min_epi64(__W);
	}			}

	unsigned long long test_mm512_reduce_min_epu64(__m512i __W){			unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
	// CHECK-LABEL: @test_mm512_reduce_min_epu64(			// CHECK-LABEL: @test_mm512_reduce_min_epu64(
	// CHECK: call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})			// CHECK: call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
	return _mm512_reduce_min_epu64(__W);			return _mm512_reduce_min_epu64(__W);
	}			}

	double test_mm512_reduce_min_pd(__m512d __W){			double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
	// CHECK-LABEL: @test_mm512_reduce_min_pd(			// CHECK-LABEL: @test_mm512_reduce_min_pd(
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			// CHECK-NOT: nnan
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			// CHECK: call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
	// CHECK: call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.}}, <4 x double> %{{.}})			// CHECK-NOT: nnan
	// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>			return _mm512_reduce_min_pd(__W) * ExtraMulOp;
	// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
	// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.}}, <2 x double> %{{.}})
	// CHECK: shufflevector <2 x double> %{{.}}, <2 x double> %{{.}}, <2 x i32> <i32 1, i32 0>
	// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.}}, <2 x double> %{{.}})
	// CHECK: extractelement <2 x double> %{{.*}}, i32 0
	return _mm512_reduce_min_pd(__W);
	}			}

	long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){			long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_max_epi64(			// CHECK-LABEL: @test_mm512_mask_reduce_max_epi64(
	// CHECK: bitcast i8 %{{.*}} to <8 x i1>			// CHECK: bitcast i8 %{{.*}} to <8 x i1>
	// CHECK: select <8 x i1> %{{.}}, <8 x i64> %{{.}}, <8 x i64> %{{.*}}			// CHECK: select <8 x i1> %{{.}}, <8 x i64> %{{.}}, <8 x i64> %{{.*}}
	// CHECK: call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})			// CHECK: call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
	return _mm512_mask_reduce_max_epi64(__M, __W);			return _mm512_mask_reduce_max_epi64(__M, __W);
	}			}

	unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){			unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_max_epu64(			// CHECK-LABEL: @test_mm512_mask_reduce_max_epu64(
	// CHECK: bitcast i8 %{{.*}} to <8 x i1>			// CHECK: bitcast i8 %{{.*}} to <8 x i1>
	// CHECK: select <8 x i1> %{{.}}, <8 x i64> %{{.}}, <8 x i64> %{{.*}}			// CHECK: select <8 x i1> %{{.}}, <8 x i64> %{{.}}, <8 x i64> %{{.*}}
	// CHECK: call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})			// CHECK: call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
	return _mm512_mask_reduce_max_epu64(__M, __W);			return _mm512_mask_reduce_max_epu64(__M, __W);
	}			}

	double test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){			double test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_max_pd(			// CHECK-LABEL: @test_mm512_mask_reduce_max_pd(
	// CHECK: bitcast i8 %{{.*}} to <8 x i1>			// CHECK: bitcast i8 %{{.*}} to <8 x i1>
	// CHECK: select <8 x i1> %{{.}}, <8 x double> %{{.}}, <8 x double> %{{.*}}			// CHECK: select <8 x i1> %{{.}}, <8 x double> %{{.}}, <8 x double> %{{.*}}
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			// CHECK: call nnan double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.}}, <4 x double> %{{.}})
	// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
	// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
	// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.}}, <2 x double> %{{.}})
	// CHECK: shufflevector <2 x double> %{{.}}, <2 x double> %{{.}}, <2 x i32> <i32 1, i32 0>
	// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.}}, <2 x double> %{{.}})
	// CHECK: extractelement <2 x double> %{{.*}}, i32 0
	return _mm512_mask_reduce_max_pd(__M, __W);			return _mm512_mask_reduce_max_pd(__M, __W);
	}			}

	long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){			long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_min_epi64(			// CHECK-LABEL: @test_mm512_mask_reduce_min_epi64(
	// CHECK: bitcast i8 %{{.*}} to <8 x i1>			// CHECK: bitcast i8 %{{.*}} to <8 x i1>
	// CHECK: select <8 x i1> %{{.}}, <8 x i64> %{{.}}, <8 x i64> %{{.*}}			// CHECK: select <8 x i1> %{{.}}, <8 x i64> %{{.}}, <8 x i64> %{{.*}}
	// CHECK: call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})			// CHECK: call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
	return _mm512_mask_reduce_min_epi64(__M, __W);			return _mm512_mask_reduce_min_epi64(__M, __W);
	}			}

	unsigned long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){			unsigned long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_min_epu64(			// CHECK-LABEL: @test_mm512_mask_reduce_min_epu64(
	// CHECK: bitcast i8 %{{.*}} to <8 x i1>			// CHECK: bitcast i8 %{{.*}} to <8 x i1>
	// CHECK: select <8 x i1> %{{.}}, <8 x i64> %{{.}}, <8 x i64> %{{.*}}			// CHECK: select <8 x i1> %{{.}}, <8 x i64> %{{.}}, <8 x i64> %{{.*}}
	// CHECK: call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})			// CHECK: call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
	return _mm512_mask_reduce_min_epu64(__M, __W);			return _mm512_mask_reduce_min_epu64(__M, __W);
	}			}

	double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){			double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_min_pd(			// CHECK-LABEL: @test_mm512_mask_reduce_min_pd(
	// CHECK: bitcast i8 %{{.*}} to <8 x i1>			// CHECK: bitcast i8 %{{.*}} to <8 x i1>
	// CHECK: select <8 x i1> %{{.}}, <8 x double> %{{.}}, <8 x double> %{{.*}}			// CHECK: select <8 x i1> %{{.}}, <8 x double> %{{.}}, <8 x double> %{{.*}}
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			// CHECK: call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.}}, <4 x double> %{{.}})
	// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
	// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
	// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.}}, <2 x double> %{{.}})
	// CHECK: shufflevector <2 x double> %{{.}}, <2 x double> %{{.}}, <2 x i32> <i32 1, i32 0>
	// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.}}, <2 x double> %{{.}})
	// CHECK: extractelement <2 x double> %{{.*}}, i32 0
	return _mm512_mask_reduce_min_pd(__M, __W);			return _mm512_mask_reduce_min_pd(__M, __W);
	}			}

	int test_mm512_reduce_max_epi32(__m512i __W){			int test_mm512_reduce_max_epi32(__m512i __W){
	// CHECK-LABEL: @test_mm512_reduce_max_epi32(			// CHECK-LABEL: @test_mm512_reduce_max_epi32(
	// CHECK: call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})			// CHECK: call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
	return _mm512_reduce_max_epi32(__W);			return _mm512_reduce_max_epi32(__W);
	}			}

	unsigned int test_mm512_reduce_max_epu32(__m512i __W){			unsigned int test_mm512_reduce_max_epu32(__m512i __W){
	// CHECK-LABEL: @test_mm512_reduce_max_epu32(			// CHECK-LABEL: @test_mm512_reduce_max_epu32(
	// CHECK: call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})			// CHECK: call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
	return _mm512_reduce_max_epu32(__W);			return _mm512_reduce_max_epu32(__W);
	}			}

	float test_mm512_reduce_max_ps(__m512 __W){			float test_mm512_reduce_max_ps(__m512 __W){
	// CHECK-LABEL: define{{.*}} float @test_mm512_reduce_max_ps(			// CHECK-LABEL: @test_mm512_reduce_max_ps(
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			// CHECK: call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.}}, <8 x float> %{{.}})
	// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: shufflevector <4 x float> %{{.}}, <4 x float> %{{.}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
	// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: shufflevector <4 x float> %{{.}}, <4 x float> %{{.}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
	// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: extractelement <4 x float> %{{.*}}, i32 0
	return _mm512_reduce_max_ps(__W);			return _mm512_reduce_max_ps(__W);
	}			}

	int test_mm512_reduce_min_epi32(__m512i __W){			int test_mm512_reduce_min_epi32(__m512i __W){
	// CHECK-LABEL: @test_mm512_reduce_min_epi32(			// CHECK-LABEL: @test_mm512_reduce_min_epi32(
	// CHECK: call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})			// CHECK: call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
	return _mm512_reduce_min_epi32(__W);			return _mm512_reduce_min_epi32(__W);
	}			}

	unsigned int test_mm512_reduce_min_epu32(__m512i __W){			unsigned int test_mm512_reduce_min_epu32(__m512i __W){
	// CHECK-LABEL: @test_mm512_reduce_min_epu32(			// CHECK-LABEL: @test_mm512_reduce_min_epu32(
	// CHECK: call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})			// CHECK: call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
	return _mm512_reduce_min_epu32(__W);			return _mm512_reduce_min_epu32(__W);
	}			}

	float test_mm512_reduce_min_ps(__m512 __W){			float test_mm512_reduce_min_ps(__m512 __W){
	// CHECK-LABEL: define{{.*}} float @test_mm512_reduce_min_ps(			// CHECK-LABEL: @test_mm512_reduce_min_ps(
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			// CHECK: call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.}}, <8 x float> %{{.}})
	// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: shufflevector <4 x float> %{{.}}, <4 x float> %{{.}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
	// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: shufflevector <4 x float> %{{.}}, <4 x float> %{{.}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
	// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: extractelement <4 x float> %{{.*}}, i32 0
	return _mm512_reduce_min_ps(__W);			return _mm512_reduce_min_ps(__W);
	}			}

	int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){			int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_max_epi32(			// CHECK-LABEL: @test_mm512_mask_reduce_max_epi32(
	// CHECK: bitcast i16 %{{.*}} to <16 x i1>			// CHECK: bitcast i16 %{{.*}} to <16 x i1>
	// CHECK: select <16 x i1> %{{.}}, <16 x i32> %{{.}}, <16 x i32> %{{.*}}			// CHECK: select <16 x i1> %{{.}}, <16 x i32> %{{.}}, <16 x i32> %{{.*}}
	// CHECK: call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})			// CHECK: call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
	return _mm512_mask_reduce_max_epi32(__M, __W);			return _mm512_mask_reduce_max_epi32(__M, __W);
	}			}

	unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){			unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_max_epu32(			// CHECK-LABEL: @test_mm512_mask_reduce_max_epu32(
	// CHECK: bitcast i16 %{{.*}} to <16 x i1>			// CHECK: bitcast i16 %{{.*}} to <16 x i1>
	// CHECK: select <16 x i1> %{{.}}, <16 x i32> %{{.}}, <16 x i32> %{{.*}}			// CHECK: select <16 x i1> %{{.}}, <16 x i32> %{{.}}, <16 x i32> %{{.*}}
	// CHECK: call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})			// CHECK: call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
	return _mm512_mask_reduce_max_epu32(__M, __W);			return _mm512_mask_reduce_max_epu32(__M, __W);
	}			}

	float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){			float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){
	// CHECK-LABEL: define{{.*}} float @test_mm512_mask_reduce_max_ps(			// CHECK-LABEL: @test_mm512_mask_reduce_max_ps(
	// CHECK: bitcast i16 %{{.*}} to <16 x i1>			// CHECK: bitcast i16 %{{.*}} to <16 x i1>
	// CHECK: select <16 x i1> %{{.}}, <16 x float> %{{.}}, <16 x float> %{{.*}}			// CHECK: select <16 x i1> %{{.}}, <16 x float> %{{.}}, <16 x float> %{{.*}}
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			// CHECK: call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.}}, <8 x float> %{{.}})
	// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: shufflevector <4 x float> %{{.}}, <4 x float> %{{.}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
	// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: shufflevector <4 x float> %{{.}}, <4 x float> %{{.}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
	// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: extractelement <4 x float> %{{.*}}, i32 0
	return _mm512_mask_reduce_max_ps(__M, __W);			return _mm512_mask_reduce_max_ps(__M, __W);
	}			}

	int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){			int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_min_epi32(			// CHECK-LABEL: @test_mm512_mask_reduce_min_epi32(
	// CHECK: bitcast i16 %{{.*}} to <16 x i1>			// CHECK: bitcast i16 %{{.*}} to <16 x i1>
	// CHECK: select <16 x i1> %{{.}}, <16 x i32> %{{.}}, <16 x i32> %{{.*}}			// CHECK: select <16 x i1> %{{.}}, <16 x i32> %{{.}}, <16 x i32> %{{.*}}
	// CHECK: call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})			// CHECK: call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
	return _mm512_mask_reduce_min_epi32(__M, __W);			return _mm512_mask_reduce_min_epi32(__M, __W);
	}			}

	unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){			unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){
	// CHECK-LABEL: @test_mm512_mask_reduce_min_epu32(			// CHECK-LABEL: @test_mm512_mask_reduce_min_epu32(
	// CHECK: bitcast i16 %{{.*}} to <16 x i1>			// CHECK: bitcast i16 %{{.*}} to <16 x i1>
	// CHECK: select <16 x i1> %{{.}}, <16 x i32> %{{.}}, <16 x i32> %{{.*}}			// CHECK: select <16 x i1> %{{.}}, <16 x i32> %{{.}}, <16 x i32> %{{.*}}
	// CHECK: call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})			// CHECK: call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
	return _mm512_mask_reduce_min_epu32(__M, __W);			return _mm512_mask_reduce_min_epu32(__M, __W);
	}			}

	float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){			float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){
	// CHECK-LABEL: define{{.*}} float @test_mm512_mask_reduce_min_ps(			// CHECK-LABEL: @test_mm512_mask_reduce_min_ps(
	// CHECK: bitcast i16 %{{.*}} to <16 x i1>			// CHECK: bitcast i16 %{{.*}} to <16 x i1>
	// CHECK: select <16 x i1> %{{.}}, <16 x float> %{{.}}, <16 x float> %{{.*}}			// CHECK: select <16 x i1> %{{.}}, <16 x float> %{{.}}, <16 x float> %{{.*}}
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			// CHECK: call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
	// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.}}, <8 x float> %{{.}})
	// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: shufflevector <4 x float> %{{.}}, <4 x float> %{{.}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
	// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: shufflevector <4 x float> %{{.}}, <4 x float> %{{.}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
	// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.}}, <4 x float> %{{.}})
	// CHECK: extractelement <4 x float> %{{.*}}, i32 0
	return _mm512_mask_reduce_min_ps(__M, __W);			return _mm512_mask_reduce_min_ps(__M, __W);
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Convert fmin/fmax _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 323653

clang/include/clang/Basic/BuiltinsX86.def

clang/lib/CodeGen/CGBuiltin.cpp

clang/lib/Headers/avx512fintrin.h

clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Convert fmin/fmax _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 323653

clang/include/clang/Basic/BuiltinsX86.def

clang/lib/CodeGen/CGBuiltin.cpp

clang/lib/Headers/avx512fintrin.h

clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c

[X86] Convert fmin/fmax _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)
ClosedPublic