This is an archive of the discontinued LLVM Phabricator instance.

Differential D20859

[X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)
ClosedPublic

Authored by RKSimon on Jun 1 2016, 5:38 AM.

Download Raw Diff

Details

Reviewers

spatel
ab
andreadb
mkuper
craig.topper

Commits

rG00880511b156: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero)…
rC271436: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero)…
rL271436: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero)…

Summary

The 'cvtt' truncation (round to zero) conversions can be safely represented as generic __builtin_convertvector (fptosi) calls instead of x86 intrinsics.

We already do this (implicitly) for the scalar equivalents.

Note: I looked at updating _mm_cvttpd_epi32 as well but this still requires a lot more backend work to correctly lower (both for debug and optimized builds).

Diff Detail

Repository: rL LLVM

Event Timeline

RKSimon updated this revision to Diff 59204.Jun 1 2016, 5:38 AM

RKSimon retitled this revision from to [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang).

RKSimon updated this object.

RKSimon added reviewers: ab, mkuper, craig.topper, spatel, andreadb.

RKSimon set the repository for this revision to rL LLVM.

RKSimon added a subscriber: cfe-commits.

RKSimon mentioned this in D20860: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (llvm).Jun 1 2016, 5:40 AM

RKSimon added a child revision: D20860: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (llvm).

LGTM

This revision is now accepted and ready to land.Jun 1 2016, 7:58 AM

Closed by commit rL271436: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero)… (authored by RKSimon). · Explain WhyJun 1 2016, 2:53 PM

This revision was automatically updated to reflect the committed changes.

RKSimon mentioned this in D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR.Jul 7 2016, 11:50 AM

RKSimon mentioned this in D22106: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR.Jul 7 2016, 11:53 AM

RKSimon mentioned this in rL275981: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using….Jul 19 2016, 8:15 AM

RKSimon mentioned this in rL276102: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using….Jul 20 2016, 3:25 AM

hans mentioned this in rL276990: Merging r275981 and r276740:.Jul 28 2016, 8:46 AM

hans mentioned this in rL276991: Merging r276102:.Jul 28 2016, 8:49 AM

Revision Contents

Path

Size

cfe/

trunk/

include/

clang/

Basic/

BuiltinsX86.def

3 lines

lib/

Headers/

avxintrin.h

4 lines

emmintrin.h

2 lines

test/

CodeGen/

avx-builtins.c

4 lines

builtins-x86.c

3 lines

sse2-builtins.c

2 lines

Diff 59284

cfe/trunk/include/clang/Basic/BuiltinsX86.def

	Show First 20 Lines • Show All 333 Lines • ▼ Show 20 Lines
	TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_pause, "v", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_pause, "v", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "", "sse2")
	TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "", "sse2")			TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "", "sse2")
	▲ Show 20 Lines • Show All 106 Lines • ▼ Show 20 Lines
	TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "", "avx")			TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "", "avx")			TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "", "avx")			TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "", "avx")			TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "", "avx")			TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")			TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")			TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")			TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")			TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx")			TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx")			TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx")			TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "", "avx")			TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "", "avx")			TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "", "avx")			TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "", "avx")			TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "", "avx")
	TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "", "avx")			TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "", "avx")
	▲ Show 20 Lines • Show All 1,789 Lines • Show Last 20 Lines

cfe/trunk/lib/Headers/avxintrin.h

	Show First 20 Lines • Show All 2,102 Lines • ▼ Show 20 Lines
	_mm256_cvtps_pd(__m128 __a)			_mm256_cvtps_pd(__m128 __a)
	{			{
	return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);			return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
	}			}

	static __inline __m128i __DEFAULT_FN_ATTRS			static __inline __m128i __DEFAULT_FN_ATTRS
	_mm256_cvttpd_epi32(__m256d __a)			_mm256_cvttpd_epi32(__m256d __a)
	{			{
	return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);			return (__m128i)__builtin_convertvector((__v4df) __a, __v4si);
	}			}

	static __inline __m128i __DEFAULT_FN_ATTRS			static __inline __m128i __DEFAULT_FN_ATTRS
	_mm256_cvtpd_epi32(__m256d __a)			_mm256_cvtpd_epi32(__m256d __a)
	{			{
	return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);			return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
	}			}

	static __inline __m256i __DEFAULT_FN_ATTRS			static __inline __m256i __DEFAULT_FN_ATTRS
	_mm256_cvttps_epi32(__m256 __a)			_mm256_cvttps_epi32(__m256 __a)
	{			{
	return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);			return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si);
	}			}

	static __inline double __DEFAULT_FN_ATTRS			static __inline double __DEFAULT_FN_ATTRS
	_mm256_cvtsd_f64(__m256d __a)			_mm256_cvtsd_f64(__m256d __a)
	{			{
	return __a[0];			return __a[0];
	}			}

	▲ Show 20 Lines • Show All 788 Lines • Show Last 20 Lines

cfe/trunk/lib/Headers/emmintrin.h

	Show First 20 Lines • Show All 1,738 Lines • ▼ Show 20 Lines
	/// This intrinsic corresponds to the \c VCVTTPS2DQ / CVTTPS2DQ instruction.			/// This intrinsic corresponds to the \c VCVTTPS2DQ / CVTTPS2DQ instruction.
	///			///
	/// \param __a			/// \param __a
	/// A 128-bit vector of [4 x float].			/// A 128-bit vector of [4 x float].
	/// \returns A 128-bit vector of [4 x i32] containing the converted values.			/// \returns A 128-bit vector of [4 x i32] containing the converted values.
	static __inline__ __m128i __DEFAULT_FN_ATTRS			static __inline__ __m128i __DEFAULT_FN_ATTRS
	_mm_cvttps_epi32(__m128 __a)			_mm_cvttps_epi32(__m128 __a)
	{			{
	return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);			return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si);
	}			}

	/// \brief Returns a vector of [4 x i32] where the lowest element is the input			/// \brief Returns a vector of [4 x i32] where the lowest element is the input
	/// operand and the remaining elements are zero.			/// operand and the remaining elements are zero.
	///			///
	/// \headerfile <x86intrin.h>			/// \headerfile <x86intrin.h>
	///			///
	/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.			/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
	▲ Show 20 Lines • Show All 691 Lines • Show Last 20 Lines

cfe/trunk/test/CodeGen/avx-builtins.c

	Show First 20 Lines • Show All 280 Lines • ▼ Show 20 Lines
	__m256d test_mm256_cvtps_pd(__m128 A) {			__m256d test_mm256_cvtps_pd(__m128 A) {
	// CHECK-LABEL: test_mm256_cvtps_pd			// CHECK-LABEL: test_mm256_cvtps_pd
	// CHECK: fpext <4 x float> %{{.*}} to <4 x double>			// CHECK: fpext <4 x float> %{{.*}} to <4 x double>
	return _mm256_cvtps_pd(A);			return _mm256_cvtps_pd(A);
	}			}

	__m128i test_mm256_cvttpd_epi32(__m256d A) {			__m128i test_mm256_cvttpd_epi32(__m256d A) {
	// CHECK-LABEL: test_mm256_cvttpd_epi32			// CHECK-LABEL: test_mm256_cvttpd_epi32
	// CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}})			// CHECK: fptosi <4 x double> %{{.*}} to <4 x i32>
	return _mm256_cvttpd_epi32(A);			return _mm256_cvttpd_epi32(A);
	}			}

	__m256i test_mm256_cvttps_epi32(__m256 A) {			__m256i test_mm256_cvttps_epi32(__m256 A) {
	// CHECK-LABEL: test_mm256_cvttps_epi32			// CHECK-LABEL: test_mm256_cvttps_epi32
	// CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}})			// CHECK: fptosi <8 x float> %{{.*}} to <8 x i32>
	return _mm256_cvttps_epi32(A);			return _mm256_cvttps_epi32(A);
	}			}

	__m256d test_mm256_div_pd(__m256d A, __m256d B) {			__m256d test_mm256_div_pd(__m256d A, __m256d B) {
	// CHECK-LABEL: test_mm256_div_pd			// CHECK-LABEL: test_mm256_div_pd
	// CHECK: fdiv <4 x double>			// CHECK: fdiv <4 x double>
	return _mm256_div_pd(A, B);			return _mm256_div_pd(A, B);
	}			}
	▲ Show 20 Lines • Show All 1,105 Lines • Show Last 20 Lines

cfe/trunk/test/CodeGen/builtins-x86.c

Show First 20 Lines • Show All 329 Lines • ▼ Show 20 Lines	#endif
tmp_V4i = __builtin_ia32_cvttpd2dq(tmp_V2d);		tmp_V4i = __builtin_ia32_cvttpd2dq(tmp_V2d);
tmp_V2i = __builtin_ia32_cvttpd2pi(tmp_V2d);		tmp_V2i = __builtin_ia32_cvttpd2pi(tmp_V2d);
tmp_V2d = __builtin_ia32_cvtpi2pd(tmp_V2i);		tmp_V2d = __builtin_ia32_cvtpi2pd(tmp_V2i);
tmp_i = __builtin_ia32_cvtsd2si(tmp_V2d);		tmp_i = __builtin_ia32_cvtsd2si(tmp_V2d);
#ifdef USE_64		#ifdef USE_64
tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);		tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
#endif		#endif
tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);		tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
(void) __builtin_ia32_clflush(tmp_vCp);		(void) __builtin_ia32_clflush(tmp_vCp);
(void) __builtin_ia32_lfence();		(void) __builtin_ia32_lfence();
(void) __builtin_ia32_mfence();		(void) __builtin_ia32_mfence();
tmp_V4s = __builtin_ia32_psllwi(tmp_V4s, tmp_i);		tmp_V4s = __builtin_ia32_psllwi(tmp_V4s, tmp_i);
tmp_V2i = __builtin_ia32_pslldi(tmp_V2i, tmp_i);		tmp_V2i = __builtin_ia32_pslldi(tmp_V2i, tmp_i);
tmp_V1LLi = __builtin_ia32_psllqi(tmp_V1LLi, tmp_i);		tmp_V1LLi = __builtin_ia32_psllqi(tmp_V1LLi, tmp_i);
tmp_V4s = __builtin_ia32_psrawi(tmp_V4s, tmp_i);		tmp_V4s = __builtin_ia32_psrawi(tmp_V4s, tmp_i);
tmp_V2i = __builtin_ia32_psradi(tmp_V2i, tmp_i);		tmp_V2i = __builtin_ia32_psradi(tmp_V2i, tmp_i);
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	#endif
tmp_V4d = __builtin_ia32_blendvpd256(tmp_V4d, tmp_V4d, tmp_V4d);		tmp_V4d = __builtin_ia32_blendvpd256(tmp_V4d, tmp_V4d, tmp_V4d);
tmp_V8f = __builtin_ia32_blendvps256(tmp_V8f, tmp_V8f, tmp_V8f);		tmp_V8f = __builtin_ia32_blendvps256(tmp_V8f, tmp_V8f, tmp_V8f);
tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);		tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);		tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);		tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);		tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);		tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);		tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);		tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);		tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);		tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);		tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d);		tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d);
tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f);		tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f);
tmp_V8f = __builtin_ia32_rsqrtps256(tmp_V8f);		tmp_V8f = __builtin_ia32_rsqrtps256(tmp_V8f);
tmp_V8f = __builtin_ia32_rcpps256(tmp_V8f);		tmp_V8f = __builtin_ia32_rcpps256(tmp_V8f);
tmp_V4d = __builtin_ia32_roundpd256(tmp_V4d, 0x1);		tmp_V4d = __builtin_ia32_roundpd256(tmp_V4d, 0x1);
▲ Show 20 Lines • Show All 71 Lines • Show Last 20 Lines

cfe/trunk/test/CodeGen/sse2-builtins.c

	Show First 20 Lines • Show All 527 Lines • ▼ Show 20 Lines
	__m128i test_mm_cvttpd_epi32(__m128d A) {			__m128i test_mm_cvttpd_epi32(__m128d A) {
	// CHECK-LABEL: test_mm_cvttpd_epi32			// CHECK-LABEL: test_mm_cvttpd_epi32
	// CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})			// CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
	return _mm_cvttpd_epi32(A);			return _mm_cvttpd_epi32(A);
	}			}

	__m128i test_mm_cvttps_epi32(__m128 A) {			__m128i test_mm_cvttps_epi32(__m128 A) {
	// CHECK-LABEL: test_mm_cvttps_epi32			// CHECK-LABEL: test_mm_cvttps_epi32
	// CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})			// CHECK: fptosi <4 x float> %{{.*}} to <4 x i32>
	return _mm_cvttps_epi32(A);			return _mm_cvttps_epi32(A);
	}			}

	int test_mm_cvttsd_si32(__m128d A) {			int test_mm_cvttsd_si32(__m128d A) {
	// CHECK-LABEL: test_mm_cvttsd_si32			// CHECK-LABEL: test_mm_cvttsd_si32
	// CHECK: extractelement <2 x double> %{{.*}}, i32 0			// CHECK: extractelement <2 x double> %{{.*}}, i32 0
	// CHECK: fptosi double %{{.*}} to i32			// CHECK: fptosi double %{{.*}} to i32
	return _mm_cvttsd_si32(A);			return _mm_cvttsd_si32(A);
	▲ Show 20 Lines • Show All 938 Lines • Show Last 20 Lines