Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -304,8 +304,12 @@ TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "", "sse") TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "", "sse") +TARGET_BUILTIN(__builtin_ia32_cvtsi2ss, "V4fV4fi","","sse") +TARGET_BUILTIN(__builtin_ia32_cvtsi642ss, "V4fV4fLLi","","sse") TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse") @@ -330,8 +334,13 @@ TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtsi642sd, "V2dV2dLLi","","sse2") TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2") @@ -458,7 +467,9 @@ TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx") +TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx") Index: lib/Headers/avxintrin.h =================================================================== --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -2117,7 +2117,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { - return (__m128i)__builtin_convertvector((__v4df) __a, __v4si); + return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); } static __inline __m128i __DEFAULT_FN_ATTRS @@ -2129,7 +2129,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { - return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si); + return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); } static __inline double __DEFAULT_FN_ATTRS Index: lib/Headers/emmintrin.h =================================================================== --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -417,8 +417,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b) { - __a[0] = __b[0]; - return __a; + return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -444,7 +443,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { - return __a[0]; + return __builtin_ia32_cvttsd2si((__v2df)__a); } static __inline__ __m64 __DEFAULT_FN_ATTRS @@ -1672,8 +1671,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a, long long __b) { - __a[0] = __b; - return __a; + return (__m128d)__builtin_ia32_cvtsi642sd((__v2df)__a, __b); } /// \brief Converts the first (lower) element of a vector of [2 x double] into a @@ -1707,7 +1705,7 @@ static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { - return __a[0]; + return __builtin_ia32_cvttsd2si64((__v2df)__a); } #endif @@ -1755,7 +1753,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { - return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si); + return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); } /// \brief Returns a vector of [4 x i32] where the lowest element is the input Index: lib/Headers/xmmintrin.h =================================================================== --- lib/Headers/xmmintrin.h +++ lib/Headers/xmmintrin.h @@ -1350,7 +1350,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_si32(__m128 __a) { - return __a[0]; + return __builtin_ia32_cvttss2si((__v4sf)__a); } /// \brief Converts a float value contained in the lower 32 bits of a vector of @@ -1386,7 +1386,7 @@ static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttss_si64(__m128 __a) { - return __a[0]; + return __builtin_ia32_cvttss2si64((__v4sf)__a); } /// \brief Converts two low-order float values in a 128-bit vector of @@ -1442,8 +1442,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi32_ss(__m128 __a, int __b) { - __a[0] = __b; - return __a; + return (__m128)__builtin_ia32_cvtsi2ss((__v4sf)__a, __b); } /// \brief Converts a 32-bit signed integer value into a floating point value @@ -1489,8 +1488,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi64_ss(__m128 __a, long long __b) { - __a[0] = __b; - return __a; + return (__m128)__builtin_ia32_cvtsi642ss((__v4sf)__a, __b); } #endif Index: test/CodeGen/avx-builtins.c =================================================================== --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -286,13 +286,13 @@ __m128i test_mm256_cvttpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvttpd_epi32 - // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32> + // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}}) return _mm256_cvttpd_epi32(A); } __m256i test_mm256_cvttps_epi32(__m256 A) { // CHECK-LABEL: test_mm256_cvttps_epi32 - // CHECK: fptosi <8 x float> %{{.*}} to <8 x i32> + // CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}}) return _mm256_cvttps_epi32(A); } Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -7430,22 +7430,19 @@ __m128d test_mm_cvti64_sd(__m128d A, long long B) { // CHECK-LABEL: test_mm_cvti64_sd - // CHECK: sitofp i64 %{{.*}} to double - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %{{.*}}, i64 %{{.*}}) return _mm_cvti64_sd(A, B); } __m128 test_mm_cvti32_ss(__m128 A, int B) { // CHECK-LABEL: test_mm_cvti32_ss - // CHECK: sitofp i32 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %{{.*}}, i32 %{{.*}}) return _mm_cvti32_ss(A, B); } __m128 test_mm_cvti64_ss(__m128 A, long long B) { // CHECK-LABEL: test_mm_cvti64_ss - // CHECK: sitofp i64 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %{{.*}}, i64 %{{.*}}) return _mm_cvti64_ss(A, B); } Index: test/CodeGen/builtins-x86.c =================================================================== --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -286,13 +286,17 @@ tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i); tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f); + tmp_V4f = __builtin_ia32_cvtsi2ss(tmp_V4f, tmp_i); tmp_i = __builtin_ia32_cvtss2si(tmp_V4f); + tmp_i = __builtin_ia32_cvttss2si(tmp_V4f); tmp_i = __builtin_ia32_rdtsc(); tmp_i = __builtin_ia32_rdtscp(&tmp_Ui); tmp_LLi = __builtin_ia32_rdpmc(tmp_i); #ifdef USE_64 + tmp_V4f = __builtin_ia32_cvtsi642ss(tmp_V4f, tmp_LLi); tmp_LLi = __builtin_ia32_cvtss2si64(tmp_V4f); + tmp_LLi = __builtin_ia32_cvttss2si64(tmp_V4f); #endif tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f); (void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp); @@ -328,10 +332,15 @@ tmp_V2i = __builtin_ia32_cvttpd2pi(tmp_V2d); tmp_V2d = __builtin_ia32_cvtpi2pd(tmp_V2i); tmp_i = __builtin_ia32_cvtsd2si(tmp_V2d); + tmp_i = __builtin_ia32_cvttsd2si(tmp_V2d); + tmp_V4f = __builtin_ia32_cvtsd2ss(tmp_V4f, tmp_V2d); #ifdef USE_64 tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); + tmp_LLi = __builtin_ia32_cvttsd2si64(tmp_V2d); + tmp_V2d = __builtin_ia32_cvtsi642sd(tmp_V2d, tmp_LLi); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); + tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); (void) __builtin_ia32_mfence(); @@ -411,7 +420,9 @@ tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); + tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); + tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); Index: test/CodeGen/sse-builtins.c =================================================================== --- test/CodeGen/sse-builtins.c +++ test/CodeGen/sse-builtins.c @@ -263,15 +263,13 @@ __m128 test_mm_cvtsi32_ss(__m128 A, int B) { // CHECK-LABEL: test_mm_cvtsi32_ss - // CHECK: sitofp i32 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %{{.*}}, i32 %{{.*}}) return _mm_cvtsi32_ss(A, B); } __m128 test_mm_cvtsi64_ss(__m128 A, long long B) { // CHECK-LABEL: test_mm_cvtsi64_ss - // CHECK: sitofp i64 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %{{.*}}, i64 %{{.*}}) return _mm_cvtsi64_ss(A, B); } @@ -295,22 +293,19 @@ int test_mm_cvtt_ss2si(__m128 A) { // CHECK-LABEL: test_mm_cvtt_ss2si - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fptosi float %{{.*}} to i32 + // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) return _mm_cvtt_ss2si(A); } int test_mm_cvttss_si32(__m128 A) { // CHECK-LABEL: test_mm_cvttss_si32 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fptosi float %{{.*}} to i32 + // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) return _mm_cvttss_si32(A); } long long test_mm_cvttss_si64(__m128 A) { // CHECK-LABEL: test_mm_cvttss_si64 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fptosi float %{{.*}} to i64 + // CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}}) return _mm_cvttss_si64(A); } Index: test/CodeGen/sse2-builtins.c =================================================================== --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -507,7 +507,7 @@ __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) { // CHECK-LABEL: test_mm_cvtsd_ss - // CHECK: fptrunc double %{{.*}} to float + // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}}) return _mm_cvtsd_ss(A, B); } @@ -541,8 +541,7 @@ __m128d test_mm_cvtsi64_sd(__m128d A, long long B) { // CHECK-LABEL: test_mm_cvtsi64_sd - // CHECK: sitofp i64 %{{.*}} to double - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %{{.*}}, i64 %{{.*}}) return _mm_cvtsi64_sd(A, B); } @@ -569,21 +568,19 @@ __m128i test_mm_cvttps_epi32(__m128 A) { // CHECK-LABEL: test_mm_cvttps_epi32 - // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32> + // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}}) return _mm_cvttps_epi32(A); } int test_mm_cvttsd_si32(__m128d A) { // CHECK-LABEL: test_mm_cvttsd_si32 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fptosi double %{{.*}} to i32 + // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}}) return _mm_cvttsd_si32(A); } long long test_mm_cvttsd_si64(__m128d A) { // CHECK-LABEL: test_mm_cvttsd_si64 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fptosi double %{{.*}} to i64 + // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}}) return _mm_cvttsd_si64(A); }