Skip to content

Commit 89f6576

Browse files
author
Asaf Badouh
committedJun 2, 2016
[X86][AVX512] add intrinsics of Scalar FP to integer
Differential Revision: http://reviews.llvm.org/D20861 llvm-svn: 271499
1 parent 9e7d0a9 commit 89f6576

File tree

2 files changed

+122
-0
lines changed

2 files changed

+122
-0
lines changed
 

‎clang/lib/Headers/avx512fintrin.h

+45
Original file line numberDiff line numberDiff line change
@@ -9331,6 +9331,33 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
93319331
(__v4sf)_mm_setzero_ps(), \
93329332
(__mmask8)(U), (int)(R)); })
93339333

9334+
static __inline__ __m128 __DEFAULT_FN_ATTRS
9335+
_mm_mask_cvtsd_ss (__m128 W, __mmask8 U, __m128 A, __m128d B)
9336+
{
9337+
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(A),
9338+
(__v2df)(B),
9339+
(__v4sf)(W),
9340+
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION);
9341+
}
9342+
9343+
static __inline__ __m128 __DEFAULT_FN_ATTRS
9344+
_mm_maskz_cvtsd_ss (__mmask8 U, __m128 A, __m128d B)
9345+
{
9346+
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(A),
9347+
(__v2df)(B),
9348+
(__v4sf)_mm_setzero_ps(),
9349+
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION);
9350+
}
9351+
9352+
#define _mm_cvtss_i32 _mm_cvtss_si32
9353+
#define _mm_cvtss_i64 _mm_cvtss_si64
9354+
#define _mm_cvtsd_i32 _mm_cvtsd_si32
9355+
#define _mm_cvtsd_i64 _mm_cvtsd_si64
9356+
#define _mm_cvti32_sd _mm_cvtsi32_sd
9357+
#define _mm_cvti64_sd _mm_cvtsi64_sd
9358+
#define _mm_cvti32_ss _mm_cvtsi32_ss
9359+
#define _mm_cvti64_ss _mm_cvtsi64_ss
9360+
93349361
#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
93359362
(__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
93369363
(int)(R)); })
@@ -9371,6 +9398,24 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
93719398
(__v2df)_mm_setzero_pd(), \
93729399
(__mmask8)(U), (int)(R)); })
93739400

9401+
static __inline__ __m128d __DEFAULT_FN_ATTRS
9402+
_mm_mask_cvtss_sd (__m128d W, __mmask8 U, __m128d A, __m128 B)
9403+
{
9404+
return __builtin_ia32_cvtss2sd_round_mask((__v2df)(A),
9405+
(__v4sf)(B),
9406+
(__v2df)(W),
9407+
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION);
9408+
}
9409+
9410+
static __inline__ __m128d __DEFAULT_FN_ATTRS
9411+
_mm_maskz_cvtss_sd (__mmask8 U, __m128d A, __m128 B)
9412+
{
9413+
return __builtin_ia32_cvtss2sd_round_mask((__v2df)(A),
9414+
(__v4sf)(B),
9415+
(__v2df)_mm_setzero_pd(),
9416+
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION);
9417+
}
9418+
93749419
static __inline__ __m128d __DEFAULT_FN_ATTRS
93759420
_mm_cvtu32_sd (__m128d __A, unsigned __B)
93769421
{

‎clang/test/CodeGen/avx512f-builtins.c

+77
Original file line numberDiff line numberDiff line change
@@ -7266,3 +7266,80 @@ __m512 test_mm512_setr_ps (float __A, float __B, float __C, float __D,
72667266
return _mm512_setr_ps( __A, __B, __C, __D, __E, __F, __G, __H,
72677267
__I, __J, __K, __L, __M, __N, __O, __P);
72687268
}
7269+
7270+
int test_mm_cvtss_i32(__m128 A) {
7271+
// CHECK-LABEL: test_mm_cvtss_i32
7272+
// CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
7273+
return _mm_cvtss_i32(A);
7274+
}
7275+
7276+
long long test_mm_cvtss_i64(__m128 A) {
7277+
// CHECK-LABEL: test_mm_cvtss_i64
7278+
// CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}})
7279+
return _mm_cvtss_i64(A);
7280+
}
7281+
7282+
__m128d test_mm_cvti32_sd(__m128d A, int B) {
7283+
// CHECK-LABEL: test_mm_cvti32_sd
7284+
// CHECK: sitofp i32 %{{.*}} to double
7285+
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
7286+
return _mm_cvti32_sd(A, B);
7287+
}
7288+
7289+
__m128d test_mm_cvti64_sd(__m128d A, long long B) {
7290+
// CHECK-LABEL: test_mm_cvti64_sd
7291+
// CHECK: sitofp i64 %{{.*}} to double
7292+
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
7293+
return _mm_cvti64_sd(A, B);
7294+
}
7295+
7296+
__m128 test_mm_cvti32_ss(__m128 A, int B) {
7297+
// CHECK-LABEL: test_mm_cvti32_ss
7298+
// CHECK: sitofp i32 %{{.*}} to float
7299+
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
7300+
return _mm_cvti32_ss(A, B);
7301+
}
7302+
7303+
__m128 test_mm_cvti64_ss(__m128 A, long long B) {
7304+
// CHECK-LABEL: test_mm_cvti64_ss
7305+
// CHECK: sitofp i64 %{{.*}} to float
7306+
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
7307+
return _mm_cvti64_ss(A, B);
7308+
}
7309+
7310+
int test_mm_cvtsd_i32(__m128d A) {
7311+
// CHECK-LABEL: test_mm_cvtsd_i32
7312+
// CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
7313+
return _mm_cvtsd_i32(A);
7314+
}
7315+
7316+
long long test_mm_cvtsd_i64(__m128d A) {
7317+
// CHECK-LABEL: test_mm_cvtsd_i64
7318+
// CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
7319+
return _mm_cvtsd_i64(A);
7320+
}
7321+
7322+
__m128d test_mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) {
7323+
// CHECK-LABEL: @test_mm_mask_cvtss_sd
7324+
// CHECK: @llvm.x86.avx512.mask.cvtss2sd.round
7325+
return _mm_mask_cvtss_sd(__W, __U, __A, __B);
7326+
}
7327+
7328+
__m128d test_mm_maskz_cvtss_sd( __mmask8 __U, __m128d __A, __m128 __B) {
7329+
// CHECK-LABEL: @test_mm_maskz_cvtss_sd
7330+
// CHECK: @llvm.x86.avx512.mask.cvtss2sd.round
7331+
return _mm_maskz_cvtss_sd( __U, __A, __B);
7332+
}
7333+
7334+
__m128 test_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) {
7335+
// CHECK-LABEL: @test_mm_mask_cvtsd_ss
7336+
// CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round
7337+
return _mm_mask_cvtsd_ss(__W, __U, __A, __B);
7338+
}
7339+
7340+
__m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
7341+
// CHECK-LABEL: @test_mm_maskz_cvtsd_ss
7342+
// CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round
7343+
return _mm_maskz_cvtsd_ss(__U, __A, __B);
7344+
}
7345+

0 commit comments

Comments
 (0)
Please sign in to comment.