Index: cfe/trunk/lib/Headers/avx512fintrin.h =================================================================== --- cfe/trunk/lib/Headers/avx512fintrin.h +++ cfe/trunk/lib/Headers/avx512fintrin.h @@ -3585,6 +3585,27 @@ /* Conversion */ +#define _mm512_cvtt_roundps_epu32( __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)\ + _mm512_undefined_epi32 (),\ + (__mmask16) -1,( __R));\ +}) + +#define _mm512_mask_cvtt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_cvtt_roundps_epu32( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)\ + _mm512_setzero_si512 (),\ + (__mmask16)( __U),( __R));\ +}) + + static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu32(__m512 __A) { @@ -3795,6 +3816,28 @@ _MM_FROUND_CUR_DIRECTION); } +#define _mm512_cvt_roundps_ph( __A, __I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + (__I),\ + (__v16hi)_mm256_undefined_si256 (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_cvt_roundps_ph( __U, __W, __A, __I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + (__I),\ + (__v16hi)( __U),\ + (__mmask16)( __W));\ +}) + +#define _mm512_maskz_cvt_roundps_ph( __W, __A, __I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + (__I),\ + (__v16hi)\ + _mm256_setzero_si256 (),\ + (__mmask16)( __W));\ +}) + #define _mm512_cvtps_ph(A, I) __extension__ ({ \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ @@ -3809,7 +3852,28 @@ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ (__mmask16)(W)); }) - + +#define _mm512_cvt_roundph_ps( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\ + (__v16sf)\ + _mm512_undefined_ps (),\ + (__mmask16) -1,( __R));\ +}) + +#define _mm512_mask_cvt_roundph_ps( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\ + (__v16sf)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_cvt_roundph_ps( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U),( __R));\ +}) + + static __inline __m512 __DEFAULT_FN_ATTRS _mm512_cvtph_ps(__m256i __A) { Index: cfe/trunk/test/CodeGen/avx512f-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/avx512f-builtins.c +++ cfe/trunk/test/CodeGen/avx512f-builtins.c @@ -2948,19 +2948,19 @@ int test_mm_cvt_roundsd_si32(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_si32 // CHECK: @llvm.x86.avx512.vcvtsd2si32 - return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvt_roundsd_i32(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_i32 // CHECK: @llvm.x86.avx512.vcvtsd2si32 - return _mm_cvt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvt_roundsd_u32(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_u32 // CHECK: @llvm.x86.avx512.vcvtsd2usi32 - return _mm_cvt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvtsd_u32(__m128d __A) { @@ -2972,7 +2972,7 @@ unsigned long long test_mm_cvt_roundsd_u64(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_u64 // CHECK: @llvm.x86.avx512.vcvtsd2usi64 - return _mm_cvt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvtsd_u64(__m128d __A) { @@ -2984,31 +2984,31 @@ int test_mm_cvt_roundss_si32(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_si32 // CHECK: @llvm.x86.avx512.vcvtss2si32 - return _mm_cvt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvt_roundss_i32(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_i32 // CHECK: @llvm.x86.avx512.vcvtss2si32 - return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvt_roundss_si64(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_si64 // CHECK: @llvm.x86.avx512.vcvtss2si64 - return _mm_cvt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvt_roundss_i64(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_i64 // CHECK: @llvm.x86.avx512.vcvtss2si64 - return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvt_roundss_u32(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_u32 // CHECK: @llvm.x86.avx512.vcvtss2usi32 - return _mm_cvt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvtss_u32(__m128 __A) { @@ -3020,7 +3020,7 @@ unsigned long long test_mm_cvt_roundss_u64(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_u64 // CHECK: @llvm.x86.avx512.vcvtss2usi64 - return _mm_cvt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvtss_u64(__m128 __A) { @@ -3032,13 +3032,13 @@ int test_mm_cvtt_roundsd_i32(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_i32 // CHECK: @llvm.x86.avx512.cvttsd2si - return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvtt_roundsd_si32(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_si32 // CHECK: @llvm.x86.avx512.cvttsd2si - return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvttsd_i32(__m128d __A) { @@ -3050,13 +3050,13 @@ unsigned long long test_mm_cvtt_roundsd_si64(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_si64 // CHECK: @llvm.x86.avx512.cvttsd2si64 - return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvtt_roundsd_i64(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_i64 // CHECK: @llvm.x86.avx512.cvttsd2si64 - return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvttsd_i64(__m128d __A) { @@ -3068,7 +3068,7 @@ unsigned test_mm_cvtt_roundsd_u32(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_u32 // CHECK: @llvm.x86.avx512.cvttsd2usi - return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvttsd_u32(__m128d __A) { @@ -3080,7 +3080,7 @@ unsigned long long test_mm_cvtt_roundsd_u64(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_u64 // CHECK: @llvm.x86.avx512.cvttsd2usi64 - return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvttsd_u64(__m128d __A) { @@ -3092,13 +3092,13 @@ int test_mm_cvtt_roundss_i32(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_i32 // CHECK: @llvm.x86.avx512.cvttss2si - return _mm_cvtt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvtt_roundss_si32(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_si32 // CHECK: @llvm.x86.avx512.cvttss2si - return _mm_cvtt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvttss_i32(__m128 __A) { @@ -3110,13 +3110,13 @@ float test_mm_cvtt_roundss_i64(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_i64 // CHECK: @llvm.x86.avx512.cvttss2si64 - return _mm_cvtt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvtt_roundss_si64(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_si64 // CHECK: @llvm.x86.avx512.cvttss2si64 - return _mm_cvtt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvttss_i64(__m128 __A) { @@ -3128,7 +3128,7 @@ unsigned test_mm_cvtt_roundss_u32(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_u32 // CHECK: @llvm.x86.avx512.cvttss2usi - return _mm_cvtt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvttss_u32(__m128 __A) { @@ -3140,7 +3140,7 @@ unsigned long long test_mm_cvtt_roundss_u64(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_u64 // CHECK: @llvm.x86.avx512.cvttss2usi64 - return _mm_cvtt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvttss_u64(__m128 __A) { @@ -3149,6 +3149,70 @@ return _mm_cvttss_u64(__A); } +__m512i test_mm512_cvtt_roundps_epu32(__m512 __A) +{ + // CHECK-LABEL: @test_mm512_cvtt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + return _mm512_cvtt_roundps_epu32(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m512i test_mm512_mask_cvtt_roundps_epu32(__m512i __W, __mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + return _mm512_mask_cvtt_roundps_epu32(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512i test_mm512_maskz_cvtt_roundps_epu32( __mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + + return _mm512_maskz_cvtt_roundps_epu32(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_cvt_roundps_ph(__m512 __A) +{ + // CHECK-LABEL: @test_mm512_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 + return _mm512_cvt_roundps_ph(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_mask_cvt_roundps_ph(__m256i __W , __mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 + return _mm512_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_maskz_cvt_roundps_ph(__mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 + return _mm512_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_cvt_roundph_ps(__m256i __A) +{ + // CHECK-LABEL: @test_mm512_cvt_roundph_ps + // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 + return _mm512_cvt_roundph_ps(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundph_ps + // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 + return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundph_ps + // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 + return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + __m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps