Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -1794,6 +1794,22 @@ TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2usi32, "UiV4fIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "ULLiV4fIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2si32, "iV2dIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "LLiV2dIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi32, "UiV2dIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "ULLiV2dIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi","","avx512f") #undef BUILTIN #undef TARGET_BUILTIN Index: lib/Headers/avx512fintrin.h =================================================================== --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -3974,6 +3974,189 @@ __builtin_ia32_vcomiss ((__v4sf) (__A), (__v4sf) (__B), ( __P), ( __R));\ }) +#define _mm_cvt_roundsd_si64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtsd2si64 ((__v2df)( __A),( __R));\ +}) + +#define _mm_cvt_roundsd_i64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtsd2si64 ((__v2df)( __A),( __R));\ +}) + +#define _mm_cvt_roundsd_si32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtsd2si32 ((__v2df)( __A),( __R));\ +}) + +#define _mm_cvt_roundsd_i32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtsd2si32 ((__v2df)( __A),( __R));\ +}) + +#define _mm_cvt_roundsd_u32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtsd2usi32 ((__v2df)( __A),( __R));\ +}) + +static __inline__ unsigned __DEFAULT_FN_ATTRS +_mm_cvtsd_u32 (__m128d __A) +{ + return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundsd_u64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtsd2usi64 ((__v2df)( __A),( __R));\ +}) + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mm_cvtsd_u64 (__m128d __A) +{ + return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundss_si32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtss2si32 ((__v4sf)( __A),( __R));\ +}) + +#define _mm_cvt_roundss_i32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtss2si32 ((__v4sf)( __A),( __R));\ +}) + +#define _mm_cvt_roundss_si64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtss2si64 ((__v4sf)( __A),( __R));\ +}) + +#define _mm_cvt_roundss_i64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtss2si64 ((__v4sf)( __A),( __R));\ +}) + +#define _mm_cvt_roundss_u32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtss2usi32 ((__v4sf)( __A),( __R));\ +}) + +static __inline__ unsigned __DEFAULT_FN_ATTRS +_mm_cvtss_u32 (__m128 __A) +{ + return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundss_u64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtss2usi64 ((__v4sf)( __A),( __R));\ +}) + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mm_cvtss_u64 (__m128 __A) +{ + return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvtt_roundsd_i32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttsd2si32 ((__v2df)( __A),( __R));\ +}) + +#define _mm_cvtt_roundsd_si32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttsd2si32 ((__v2df)( __A),( __R));\ +}) + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvttsd_i32 (__m128d __A) +{ + return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvtt_roundsd_si64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttsd2si64 ((__v2df)( __A),( __R));\ +}) + +#define _mm_cvtt_roundsd_i64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttsd2si64 ((__v2df)( __A),( __R));\ +}) + +static __inline__ long long __DEFAULT_FN_ATTRS +_mm_cvttsd_i64 (__m128d __A) +{ + return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvtt_roundsd_u32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttsd2usi32 ((__v2df)( __A),( __R));\ +}) + +static __inline__ unsigned __DEFAULT_FN_ATTRS +_mm_cvttsd_u32 (__m128d __A) +{ + return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvtt_roundsd_u64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttsd2usi64 ((__v2df)( __A),( __R));\ +}) + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mm_cvttsd_u64 (__m128d __A) +{ + return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvtt_roundss_i32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttss2si32 ((__v4sf)( __A),( __R));\ +}) + +#define _mm_cvtt_roundss_si32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttss2si32 ((__v4sf)( __A),( __R));\ +}) + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvttss_i32 (__m128 __A) +{ + return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvtt_roundss_i64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttss2si64 ((__v4sf)( __A),( __R));\ +}) + +#define _mm_cvtt_roundss_si64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttss2si64 ((__v4sf)( __A),( __R));\ +}) + +static __inline__ long long __DEFAULT_FN_ATTRS +_mm_cvttss_i64 (__m128 __A) +{ + return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvtt_roundss_u32( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttss2usi32 ((__v4sf)( __A),( __R));\ +}) + +static __inline__ unsigned __DEFAULT_FN_ATTRS +_mm_cvttss_u32 (__m128 __A) +{ + return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvtt_roundss_u64( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvttss2usi64 ((__v4sf)( __A),( __R));\ +}) + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mm_cvttss_u64 (__m128 __A) +{ + return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) + __A, + _MM_FROUND_CUR_DIRECTION); +} #undef __DEFAULT_FN_ATTRS Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -2576,5 +2576,218 @@ return _mm512_kmov(__A); } +unsigned long long test_mm_cvt_roundsd_si64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvt_roundsd_si64 + // CHECK: @llvm.x86.avx512.vcvtsd2si64 + return _mm_cvt_roundsd_si64(__A, 4); +} + +long long test_mm_cvt_roundsd_i64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvt_roundsd_i64 + // CHECK: @llvm.x86.avx512.vcvtsd2si64 + return _mm_cvt_roundsd_i64(__A, 4); +} + +int test_mm_cvt_roundsd_si32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvt_roundsd_si32 + // CHECK: @llvm.x86.avx512.vcvtsd2si32 + return _mm_cvt_roundsd_si32(__A, 4); +} + +int test_mm_cvt_roundsd_i32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvt_roundsd_i32 + // CHECK: @llvm.x86.avx512.vcvtsd2si32 + return _mm_cvt_roundsd_i32(__A, 4); +} + +unsigned test_mm_cvt_roundsd_u32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvt_roundsd_u32 + // CHECK: @llvm.x86.avx512.vcvtsd2usi32 + return _mm_cvt_roundsd_u32(__A, 4); +} +unsigned test_mm_cvtsd_u32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtsd_u32 + // CHECK: @llvm.x86.avx512.vcvtsd2usi32 + return _mm_cvtsd_u32(__A); +} + +unsigned long long test_mm_cvt_roundsd_u64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvt_roundsd_u64 + // CHECK: @llvm.x86.avx512.vcvtsd2usi64 + return _mm_cvt_roundsd_u64(__A, 4); +} + +unsigned long long test_mm_cvtsd_u64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtsd_u64 + // CHECK: @llvm.x86.avx512.vcvtsd2usi64 + return _mm_cvtsd_u64(__A); +} + +int test_mm_cvt_roundss_si32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvt_roundss_si32 + // CHECK: @llvm.x86.avx512.vcvtss2si32 + return _mm_cvt_roundss_si32(__A, 4); +} + +int test_mm_cvt_roundss_i32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvt_roundss_i32 + // CHECK: @llvm.x86.avx512.vcvtss2si32 + return _mm_cvt_roundss_i32(__A, 4); +} +int test_mm_cvt_roundss_si64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvt_roundss_si64 + // CHECK: @llvm.x86.avx512.vcvtss2si64 + return _mm_cvt_roundss_si64(__A, 4); +} + +long long test_mm_cvt_roundss_i64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvt_roundss_i64 + // CHECK: @llvm.x86.avx512.vcvtss2si64 + return _mm_cvt_roundss_i64(__A, 4); +} + +unsigned test_mm_cvt_roundss_u32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvt_roundss_u32 + // CHECK: @llvm.x86.avx512.vcvtss2usi32 + return _mm_cvt_roundss_u32(__A, 4); +} + +unsigned test_mm_cvtss_u32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtss_u32 + // CHECK: @llvm.x86.avx512.vcvtss2usi32 + return _mm_cvtss_u32(__A); +} + +unsigned long long test_mm_cvt_roundss_u64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvt_roundss_u64 + // CHECK: @llvm.x86.avx512.vcvtss2usi64 + return _mm_cvt_roundss_u64(__A, 4); +} + +unsigned long long test_mm_cvtss_u64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtss_u64 + // CHECK: @llvm.x86.avx512.vcvtss2usi64 + return _mm_cvtss_u64(__A); +} + +int test_mm_cvtt_roundsd_i32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtt_roundsd_i32 + // CHECK: @llvm.x86.avx512.cvttsd2si + return _mm_cvtt_roundsd_i32(__A, 4); +} + +int test_mm_cvtt_roundsd_si32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtt_roundsd_si32 + // CHECK: @llvm.x86.avx512.cvttsd2si + return _mm_cvtt_roundsd_si32(__A, 4); +} + +int test_mm_cvttsd_i32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvttsd_i32 + // CHECK: @llvm.x86.avx512.cvttsd2si + return _mm_cvttsd_i32(__A); +} + +unsigned long long test_mm_cvtt_roundsd_si64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtt_roundsd_si64 + // CHECK: @llvm.x86.avx512.cvttsd2si64 + return _mm_cvtt_roundsd_si64(__A, 4); +} + +long long test_mm_cvtt_roundsd_i64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtt_roundsd_i64 + // CHECK: @llvm.x86.avx512.cvttsd2si64 + return _mm_cvtt_roundsd_i64(__A, 4); +} + +long long test_mm_cvttsd_i64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvttsd_i64 + // CHECK: @llvm.x86.avx512.cvttsd2si64 + return _mm_cvttsd_i64(__A); +} + +unsigned test_mm_cvtt_roundsd_u32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtt_roundsd_u32 + // CHECK: @llvm.x86.avx512.cvttsd2usi + return _mm_cvtt_roundsd_u32(__A, 4); +} + +unsigned test_mm_cvttsd_u32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvttsd_u32 + // CHECK: @llvm.x86.avx512.cvttsd2usi + return _mm_cvttsd_u32(__A); +} + +unsigned long long test_mm_cvtt_roundsd_u64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtt_roundsd_u64 + // CHECK: @llvm.x86.avx512.cvttsd2usi64 + return _mm_cvtt_roundsd_u64(__A, 4); +} + +unsigned long long test_mm_cvttsd_u64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvttsd_u64 + // CHECK: @llvm.x86.avx512.cvttsd2usi64 + return _mm_cvttsd_u64(__A); +} + +int test_mm_cvtt_roundss_i32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtt_roundss_i32 + // CHECK: @llvm.x86.avx512.cvttss2si + return _mm_cvtt_roundss_i32(__A, 4); +} + +int test_mm_cvtt_roundss_si32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtt_roundss_si32 + // CHECK: @llvm.x86.avx512.cvttss2si + return _mm_cvtt_roundss_si32(__A, 4); +} + +int test_mm_cvttss_i32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvttss_i32 + // CHECK: @llvm.x86.avx512.cvttss2si + return _mm_cvttss_i32(__A); +} + +float test_mm_cvtt_roundss_i64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtt_roundss_i64 + // CHECK: @llvm.x86.avx512.cvttss2si64 + return _mm_cvtt_roundss_i64(__A, 4); +} + +long long test_mm_cvtt_roundss_si64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtt_roundss_si64 + // CHECK: @llvm.x86.avx512.cvttss2si64 + return _mm_cvtt_roundss_si64(__A, 4); +} + +long long test_mm_cvttss_i64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvttss_i64 + // CHECK: @llvm.x86.avx512.cvttss2si64 + return _mm_cvttss_i64(__A); +} + +unsigned test_mm_cvtt_roundss_u32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtt_roundss_u32 + // CHECK: @llvm.x86.avx512.cvttss2usi + return _mm_cvtt_roundss_u32(__A, 4); +} + +unsigned test_mm_cvttss_u32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvttss_u32 + // CHECK: @llvm.x86.avx512.cvttss2usi + return _mm_cvttss_u32(__A); +} + +unsigned long long test_mm_cvtt_roundss_u64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtt_roundss_u64 + // CHECK: @llvm.x86.avx512.cvttss2usi64 + return _mm_cvtt_roundss_u64(__A, 4); +} + +unsigned long long test_mm_cvttss_u64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvttss_u64 + // CHECK: @llvm.x86.avx512.cvttss2usi64 + return _mm_cvttss_u64(__A); +}