Index: lib/Headers/avx512fintrin.h =================================================================== --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -818,6 +818,118 @@ return (__m512i)((__v8du)__a ^ (__v8du)__b); } +/* Compare */ + +#define _mm512_cmp_round_ps_mask(A, B, P, R) \ + (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(P), \ + (__mmask16)-1, (int)(R)) + +#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ + (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(P), \ + (__mmask16)(U), (int)(R)) + +#define _mm512_cmp_ps_mask(A, B, P) \ + _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) +#define _mm512_mask_cmp_ps_mask(U, A, B, P) \ + _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_cmpeq_ps_mask(A, B) \ + _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) +#define _mm512_mask_cmpeq_ps_mask(k, A, B) \ + _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) + +#define _mm512_cmplt_ps_mask(A, B) \ + _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) +#define _mm512_mask_cmplt_ps_mask(k, A, B) \ + _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) + +#define _mm512_cmple_ps_mask(A, B) \ + _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) +#define _mm512_mask_cmple_ps_mask(k, A, B) \ + _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) + +#define _mm512_cmpunord_ps_mask(A, B) \ + _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) +#define _mm512_mask_cmpunord_ps_mask(k, A, B) \ + _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) + +#define _mm512_cmpneq_ps_mask(A, B) \ + _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) +#define _mm512_mask_cmpneq_ps_mask(k, A, B) \ + _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) + +#define _mm512_cmpnlt_ps_mask(A, B) \ + _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) +#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ + _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) + +#define _mm512_cmpnle_ps_mask(A, B) \ + _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) +#define _mm512_mask_cmpnle_ps_mask(k, A, B) \ + _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) + +#define _mm512_cmpord_ps_mask(A, B) \ + _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) +#define _mm512_mask_cmpord_ps_mask(k, A, B) \ + _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) + +#define _mm512_cmp_round_pd_mask(A, B, P, R) \ + (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(P), \ + (__mmask8)-1, (int)(R)) + +#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ + (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(P), \ + (__mmask8)(U), (int)(R)) + +#define _mm512_cmp_pd_mask(A, B, P) \ + _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) +#define _mm512_mask_cmp_pd_mask(U, A, B, P) \ + _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_cmpeq_pd_mask(A, B) \ + _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) +#define _mm512_mask_cmpeq_pd_mask(k, A, B) \ + _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) + +#define _mm512_cmplt_pd_mask(A, B) \ + _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) +#define _mm512_mask_cmplt_pd_mask(k, A, B) \ + _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) + +#define _mm512_cmple_pd_mask(A, B) \ + _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) +#define _mm512_mask_cmple_pd_mask(k, A, B) \ + _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) + +#define _mm512_cmpunord_pd_mask(A, B) \ + _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) +#define _mm512_mask_cmpunord_pd_mask(k, A, B) \ + _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) + +#define _mm512_cmpneq_pd_mask(A, B) \ + _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) +#define _mm512_mask_cmpneq_pd_mask(k, A, B) \ + _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) + +#define _mm512_cmpnlt_pd_mask(A, B) \ + _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) +#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ + _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) + +#define _mm512_cmpnle_pd_mask(A, B) \ + _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) +#define _mm512_mask_cmpnle_pd_mask(k, A, B) \ + _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) + +#define _mm512_cmpord_pd_mask(A, B) \ + _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) +#define _mm512_mask_cmpord_pd_mask(k, A, B) \ + _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) + /* Arithmetic */ static __inline __m512d __DEFAULT_FN_ATTRS @@ -961,8 +1073,8 @@ static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_max_pd(__m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_selectpd_512 ( + _mm512_cmpnlt_pd_mask(__A, __B), __A, __B); } static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -998,8 +1110,8 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_max_ps(__m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_selectps_512 ( + _mm512_cmpnlt_ps_mask(__A, __B), __A, __B); } static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -1196,8 +1308,8 @@ static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_min_pd(__m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_selectpd_512 ( + _mm512_cmplt_pd_mask(__A, __B), __A, __B); } static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -1233,8 +1345,8 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_min_ps(__m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_selectps_512 ( + _mm512_cmplt_ps_mask(__A, __B), __A, __B); } static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -3449,118 +3561,6 @@ (__v16si) __A); } -/* Compare */ - -#define _mm512_cmp_round_ps_mask(A, B, P, R) \ - (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(P), \ - (__mmask16)-1, (int)(R)) - -#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ - (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(P), \ - (__mmask16)(U), (int)(R)) - -#define _mm512_cmp_ps_mask(A, B, P) \ - _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_cmp_ps_mask(U, A, B, P) \ - _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_cmpeq_ps_mask(A, B) \ - _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) -#define _mm512_mask_cmpeq_ps_mask(k, A, B) \ - _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) - -#define _mm512_cmplt_ps_mask(A, B) \ - _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) -#define _mm512_mask_cmplt_ps_mask(k, A, B) \ - _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) - -#define _mm512_cmple_ps_mask(A, B) \ - _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) -#define _mm512_mask_cmple_ps_mask(k, A, B) \ - _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) - -#define _mm512_cmpunord_ps_mask(A, B) \ - _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) -#define _mm512_mask_cmpunord_ps_mask(k, A, B) \ - _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) - -#define _mm512_cmpneq_ps_mask(A, B) \ - _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) -#define _mm512_mask_cmpneq_ps_mask(k, A, B) \ - _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) - -#define _mm512_cmpnlt_ps_mask(A, B) \ - _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) -#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ - _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) - -#define _mm512_cmpnle_ps_mask(A, B) \ - _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) -#define _mm512_mask_cmpnle_ps_mask(k, A, B) \ - _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) - -#define _mm512_cmpord_ps_mask(A, B) \ - _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) -#define _mm512_mask_cmpord_ps_mask(k, A, B) \ - _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) - -#define _mm512_cmp_round_pd_mask(A, B, P, R) \ - (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(P), \ - (__mmask8)-1, (int)(R)) - -#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ - (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(P), \ - (__mmask8)(U), (int)(R)) - -#define _mm512_cmp_pd_mask(A, B, P) \ - _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_cmp_pd_mask(U, A, B, P) \ - _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_cmpeq_pd_mask(A, B) \ - _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) -#define _mm512_mask_cmpeq_pd_mask(k, A, B) \ - _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) - -#define _mm512_cmplt_pd_mask(A, B) \ - _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) -#define _mm512_mask_cmplt_pd_mask(k, A, B) \ - _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) - -#define _mm512_cmple_pd_mask(A, B) \ - _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) -#define _mm512_mask_cmple_pd_mask(k, A, B) \ - _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) - -#define _mm512_cmpunord_pd_mask(A, B) \ - _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) -#define _mm512_mask_cmpunord_pd_mask(k, A, B) \ - _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) - -#define _mm512_cmpneq_pd_mask(A, B) \ - _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) -#define _mm512_mask_cmpneq_pd_mask(k, A, B) \ - _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) - -#define _mm512_cmpnlt_pd_mask(A, B) \ - _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) -#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ - _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) - -#define _mm512_cmpnle_pd_mask(A, B) \ - _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) -#define _mm512_mask_cmpnle_pd_mask(k, A, B) \ - _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) - -#define _mm512_cmpord_pd_mask(A, B) \ - _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) -#define _mm512_mask_cmpord_pd_mask(k, A, B) \ - _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) - /* Conversion */ #define _mm512_cvtt_roundps_epu32(A, R) \ Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -8426,7 +8426,8 @@ __m512d test_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_max_pd - // CHECK: @llvm.x86.avx512.max.pd.512 + // CHECK: fcmp uge <8 x double> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_max_pd (__W,__U,__A,__B); } @@ -8434,7 +8435,10 @@ __m512d test_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_max_pd - // CHECK: @llvm.x86.avx512.max.pd.512 + // CHECK: fcmp uge <8 x double> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK: store <8 x double> zeroinitializer, <8 x double>* %.compoundliteral.i.i, align 64 + // CHECK: load <8 x double>, <8 x double>* %.compoundliteral.i.i, align 64 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_max_pd (__U,__A,__B); } @@ -8442,7 +8446,8 @@ __m512 test_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_max_ps - // CHECK: @llvm.x86.avx512.max.ps.512 + // CHECK: fcmp uge <16 x float> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_max_ps (__W,__U,__A,__B); } @@ -8473,7 +8478,10 @@ __m512 test_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_max_ps - // CHECK: @llvm.x86.avx512.max.ps.512 + // CHECK: fcmp uge <16 x float> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // CHECK: store <16 x float> zeroinitializer, <16 x float>* %.compoundliteral.i.i, align 64 + // CHECK: load <16 x float>, <16 x float>* %.compoundliteral.i.i, align 64 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_max_ps (__U,__A,__B); } @@ -8504,7 +8512,8 @@ __m512d test_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_min_pd - // CHECK: @llvm.x86.avx512.min.pd.512 + // CHECK: fcmp olt <8 x double> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_min_pd (__W,__U,__A,__B); } @@ -8512,7 +8521,11 @@ __m512d test_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_min_pd - // CHECK: @llvm.x86.avx512.min.pd.512 + // CHECK: fcmp olt <8 x double> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK: store <8 x double> zeroinitializer, <8 x double>* %.compoundliteral.i.i, align 64 + // CHECK: load <8 x double>, <8 x double>* %.compoundliteral.i.i, align 64 + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_min_pd (__U,__A,__B); } @@ -8542,7 +8555,8 @@ __m512 test_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_min_ps - // CHECK: @llvm.x86.avx512.min.ps.512 + // CHECK: fcmp olt <16 x float> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_min_ps (__W,__U,__A,__B); } @@ -8550,7 +8564,10 @@ __m512 test_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_min_ps - // CHECK: @llvm.x86.avx512.min.ps.512 + // CHECK: fcmp olt <16 x float> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // CHECK: store <16 x float> zeroinitializer, <16 x float>* %.compoundliteral.i.i, align 64 + // CHECK: load <16 x float>, <16 x float>* %.compoundliteral.i.i, align 64 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_min_ps (__U,__A,__B); }