diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -838,8 +838,8 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er") TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er") @@ -848,8 +848,8 @@ TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rcp28sd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rcp28ss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er") TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er") TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er") @@ -1041,18 +1041,18 @@ TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_addsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_addss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_addsd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divsd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulsd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subsd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxsd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minsd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") @@ -1235,8 +1235,8 @@ TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "ncV:256:", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangesd128_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangess128_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangesd128_mask_round, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangess128_mask_round, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq") @@ -1345,10 +1345,10 @@ TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2OiIiUcIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexpss128_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getmantsd_round_mask, "V2dV2dV2dIiV2dUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getmantss_round_mask, "V4fV4fV4fIiV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexpsd128_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexpss128_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantsd_mask_round, "V2dV2dV2dIiV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantss_mask_round, "V4fV4fV4fIiV4fUcIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8sC*V8sUc", "nV:128:", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16sC*V16sUs", "nV:256:", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16cC*V16cUs", "nV:128:", "avx512bw,avx512vl") @@ -1415,12 +1415,12 @@ TARGET_BUILTIN(__builtin_ia32_vpermilps512, "V16fV16fIi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rndscalesd_mask_round, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rndscaless_mask_round, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefsd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8OiV8Oii", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_psraq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") @@ -1461,8 +1461,8 @@ TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_sqrtsd_mask_round, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_sqrtss_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") @@ -1739,9 +1739,9 @@ TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_cvtw2mask128, "UcV8s", "ncV:128:", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "ncV:256:", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_mask_round, "V4fV4fV2dV4fUcIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtss2sd_mask_round, "V2dV2dV4fV2dUcIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl") @@ -1780,12 +1780,12 @@ TARGET_BUILTIN(__builtin_ia32_maxph256, "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_maxph128, "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl") -TARGET_BUILTIN(__builtin_ia32_addsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_divsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_mulsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_subsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_maxsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_minsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_addsh_mask_round, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_divsh_mask_round, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_mulsh_mask_round, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_subsh_mask_round, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_maxsh_mask_round, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_minsh_mask_round, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_cmpph512_mask, "UiV32xV32xIiUiIi", "ncV:512:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl") @@ -1819,16 +1819,16 @@ TARGET_BUILTIN(__builtin_ia32_reduceph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_rcpsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_rsqrtsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_getmantsh_round_mask, "V8xV8xV8xIiV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_getexpsh128_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_scalefsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_rndscalesh_round_mask, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_getmantsh_mask_round, "V8xV8xV8xIiV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_getexpsh128_mask_round, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_scalefsh_mask_round, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_rndscalesh_mask_round, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_reducesh_mask, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_sqrtph, "V8xV8x", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_sqrtph256, "V16xV16x", "ncV:256:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_sqrtph512, "V32xV32xIi", "ncV:512:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_sqrtsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_sqrtsh_mask_round, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_fpclassph128_mask, "UcV8xIiUc", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclassph256_mask, "UsV16xIiUs", "ncV:256:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclassph512_mask, "UiV32xIiUi", "ncV:512:", "avx512fp16") @@ -1840,10 +1840,10 @@ TARGET_BUILTIN(__builtin_ia32_vcvtph2pd128_mask, "V2dV8xV2dUc", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_mask, "V4dV8xV4dUc", "ncV:256:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_vcvtph2pd512_mask, "V8dV8xV8dUcIi", "ncV:512:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_vcvtsh2ss_round_mask, "V4fV4fV8xV4fUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_vcvtss2sh_round_mask, "V8xV8xV4fV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_vcvtsd2sh_round_mask, "V8xV8xV2dV8xUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_vcvtsh2sd_round_mask, "V2dV2dV8xV2dUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsh2ss_mask_round, "V4fV4fV8xV4fUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtss2sh_mask_round, "V8xV8xV4fV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2sh_mask_round, "V8xV8xV2dV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsh2sd_mask_round, "V2dV2dV8xV2dUcIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_vcvtph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_vcvtph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16") @@ -1949,10 +1949,10 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_round_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_round_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_mask_round3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_mask_round, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_mask_round3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_vfmulcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_vfcmulcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14268,9 +14268,9 @@ } return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); } - case X86::BI__builtin_ia32_sqrtsh_round_mask: - case X86::BI__builtin_ia32_sqrtsd_round_mask: - case X86::BI__builtin_ia32_sqrtss_round_mask: { + case X86::BI__builtin_ia32_sqrtsh_mask_round: + case X86::BI__builtin_ia32_sqrtsd_mask_round: + case X86::BI__builtin_ia32_sqrtss_mask_round: { unsigned CC = cast(Ops[4])->getZExtValue(); // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), // otherwise keep the intrinsic. @@ -14280,13 +14280,13 @@ switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_sqrtsh_round_mask: + case X86::BI__builtin_ia32_sqrtsh_mask_round: IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh; break; - case X86::BI__builtin_ia32_sqrtsd_round_mask: + case X86::BI__builtin_ia32_sqrtsd_mask_round: IID = Intrinsic::x86_avx512_mask_sqrt_sd; break; - case X86::BI__builtin_ia32_sqrtss_round_mask: + case X86::BI__builtin_ia32_sqrtss_mask_round: IID = Intrinsic::x86_avx512_mask_sqrt_ss; break; } @@ -15204,20 +15204,20 @@ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); return EmitX86Select(*this, Ops[3], Call, Ops[0]); } - case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: + case X86::BI__builtin_ia32_vfcmaddcsh_mask_round: IsConjFMA = true; LLVM_FALLTHROUGH; - case X86::BI__builtin_ia32_vfmaddcsh_round_mask: { + case X86::BI__builtin_ia32_vfmaddcsh_mask_round: { Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1)); return EmitX86Select(*this, And, Call, Ops[0]); } - case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: + case X86::BI__builtin_ia32_vfcmaddcsh_mask_round3: IsConjFMA = true; LLVM_FALLTHROUGH; - case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: { + case X86::BI__builtin_ia32_vfmaddcsh_mask_round3: { Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -856,7 +856,7 @@ (__mmask16)(U), (int)(R))) #define _mm_range_round_ss(A, B, C, R) \ - ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rangess128_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8) -1, (int)(C),\ @@ -865,7 +865,7 @@ #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ - ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rangess128_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W),\ (__mmask8)(U), (int)(C),\ @@ -874,7 +874,7 @@ #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_range_round_ss(U, A, B, C, R) \ - ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rangess128_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(C),\ @@ -883,7 +883,7 @@ #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_range_round_sd(A, B, C, R) \ - ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rangesd128_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8) -1, (int)(C),\ @@ -892,7 +892,7 @@ #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ - ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rangesd128_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W),\ (__mmask8)(U), (int)(C),\ @@ -901,7 +901,7 @@ #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_maskz_range_round_sd(U, A, B, C, R) \ - ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rangesd128_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C),\ diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h --- a/clang/lib/Headers/avx512erintrin.h +++ b/clang/lib/Headers/avx512erintrin.h @@ -112,19 +112,19 @@ _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm_rsqrt28_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rsqrt28ss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ - ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rsqrt28ss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(S), \ (__mmask8)(M), (int)(R))) #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ - ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rsqrt28ss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(M), (int)(R))) @@ -139,19 +139,19 @@ _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_rsqrt28_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rsqrt28sd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ - ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rsqrt28sd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(S), \ (__mmask8)(M), (int)(R))) #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ - ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rsqrt28sd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(M), (int)(R))) @@ -215,19 +215,19 @@ _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm_rcp28_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rcp28ss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ - ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rcp28ss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(S), \ (__mmask8)(M), (int)(R))) #define _mm_maskz_rcp28_round_ss(M, A, B, R) \ - ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rcp28ss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(M), (int)(R))) @@ -242,19 +242,19 @@ _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_rcp28_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rcp28sd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ - ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rcp28sd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(S), \ (__mmask8)(M), (int)(R))) #define _mm_maskz_rcp28_round_sd(M, A, B, R) \ - ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rcp28sd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(M), (int)(R))) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1016,7 +1016,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, @@ -1025,7 +1025,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, @@ -1033,26 +1033,26 @@ } #define _mm_max_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_maxss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_max_round_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_maxss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_max_round_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_maxss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, + return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, @@ -1061,7 +1061,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, + return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, @@ -1069,19 +1069,19 @@ } #define _mm_max_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_maxsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_max_round_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_maxsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_max_round_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_maxsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) @@ -1251,7 +1251,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, @@ -1260,7 +1260,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, @@ -1268,26 +1268,26 @@ } #define _mm_min_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_minss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_min_round_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_minss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_min_round_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_minss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, + return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, @@ -1296,7 +1296,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, + return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, @@ -1304,19 +1304,19 @@ } #define _mm_min_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_minsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_min_round_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_minsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_min_round_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_minsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) @@ -1904,19 +1904,19 @@ } #define _mm_add_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_addss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_add_round_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_addss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_add_round_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_addss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) @@ -1933,19 +1933,19 @@ return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } #define _mm_add_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_addsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_add_round_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_addsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_add_round_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_addsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) @@ -2018,19 +2018,19 @@ return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } #define _mm_sub_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_subss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_sub_round_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_subss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_sub_round_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_subss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) @@ -2048,19 +2048,19 @@ } #define _mm_sub_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_subsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_sub_round_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_subsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_sub_round_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_subsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) @@ -2133,19 +2133,19 @@ return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } #define _mm_mul_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_mulss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_mul_round_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_mulss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_mul_round_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_mulss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) @@ -2163,19 +2163,19 @@ } #define _mm_mul_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_mulsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_mul_round_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_mulsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_mul_round_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_mulsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) @@ -2249,19 +2249,19 @@ } #define _mm_div_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_divss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_div_round_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_divss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_div_round_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_divss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) @@ -2279,19 +2279,19 @@ } #define _mm_div_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_divsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_div_round_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_divsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_div_round_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_divsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) @@ -5466,7 +5466,7 @@ _MM_FROUND_CUR_DIRECTION)) #define _mm_getexp_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_getexpsd128_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) @@ -5475,14 +5475,14 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd (__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, + return (__m128d) __builtin_ia32_getexpsd128_mask_round ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, + return (__m128d) __builtin_ia32_getexpsd128_mask_round ( (__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, @@ -5490,7 +5490,7 @@ } #define _mm_mask_getexp_round_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_getexpsd128_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) @@ -5498,7 +5498,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, + return (__m128d) __builtin_ia32_getexpsd128_mask_round ( (__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, @@ -5506,13 +5506,13 @@ } #define _mm_maskz_getexp_round_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_getexpsd128_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm_getexp_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_getexpss128_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) @@ -5520,14 +5520,14 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_getexpss128_mask_round ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_getexpss128_mask_round ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, @@ -5535,7 +5535,7 @@ } #define _mm_mask_getexp_round_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_getexpss128_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R))) @@ -5543,7 +5543,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_getexpss128_mask_round ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, @@ -5551,20 +5551,20 @@ } #define _mm_maskz_getexp_round_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_getexpss128_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) #define _mm_getmant_round_sd(A, B, C, D, R) \ - ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_getmantsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_getmant_sd(A, B, C, D) \ - ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_getmantsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ @@ -5572,7 +5572,7 @@ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_sd(W, U, A, B, C, D) \ - ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_getmantsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)(__m128d)(W), \ @@ -5580,14 +5580,14 @@ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ - ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_getmantsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_getmant_sd(U, A, B, C, D) \ - ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_getmantsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ @@ -5595,21 +5595,21 @@ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ - ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_getmantsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm_getmant_round_ss(A, B, C, D, R) \ - ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_getmantss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_getmant_ss(A, B, C, D) \ - ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_getmantss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ @@ -5617,7 +5617,7 @@ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_ss(W, U, A, B, C, D) \ - ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_getmantss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)(__m128)(W), \ @@ -5625,14 +5625,14 @@ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ - ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_getmantss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_getmant_ss(U, A, B, C, D) \ - ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_getmantss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ @@ -5640,7 +5640,7 @@ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ - ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_getmantss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ @@ -6325,84 +6325,84 @@ } #define _mm_roundscale_round_sd(A, B, imm, R) \ - ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rndscalesd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(imm), \ (int)(R))) #define _mm_roundscale_sd(A, B, imm) \ - ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rndscalesd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(imm), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_sd(W, U, A, B, imm) \ - ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rndscalesd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(imm), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ - ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rndscalesd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(I), \ (int)(R))) #define _mm_maskz_roundscale_sd(U, A, B, I) \ - ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rndscalesd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(I), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ - ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_rndscalesd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(I), \ (int)(R))) #define _mm_roundscale_round_ss(A, B, imm, R) \ - ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rndscaless_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(imm), \ (int)(R))) #define _mm_roundscale_ss(A, B, imm) \ - ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rndscaless_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(imm), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_ss(W, U, A, B, I) \ - ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rndscaless_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(I), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ - ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rndscaless_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(I), \ (int)(R))) #define _mm_maskz_roundscale_ss(U, A, B, I) \ - ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rndscaless_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(I), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ - ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_rndscaless_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(I), \ @@ -6509,7 +6509,7 @@ } #define _mm_scalef_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_scalefsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) @@ -6517,7 +6517,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd (__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, + return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, (__v2df)( __B), (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); @@ -6526,7 +6526,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, + return (__m128d) __builtin_ia32_scalefsd_mask_round ( (__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, @@ -6534,7 +6534,7 @@ } #define _mm_mask_scalef_round_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_scalefsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) @@ -6542,7 +6542,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, + return (__m128d) __builtin_ia32_scalefsd_mask_round ( (__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, @@ -6550,13 +6550,13 @@ } #define _mm_maskz_scalef_round_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_scalefsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm_scalef_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_scalefss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) @@ -6564,7 +6564,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); @@ -6573,7 +6573,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, + return (__m128) __builtin_ia32_scalefss_mask_round ( (__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, @@ -6581,7 +6581,7 @@ } #define _mm_mask_scalef_round_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_scalefss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R))) @@ -6589,7 +6589,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, + return (__m128) __builtin_ia32_scalefss_mask_round ( (__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, @@ -6597,7 +6597,7 @@ } #define _mm_maskz_scalef_round_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_scalefss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), \ @@ -6733,7 +6733,7 @@ (__v16sf)_mm512_setzero_ps())) #define _mm_sqrt_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_sqrtsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) @@ -6741,7 +6741,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, + return (__m128d) __builtin_ia32_sqrtsd_mask_round ( (__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, @@ -6749,7 +6749,7 @@ } #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_sqrtsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) @@ -6757,7 +6757,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, + return (__m128d) __builtin_ia32_sqrtsd_mask_round ( (__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, @@ -6765,13 +6765,13 @@ } #define _mm_maskz_sqrt_round_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_sqrtsd_mask_round((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm_sqrt_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_sqrtss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) @@ -6779,7 +6779,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, + return (__m128) __builtin_ia32_sqrtss_mask_round ( (__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, @@ -6787,7 +6787,7 @@ } #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_sqrtss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) @@ -6795,7 +6795,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, + return (__m128) __builtin_ia32_sqrtss_mask_round ( (__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, @@ -6803,7 +6803,7 @@ } #define _mm_maskz_sqrt_round_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_sqrtss_mask_round((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) @@ -9017,19 +9017,19 @@ } #define _mm_cvt_roundsd_ss(A, B, R) \ - ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_cvtsd2ss_mask_round((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)_mm_undefined_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_cvtsd2ss_mask_round((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_cvtsd2ss_mask_round((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) @@ -9037,7 +9037,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { - return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, + return __builtin_ia32_cvtsd2ss_mask_round ((__v4sf)__A, (__v2df)__B, (__v4sf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); @@ -9046,7 +9046,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) { - return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, + return __builtin_ia32_cvtsd2ss_mask_round ((__v4sf)__A, (__v2df)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); @@ -9090,19 +9090,19 @@ #endif #define _mm_cvt_roundss_sd(A, B, R) \ - ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_cvtss2sd_mask_round((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)_mm_undefined_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_cvtss2sd_mask_round((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_cvtss2sd_mask_round((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) @@ -9110,7 +9110,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { - return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, + return __builtin_ia32_cvtss2sd_mask_round((__v2df)__A, (__v4sf)__B, (__v2df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); @@ -9119,7 +9119,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) { - return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, + return __builtin_ia32_cvtss2sd_mask_round((__v2df)__A, (__v4sf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -589,17 +589,17 @@ } #define _mm_add_round_sh(A, B, R) \ - ((__m128h)__builtin_ia32_addsh_round_mask( \ + ((__m128h)__builtin_ia32_addsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_add_round_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_addsh_round_mask( \ + ((__m128h)__builtin_ia32_addsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_add_round_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_addsh_round_mask( \ + ((__m128h)__builtin_ia32_addsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) @@ -625,17 +625,17 @@ } #define _mm_sub_round_sh(A, B, R) \ - ((__m128h)__builtin_ia32_subsh_round_mask( \ + ((__m128h)__builtin_ia32_subsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_sub_round_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_subsh_round_mask( \ + ((__m128h)__builtin_ia32_subsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_sub_round_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_subsh_round_mask( \ + ((__m128h)__builtin_ia32_subsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) @@ -661,17 +661,17 @@ } #define _mm_mul_round_sh(A, B, R) \ - ((__m128h)__builtin_ia32_mulsh_round_mask( \ + ((__m128h)__builtin_ia32_mulsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_mul_round_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_mulsh_round_mask( \ + ((__m128h)__builtin_ia32_mulsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_mul_round_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_mulsh_round_mask( \ + ((__m128h)__builtin_ia32_mulsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) @@ -697,23 +697,23 @@ } #define _mm_div_round_sh(A, B, R) \ - ((__m128h)__builtin_ia32_divsh_round_mask( \ + ((__m128h)__builtin_ia32_divsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_div_round_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_divsh_round_mask( \ + ((__m128h)__builtin_ia32_divsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_div_round_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_divsh_round_mask( \ + ((__m128h)__builtin_ia32_divsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_minsh_round_mask( + return (__m128h)__builtin_ia32_minsh_mask_round( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } @@ -722,7 +722,7 @@ __mmask8 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B, + return (__m128h)__builtin_ia32_minsh_mask_round((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } @@ -730,29 +730,29 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_minsh_round_mask( + return (__m128h)__builtin_ia32_minsh_mask_round( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_min_round_sh(A, B, R) \ - ((__m128h)__builtin_ia32_minsh_round_mask( \ + ((__m128h)__builtin_ia32_minsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_min_round_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_minsh_round_mask( \ + ((__m128h)__builtin_ia32_minsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_min_round_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_minsh_round_mask( \ + ((__m128h)__builtin_ia32_minsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_maxsh_round_mask( + return (__m128h)__builtin_ia32_maxsh_mask_round( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } @@ -761,7 +761,7 @@ __mmask8 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B, + return (__m128h)__builtin_ia32_maxsh_mask_round((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } @@ -769,23 +769,23 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_maxsh_round_mask( + return (__m128h)__builtin_ia32_maxsh_mask_round( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_max_round_sh(A, B, R) \ - ((__m128h)__builtin_ia32_maxsh_round_mask( \ + ((__m128h)__builtin_ia32_maxsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_max_round_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_maxsh_round_mask( \ + ((__m128h)__builtin_ia32_maxsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_max_round_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_maxsh_round_mask( \ + ((__m128h)__builtin_ia32_maxsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) @@ -1205,134 +1205,134 @@ } #define _mm_getmant_round_sh(A, B, C, D, R) \ - ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + ((__m128h)__builtin_ia32_getmantsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)_mm_setzero_ph(), (__mmask8)-1, (int)(R))) #define _mm_getmant_sh(A, B, C, D) \ - ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + ((__m128h)__builtin_ia32_getmantsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_sh(W, U, A, B, C, D) \ - ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + ((__m128h)__builtin_ia32_getmantsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_round_sh(W, U, A, B, C, D, R) \ - ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + ((__m128h)__builtin_ia32_getmantsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_getmant_sh(U, A, B, C, D) \ - ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + ((__m128h)__builtin_ia32_getmantsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_getmant_round_sh(U, A, B, C, D, R) \ - ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + ((__m128h)__builtin_ia32_getmantsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) #define _mm_getexp_round_sh(A, B, R) \ - ((__m128h)__builtin_ia32_getexpsh128_round_mask( \ + ((__m128h)__builtin_ia32_getexpsh128_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_sh(__m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_getexpsh128_round_mask( + return (__m128h)__builtin_ia32_getexpsh128_mask_round( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_getexpsh128_round_mask( + return (__m128h)__builtin_ia32_getexpsh128_mask_round( (__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_getexp_round_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_getexpsh128_round_mask( \ + ((__m128h)__builtin_ia32_getexpsh128_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_getexpsh128_round_mask( + return (__m128h)__builtin_ia32_getexpsh128_mask_round( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_getexp_round_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_getexpsh128_round_mask( \ + ((__m128h)__builtin_ia32_getexpsh128_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) #define _mm_scalef_round_sh(A, B, R) \ - ((__m128h)__builtin_ia32_scalefsh_round_mask( \ + ((__m128h)__builtin_ia32_scalefsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_sh(__m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_scalefsh_round_mask( + return (__m128h)__builtin_ia32_scalefsh_mask_round( (__v8hf)__A, (__v8hf)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_scalefsh_round_mask((__v8hf)__A, (__v8hf)__B, + return (__m128h)__builtin_ia32_scalefsh_mask_round((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_scalef_round_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_scalefsh_round_mask( \ + ((__m128h)__builtin_ia32_scalefsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_scalefsh_round_mask( + return (__m128h)__builtin_ia32_scalefsh_mask_round( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_scalef_round_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_scalefsh_round_mask( \ + ((__m128h)__builtin_ia32_scalefsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) #define _mm_roundscale_round_sh(A, B, imm, R) \ - ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + ((__m128h)__builtin_ia32_rndscalesh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(imm), (int)(R))) #define _mm_roundscale_sh(A, B, imm) \ - ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + ((__m128h)__builtin_ia32_rndscalesh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(imm), _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_sh(W, U, A, B, I) \ - ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + ((__m128h)__builtin_ia32_rndscalesh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_round_sh(W, U, A, B, I, R) \ - ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + ((__m128h)__builtin_ia32_rndscalesh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(I), (int)(R))) #define _mm_maskz_roundscale_sh(U, A, B, I) \ - ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + ((__m128h)__builtin_ia32_rndscalesh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_roundscale_round_sh(U, A, B, I, R) \ - ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + ((__m128h)__builtin_ia32_rndscalesh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(I), (int)(R))) @@ -1401,23 +1401,23 @@ } #define _mm_sqrt_round_sh(A, B, R) \ - ((__m128h)__builtin_ia32_sqrtsh_round_mask( \ + ((__m128h)__builtin_ia32_sqrtsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_sqrt_round_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_sqrtsh_round_mask( \ + ((__m128h)__builtin_ia32_sqrtsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_sqrt_round_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_sqrtsh_round_mask( \ + ((__m128h)__builtin_ia32_sqrtsh_mask_round( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_sh(__m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_sqrtsh_round_mask( + return (__m128h)__builtin_ia32_sqrtsh_mask_round( (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } @@ -1426,7 +1426,7 @@ __mmask32 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_sqrtsh_round_mask( + return (__m128h)__builtin_ia32_sqrtsh_mask_round( (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)(__m128h)(__W), (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); } @@ -1434,7 +1434,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sh(__mmask32 __U, __m128h __A, __m128h __B) { - return (__m128h)__builtin_ia32_sqrtsh_round_mask( + return (__m128h)__builtin_ia32_sqrtsh_mask_round( (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); } @@ -1518,22 +1518,22 @@ } #define _mm_cvt_roundsh_ss(A, B, R) \ - ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \ + ((__m128)__builtin_ia32_vcvtsh2ss_mask_round((__v4sf)(A), (__v8hf)(B), \ (__v4sf)_mm_undefined_ps(), \ (__mmask8)(-1), (int)(R))) #define _mm_mask_cvt_roundsh_ss(W, U, A, B, R) \ - ((__m128)__builtin_ia32_vcvtsh2ss_round_mask( \ + ((__m128)__builtin_ia32_vcvtsh2ss_mask_round( \ (__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundsh_ss(U, A, B, R) \ - ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \ + ((__m128)__builtin_ia32_vcvtsh2ss_mask_round((__v4sf)(A), (__v8hf)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A, __m128h __B) { - return (__m128)__builtin_ia32_vcvtsh2ss_round_mask( + return (__m128)__builtin_ia32_vcvtsh2ss_mask_round( (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } @@ -1542,7 +1542,7 @@ __mmask8 __U, __m128 __A, __m128h __B) { - return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B, + return (__m128)__builtin_ia32_vcvtsh2ss_mask_round((__v4sf)__A, (__v8hf)__B, (__v4sf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } @@ -1550,28 +1550,28 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U, __m128 __A, __m128h __B) { - return (__m128)__builtin_ia32_vcvtsh2ss_round_mask( + return (__m128)__builtin_ia32_vcvtsh2ss_mask_round( (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundss_sh(A, B, R) \ - ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \ + ((__m128h)__builtin_ia32_vcvtss2sh_mask_round((__v8hf)(A), (__v4sf)(B), \ (__v8hf)_mm_undefined_ph(), \ (__mmask8)(-1), (int)(R))) #define _mm_mask_cvt_roundss_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_vcvtss2sh_round_mask( \ + ((__m128h)__builtin_ia32_vcvtss2sh_mask_round( \ (__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundss_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \ + ((__m128h)__builtin_ia32_vcvtss2sh_mask_round((__v8hf)(A), (__v4sf)(B), \ (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A, __m128 __B) { - return (__m128h)__builtin_ia32_vcvtss2sh_round_mask( + return (__m128h)__builtin_ia32_vcvtss2sh_mask_round( (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } @@ -1580,7 +1580,7 @@ __mmask8 __U, __m128h __A, __m128 __B) { - return (__m128h)__builtin_ia32_vcvtss2sh_round_mask( + return (__m128h)__builtin_ia32_vcvtss2sh_mask_round( (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } @@ -1588,28 +1588,28 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U, __m128h __A, __m128 __B) { - return (__m128h)__builtin_ia32_vcvtss2sh_round_mask( + return (__m128h)__builtin_ia32_vcvtss2sh_mask_round( (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundsd_sh(A, B, R) \ - ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \ + ((__m128h)__builtin_ia32_vcvtsd2sh_mask_round((__v8hf)(A), (__v2df)(B), \ (__v8hf)_mm_undefined_ph(), \ (__mmask8)(-1), (int)(R))) #define _mm_mask_cvt_roundsd_sh(W, U, A, B, R) \ - ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask( \ + ((__m128h)__builtin_ia32_vcvtsd2sh_mask_round( \ (__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundsd_sh(U, A, B, R) \ - ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \ + ((__m128h)__builtin_ia32_vcvtsd2sh_mask_round((__v8hf)(A), (__v2df)(B), \ (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A, __m128d __B) { - return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask( + return (__m128h)__builtin_ia32_vcvtsd2sh_mask_round( (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } @@ -1618,35 +1618,35 @@ __mmask8 __U, __m128h __A, __m128d __B) { - return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask( + return (__m128h)__builtin_ia32_vcvtsd2sh_mask_round( (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) { - return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask( + return (__m128h)__builtin_ia32_vcvtsd2sh_mask_round( (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundsh_sd(A, B, R) \ - ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \ + ((__m128d)__builtin_ia32_vcvtsh2sd_mask_round((__v2df)(A), (__v8hf)(B), \ (__v2df)_mm_undefined_pd(), \ (__mmask8)(-1), (int)(R))) #define _mm_mask_cvt_roundsh_sd(W, U, A, B, R) \ - ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask( \ + ((__m128d)__builtin_ia32_vcvtsh2sd_mask_round( \ (__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundsh_sd(U, A, B, R) \ - ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \ + ((__m128d)__builtin_ia32_vcvtsh2sd_mask_round((__v2df)(A), (__v8hf)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A, __m128h __B) { - return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask( + return (__m128d)__builtin_ia32_vcvtsh2sd_mask_round( (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } @@ -1655,14 +1655,14 @@ __mmask8 __U, __m128d __A, __m128h __B) { - return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask( + return (__m128d)__builtin_ia32_vcvtsh2sd_mask_round( (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) { - return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask( + return (__m128d)__builtin_ia32_vcvtsh2sd_mask_round( (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } @@ -2941,7 +2941,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask( + return (__m128h)__builtin_ia32_vfcmaddcsh_mask_round( (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION); } @@ -2954,7 +2954,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { - return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( + return (__m128h)__builtin_ia32_vfcmaddcsh_mask_round3( (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION); } @@ -2964,7 +2964,7 @@ (__mmask8)-1, (int)(R))) #define _mm_mask_fcmadd_round_sch(A, U, B, C, R) \ - ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask( \ + ((__m128h)__builtin_ia32_vfcmaddcsh_mask_round( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) @@ -2974,7 +2974,7 @@ (__mmask8)(U), (int)(R))) #define _mm_mask3_fcmadd_round_sch(A, B, C, U, R) \ - ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( \ + ((__m128h)__builtin_ia32_vfcmaddcsh_mask_round3( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) @@ -2988,7 +2988,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddcsh_round_mask( + return (__m128h)__builtin_ia32_vfmaddcsh_mask_round( (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION); } @@ -3001,7 +3001,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { - return (__m128h)__builtin_ia32_vfmaddcsh_round_mask3( + return (__m128h)__builtin_ia32_vfmaddcsh_mask_round3( (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION); } @@ -3011,7 +3011,7 @@ (__mmask8)-1, (int)(R))) #define _mm_mask_fmadd_round_sch(A, U, B, C, R) \ - ((__m128h)__builtin_ia32_vfmaddcsh_round_mask( \ + ((__m128h)__builtin_ia32_vfmaddcsh_mask_round( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) @@ -3021,7 +3021,7 @@ (__mmask8)(U), (int)(R))) #define _mm_mask3_fmadd_round_sch(A, B, C, U, R) \ - ((__m128h)__builtin_ia32_vfmaddcsh_round_mask3( \ + ((__m128h)__builtin_ia32_vfmaddcsh_mask_round3( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -4627,31 +4627,31 @@ case X86::BI__builtin_ia32_cmpsd_mask: case X86::BI__builtin_ia32_cmpss_mask: case X86::BI__builtin_ia32_cmpsh_mask: - case X86::BI__builtin_ia32_vcvtsh2sd_round_mask: - case X86::BI__builtin_ia32_vcvtsh2ss_round_mask: - case X86::BI__builtin_ia32_cvtss2sd_round_mask: - case X86::BI__builtin_ia32_getexpsd128_round_mask: - case X86::BI__builtin_ia32_getexpss128_round_mask: - case X86::BI__builtin_ia32_getexpsh128_round_mask: + case X86::BI__builtin_ia32_vcvtsh2sd_mask_round: + case X86::BI__builtin_ia32_vcvtsh2ss_mask_round: + case X86::BI__builtin_ia32_cvtss2sd_mask_round: + case X86::BI__builtin_ia32_getexpsd128_mask_round: + case X86::BI__builtin_ia32_getexpss128_mask_round: + case X86::BI__builtin_ia32_getexpsh128_mask_round: case X86::BI__builtin_ia32_getmantpd512_mask: case X86::BI__builtin_ia32_getmantps512_mask: case X86::BI__builtin_ia32_getmantph512_mask: - case X86::BI__builtin_ia32_maxsd_round_mask: - case X86::BI__builtin_ia32_maxss_round_mask: - case X86::BI__builtin_ia32_maxsh_round_mask: - case X86::BI__builtin_ia32_minsd_round_mask: - case X86::BI__builtin_ia32_minss_round_mask: - case X86::BI__builtin_ia32_minsh_round_mask: - case X86::BI__builtin_ia32_rcp28sd_round_mask: - case X86::BI__builtin_ia32_rcp28ss_round_mask: + case X86::BI__builtin_ia32_maxsd_mask_round: + case X86::BI__builtin_ia32_maxss_mask_round: + case X86::BI__builtin_ia32_maxsh_mask_round: + case X86::BI__builtin_ia32_minsd_mask_round: + case X86::BI__builtin_ia32_minss_mask_round: + case X86::BI__builtin_ia32_minsh_mask_round: + case X86::BI__builtin_ia32_rcp28sd_mask_round: + case X86::BI__builtin_ia32_rcp28ss_mask_round: case X86::BI__builtin_ia32_reducepd512_mask: case X86::BI__builtin_ia32_reduceps512_mask: case X86::BI__builtin_ia32_reduceph512_mask: case X86::BI__builtin_ia32_rndscalepd_mask: case X86::BI__builtin_ia32_rndscaleps_mask: case X86::BI__builtin_ia32_rndscaleph_mask: - case X86::BI__builtin_ia32_rsqrt28sd_round_mask: - case X86::BI__builtin_ia32_rsqrt28ss_round_mask: + case X86::BI__builtin_ia32_rsqrt28sd_mask_round: + case X86::BI__builtin_ia32_rsqrt28ss_mask_round: ArgNum = 4; break; case X86::BI__builtin_ia32_fixupimmpd512_mask: @@ -4662,19 +4662,19 @@ case X86::BI__builtin_ia32_fixupimmsd_maskz: case X86::BI__builtin_ia32_fixupimmss_mask: case X86::BI__builtin_ia32_fixupimmss_maskz: - case X86::BI__builtin_ia32_getmantsd_round_mask: - case X86::BI__builtin_ia32_getmantss_round_mask: - case X86::BI__builtin_ia32_getmantsh_round_mask: + case X86::BI__builtin_ia32_getmantsd_mask_round: + case X86::BI__builtin_ia32_getmantss_mask_round: + case X86::BI__builtin_ia32_getmantsh_mask_round: case X86::BI__builtin_ia32_rangepd512_mask: case X86::BI__builtin_ia32_rangeps512_mask: - case X86::BI__builtin_ia32_rangesd128_round_mask: - case X86::BI__builtin_ia32_rangess128_round_mask: + case X86::BI__builtin_ia32_rangesd128_mask_round: + case X86::BI__builtin_ia32_rangess128_mask_round: case X86::BI__builtin_ia32_reducesd_mask: case X86::BI__builtin_ia32_reducess_mask: case X86::BI__builtin_ia32_reducesh_mask: - case X86::BI__builtin_ia32_rndscalesd_round_mask: - case X86::BI__builtin_ia32_rndscaless_round_mask: - case X86::BI__builtin_ia32_rndscalesh_round_mask: + case X86::BI__builtin_ia32_rndscalesd_mask_round: + case X86::BI__builtin_ia32_rndscaless_mask_round: + case X86::BI__builtin_ia32_rndscalesh_mask_round: ArgNum = 5; break; case X86::BI__builtin_ia32_vcvtsd2si64: @@ -4752,30 +4752,30 @@ ArgNum = 3; HasRC = true; break; - case X86::BI__builtin_ia32_addsh_round_mask: - case X86::BI__builtin_ia32_addss_round_mask: - case X86::BI__builtin_ia32_addsd_round_mask: - case X86::BI__builtin_ia32_divsh_round_mask: - case X86::BI__builtin_ia32_divss_round_mask: - case X86::BI__builtin_ia32_divsd_round_mask: - case X86::BI__builtin_ia32_mulsh_round_mask: - case X86::BI__builtin_ia32_mulss_round_mask: - case X86::BI__builtin_ia32_mulsd_round_mask: - case X86::BI__builtin_ia32_subsh_round_mask: - case X86::BI__builtin_ia32_subss_round_mask: - case X86::BI__builtin_ia32_subsd_round_mask: + case X86::BI__builtin_ia32_addsh_mask_round: + case X86::BI__builtin_ia32_addss_mask_round: + case X86::BI__builtin_ia32_addsd_mask_round: + case X86::BI__builtin_ia32_divsh_mask_round: + case X86::BI__builtin_ia32_divss_mask_round: + case X86::BI__builtin_ia32_divsd_mask_round: + case X86::BI__builtin_ia32_mulsh_mask_round: + case X86::BI__builtin_ia32_mulss_mask_round: + case X86::BI__builtin_ia32_mulsd_mask_round: + case X86::BI__builtin_ia32_subsh_mask_round: + case X86::BI__builtin_ia32_subss_mask_round: + case X86::BI__builtin_ia32_subsd_mask_round: case X86::BI__builtin_ia32_scalefph512_mask: case X86::BI__builtin_ia32_scalefpd512_mask: case X86::BI__builtin_ia32_scalefps512_mask: - case X86::BI__builtin_ia32_scalefsd_round_mask: - case X86::BI__builtin_ia32_scalefss_round_mask: - case X86::BI__builtin_ia32_scalefsh_round_mask: - case X86::BI__builtin_ia32_cvtsd2ss_round_mask: - case X86::BI__builtin_ia32_vcvtss2sh_round_mask: - case X86::BI__builtin_ia32_vcvtsd2sh_round_mask: - case X86::BI__builtin_ia32_sqrtsd_round_mask: - case X86::BI__builtin_ia32_sqrtss_round_mask: - case X86::BI__builtin_ia32_sqrtsh_round_mask: + case X86::BI__builtin_ia32_scalefsd_mask_round: + case X86::BI__builtin_ia32_scalefss_mask_round: + case X86::BI__builtin_ia32_scalefsh_mask_round: + case X86::BI__builtin_ia32_cvtsd2ss_mask_round: + case X86::BI__builtin_ia32_vcvtss2sh_mask_round: + case X86::BI__builtin_ia32_vcvtsd2sh_mask_round: + case X86::BI__builtin_ia32_sqrtsd_mask_round: + case X86::BI__builtin_ia32_sqrtss_mask_round: + case X86::BI__builtin_ia32_sqrtsh_mask_round: case X86::BI__builtin_ia32_vfmaddsd3_mask: case X86::BI__builtin_ia32_vfmaddsd3_maskz: case X86::BI__builtin_ia32_vfmaddsd3_mask3: @@ -4810,14 +4810,14 @@ case X86::BI__builtin_ia32_vfmaddsubph512_mask3: case X86::BI__builtin_ia32_vfmsubaddph512_mask3: case X86::BI__builtin_ia32_vfmaddcsh_mask: - case X86::BI__builtin_ia32_vfmaddcsh_round_mask: - case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: + case X86::BI__builtin_ia32_vfmaddcsh_mask_round: + case X86::BI__builtin_ia32_vfmaddcsh_mask_round3: case X86::BI__builtin_ia32_vfmaddcph512_mask: case X86::BI__builtin_ia32_vfmaddcph512_maskz: case X86::BI__builtin_ia32_vfmaddcph512_mask3: case X86::BI__builtin_ia32_vfcmaddcsh_mask: - case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: - case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: + case X86::BI__builtin_ia32_vfcmaddcsh_mask_round: + case X86::BI__builtin_ia32_vfcmaddcsh_mask_round3: case X86::BI__builtin_ia32_vfcmaddcph512_mask: case X86::BI__builtin_ia32_vfcmaddcph512_maskz: case X86::BI__builtin_ia32_vfcmaddcph512_mask3: @@ -5195,9 +5195,9 @@ case X86::BI__builtin_ia32_rangeps128_mask: case X86::BI__builtin_ia32_rangeps256_mask: case X86::BI__builtin_ia32_rangeps512_mask: - case X86::BI__builtin_ia32_getmantsd_round_mask: - case X86::BI__builtin_ia32_getmantss_round_mask: - case X86::BI__builtin_ia32_getmantsh_round_mask: + case X86::BI__builtin_ia32_getmantsd_mask_round: + case X86::BI__builtin_ia32_getmantss_mask_round: + case X86::BI__builtin_ia32_getmantsh_mask_round: case X86::BI__builtin_ia32_vec_set_v16qi: case X86::BI__builtin_ia32_vec_set_v16hi: i = 2; l = 0; u = 15; @@ -5392,9 +5392,9 @@ break; case X86::BI__builtin_ia32_reducesd_mask: case X86::BI__builtin_ia32_reducess_mask: - case X86::BI__builtin_ia32_rndscalesd_round_mask: - case X86::BI__builtin_ia32_rndscaless_round_mask: - case X86::BI__builtin_ia32_rndscalesh_round_mask: + case X86::BI__builtin_ia32_rndscalesd_mask_round: + case X86::BI__builtin_ia32_rndscaless_mask_round: + case X86::BI__builtin_ia32_rndscalesh_mask_round: case X86::BI__builtin_ia32_reducesh_mask: i = 4; l = 0; u = 255; break; diff --git a/clang/test/Sema/builtins-x86.c b/clang/test/Sema/builtins-x86.c --- a/clang/test/Sema/builtins-x86.c +++ b/clang/test/Sema/builtins-x86.c @@ -98,8 +98,8 @@ return __builtin_ia32_getmantps512_mask(a, 0, b, (__mmask16)-1, 10); // expected-error {{invalid rounding argument}} } -__m128 test__builtin_ia32_getmantss_round_mask(__m128 a, __m128 b, __m128 c) { - return __builtin_ia32_getmantss_round_mask(a, b, 0, c, (__mmask8)-1, 10); // expected-error {{invalid rounding argument}} +__m128 test__builtin_ia32_getmantss_mask_round(__m128 a, __m128 b, __m128 c) { + return __builtin_ia32_getmantss_mask_round(a, b, 0, c, (__mmask8)-1, 10); // expected-error {{invalid rounding argument}} } __m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c, __m128i mask) {